xref: /llvm-project/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp (revision e43295209bb86e93008363c66c1277c7d8bb148c)
1 //===--- LoopConvertCheck.cpp - clang-tidy---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "LoopConvertCheck.h"
10 #include "clang/AST/ASTContext.h"
11 #include "clang/ASTMatchers/ASTMatchFinder.h"
12 #include "clang/Basic/LLVM.h"
13 #include "clang/Basic/LangOptions.h"
14 #include "clang/Basic/SourceLocation.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "clang/Lex/Lexer.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/Casting.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include <cassert>
23 #include <cstring>
24 #include <optional>
25 #include <utility>
26 
27 using namespace clang::ast_matchers;
28 using namespace llvm;
29 
30 namespace clang {
31 namespace tidy {
32 
33 template <> struct OptionEnumMapping<modernize::Confidence::Level> {
34   static llvm::ArrayRef<std::pair<modernize::Confidence::Level, StringRef>>
35   getEnumMapping() {
36     static constexpr std::pair<modernize::Confidence::Level, StringRef>
37         Mapping[] = {{modernize::Confidence::CL_Reasonable, "reasonable"},
38                      {modernize::Confidence::CL_Safe, "safe"},
39                      {modernize::Confidence::CL_Risky, "risky"}};
40     return ArrayRef(Mapping);
41   }
42 };
43 
44 template <> struct OptionEnumMapping<modernize::VariableNamer::NamingStyle> {
45   static llvm::ArrayRef<
46       std::pair<modernize::VariableNamer::NamingStyle, StringRef>>
47   getEnumMapping() {
48     static constexpr std::pair<modernize::VariableNamer::NamingStyle, StringRef>
49         Mapping[] = {{modernize::VariableNamer::NS_CamelCase, "CamelCase"},
50                      {modernize::VariableNamer::NS_CamelBack, "camelBack"},
51                      {modernize::VariableNamer::NS_LowerCase, "lower_case"},
52                      {modernize::VariableNamer::NS_UpperCase, "UPPER_CASE"}};
53     return ArrayRef(Mapping);
54   }
55 };
56 
57 namespace modernize {
58 
59 static const char LoopNameArray[] = "forLoopArray";
60 static const char LoopNameIterator[] = "forLoopIterator";
61 static const char LoopNameReverseIterator[] = "forLoopReverseIterator";
62 static const char LoopNamePseudoArray[] = "forLoopPseudoArray";
63 static const char ConditionBoundName[] = "conditionBound";
64 static const char InitVarName[] = "initVar";
65 static const char BeginCallName[] = "beginCall";
66 static const char EndCallName[] = "endCall";
67 static const char EndVarName[] = "endVar";
68 static const char DerefByValueResultName[] = "derefByValueResult";
69 static const char DerefByRefResultName[] = "derefByRefResult";
70 
71 static const StatementMatcher integerComparisonMatcher() {
72   return expr(ignoringParenImpCasts(
73       declRefExpr(to(varDecl(equalsBoundNode(InitVarName))))));
74 }
75 
76 static const DeclarationMatcher initToZeroMatcher() {
77   return varDecl(
78              hasInitializer(ignoringParenImpCasts(integerLiteral(equals(0)))))
79       .bind(InitVarName);
80 }
81 
82 static const StatementMatcher incrementVarMatcher() {
83   return declRefExpr(to(varDecl(equalsBoundNode(InitVarName))));
84 }
85 
86 static StatementMatcher
87 arrayConditionMatcher(internal::Matcher<Expr> LimitExpr) {
88   return binaryOperator(
89       anyOf(allOf(hasOperatorName("<"), hasLHS(integerComparisonMatcher()),
90                   hasRHS(LimitExpr)),
91             allOf(hasOperatorName(">"), hasLHS(LimitExpr),
92                   hasRHS(integerComparisonMatcher())),
93             allOf(hasOperatorName("!="),
94                   hasOperands(integerComparisonMatcher(), LimitExpr))));
95 }
96 
97 /// The matcher for loops over arrays.
98 /// \code
99 ///   for (int i = 0; i < 3 + 2; ++i) { ... }
100 /// \endcode
101 /// The following string identifiers are bound to these parts of the AST:
102 ///   ConditionBoundName: '3 + 2' (as an Expr)
103 ///   InitVarName: 'i' (as a VarDecl)
104 ///   LoopName: The entire for loop (as a ForStmt)
105 ///
106 /// Client code will need to make sure that:
107 ///   - The index variable is only used as an array index.
108 ///   - All arrays indexed by the loop are the same.
109 StatementMatcher makeArrayLoopMatcher() {
110   StatementMatcher ArrayBoundMatcher =
111       expr(hasType(isInteger())).bind(ConditionBoundName);
112 
113   return forStmt(unless(isInTemplateInstantiation()),
114                  hasLoopInit(declStmt(hasSingleDecl(initToZeroMatcher()))),
115                  hasCondition(arrayConditionMatcher(ArrayBoundMatcher)),
116                  hasIncrement(
117                      unaryOperator(hasOperatorName("++"),
118                                    hasUnaryOperand(incrementVarMatcher()))))
119       .bind(LoopNameArray);
120 }
121 
122 /// The matcher used for iterator-based for loops.
123 ///
124 /// This matcher is more flexible than array-based loops. It will match
125 /// catch loops of the following textual forms (regardless of whether the
126 /// iterator type is actually a pointer type or a class type):
127 ///
128 /// \code
129 ///   for (containerType::iterator it = container.begin(),
130 ///        e = createIterator(); it != e; ++it) { ... }
131 ///   for (containerType::iterator it = container.begin();
132 ///        it != anotherContainer.end(); ++it) { ... }
133 /// \endcode
134 /// The following string identifiers are bound to the parts of the AST:
135 ///   InitVarName: 'it' (as a VarDecl)
136 ///   LoopName: The entire for loop (as a ForStmt)
137 ///   In the first example only:
138 ///     EndVarName: 'e' (as a VarDecl)
139 ///   In the second example only:
140 ///     EndCallName: 'container.end()' (as a CXXMemberCallExpr)
141 ///
142 /// Client code will need to make sure that:
143 ///   - The two containers on which 'begin' and 'end' are called are the same.
144 StatementMatcher makeIteratorLoopMatcher(bool IsReverse) {
145 
146   auto BeginNameMatcher = IsReverse ? hasAnyName("rbegin", "crbegin")
147                                     : hasAnyName("begin", "cbegin");
148 
149   auto EndNameMatcher =
150       IsReverse ? hasAnyName("rend", "crend") : hasAnyName("end", "cend");
151 
152   StatementMatcher BeginCallMatcher =
153       cxxMemberCallExpr(argumentCountIs(0),
154                         callee(cxxMethodDecl(BeginNameMatcher)))
155           .bind(BeginCallName);
156 
157   DeclarationMatcher InitDeclMatcher =
158       varDecl(hasInitializer(anyOf(ignoringParenImpCasts(BeginCallMatcher),
159                                    materializeTemporaryExpr(
160                                        ignoringParenImpCasts(BeginCallMatcher)),
161                                    hasDescendant(BeginCallMatcher))))
162           .bind(InitVarName);
163 
164   DeclarationMatcher EndDeclMatcher =
165       varDecl(hasInitializer(anything())).bind(EndVarName);
166 
167   StatementMatcher EndCallMatcher = cxxMemberCallExpr(
168       argumentCountIs(0), callee(cxxMethodDecl(EndNameMatcher)));
169 
170   StatementMatcher IteratorBoundMatcher =
171       expr(anyOf(ignoringParenImpCasts(
172                      declRefExpr(to(varDecl(equalsBoundNode(EndVarName))))),
173                  ignoringParenImpCasts(expr(EndCallMatcher).bind(EndCallName)),
174                  materializeTemporaryExpr(ignoringParenImpCasts(
175                      expr(EndCallMatcher).bind(EndCallName)))));
176 
177   StatementMatcher IteratorComparisonMatcher = expr(ignoringParenImpCasts(
178       declRefExpr(to(varDecl(equalsBoundNode(InitVarName))))));
179 
180   // This matcher tests that a declaration is a CXXRecordDecl that has an
181   // overloaded operator*(). If the operator*() returns by value instead of by
182   // reference then the return type is tagged with DerefByValueResultName.
183   internal::Matcher<VarDecl> TestDerefReturnsByValue =
184       hasType(hasUnqualifiedDesugaredType(
185           recordType(hasDeclaration(cxxRecordDecl(hasMethod(cxxMethodDecl(
186               hasOverloadedOperatorName("*"),
187               anyOf(
188                   // Tag the return type if it's by value.
189                   returns(qualType(unless(hasCanonicalType(referenceType())))
190                               .bind(DerefByValueResultName)),
191                   returns(
192                       // Skip loops where the iterator's operator* returns an
193                       // rvalue reference. This is just weird.
194                       qualType(unless(hasCanonicalType(rValueReferenceType())))
195                           .bind(DerefByRefResultName))))))))));
196 
197   return forStmt(
198              unless(isInTemplateInstantiation()),
199              hasLoopInit(anyOf(declStmt(declCountIs(2),
200                                         containsDeclaration(0, InitDeclMatcher),
201                                         containsDeclaration(1, EndDeclMatcher)),
202                                declStmt(hasSingleDecl(InitDeclMatcher)))),
203              hasCondition(ignoringImplicit(binaryOperation(
204                  hasOperatorName("!="), hasOperands(IteratorComparisonMatcher,
205                                                     IteratorBoundMatcher)))),
206              hasIncrement(anyOf(
207                  unaryOperator(hasOperatorName("++"),
208                                hasUnaryOperand(declRefExpr(
209                                    to(varDecl(equalsBoundNode(InitVarName)))))),
210                  cxxOperatorCallExpr(
211                      hasOverloadedOperatorName("++"),
212                      hasArgument(0, declRefExpr(to(
213                                         varDecl(equalsBoundNode(InitVarName),
214                                                 TestDerefReturnsByValue))))))))
215       .bind(IsReverse ? LoopNameReverseIterator : LoopNameIterator);
216 }
217 
218 /// The matcher used for array-like containers (pseudoarrays).
219 ///
220 /// This matcher is more flexible than array-based loops. It will match
221 /// loops of the following textual forms (regardless of whether the
222 /// iterator type is actually a pointer type or a class type):
223 ///
224 /// \code
225 ///   for (int i = 0, j = container.size(); i < j; ++i) { ... }
226 ///   for (int i = 0; i < container.size(); ++i) { ... }
227 /// \endcode
228 /// The following string identifiers are bound to the parts of the AST:
229 ///   InitVarName: 'i' (as a VarDecl)
230 ///   LoopName: The entire for loop (as a ForStmt)
231 ///   In the first example only:
232 ///     EndVarName: 'j' (as a VarDecl)
233 ///   In the second example only:
234 ///     EndCallName: 'container.size()' (as a CXXMemberCallExpr)
235 ///
236 /// Client code will need to make sure that:
237 ///   - The containers on which 'size()' is called is the container indexed.
238 ///   - The index variable is only used in overloaded operator[] or
239 ///     container.at().
240 ///   - The container's iterators would not be invalidated during the loop.
241 StatementMatcher makePseudoArrayLoopMatcher() {
242   // Test that the incoming type has a record declaration that has methods
243   // called 'begin' and 'end'. If the incoming type is const, then make sure
244   // these methods are also marked const.
245   //
246   // FIXME: To be completely thorough this matcher should also ensure the
247   // return type of begin/end is an iterator that dereferences to the same as
248   // what operator[] or at() returns. Such a test isn't likely to fail except
249   // for pathological cases.
250   //
251   // FIXME: Also, a record doesn't necessarily need begin() and end(). Free
252   // functions called begin() and end() taking the container as an argument
253   // are also allowed.
254   TypeMatcher RecordWithBeginEnd = qualType(anyOf(
255       qualType(isConstQualified(),
256                hasUnqualifiedDesugaredType(recordType(hasDeclaration(
257                    cxxRecordDecl(isSameOrDerivedFrom(cxxRecordDecl(
258                        hasMethod(cxxMethodDecl(hasName("begin"), isConst())),
259                        hasMethod(cxxMethodDecl(hasName("end"),
260                                                isConst())))))) // hasDeclaration
261                                                       ))),     // qualType
262       qualType(unless(isConstQualified()),
263                hasUnqualifiedDesugaredType(recordType(hasDeclaration(
264                    cxxRecordDecl(isSameOrDerivedFrom(cxxRecordDecl(
265                        hasMethod(hasName("begin")),
266                        hasMethod(hasName("end"))))))))) // qualType
267       ));
268 
269   StatementMatcher SizeCallMatcher = cxxMemberCallExpr(
270       argumentCountIs(0), callee(cxxMethodDecl(hasAnyName("size", "length"))),
271       on(anyOf(hasType(pointsTo(RecordWithBeginEnd)),
272                hasType(RecordWithBeginEnd))));
273 
274   StatementMatcher EndInitMatcher =
275       expr(anyOf(ignoringParenImpCasts(expr(SizeCallMatcher).bind(EndCallName)),
276                  explicitCastExpr(hasSourceExpression(ignoringParenImpCasts(
277                      expr(SizeCallMatcher).bind(EndCallName))))));
278 
279   DeclarationMatcher EndDeclMatcher =
280       varDecl(hasInitializer(EndInitMatcher)).bind(EndVarName);
281 
282   StatementMatcher IndexBoundMatcher =
283       expr(anyOf(ignoringParenImpCasts(
284                      declRefExpr(to(varDecl(equalsBoundNode(EndVarName))))),
285                  EndInitMatcher));
286 
287   return forStmt(unless(isInTemplateInstantiation()),
288                  hasLoopInit(
289                      anyOf(declStmt(declCountIs(2),
290                                     containsDeclaration(0, initToZeroMatcher()),
291                                     containsDeclaration(1, EndDeclMatcher)),
292                            declStmt(hasSingleDecl(initToZeroMatcher())))),
293                  hasCondition(arrayConditionMatcher(IndexBoundMatcher)),
294                  hasIncrement(
295                      unaryOperator(hasOperatorName("++"),
296                                    hasUnaryOperand(incrementVarMatcher()))))
297       .bind(LoopNamePseudoArray);
298 }
299 
300 /// Determine whether Init appears to be an initializing an iterator.
301 ///
302 /// If it is, returns the object whose begin() or end() method is called, and
303 /// the output parameter isArrow is set to indicate whether the initialization
304 /// is called via . or ->.
305 static const Expr *getContainerFromBeginEndCall(const Expr *Init, bool IsBegin,
306                                                 bool *IsArrow, bool IsReverse) {
307   // FIXME: Maybe allow declaration/initialization outside of the for loop.
308   const auto *TheCall = dyn_cast_or_null<CXXMemberCallExpr>(
309       digThroughConstructorsConversions(Init));
310   if (!TheCall || TheCall->getNumArgs() != 0)
311     return nullptr;
312 
313   const auto *Member = dyn_cast<MemberExpr>(TheCall->getCallee());
314   if (!Member)
315     return nullptr;
316   StringRef Name = Member->getMemberDecl()->getName();
317   if (!Name.consume_back(IsBegin ? "begin" : "end"))
318     return nullptr;
319   if (IsReverse && !Name.consume_back("r"))
320     return nullptr;
321   if (!Name.empty() && !Name.equals("c"))
322     return nullptr;
323 
324   const Expr *SourceExpr = Member->getBase();
325   if (!SourceExpr)
326     return nullptr;
327 
328   *IsArrow = Member->isArrow();
329   return SourceExpr;
330 }
331 
332 /// Determines the container whose begin() and end() functions are called
333 /// for an iterator-based loop.
334 ///
335 /// BeginExpr must be a member call to a function named "begin()", and EndExpr
336 /// must be a member.
337 static const Expr *findContainer(ASTContext *Context, const Expr *BeginExpr,
338                                  const Expr *EndExpr,
339                                  bool *ContainerNeedsDereference,
340                                  bool IsReverse) {
341   // Now that we know the loop variable and test expression, make sure they are
342   // valid.
343   bool BeginIsArrow = false;
344   bool EndIsArrow = false;
345   const Expr *BeginContainerExpr = getContainerFromBeginEndCall(
346       BeginExpr, /*IsBegin=*/true, &BeginIsArrow, IsReverse);
347   if (!BeginContainerExpr)
348     return nullptr;
349 
350   const Expr *EndContainerExpr = getContainerFromBeginEndCall(
351       EndExpr, /*IsBegin=*/false, &EndIsArrow, IsReverse);
352   // Disallow loops that try evil things like this (note the dot and arrow):
353   //  for (IteratorType It = Obj.begin(), E = Obj->end(); It != E; ++It) { }
354   if (!EndContainerExpr || BeginIsArrow != EndIsArrow ||
355       !areSameExpr(Context, EndContainerExpr, BeginContainerExpr))
356     return nullptr;
357 
358   *ContainerNeedsDereference = BeginIsArrow;
359   return BeginContainerExpr;
360 }
361 
362 /// Obtain the original source code text from a SourceRange.
363 static StringRef getStringFromRange(SourceManager &SourceMgr,
364                                     const LangOptions &LangOpts,
365                                     SourceRange Range) {
366   if (SourceMgr.getFileID(Range.getBegin()) !=
367       SourceMgr.getFileID(Range.getEnd())) {
368     return StringRef(); // Empty string.
369   }
370 
371   return Lexer::getSourceText(CharSourceRange(Range, true), SourceMgr,
372                               LangOpts);
373 }
374 
375 /// If the given expression is actually a DeclRefExpr or a MemberExpr,
376 /// find and return the underlying ValueDecl; otherwise, return NULL.
377 static const ValueDecl *getReferencedVariable(const Expr *E) {
378   if (const DeclRefExpr *DRE = getDeclRef(E))
379     return dyn_cast<VarDecl>(DRE->getDecl());
380   if (const auto *Mem = dyn_cast<MemberExpr>(E->IgnoreParenImpCasts()))
381     return dyn_cast<FieldDecl>(Mem->getMemberDecl());
382   return nullptr;
383 }
384 
385 /// Returns true when the given expression is a member expression
386 /// whose base is `this` (implicitly or not).
387 static bool isDirectMemberExpr(const Expr *E) {
388   if (const auto *Member = dyn_cast<MemberExpr>(E->IgnoreParenImpCasts()))
389     return isa<CXXThisExpr>(Member->getBase()->IgnoreParenImpCasts());
390   return false;
391 }
392 
393 /// Given an expression that represents an usage of an element from the
394 /// containter that we are iterating over, returns false when it can be
395 /// guaranteed this element cannot be modified as a result of this usage.
396 static bool canBeModified(ASTContext *Context, const Expr *E) {
397   if (E->getType().isConstQualified())
398     return false;
399   auto Parents = Context->getParents(*E);
400   if (Parents.size() != 1)
401     return true;
402   if (const auto *Cast = Parents[0].get<ImplicitCastExpr>()) {
403     if ((Cast->getCastKind() == CK_NoOp &&
404          Context->hasSameType(Cast->getType(), E->getType().withConst())) ||
405         (Cast->getCastKind() == CK_LValueToRValue &&
406          !Cast->getType().isNull() && Cast->getType()->isFundamentalType()))
407       return false;
408   }
409   // FIXME: Make this function more generic.
410   return true;
411 }
412 
413 /// Returns true when it can be guaranteed that the elements of the
414 /// container are not being modified.
415 static bool usagesAreConst(ASTContext *Context, const UsageResult &Usages) {
416   for (const Usage &U : Usages) {
417     // Lambda captures are just redeclarations (VarDecl) of the same variable,
418     // not expressions. If we want to know if a variable that is captured by
419     // reference can be modified in an usage inside the lambda's body, we need
420     // to find the expression corresponding to that particular usage, later in
421     // this loop.
422     if (U.Kind != Usage::UK_CaptureByCopy && U.Kind != Usage::UK_CaptureByRef &&
423         canBeModified(Context, U.Expression))
424       return false;
425   }
426   return true;
427 }
428 
429 /// Returns true if the elements of the container are never accessed
430 /// by reference.
431 static bool usagesReturnRValues(const UsageResult &Usages) {
432   for (const auto &U : Usages) {
433     if (U.Expression && !U.Expression->isPRValue())
434       return false;
435   }
436   return true;
437 }
438 
439 /// Returns true if the container is const-qualified.
440 static bool containerIsConst(const Expr *ContainerExpr, bool Dereference) {
441   if (const auto *VDec = getReferencedVariable(ContainerExpr)) {
442     QualType CType = VDec->getType();
443     if (Dereference) {
444       if (!CType->isPointerType())
445         return false;
446       CType = CType->getPointeeType();
447     }
448     // If VDec is a reference to a container, Dereference is false,
449     // but we still need to check the const-ness of the underlying container
450     // type.
451     CType = CType.getNonReferenceType();
452     return CType.isConstQualified();
453   }
454   return false;
455 }
456 
457 LoopConvertCheck::RangeDescriptor::RangeDescriptor()
458     : ContainerNeedsDereference(false), DerefByConstRef(false),
459       DerefByValue(false), NeedsReverseCall(false) {}
460 
461 LoopConvertCheck::LoopConvertCheck(StringRef Name, ClangTidyContext *Context)
462     : ClangTidyCheck(Name, Context), TUInfo(new TUTrackingInfo),
463       MaxCopySize(Options.get("MaxCopySize", 16ULL)),
464       MinConfidence(Options.get("MinConfidence", Confidence::CL_Reasonable)),
465       NamingStyle(Options.get("NamingStyle", VariableNamer::NS_CamelCase)),
466       Inserter(Options.getLocalOrGlobal("IncludeStyle",
467                                         utils::IncludeSorter::IS_LLVM),
468                areDiagsSelfContained()),
469       UseCxx20IfAvailable(Options.get("UseCxx20ReverseRanges", true)),
470       ReverseFunction(Options.get("MakeReverseRangeFunction", "")),
471       ReverseHeader(Options.get("MakeReverseRangeHeader", "")) {
472 
473   if (ReverseFunction.empty() && !ReverseHeader.empty()) {
474     configurationDiag(
475         "modernize-loop-convert: 'MakeReverseRangeHeader' is set but "
476         "'MakeReverseRangeFunction' is not, disabling reverse loop "
477         "transformation");
478     UseReverseRanges = false;
479   } else if (ReverseFunction.empty()) {
480     UseReverseRanges = UseCxx20IfAvailable && getLangOpts().CPlusPlus20;
481   } else {
482     UseReverseRanges = true;
483   }
484 }
485 
486 void LoopConvertCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
487   Options.store(Opts, "MaxCopySize", MaxCopySize);
488   Options.store(Opts, "MinConfidence", MinConfidence);
489   Options.store(Opts, "NamingStyle", NamingStyle);
490   Options.store(Opts, "IncludeStyle", Inserter.getStyle());
491   Options.store(Opts, "UseCxx20ReverseRanges", UseCxx20IfAvailable);
492   Options.store(Opts, "MakeReverseRangeFunction", ReverseFunction);
493   Options.store(Opts, "MakeReverseRangeHeader", ReverseHeader);
494 }
495 
496 void LoopConvertCheck::registerPPCallbacks(const SourceManager &SM,
497                                            Preprocessor *PP,
498                                            Preprocessor *ModuleExpanderPP) {
499   Inserter.registerPreprocessor(PP);
500 }
501 
502 void LoopConvertCheck::registerMatchers(MatchFinder *Finder) {
503   Finder->addMatcher(traverse(TK_AsIs, makeArrayLoopMatcher()), this);
504   Finder->addMatcher(traverse(TK_AsIs, makeIteratorLoopMatcher(false)), this);
505   Finder->addMatcher(traverse(TK_AsIs, makePseudoArrayLoopMatcher()), this);
506   if (UseReverseRanges)
507     Finder->addMatcher(traverse(TK_AsIs, makeIteratorLoopMatcher(true)), this);
508 }
509 
510 /// Given the range of a single declaration, such as:
511 /// \code
512 ///   unsigned &ThisIsADeclarationThatCanSpanSeveralLinesOfCode =
513 ///       InitializationValues[I];
514 ///   next_instruction;
515 /// \endcode
516 /// Finds the range that has to be erased to remove this declaration without
517 /// leaving empty lines, by extending the range until the beginning of the
518 /// next instruction.
519 ///
520 /// We need to delete a potential newline after the deleted alias, as
521 /// clang-format will leave empty lines untouched. For all other formatting we
522 /// rely on clang-format to fix it.
523 void LoopConvertCheck::getAliasRange(SourceManager &SM, SourceRange &Range) {
524   bool Invalid = false;
525   const char *TextAfter =
526       SM.getCharacterData(Range.getEnd().getLocWithOffset(1), &Invalid);
527   if (Invalid)
528     return;
529   unsigned Offset = std::strspn(TextAfter, " \t\r\n");
530   Range =
531       SourceRange(Range.getBegin(), Range.getEnd().getLocWithOffset(Offset));
532 }
533 
534 /// Computes the changes needed to convert a given for loop, and
535 /// applies them.
536 void LoopConvertCheck::doConversion(
537     ASTContext *Context, const VarDecl *IndexVar,
538     const ValueDecl *MaybeContainer, const UsageResult &Usages,
539     const DeclStmt *AliasDecl, bool AliasUseRequired, bool AliasFromForInit,
540     const ForStmt *Loop, RangeDescriptor Descriptor) {
541   std::string VarName;
542   bool VarNameFromAlias = (Usages.size() == 1) && AliasDecl;
543   bool AliasVarIsRef = false;
544   bool CanCopy = true;
545   std::vector<FixItHint> FixIts;
546   if (VarNameFromAlias) {
547     const auto *AliasVar = cast<VarDecl>(AliasDecl->getSingleDecl());
548     VarName = AliasVar->getName().str();
549 
550     // Use the type of the alias if it's not the same
551     QualType AliasVarType = AliasVar->getType();
552     assert(!AliasVarType.isNull() && "Type in VarDecl is null");
553     if (AliasVarType->isReferenceType()) {
554       AliasVarType = AliasVarType.getNonReferenceType();
555       AliasVarIsRef = true;
556     }
557     if (Descriptor.ElemType.isNull() ||
558         !Context->hasSameUnqualifiedType(AliasVarType, Descriptor.ElemType))
559       Descriptor.ElemType = AliasVarType;
560 
561     // We keep along the entire DeclStmt to keep the correct range here.
562     SourceRange ReplaceRange = AliasDecl->getSourceRange();
563 
564     std::string ReplacementText;
565     if (AliasUseRequired) {
566       ReplacementText = VarName;
567     } else if (AliasFromForInit) {
568       // FIXME: Clang includes the location of the ';' but only for DeclStmt's
569       // in a for loop's init clause. Need to put this ';' back while removing
570       // the declaration of the alias variable. This is probably a bug.
571       ReplacementText = ";";
572     } else {
573       // Avoid leaving empty lines or trailing whitespaces.
574       getAliasRange(Context->getSourceManager(), ReplaceRange);
575     }
576 
577     FixIts.push_back(FixItHint::CreateReplacement(
578         CharSourceRange::getTokenRange(ReplaceRange), ReplacementText));
579     // No further replacements are made to the loop, since the iterator or index
580     // was used exactly once - in the initialization of AliasVar.
581   } else {
582     VariableNamer Namer(&TUInfo->getGeneratedDecls(),
583                         &TUInfo->getParentFinder().getStmtToParentStmtMap(),
584                         Loop, IndexVar, MaybeContainer, Context, NamingStyle);
585     VarName = Namer.createIndexName();
586     // First, replace all usages of the array subscript expression with our new
587     // variable.
588     for (const auto &Usage : Usages) {
589       std::string ReplaceText;
590       SourceRange Range = Usage.Range;
591       if (Usage.Expression) {
592         // If this is an access to a member through the arrow operator, after
593         // the replacement it must be accessed through the '.' operator.
594         ReplaceText = Usage.Kind == Usage::UK_MemberThroughArrow ? VarName + "."
595                                                                  : VarName;
596         auto Parents = Context->getParents(*Usage.Expression);
597         if (Parents.size() == 1) {
598           if (const auto *Paren = Parents[0].get<ParenExpr>()) {
599             // Usage.Expression will be replaced with the new index variable,
600             // and parenthesis around a simple DeclRefExpr can always be
601             // removed.
602             Range = Paren->getSourceRange();
603           } else if (const auto *UOP = Parents[0].get<UnaryOperator>()) {
604             // If we are taking the address of the loop variable, then we must
605             // not use a copy, as it would mean taking the address of the loop's
606             // local index instead.
607             // FIXME: This won't catch cases where the address is taken outside
608             // of the loop's body (for instance, in a function that got the
609             // loop's index as a const reference parameter), or where we take
610             // the address of a member (like "&Arr[i].A.B.C").
611             if (UOP->getOpcode() == UO_AddrOf)
612               CanCopy = false;
613           }
614         }
615       } else {
616         // The Usage expression is only null in case of lambda captures (which
617         // are VarDecl). If the index is captured by value, add '&' to capture
618         // by reference instead.
619         ReplaceText =
620             Usage.Kind == Usage::UK_CaptureByCopy ? "&" + VarName : VarName;
621       }
622       TUInfo->getReplacedVars().insert(std::make_pair(Loop, IndexVar));
623       FixIts.push_back(FixItHint::CreateReplacement(
624           CharSourceRange::getTokenRange(Range), ReplaceText));
625     }
626   }
627 
628   // Now, we need to construct the new range expression.
629   SourceRange ParenRange(Loop->getLParenLoc(), Loop->getRParenLoc());
630 
631   QualType Type = Context->getAutoDeductType();
632   if (!Descriptor.ElemType.isNull() && Descriptor.ElemType->isFundamentalType())
633     Type = Descriptor.ElemType.getUnqualifiedType();
634   Type = Type.getDesugaredType(*Context);
635 
636   // If the new variable name is from the aliased variable, then the reference
637   // type for the new variable should only be used if the aliased variable was
638   // declared as a reference.
639   bool IsCheapToCopy =
640       !Descriptor.ElemType.isNull() &&
641       Descriptor.ElemType.isTriviallyCopyableType(*Context) &&
642       // TypeInfo::Width is in bits.
643       Context->getTypeInfo(Descriptor.ElemType).Width <= 8 * MaxCopySize;
644   bool UseCopy = CanCopy && ((VarNameFromAlias && !AliasVarIsRef) ||
645                              (Descriptor.DerefByConstRef && IsCheapToCopy));
646 
647   if (!UseCopy) {
648     if (Descriptor.DerefByConstRef) {
649       Type = Context->getLValueReferenceType(Context->getConstType(Type));
650     } else if (Descriptor.DerefByValue) {
651       if (!IsCheapToCopy)
652         Type = Context->getRValueReferenceType(Type);
653     } else {
654       Type = Context->getLValueReferenceType(Type);
655     }
656   }
657 
658   SmallString<128> Range;
659   llvm::raw_svector_ostream Output(Range);
660   Output << '(';
661   Type.print(Output, getLangOpts());
662   Output << ' ' << VarName << " : ";
663   if (Descriptor.NeedsReverseCall)
664     Output << getReverseFunction() << '(';
665   if (Descriptor.ContainerNeedsDereference)
666     Output << '*';
667   Output << Descriptor.ContainerString;
668   if (Descriptor.NeedsReverseCall)
669     Output << "))";
670   else
671     Output << ')';
672   FixIts.push_back(FixItHint::CreateReplacement(
673       CharSourceRange::getTokenRange(ParenRange), Range));
674 
675   if (Descriptor.NeedsReverseCall && !getReverseHeader().empty()) {
676     if (std::optional<FixItHint> Insertion = Inserter.createIncludeInsertion(
677             Context->getSourceManager().getFileID(Loop->getBeginLoc()),
678             getReverseHeader()))
679       FixIts.push_back(*Insertion);
680   }
681   diag(Loop->getForLoc(), "use range-based for loop instead") << FixIts;
682   TUInfo->getGeneratedDecls().insert(make_pair(Loop, VarName));
683 }
684 
685 /// Returns a string which refers to the container iterated over.
686 StringRef LoopConvertCheck::getContainerString(ASTContext *Context,
687                                                const ForStmt *Loop,
688                                                const Expr *ContainerExpr) {
689   StringRef ContainerString;
690   ContainerExpr = ContainerExpr->IgnoreParenImpCasts();
691   if (isa<CXXThisExpr>(ContainerExpr)) {
692     ContainerString = "this";
693   } else {
694     // For CXXOperatorCallExpr such as vector_ptr->size() we want the class
695     // object vector_ptr, but for vector[2] we need the whole expression.
696     if (const auto* E = dyn_cast<CXXOperatorCallExpr>(ContainerExpr))
697       if (E->getOperator() != OO_Subscript)
698         ContainerExpr = E->getArg(0);
699     ContainerString =
700         getStringFromRange(Context->getSourceManager(), Context->getLangOpts(),
701                            ContainerExpr->getSourceRange());
702   }
703 
704   return ContainerString;
705 }
706 
707 /// Determines what kind of 'auto' must be used after converting a for
708 /// loop that iterates over an array or pseudoarray.
709 void LoopConvertCheck::getArrayLoopQualifiers(ASTContext *Context,
710                                               const BoundNodes &Nodes,
711                                               const Expr *ContainerExpr,
712                                               const UsageResult &Usages,
713                                               RangeDescriptor &Descriptor) {
714   // On arrays and pseudoarrays, we must figure out the qualifiers from the
715   // usages.
716   if (usagesAreConst(Context, Usages) ||
717       containerIsConst(ContainerExpr, Descriptor.ContainerNeedsDereference)) {
718     Descriptor.DerefByConstRef = true;
719   }
720   if (usagesReturnRValues(Usages)) {
721     // If the index usages (dereference, subscript, at, ...) return rvalues,
722     // then we should not use a reference, because we need to keep the code
723     // correct if it mutates the returned objects.
724     Descriptor.DerefByValue = true;
725   }
726   // Try to find the type of the elements on the container, to check if
727   // they are trivially copyable.
728   for (const Usage &U : Usages) {
729     if (!U.Expression || U.Expression->getType().isNull())
730       continue;
731     QualType Type = U.Expression->getType().getCanonicalType();
732     if (U.Kind == Usage::UK_MemberThroughArrow) {
733       if (!Type->isPointerType()) {
734         continue;
735       }
736       Type = Type->getPointeeType();
737     }
738     Descriptor.ElemType = Type;
739   }
740 }
741 
742 /// Determines what kind of 'auto' must be used after converting an
743 /// iterator based for loop.
744 void LoopConvertCheck::getIteratorLoopQualifiers(ASTContext *Context,
745                                                  const BoundNodes &Nodes,
746                                                  RangeDescriptor &Descriptor) {
747   // The matchers for iterator loops provide bound nodes to obtain this
748   // information.
749   const auto *InitVar = Nodes.getNodeAs<VarDecl>(InitVarName);
750   QualType CanonicalInitVarType = InitVar->getType().getCanonicalType();
751   const auto *DerefByValueType =
752       Nodes.getNodeAs<QualType>(DerefByValueResultName);
753   Descriptor.DerefByValue = DerefByValueType;
754 
755   if (Descriptor.DerefByValue) {
756     // If the dereference operator returns by value then test for the
757     // canonical const qualification of the init variable type.
758     Descriptor.DerefByConstRef = CanonicalInitVarType.isConstQualified();
759     Descriptor.ElemType = *DerefByValueType;
760   } else {
761     if (const auto *DerefType =
762             Nodes.getNodeAs<QualType>(DerefByRefResultName)) {
763       // A node will only be bound with DerefByRefResultName if we're dealing
764       // with a user-defined iterator type. Test the const qualification of
765       // the reference type.
766       auto ValueType = DerefType->getNonReferenceType();
767 
768       Descriptor.DerefByConstRef = ValueType.isConstQualified();
769       Descriptor.ElemType = ValueType;
770     } else {
771       // By nature of the matcher this case is triggered only for built-in
772       // iterator types (i.e. pointers).
773       assert(isa<PointerType>(CanonicalInitVarType) &&
774              "Non-class iterator type is not a pointer type");
775 
776       // We test for const qualification of the pointed-at type.
777       Descriptor.DerefByConstRef =
778           CanonicalInitVarType->getPointeeType().isConstQualified();
779       Descriptor.ElemType = CanonicalInitVarType->getPointeeType();
780     }
781   }
782 }
783 
784 /// Determines the parameters needed to build the range replacement.
785 void LoopConvertCheck::determineRangeDescriptor(
786     ASTContext *Context, const BoundNodes &Nodes, const ForStmt *Loop,
787     LoopFixerKind FixerKind, const Expr *ContainerExpr,
788     const UsageResult &Usages, RangeDescriptor &Descriptor) {
789   Descriptor.ContainerString =
790       std::string(getContainerString(Context, Loop, ContainerExpr));
791   Descriptor.NeedsReverseCall = (FixerKind == LFK_ReverseIterator);
792 
793   if (FixerKind == LFK_Iterator || FixerKind == LFK_ReverseIterator)
794     getIteratorLoopQualifiers(Context, Nodes, Descriptor);
795   else
796     getArrayLoopQualifiers(Context, Nodes, ContainerExpr, Usages, Descriptor);
797 }
798 
799 /// Check some of the conditions that must be met for the loop to be
800 /// convertible.
801 bool LoopConvertCheck::isConvertible(ASTContext *Context,
802                                      const ast_matchers::BoundNodes &Nodes,
803                                      const ForStmt *Loop,
804                                      LoopFixerKind FixerKind) {
805   // In self contained diagnosics mode we don't want dependancies on other
806   // loops, otherwise, If we already modified the range of this for loop, don't
807   // do any further updates on this iteration.
808   if (areDiagsSelfContained())
809     TUInfo = std::make_unique<TUTrackingInfo>();
810   else if (TUInfo->getReplacedVars().count(Loop))
811     return false;
812 
813   // Check that we have exactly one index variable and at most one end variable.
814   const auto *InitVar = Nodes.getNodeAs<VarDecl>(InitVarName);
815 
816   // FIXME: Try to put most of this logic inside a matcher.
817   if (FixerKind == LFK_Iterator || FixerKind == LFK_ReverseIterator) {
818     QualType InitVarType = InitVar->getType();
819     QualType CanonicalInitVarType = InitVarType.getCanonicalType();
820 
821     const auto *BeginCall = Nodes.getNodeAs<CXXMemberCallExpr>(BeginCallName);
822     assert(BeginCall && "Bad Callback. No begin call expression");
823     QualType CanonicalBeginType =
824         BeginCall->getMethodDecl()->getReturnType().getCanonicalType();
825     if (CanonicalBeginType->isPointerType() &&
826         CanonicalInitVarType->isPointerType()) {
827       // If the initializer and the variable are both pointers check if the
828       // un-qualified pointee types match, otherwise we don't use auto.
829       if (!Context->hasSameUnqualifiedType(
830               CanonicalBeginType->getPointeeType(),
831               CanonicalInitVarType->getPointeeType()))
832         return false;
833     }
834   } else if (FixerKind == LFK_PseudoArray) {
835     // This call is required to obtain the container.
836     const auto *EndCall = Nodes.getNodeAs<CXXMemberCallExpr>(EndCallName);
837     if (!EndCall || !isa<MemberExpr>(EndCall->getCallee()))
838       return false;
839   }
840   return true;
841 }
842 
843 void LoopConvertCheck::check(const MatchFinder::MatchResult &Result) {
844   const BoundNodes &Nodes = Result.Nodes;
845   Confidence ConfidenceLevel(Confidence::CL_Safe);
846   ASTContext *Context = Result.Context;
847 
848   const ForStmt *Loop;
849   LoopFixerKind FixerKind;
850   RangeDescriptor Descriptor;
851 
852   if ((Loop = Nodes.getNodeAs<ForStmt>(LoopNameArray))) {
853     FixerKind = LFK_Array;
854   } else if ((Loop = Nodes.getNodeAs<ForStmt>(LoopNameIterator))) {
855     FixerKind = LFK_Iterator;
856   } else if ((Loop = Nodes.getNodeAs<ForStmt>(LoopNameReverseIterator))) {
857     FixerKind = LFK_ReverseIterator;
858   } else {
859     Loop = Nodes.getNodeAs<ForStmt>(LoopNamePseudoArray);
860     assert(Loop && "Bad Callback. No for statement");
861     FixerKind = LFK_PseudoArray;
862   }
863 
864   if (!isConvertible(Context, Nodes, Loop, FixerKind))
865     return;
866 
867   const auto *LoopVar = Nodes.getNodeAs<VarDecl>(InitVarName);
868   const auto *EndVar = Nodes.getNodeAs<VarDecl>(EndVarName);
869 
870   // If the loop calls end()/size() after each iteration, lower our confidence
871   // level.
872   if (FixerKind != LFK_Array && !EndVar)
873     ConfidenceLevel.lowerTo(Confidence::CL_Reasonable);
874 
875   // If the end comparison isn't a variable, we can try to work with the
876   // expression the loop variable is being tested against instead.
877   const auto *EndCall = Nodes.getNodeAs<CXXMemberCallExpr>(EndCallName);
878   const auto *BoundExpr = Nodes.getNodeAs<Expr>(ConditionBoundName);
879 
880   // Find container expression of iterators and pseudoarrays, and determine if
881   // this expression needs to be dereferenced to obtain the container.
882   // With array loops, the container is often discovered during the
883   // ForLoopIndexUseVisitor traversal.
884   const Expr *ContainerExpr = nullptr;
885   if (FixerKind == LFK_Iterator || FixerKind == LFK_ReverseIterator) {
886     ContainerExpr = findContainer(
887         Context, LoopVar->getInit(), EndVar ? EndVar->getInit() : EndCall,
888         &Descriptor.ContainerNeedsDereference,
889         /*IsReverse=*/FixerKind == LFK_ReverseIterator);
890   } else if (FixerKind == LFK_PseudoArray) {
891     ContainerExpr = EndCall->getImplicitObjectArgument();
892     Descriptor.ContainerNeedsDereference =
893         dyn_cast<MemberExpr>(EndCall->getCallee())->isArrow();
894   }
895 
896   // We must know the container or an array length bound.
897   if (!ContainerExpr && !BoundExpr)
898     return;
899 
900   ForLoopIndexUseVisitor Finder(Context, LoopVar, EndVar, ContainerExpr,
901                                 BoundExpr,
902                                 Descriptor.ContainerNeedsDereference);
903 
904   // Find expressions and variables on which the container depends.
905   if (ContainerExpr) {
906     ComponentFinderASTVisitor ComponentFinder;
907     ComponentFinder.findExprComponents(ContainerExpr->IgnoreParenImpCasts());
908     Finder.addComponents(ComponentFinder.getComponents());
909   }
910 
911   // Find usages of the loop index. If they are not used in a convertible way,
912   // stop here.
913   if (!Finder.findAndVerifyUsages(Loop->getBody()))
914     return;
915   ConfidenceLevel.lowerTo(Finder.getConfidenceLevel());
916 
917   // Obtain the container expression, if we don't have it yet.
918   if (FixerKind == LFK_Array) {
919     ContainerExpr = Finder.getContainerIndexed()->IgnoreParenImpCasts();
920 
921     // Very few loops are over expressions that generate arrays rather than
922     // array variables. Consider loops over arrays that aren't just represented
923     // by a variable to be risky conversions.
924     if (!getReferencedVariable(ContainerExpr) &&
925         !isDirectMemberExpr(ContainerExpr))
926       ConfidenceLevel.lowerTo(Confidence::CL_Risky);
927   }
928 
929   // Find out which qualifiers we have to use in the loop range.
930   TraversalKindScope RAII(*Context, TK_AsIs);
931   const UsageResult &Usages = Finder.getUsages();
932   determineRangeDescriptor(Context, Nodes, Loop, FixerKind, ContainerExpr,
933                            Usages, Descriptor);
934 
935   // Ensure that we do not try to move an expression dependent on a local
936   // variable declared inside the loop outside of it.
937   // FIXME: Determine when the external dependency isn't an expression converted
938   // by another loop.
939   TUInfo->getParentFinder().gatherAncestors(*Context);
940   DependencyFinderASTVisitor DependencyFinder(
941       &TUInfo->getParentFinder().getStmtToParentStmtMap(),
942       &TUInfo->getParentFinder().getDeclToParentStmtMap(),
943       &TUInfo->getReplacedVars(), Loop);
944 
945   if (DependencyFinder.dependsOnInsideVariable(ContainerExpr) ||
946       Descriptor.ContainerString.empty() || Usages.empty() ||
947       ConfidenceLevel.getLevel() < MinConfidence)
948     return;
949 
950   doConversion(Context, LoopVar, getReferencedVariable(ContainerExpr), Usages,
951                Finder.getAliasDecl(), Finder.aliasUseRequired(),
952                Finder.aliasFromForInit(), Loop, Descriptor);
953 }
954 
955 llvm::StringRef LoopConvertCheck::getReverseFunction() const {
956   if (!ReverseFunction.empty())
957     return ReverseFunction;
958   if (UseReverseRanges)
959     return "std::ranges::reverse_view";
960   return "";
961 }
962 
963 llvm::StringRef LoopConvertCheck::getReverseHeader() const {
964   if (!ReverseHeader.empty())
965     return ReverseHeader;
966   if (UseReverseRanges && ReverseFunction.empty()) {
967     return "<ranges>";
968   }
969   return "";
970 }
971 
972 } // namespace modernize
973 } // namespace tidy
974 } // namespace clang
975