1 //===--- LoopConvertCheck.cpp - clang-tidy---------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "LoopConvertCheck.h" 10 #include "clang/AST/ASTContext.h" 11 #include "clang/ASTMatchers/ASTMatchFinder.h" 12 #include "clang/Basic/LLVM.h" 13 #include "clang/Basic/LangOptions.h" 14 #include "clang/Basic/SourceLocation.h" 15 #include "clang/Basic/SourceManager.h" 16 #include "clang/Lex/Lexer.h" 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/Support/Casting.h" 21 #include "llvm/Support/raw_ostream.h" 22 #include <cassert> 23 #include <cstring> 24 #include <optional> 25 #include <utility> 26 27 using namespace clang::ast_matchers; 28 using namespace llvm; 29 30 namespace clang { 31 namespace tidy { 32 33 template <> struct OptionEnumMapping<modernize::Confidence::Level> { 34 static llvm::ArrayRef<std::pair<modernize::Confidence::Level, StringRef>> 35 getEnumMapping() { 36 static constexpr std::pair<modernize::Confidence::Level, StringRef> 37 Mapping[] = {{modernize::Confidence::CL_Reasonable, "reasonable"}, 38 {modernize::Confidence::CL_Safe, "safe"}, 39 {modernize::Confidence::CL_Risky, "risky"}}; 40 return ArrayRef(Mapping); 41 } 42 }; 43 44 template <> struct OptionEnumMapping<modernize::VariableNamer::NamingStyle> { 45 static llvm::ArrayRef< 46 std::pair<modernize::VariableNamer::NamingStyle, StringRef>> 47 getEnumMapping() { 48 static constexpr std::pair<modernize::VariableNamer::NamingStyle, StringRef> 49 Mapping[] = {{modernize::VariableNamer::NS_CamelCase, "CamelCase"}, 50 {modernize::VariableNamer::NS_CamelBack, "camelBack"}, 51 {modernize::VariableNamer::NS_LowerCase, "lower_case"}, 52 {modernize::VariableNamer::NS_UpperCase, "UPPER_CASE"}}; 53 return ArrayRef(Mapping); 54 } 55 }; 56 57 namespace modernize { 58 59 static const char LoopNameArray[] = "forLoopArray"; 60 static const char LoopNameIterator[] = "forLoopIterator"; 61 static const char LoopNameReverseIterator[] = "forLoopReverseIterator"; 62 static const char LoopNamePseudoArray[] = "forLoopPseudoArray"; 63 static const char ConditionBoundName[] = "conditionBound"; 64 static const char InitVarName[] = "initVar"; 65 static const char BeginCallName[] = "beginCall"; 66 static const char EndCallName[] = "endCall"; 67 static const char EndVarName[] = "endVar"; 68 static const char DerefByValueResultName[] = "derefByValueResult"; 69 static const char DerefByRefResultName[] = "derefByRefResult"; 70 71 static const StatementMatcher integerComparisonMatcher() { 72 return expr(ignoringParenImpCasts( 73 declRefExpr(to(varDecl(equalsBoundNode(InitVarName)))))); 74 } 75 76 static const DeclarationMatcher initToZeroMatcher() { 77 return varDecl( 78 hasInitializer(ignoringParenImpCasts(integerLiteral(equals(0))))) 79 .bind(InitVarName); 80 } 81 82 static const StatementMatcher incrementVarMatcher() { 83 return declRefExpr(to(varDecl(equalsBoundNode(InitVarName)))); 84 } 85 86 static StatementMatcher 87 arrayConditionMatcher(internal::Matcher<Expr> LimitExpr) { 88 return binaryOperator( 89 anyOf(allOf(hasOperatorName("<"), hasLHS(integerComparisonMatcher()), 90 hasRHS(LimitExpr)), 91 allOf(hasOperatorName(">"), hasLHS(LimitExpr), 92 hasRHS(integerComparisonMatcher())), 93 allOf(hasOperatorName("!="), 94 hasOperands(integerComparisonMatcher(), LimitExpr)))); 95 } 96 97 /// The matcher for loops over arrays. 98 /// \code 99 /// for (int i = 0; i < 3 + 2; ++i) { ... } 100 /// \endcode 101 /// The following string identifiers are bound to these parts of the AST: 102 /// ConditionBoundName: '3 + 2' (as an Expr) 103 /// InitVarName: 'i' (as a VarDecl) 104 /// LoopName: The entire for loop (as a ForStmt) 105 /// 106 /// Client code will need to make sure that: 107 /// - The index variable is only used as an array index. 108 /// - All arrays indexed by the loop are the same. 109 StatementMatcher makeArrayLoopMatcher() { 110 StatementMatcher ArrayBoundMatcher = 111 expr(hasType(isInteger())).bind(ConditionBoundName); 112 113 return forStmt(unless(isInTemplateInstantiation()), 114 hasLoopInit(declStmt(hasSingleDecl(initToZeroMatcher()))), 115 hasCondition(arrayConditionMatcher(ArrayBoundMatcher)), 116 hasIncrement( 117 unaryOperator(hasOperatorName("++"), 118 hasUnaryOperand(incrementVarMatcher())))) 119 .bind(LoopNameArray); 120 } 121 122 /// The matcher used for iterator-based for loops. 123 /// 124 /// This matcher is more flexible than array-based loops. It will match 125 /// catch loops of the following textual forms (regardless of whether the 126 /// iterator type is actually a pointer type or a class type): 127 /// 128 /// \code 129 /// for (containerType::iterator it = container.begin(), 130 /// e = createIterator(); it != e; ++it) { ... } 131 /// for (containerType::iterator it = container.begin(); 132 /// it != anotherContainer.end(); ++it) { ... } 133 /// \endcode 134 /// The following string identifiers are bound to the parts of the AST: 135 /// InitVarName: 'it' (as a VarDecl) 136 /// LoopName: The entire for loop (as a ForStmt) 137 /// In the first example only: 138 /// EndVarName: 'e' (as a VarDecl) 139 /// In the second example only: 140 /// EndCallName: 'container.end()' (as a CXXMemberCallExpr) 141 /// 142 /// Client code will need to make sure that: 143 /// - The two containers on which 'begin' and 'end' are called are the same. 144 StatementMatcher makeIteratorLoopMatcher(bool IsReverse) { 145 146 auto BeginNameMatcher = IsReverse ? hasAnyName("rbegin", "crbegin") 147 : hasAnyName("begin", "cbegin"); 148 149 auto EndNameMatcher = 150 IsReverse ? hasAnyName("rend", "crend") : hasAnyName("end", "cend"); 151 152 StatementMatcher BeginCallMatcher = 153 cxxMemberCallExpr(argumentCountIs(0), 154 callee(cxxMethodDecl(BeginNameMatcher))) 155 .bind(BeginCallName); 156 157 DeclarationMatcher InitDeclMatcher = 158 varDecl(hasInitializer(anyOf(ignoringParenImpCasts(BeginCallMatcher), 159 materializeTemporaryExpr( 160 ignoringParenImpCasts(BeginCallMatcher)), 161 hasDescendant(BeginCallMatcher)))) 162 .bind(InitVarName); 163 164 DeclarationMatcher EndDeclMatcher = 165 varDecl(hasInitializer(anything())).bind(EndVarName); 166 167 StatementMatcher EndCallMatcher = cxxMemberCallExpr( 168 argumentCountIs(0), callee(cxxMethodDecl(EndNameMatcher))); 169 170 StatementMatcher IteratorBoundMatcher = 171 expr(anyOf(ignoringParenImpCasts( 172 declRefExpr(to(varDecl(equalsBoundNode(EndVarName))))), 173 ignoringParenImpCasts(expr(EndCallMatcher).bind(EndCallName)), 174 materializeTemporaryExpr(ignoringParenImpCasts( 175 expr(EndCallMatcher).bind(EndCallName))))); 176 177 StatementMatcher IteratorComparisonMatcher = expr(ignoringParenImpCasts( 178 declRefExpr(to(varDecl(equalsBoundNode(InitVarName)))))); 179 180 // This matcher tests that a declaration is a CXXRecordDecl that has an 181 // overloaded operator*(). If the operator*() returns by value instead of by 182 // reference then the return type is tagged with DerefByValueResultName. 183 internal::Matcher<VarDecl> TestDerefReturnsByValue = 184 hasType(hasUnqualifiedDesugaredType( 185 recordType(hasDeclaration(cxxRecordDecl(hasMethod(cxxMethodDecl( 186 hasOverloadedOperatorName("*"), 187 anyOf( 188 // Tag the return type if it's by value. 189 returns(qualType(unless(hasCanonicalType(referenceType()))) 190 .bind(DerefByValueResultName)), 191 returns( 192 // Skip loops where the iterator's operator* returns an 193 // rvalue reference. This is just weird. 194 qualType(unless(hasCanonicalType(rValueReferenceType()))) 195 .bind(DerefByRefResultName)))))))))); 196 197 return forStmt( 198 unless(isInTemplateInstantiation()), 199 hasLoopInit(anyOf(declStmt(declCountIs(2), 200 containsDeclaration(0, InitDeclMatcher), 201 containsDeclaration(1, EndDeclMatcher)), 202 declStmt(hasSingleDecl(InitDeclMatcher)))), 203 hasCondition(ignoringImplicit(binaryOperation( 204 hasOperatorName("!="), hasOperands(IteratorComparisonMatcher, 205 IteratorBoundMatcher)))), 206 hasIncrement(anyOf( 207 unaryOperator(hasOperatorName("++"), 208 hasUnaryOperand(declRefExpr( 209 to(varDecl(equalsBoundNode(InitVarName)))))), 210 cxxOperatorCallExpr( 211 hasOverloadedOperatorName("++"), 212 hasArgument(0, declRefExpr(to( 213 varDecl(equalsBoundNode(InitVarName), 214 TestDerefReturnsByValue)))))))) 215 .bind(IsReverse ? LoopNameReverseIterator : LoopNameIterator); 216 } 217 218 /// The matcher used for array-like containers (pseudoarrays). 219 /// 220 /// This matcher is more flexible than array-based loops. It will match 221 /// loops of the following textual forms (regardless of whether the 222 /// iterator type is actually a pointer type or a class type): 223 /// 224 /// \code 225 /// for (int i = 0, j = container.size(); i < j; ++i) { ... } 226 /// for (int i = 0; i < container.size(); ++i) { ... } 227 /// \endcode 228 /// The following string identifiers are bound to the parts of the AST: 229 /// InitVarName: 'i' (as a VarDecl) 230 /// LoopName: The entire for loop (as a ForStmt) 231 /// In the first example only: 232 /// EndVarName: 'j' (as a VarDecl) 233 /// In the second example only: 234 /// EndCallName: 'container.size()' (as a CXXMemberCallExpr) 235 /// 236 /// Client code will need to make sure that: 237 /// - The containers on which 'size()' is called is the container indexed. 238 /// - The index variable is only used in overloaded operator[] or 239 /// container.at(). 240 /// - The container's iterators would not be invalidated during the loop. 241 StatementMatcher makePseudoArrayLoopMatcher() { 242 // Test that the incoming type has a record declaration that has methods 243 // called 'begin' and 'end'. If the incoming type is const, then make sure 244 // these methods are also marked const. 245 // 246 // FIXME: To be completely thorough this matcher should also ensure the 247 // return type of begin/end is an iterator that dereferences to the same as 248 // what operator[] or at() returns. Such a test isn't likely to fail except 249 // for pathological cases. 250 // 251 // FIXME: Also, a record doesn't necessarily need begin() and end(). Free 252 // functions called begin() and end() taking the container as an argument 253 // are also allowed. 254 TypeMatcher RecordWithBeginEnd = qualType(anyOf( 255 qualType(isConstQualified(), 256 hasUnqualifiedDesugaredType(recordType(hasDeclaration( 257 cxxRecordDecl(isSameOrDerivedFrom(cxxRecordDecl( 258 hasMethod(cxxMethodDecl(hasName("begin"), isConst())), 259 hasMethod(cxxMethodDecl(hasName("end"), 260 isConst())))))) // hasDeclaration 261 ))), // qualType 262 qualType(unless(isConstQualified()), 263 hasUnqualifiedDesugaredType(recordType(hasDeclaration( 264 cxxRecordDecl(isSameOrDerivedFrom(cxxRecordDecl( 265 hasMethod(hasName("begin")), 266 hasMethod(hasName("end"))))))))) // qualType 267 )); 268 269 StatementMatcher SizeCallMatcher = cxxMemberCallExpr( 270 argumentCountIs(0), callee(cxxMethodDecl(hasAnyName("size", "length"))), 271 on(anyOf(hasType(pointsTo(RecordWithBeginEnd)), 272 hasType(RecordWithBeginEnd)))); 273 274 StatementMatcher EndInitMatcher = 275 expr(anyOf(ignoringParenImpCasts(expr(SizeCallMatcher).bind(EndCallName)), 276 explicitCastExpr(hasSourceExpression(ignoringParenImpCasts( 277 expr(SizeCallMatcher).bind(EndCallName)))))); 278 279 DeclarationMatcher EndDeclMatcher = 280 varDecl(hasInitializer(EndInitMatcher)).bind(EndVarName); 281 282 StatementMatcher IndexBoundMatcher = 283 expr(anyOf(ignoringParenImpCasts( 284 declRefExpr(to(varDecl(equalsBoundNode(EndVarName))))), 285 EndInitMatcher)); 286 287 return forStmt(unless(isInTemplateInstantiation()), 288 hasLoopInit( 289 anyOf(declStmt(declCountIs(2), 290 containsDeclaration(0, initToZeroMatcher()), 291 containsDeclaration(1, EndDeclMatcher)), 292 declStmt(hasSingleDecl(initToZeroMatcher())))), 293 hasCondition(arrayConditionMatcher(IndexBoundMatcher)), 294 hasIncrement( 295 unaryOperator(hasOperatorName("++"), 296 hasUnaryOperand(incrementVarMatcher())))) 297 .bind(LoopNamePseudoArray); 298 } 299 300 /// Determine whether Init appears to be an initializing an iterator. 301 /// 302 /// If it is, returns the object whose begin() or end() method is called, and 303 /// the output parameter isArrow is set to indicate whether the initialization 304 /// is called via . or ->. 305 static const Expr *getContainerFromBeginEndCall(const Expr *Init, bool IsBegin, 306 bool *IsArrow, bool IsReverse) { 307 // FIXME: Maybe allow declaration/initialization outside of the for loop. 308 const auto *TheCall = dyn_cast_or_null<CXXMemberCallExpr>( 309 digThroughConstructorsConversions(Init)); 310 if (!TheCall || TheCall->getNumArgs() != 0) 311 return nullptr; 312 313 const auto *Member = dyn_cast<MemberExpr>(TheCall->getCallee()); 314 if (!Member) 315 return nullptr; 316 StringRef Name = Member->getMemberDecl()->getName(); 317 if (!Name.consume_back(IsBegin ? "begin" : "end")) 318 return nullptr; 319 if (IsReverse && !Name.consume_back("r")) 320 return nullptr; 321 if (!Name.empty() && !Name.equals("c")) 322 return nullptr; 323 324 const Expr *SourceExpr = Member->getBase(); 325 if (!SourceExpr) 326 return nullptr; 327 328 *IsArrow = Member->isArrow(); 329 return SourceExpr; 330 } 331 332 /// Determines the container whose begin() and end() functions are called 333 /// for an iterator-based loop. 334 /// 335 /// BeginExpr must be a member call to a function named "begin()", and EndExpr 336 /// must be a member. 337 static const Expr *findContainer(ASTContext *Context, const Expr *BeginExpr, 338 const Expr *EndExpr, 339 bool *ContainerNeedsDereference, 340 bool IsReverse) { 341 // Now that we know the loop variable and test expression, make sure they are 342 // valid. 343 bool BeginIsArrow = false; 344 bool EndIsArrow = false; 345 const Expr *BeginContainerExpr = getContainerFromBeginEndCall( 346 BeginExpr, /*IsBegin=*/true, &BeginIsArrow, IsReverse); 347 if (!BeginContainerExpr) 348 return nullptr; 349 350 const Expr *EndContainerExpr = getContainerFromBeginEndCall( 351 EndExpr, /*IsBegin=*/false, &EndIsArrow, IsReverse); 352 // Disallow loops that try evil things like this (note the dot and arrow): 353 // for (IteratorType It = Obj.begin(), E = Obj->end(); It != E; ++It) { } 354 if (!EndContainerExpr || BeginIsArrow != EndIsArrow || 355 !areSameExpr(Context, EndContainerExpr, BeginContainerExpr)) 356 return nullptr; 357 358 *ContainerNeedsDereference = BeginIsArrow; 359 return BeginContainerExpr; 360 } 361 362 /// Obtain the original source code text from a SourceRange. 363 static StringRef getStringFromRange(SourceManager &SourceMgr, 364 const LangOptions &LangOpts, 365 SourceRange Range) { 366 if (SourceMgr.getFileID(Range.getBegin()) != 367 SourceMgr.getFileID(Range.getEnd())) { 368 return StringRef(); // Empty string. 369 } 370 371 return Lexer::getSourceText(CharSourceRange(Range, true), SourceMgr, 372 LangOpts); 373 } 374 375 /// If the given expression is actually a DeclRefExpr or a MemberExpr, 376 /// find and return the underlying ValueDecl; otherwise, return NULL. 377 static const ValueDecl *getReferencedVariable(const Expr *E) { 378 if (const DeclRefExpr *DRE = getDeclRef(E)) 379 return dyn_cast<VarDecl>(DRE->getDecl()); 380 if (const auto *Mem = dyn_cast<MemberExpr>(E->IgnoreParenImpCasts())) 381 return dyn_cast<FieldDecl>(Mem->getMemberDecl()); 382 return nullptr; 383 } 384 385 /// Returns true when the given expression is a member expression 386 /// whose base is `this` (implicitly or not). 387 static bool isDirectMemberExpr(const Expr *E) { 388 if (const auto *Member = dyn_cast<MemberExpr>(E->IgnoreParenImpCasts())) 389 return isa<CXXThisExpr>(Member->getBase()->IgnoreParenImpCasts()); 390 return false; 391 } 392 393 /// Given an expression that represents an usage of an element from the 394 /// containter that we are iterating over, returns false when it can be 395 /// guaranteed this element cannot be modified as a result of this usage. 396 static bool canBeModified(ASTContext *Context, const Expr *E) { 397 if (E->getType().isConstQualified()) 398 return false; 399 auto Parents = Context->getParents(*E); 400 if (Parents.size() != 1) 401 return true; 402 if (const auto *Cast = Parents[0].get<ImplicitCastExpr>()) { 403 if ((Cast->getCastKind() == CK_NoOp && 404 Context->hasSameType(Cast->getType(), E->getType().withConst())) || 405 (Cast->getCastKind() == CK_LValueToRValue && 406 !Cast->getType().isNull() && Cast->getType()->isFundamentalType())) 407 return false; 408 } 409 // FIXME: Make this function more generic. 410 return true; 411 } 412 413 /// Returns true when it can be guaranteed that the elements of the 414 /// container are not being modified. 415 static bool usagesAreConst(ASTContext *Context, const UsageResult &Usages) { 416 for (const Usage &U : Usages) { 417 // Lambda captures are just redeclarations (VarDecl) of the same variable, 418 // not expressions. If we want to know if a variable that is captured by 419 // reference can be modified in an usage inside the lambda's body, we need 420 // to find the expression corresponding to that particular usage, later in 421 // this loop. 422 if (U.Kind != Usage::UK_CaptureByCopy && U.Kind != Usage::UK_CaptureByRef && 423 canBeModified(Context, U.Expression)) 424 return false; 425 } 426 return true; 427 } 428 429 /// Returns true if the elements of the container are never accessed 430 /// by reference. 431 static bool usagesReturnRValues(const UsageResult &Usages) { 432 for (const auto &U : Usages) { 433 if (U.Expression && !U.Expression->isPRValue()) 434 return false; 435 } 436 return true; 437 } 438 439 /// Returns true if the container is const-qualified. 440 static bool containerIsConst(const Expr *ContainerExpr, bool Dereference) { 441 if (const auto *VDec = getReferencedVariable(ContainerExpr)) { 442 QualType CType = VDec->getType(); 443 if (Dereference) { 444 if (!CType->isPointerType()) 445 return false; 446 CType = CType->getPointeeType(); 447 } 448 // If VDec is a reference to a container, Dereference is false, 449 // but we still need to check the const-ness of the underlying container 450 // type. 451 CType = CType.getNonReferenceType(); 452 return CType.isConstQualified(); 453 } 454 return false; 455 } 456 457 LoopConvertCheck::RangeDescriptor::RangeDescriptor() 458 : ContainerNeedsDereference(false), DerefByConstRef(false), 459 DerefByValue(false), NeedsReverseCall(false) {} 460 461 LoopConvertCheck::LoopConvertCheck(StringRef Name, ClangTidyContext *Context) 462 : ClangTidyCheck(Name, Context), TUInfo(new TUTrackingInfo), 463 MaxCopySize(Options.get("MaxCopySize", 16ULL)), 464 MinConfidence(Options.get("MinConfidence", Confidence::CL_Reasonable)), 465 NamingStyle(Options.get("NamingStyle", VariableNamer::NS_CamelCase)), 466 Inserter(Options.getLocalOrGlobal("IncludeStyle", 467 utils::IncludeSorter::IS_LLVM), 468 areDiagsSelfContained()), 469 UseCxx20IfAvailable(Options.get("UseCxx20ReverseRanges", true)), 470 ReverseFunction(Options.get("MakeReverseRangeFunction", "")), 471 ReverseHeader(Options.get("MakeReverseRangeHeader", "")) { 472 473 if (ReverseFunction.empty() && !ReverseHeader.empty()) { 474 configurationDiag( 475 "modernize-loop-convert: 'MakeReverseRangeHeader' is set but " 476 "'MakeReverseRangeFunction' is not, disabling reverse loop " 477 "transformation"); 478 UseReverseRanges = false; 479 } else if (ReverseFunction.empty()) { 480 UseReverseRanges = UseCxx20IfAvailable && getLangOpts().CPlusPlus20; 481 } else { 482 UseReverseRanges = true; 483 } 484 } 485 486 void LoopConvertCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { 487 Options.store(Opts, "MaxCopySize", MaxCopySize); 488 Options.store(Opts, "MinConfidence", MinConfidence); 489 Options.store(Opts, "NamingStyle", NamingStyle); 490 Options.store(Opts, "IncludeStyle", Inserter.getStyle()); 491 Options.store(Opts, "UseCxx20ReverseRanges", UseCxx20IfAvailable); 492 Options.store(Opts, "MakeReverseRangeFunction", ReverseFunction); 493 Options.store(Opts, "MakeReverseRangeHeader", ReverseHeader); 494 } 495 496 void LoopConvertCheck::registerPPCallbacks(const SourceManager &SM, 497 Preprocessor *PP, 498 Preprocessor *ModuleExpanderPP) { 499 Inserter.registerPreprocessor(PP); 500 } 501 502 void LoopConvertCheck::registerMatchers(MatchFinder *Finder) { 503 Finder->addMatcher(traverse(TK_AsIs, makeArrayLoopMatcher()), this); 504 Finder->addMatcher(traverse(TK_AsIs, makeIteratorLoopMatcher(false)), this); 505 Finder->addMatcher(traverse(TK_AsIs, makePseudoArrayLoopMatcher()), this); 506 if (UseReverseRanges) 507 Finder->addMatcher(traverse(TK_AsIs, makeIteratorLoopMatcher(true)), this); 508 } 509 510 /// Given the range of a single declaration, such as: 511 /// \code 512 /// unsigned &ThisIsADeclarationThatCanSpanSeveralLinesOfCode = 513 /// InitializationValues[I]; 514 /// next_instruction; 515 /// \endcode 516 /// Finds the range that has to be erased to remove this declaration without 517 /// leaving empty lines, by extending the range until the beginning of the 518 /// next instruction. 519 /// 520 /// We need to delete a potential newline after the deleted alias, as 521 /// clang-format will leave empty lines untouched. For all other formatting we 522 /// rely on clang-format to fix it. 523 void LoopConvertCheck::getAliasRange(SourceManager &SM, SourceRange &Range) { 524 bool Invalid = false; 525 const char *TextAfter = 526 SM.getCharacterData(Range.getEnd().getLocWithOffset(1), &Invalid); 527 if (Invalid) 528 return; 529 unsigned Offset = std::strspn(TextAfter, " \t\r\n"); 530 Range = 531 SourceRange(Range.getBegin(), Range.getEnd().getLocWithOffset(Offset)); 532 } 533 534 /// Computes the changes needed to convert a given for loop, and 535 /// applies them. 536 void LoopConvertCheck::doConversion( 537 ASTContext *Context, const VarDecl *IndexVar, 538 const ValueDecl *MaybeContainer, const UsageResult &Usages, 539 const DeclStmt *AliasDecl, bool AliasUseRequired, bool AliasFromForInit, 540 const ForStmt *Loop, RangeDescriptor Descriptor) { 541 std::string VarName; 542 bool VarNameFromAlias = (Usages.size() == 1) && AliasDecl; 543 bool AliasVarIsRef = false; 544 bool CanCopy = true; 545 std::vector<FixItHint> FixIts; 546 if (VarNameFromAlias) { 547 const auto *AliasVar = cast<VarDecl>(AliasDecl->getSingleDecl()); 548 VarName = AliasVar->getName().str(); 549 550 // Use the type of the alias if it's not the same 551 QualType AliasVarType = AliasVar->getType(); 552 assert(!AliasVarType.isNull() && "Type in VarDecl is null"); 553 if (AliasVarType->isReferenceType()) { 554 AliasVarType = AliasVarType.getNonReferenceType(); 555 AliasVarIsRef = true; 556 } 557 if (Descriptor.ElemType.isNull() || 558 !Context->hasSameUnqualifiedType(AliasVarType, Descriptor.ElemType)) 559 Descriptor.ElemType = AliasVarType; 560 561 // We keep along the entire DeclStmt to keep the correct range here. 562 SourceRange ReplaceRange = AliasDecl->getSourceRange(); 563 564 std::string ReplacementText; 565 if (AliasUseRequired) { 566 ReplacementText = VarName; 567 } else if (AliasFromForInit) { 568 // FIXME: Clang includes the location of the ';' but only for DeclStmt's 569 // in a for loop's init clause. Need to put this ';' back while removing 570 // the declaration of the alias variable. This is probably a bug. 571 ReplacementText = ";"; 572 } else { 573 // Avoid leaving empty lines or trailing whitespaces. 574 getAliasRange(Context->getSourceManager(), ReplaceRange); 575 } 576 577 FixIts.push_back(FixItHint::CreateReplacement( 578 CharSourceRange::getTokenRange(ReplaceRange), ReplacementText)); 579 // No further replacements are made to the loop, since the iterator or index 580 // was used exactly once - in the initialization of AliasVar. 581 } else { 582 VariableNamer Namer(&TUInfo->getGeneratedDecls(), 583 &TUInfo->getParentFinder().getStmtToParentStmtMap(), 584 Loop, IndexVar, MaybeContainer, Context, NamingStyle); 585 VarName = Namer.createIndexName(); 586 // First, replace all usages of the array subscript expression with our new 587 // variable. 588 for (const auto &Usage : Usages) { 589 std::string ReplaceText; 590 SourceRange Range = Usage.Range; 591 if (Usage.Expression) { 592 // If this is an access to a member through the arrow operator, after 593 // the replacement it must be accessed through the '.' operator. 594 ReplaceText = Usage.Kind == Usage::UK_MemberThroughArrow ? VarName + "." 595 : VarName; 596 auto Parents = Context->getParents(*Usage.Expression); 597 if (Parents.size() == 1) { 598 if (const auto *Paren = Parents[0].get<ParenExpr>()) { 599 // Usage.Expression will be replaced with the new index variable, 600 // and parenthesis around a simple DeclRefExpr can always be 601 // removed. 602 Range = Paren->getSourceRange(); 603 } else if (const auto *UOP = Parents[0].get<UnaryOperator>()) { 604 // If we are taking the address of the loop variable, then we must 605 // not use a copy, as it would mean taking the address of the loop's 606 // local index instead. 607 // FIXME: This won't catch cases where the address is taken outside 608 // of the loop's body (for instance, in a function that got the 609 // loop's index as a const reference parameter), or where we take 610 // the address of a member (like "&Arr[i].A.B.C"). 611 if (UOP->getOpcode() == UO_AddrOf) 612 CanCopy = false; 613 } 614 } 615 } else { 616 // The Usage expression is only null in case of lambda captures (which 617 // are VarDecl). If the index is captured by value, add '&' to capture 618 // by reference instead. 619 ReplaceText = 620 Usage.Kind == Usage::UK_CaptureByCopy ? "&" + VarName : VarName; 621 } 622 TUInfo->getReplacedVars().insert(std::make_pair(Loop, IndexVar)); 623 FixIts.push_back(FixItHint::CreateReplacement( 624 CharSourceRange::getTokenRange(Range), ReplaceText)); 625 } 626 } 627 628 // Now, we need to construct the new range expression. 629 SourceRange ParenRange(Loop->getLParenLoc(), Loop->getRParenLoc()); 630 631 QualType Type = Context->getAutoDeductType(); 632 if (!Descriptor.ElemType.isNull() && Descriptor.ElemType->isFundamentalType()) 633 Type = Descriptor.ElemType.getUnqualifiedType(); 634 Type = Type.getDesugaredType(*Context); 635 636 // If the new variable name is from the aliased variable, then the reference 637 // type for the new variable should only be used if the aliased variable was 638 // declared as a reference. 639 bool IsCheapToCopy = 640 !Descriptor.ElemType.isNull() && 641 Descriptor.ElemType.isTriviallyCopyableType(*Context) && 642 // TypeInfo::Width is in bits. 643 Context->getTypeInfo(Descriptor.ElemType).Width <= 8 * MaxCopySize; 644 bool UseCopy = CanCopy && ((VarNameFromAlias && !AliasVarIsRef) || 645 (Descriptor.DerefByConstRef && IsCheapToCopy)); 646 647 if (!UseCopy) { 648 if (Descriptor.DerefByConstRef) { 649 Type = Context->getLValueReferenceType(Context->getConstType(Type)); 650 } else if (Descriptor.DerefByValue) { 651 if (!IsCheapToCopy) 652 Type = Context->getRValueReferenceType(Type); 653 } else { 654 Type = Context->getLValueReferenceType(Type); 655 } 656 } 657 658 SmallString<128> Range; 659 llvm::raw_svector_ostream Output(Range); 660 Output << '('; 661 Type.print(Output, getLangOpts()); 662 Output << ' ' << VarName << " : "; 663 if (Descriptor.NeedsReverseCall) 664 Output << getReverseFunction() << '('; 665 if (Descriptor.ContainerNeedsDereference) 666 Output << '*'; 667 Output << Descriptor.ContainerString; 668 if (Descriptor.NeedsReverseCall) 669 Output << "))"; 670 else 671 Output << ')'; 672 FixIts.push_back(FixItHint::CreateReplacement( 673 CharSourceRange::getTokenRange(ParenRange), Range)); 674 675 if (Descriptor.NeedsReverseCall && !getReverseHeader().empty()) { 676 if (std::optional<FixItHint> Insertion = Inserter.createIncludeInsertion( 677 Context->getSourceManager().getFileID(Loop->getBeginLoc()), 678 getReverseHeader())) 679 FixIts.push_back(*Insertion); 680 } 681 diag(Loop->getForLoc(), "use range-based for loop instead") << FixIts; 682 TUInfo->getGeneratedDecls().insert(make_pair(Loop, VarName)); 683 } 684 685 /// Returns a string which refers to the container iterated over. 686 StringRef LoopConvertCheck::getContainerString(ASTContext *Context, 687 const ForStmt *Loop, 688 const Expr *ContainerExpr) { 689 StringRef ContainerString; 690 ContainerExpr = ContainerExpr->IgnoreParenImpCasts(); 691 if (isa<CXXThisExpr>(ContainerExpr)) { 692 ContainerString = "this"; 693 } else { 694 // For CXXOperatorCallExpr such as vector_ptr->size() we want the class 695 // object vector_ptr, but for vector[2] we need the whole expression. 696 if (const auto* E = dyn_cast<CXXOperatorCallExpr>(ContainerExpr)) 697 if (E->getOperator() != OO_Subscript) 698 ContainerExpr = E->getArg(0); 699 ContainerString = 700 getStringFromRange(Context->getSourceManager(), Context->getLangOpts(), 701 ContainerExpr->getSourceRange()); 702 } 703 704 return ContainerString; 705 } 706 707 /// Determines what kind of 'auto' must be used after converting a for 708 /// loop that iterates over an array or pseudoarray. 709 void LoopConvertCheck::getArrayLoopQualifiers(ASTContext *Context, 710 const BoundNodes &Nodes, 711 const Expr *ContainerExpr, 712 const UsageResult &Usages, 713 RangeDescriptor &Descriptor) { 714 // On arrays and pseudoarrays, we must figure out the qualifiers from the 715 // usages. 716 if (usagesAreConst(Context, Usages) || 717 containerIsConst(ContainerExpr, Descriptor.ContainerNeedsDereference)) { 718 Descriptor.DerefByConstRef = true; 719 } 720 if (usagesReturnRValues(Usages)) { 721 // If the index usages (dereference, subscript, at, ...) return rvalues, 722 // then we should not use a reference, because we need to keep the code 723 // correct if it mutates the returned objects. 724 Descriptor.DerefByValue = true; 725 } 726 // Try to find the type of the elements on the container, to check if 727 // they are trivially copyable. 728 for (const Usage &U : Usages) { 729 if (!U.Expression || U.Expression->getType().isNull()) 730 continue; 731 QualType Type = U.Expression->getType().getCanonicalType(); 732 if (U.Kind == Usage::UK_MemberThroughArrow) { 733 if (!Type->isPointerType()) { 734 continue; 735 } 736 Type = Type->getPointeeType(); 737 } 738 Descriptor.ElemType = Type; 739 } 740 } 741 742 /// Determines what kind of 'auto' must be used after converting an 743 /// iterator based for loop. 744 void LoopConvertCheck::getIteratorLoopQualifiers(ASTContext *Context, 745 const BoundNodes &Nodes, 746 RangeDescriptor &Descriptor) { 747 // The matchers for iterator loops provide bound nodes to obtain this 748 // information. 749 const auto *InitVar = Nodes.getNodeAs<VarDecl>(InitVarName); 750 QualType CanonicalInitVarType = InitVar->getType().getCanonicalType(); 751 const auto *DerefByValueType = 752 Nodes.getNodeAs<QualType>(DerefByValueResultName); 753 Descriptor.DerefByValue = DerefByValueType; 754 755 if (Descriptor.DerefByValue) { 756 // If the dereference operator returns by value then test for the 757 // canonical const qualification of the init variable type. 758 Descriptor.DerefByConstRef = CanonicalInitVarType.isConstQualified(); 759 Descriptor.ElemType = *DerefByValueType; 760 } else { 761 if (const auto *DerefType = 762 Nodes.getNodeAs<QualType>(DerefByRefResultName)) { 763 // A node will only be bound with DerefByRefResultName if we're dealing 764 // with a user-defined iterator type. Test the const qualification of 765 // the reference type. 766 auto ValueType = DerefType->getNonReferenceType(); 767 768 Descriptor.DerefByConstRef = ValueType.isConstQualified(); 769 Descriptor.ElemType = ValueType; 770 } else { 771 // By nature of the matcher this case is triggered only for built-in 772 // iterator types (i.e. pointers). 773 assert(isa<PointerType>(CanonicalInitVarType) && 774 "Non-class iterator type is not a pointer type"); 775 776 // We test for const qualification of the pointed-at type. 777 Descriptor.DerefByConstRef = 778 CanonicalInitVarType->getPointeeType().isConstQualified(); 779 Descriptor.ElemType = CanonicalInitVarType->getPointeeType(); 780 } 781 } 782 } 783 784 /// Determines the parameters needed to build the range replacement. 785 void LoopConvertCheck::determineRangeDescriptor( 786 ASTContext *Context, const BoundNodes &Nodes, const ForStmt *Loop, 787 LoopFixerKind FixerKind, const Expr *ContainerExpr, 788 const UsageResult &Usages, RangeDescriptor &Descriptor) { 789 Descriptor.ContainerString = 790 std::string(getContainerString(Context, Loop, ContainerExpr)); 791 Descriptor.NeedsReverseCall = (FixerKind == LFK_ReverseIterator); 792 793 if (FixerKind == LFK_Iterator || FixerKind == LFK_ReverseIterator) 794 getIteratorLoopQualifiers(Context, Nodes, Descriptor); 795 else 796 getArrayLoopQualifiers(Context, Nodes, ContainerExpr, Usages, Descriptor); 797 } 798 799 /// Check some of the conditions that must be met for the loop to be 800 /// convertible. 801 bool LoopConvertCheck::isConvertible(ASTContext *Context, 802 const ast_matchers::BoundNodes &Nodes, 803 const ForStmt *Loop, 804 LoopFixerKind FixerKind) { 805 // In self contained diagnosics mode we don't want dependancies on other 806 // loops, otherwise, If we already modified the range of this for loop, don't 807 // do any further updates on this iteration. 808 if (areDiagsSelfContained()) 809 TUInfo = std::make_unique<TUTrackingInfo>(); 810 else if (TUInfo->getReplacedVars().count(Loop)) 811 return false; 812 813 // Check that we have exactly one index variable and at most one end variable. 814 const auto *InitVar = Nodes.getNodeAs<VarDecl>(InitVarName); 815 816 // FIXME: Try to put most of this logic inside a matcher. 817 if (FixerKind == LFK_Iterator || FixerKind == LFK_ReverseIterator) { 818 QualType InitVarType = InitVar->getType(); 819 QualType CanonicalInitVarType = InitVarType.getCanonicalType(); 820 821 const auto *BeginCall = Nodes.getNodeAs<CXXMemberCallExpr>(BeginCallName); 822 assert(BeginCall && "Bad Callback. No begin call expression"); 823 QualType CanonicalBeginType = 824 BeginCall->getMethodDecl()->getReturnType().getCanonicalType(); 825 if (CanonicalBeginType->isPointerType() && 826 CanonicalInitVarType->isPointerType()) { 827 // If the initializer and the variable are both pointers check if the 828 // un-qualified pointee types match, otherwise we don't use auto. 829 if (!Context->hasSameUnqualifiedType( 830 CanonicalBeginType->getPointeeType(), 831 CanonicalInitVarType->getPointeeType())) 832 return false; 833 } 834 } else if (FixerKind == LFK_PseudoArray) { 835 // This call is required to obtain the container. 836 const auto *EndCall = Nodes.getNodeAs<CXXMemberCallExpr>(EndCallName); 837 if (!EndCall || !isa<MemberExpr>(EndCall->getCallee())) 838 return false; 839 } 840 return true; 841 } 842 843 void LoopConvertCheck::check(const MatchFinder::MatchResult &Result) { 844 const BoundNodes &Nodes = Result.Nodes; 845 Confidence ConfidenceLevel(Confidence::CL_Safe); 846 ASTContext *Context = Result.Context; 847 848 const ForStmt *Loop; 849 LoopFixerKind FixerKind; 850 RangeDescriptor Descriptor; 851 852 if ((Loop = Nodes.getNodeAs<ForStmt>(LoopNameArray))) { 853 FixerKind = LFK_Array; 854 } else if ((Loop = Nodes.getNodeAs<ForStmt>(LoopNameIterator))) { 855 FixerKind = LFK_Iterator; 856 } else if ((Loop = Nodes.getNodeAs<ForStmt>(LoopNameReverseIterator))) { 857 FixerKind = LFK_ReverseIterator; 858 } else { 859 Loop = Nodes.getNodeAs<ForStmt>(LoopNamePseudoArray); 860 assert(Loop && "Bad Callback. No for statement"); 861 FixerKind = LFK_PseudoArray; 862 } 863 864 if (!isConvertible(Context, Nodes, Loop, FixerKind)) 865 return; 866 867 const auto *LoopVar = Nodes.getNodeAs<VarDecl>(InitVarName); 868 const auto *EndVar = Nodes.getNodeAs<VarDecl>(EndVarName); 869 870 // If the loop calls end()/size() after each iteration, lower our confidence 871 // level. 872 if (FixerKind != LFK_Array && !EndVar) 873 ConfidenceLevel.lowerTo(Confidence::CL_Reasonable); 874 875 // If the end comparison isn't a variable, we can try to work with the 876 // expression the loop variable is being tested against instead. 877 const auto *EndCall = Nodes.getNodeAs<CXXMemberCallExpr>(EndCallName); 878 const auto *BoundExpr = Nodes.getNodeAs<Expr>(ConditionBoundName); 879 880 // Find container expression of iterators and pseudoarrays, and determine if 881 // this expression needs to be dereferenced to obtain the container. 882 // With array loops, the container is often discovered during the 883 // ForLoopIndexUseVisitor traversal. 884 const Expr *ContainerExpr = nullptr; 885 if (FixerKind == LFK_Iterator || FixerKind == LFK_ReverseIterator) { 886 ContainerExpr = findContainer( 887 Context, LoopVar->getInit(), EndVar ? EndVar->getInit() : EndCall, 888 &Descriptor.ContainerNeedsDereference, 889 /*IsReverse=*/FixerKind == LFK_ReverseIterator); 890 } else if (FixerKind == LFK_PseudoArray) { 891 ContainerExpr = EndCall->getImplicitObjectArgument(); 892 Descriptor.ContainerNeedsDereference = 893 dyn_cast<MemberExpr>(EndCall->getCallee())->isArrow(); 894 } 895 896 // We must know the container or an array length bound. 897 if (!ContainerExpr && !BoundExpr) 898 return; 899 900 ForLoopIndexUseVisitor Finder(Context, LoopVar, EndVar, ContainerExpr, 901 BoundExpr, 902 Descriptor.ContainerNeedsDereference); 903 904 // Find expressions and variables on which the container depends. 905 if (ContainerExpr) { 906 ComponentFinderASTVisitor ComponentFinder; 907 ComponentFinder.findExprComponents(ContainerExpr->IgnoreParenImpCasts()); 908 Finder.addComponents(ComponentFinder.getComponents()); 909 } 910 911 // Find usages of the loop index. If they are not used in a convertible way, 912 // stop here. 913 if (!Finder.findAndVerifyUsages(Loop->getBody())) 914 return; 915 ConfidenceLevel.lowerTo(Finder.getConfidenceLevel()); 916 917 // Obtain the container expression, if we don't have it yet. 918 if (FixerKind == LFK_Array) { 919 ContainerExpr = Finder.getContainerIndexed()->IgnoreParenImpCasts(); 920 921 // Very few loops are over expressions that generate arrays rather than 922 // array variables. Consider loops over arrays that aren't just represented 923 // by a variable to be risky conversions. 924 if (!getReferencedVariable(ContainerExpr) && 925 !isDirectMemberExpr(ContainerExpr)) 926 ConfidenceLevel.lowerTo(Confidence::CL_Risky); 927 } 928 929 // Find out which qualifiers we have to use in the loop range. 930 TraversalKindScope RAII(*Context, TK_AsIs); 931 const UsageResult &Usages = Finder.getUsages(); 932 determineRangeDescriptor(Context, Nodes, Loop, FixerKind, ContainerExpr, 933 Usages, Descriptor); 934 935 // Ensure that we do not try to move an expression dependent on a local 936 // variable declared inside the loop outside of it. 937 // FIXME: Determine when the external dependency isn't an expression converted 938 // by another loop. 939 TUInfo->getParentFinder().gatherAncestors(*Context); 940 DependencyFinderASTVisitor DependencyFinder( 941 &TUInfo->getParentFinder().getStmtToParentStmtMap(), 942 &TUInfo->getParentFinder().getDeclToParentStmtMap(), 943 &TUInfo->getReplacedVars(), Loop); 944 945 if (DependencyFinder.dependsOnInsideVariable(ContainerExpr) || 946 Descriptor.ContainerString.empty() || Usages.empty() || 947 ConfidenceLevel.getLevel() < MinConfidence) 948 return; 949 950 doConversion(Context, LoopVar, getReferencedVariable(ContainerExpr), Usages, 951 Finder.getAliasDecl(), Finder.aliasUseRequired(), 952 Finder.aliasFromForInit(), Loop, Descriptor); 953 } 954 955 llvm::StringRef LoopConvertCheck::getReverseFunction() const { 956 if (!ReverseFunction.empty()) 957 return ReverseFunction; 958 if (UseReverseRanges) 959 return "std::ranges::reverse_view"; 960 return ""; 961 } 962 963 llvm::StringRef LoopConvertCheck::getReverseHeader() const { 964 if (!ReverseHeader.empty()) 965 return ReverseHeader; 966 if (UseReverseRanges && ReverseFunction.empty()) { 967 return "<ranges>"; 968 } 969 return ""; 970 } 971 972 } // namespace modernize 973 } // namespace tidy 974 } // namespace clang 975