1 //===- CalledOnceCheck.cpp - Check 'called once' parameters ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Analysis/Analyses/CalledOnceCheck.h" 10 #include "clang/AST/ASTContext.h" 11 #include "clang/AST/Attr.h" 12 #include "clang/AST/Decl.h" 13 #include "clang/AST/DeclBase.h" 14 #include "clang/AST/Expr.h" 15 #include "clang/AST/ExprObjC.h" 16 #include "clang/AST/OperationKinds.h" 17 #include "clang/AST/ParentMap.h" 18 #include "clang/AST/RecursiveASTVisitor.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtObjC.h" 21 #include "clang/AST/StmtVisitor.h" 22 #include "clang/AST/Type.h" 23 #include "clang/Analysis/AnalysisDeclContext.h" 24 #include "clang/Analysis/CFG.h" 25 #include "clang/Analysis/FlowSensitive/DataflowWorklist.h" 26 #include "clang/Basic/Builtins.h" 27 #include "clang/Basic/IdentifierTable.h" 28 #include "clang/Basic/LLVM.h" 29 #include "llvm/ADT/BitVector.h" 30 #include "llvm/ADT/BitmaskEnum.h" 31 #include "llvm/ADT/Optional.h" 32 #include "llvm/ADT/PointerIntPair.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/Sequence.h" 35 #include "llvm/ADT/SmallVector.h" 36 #include "llvm/ADT/StringRef.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/Compiler.h" 39 #include "llvm/Support/ErrorHandling.h" 40 #include <memory> 41 #include <optional> 42 43 using namespace clang; 44 45 namespace { 46 static constexpr unsigned EXPECTED_MAX_NUMBER_OF_PARAMS = 2; 47 template <class T> 48 using ParamSizedVector = llvm::SmallVector<T, EXPECTED_MAX_NUMBER_OF_PARAMS>; 49 static constexpr unsigned EXPECTED_NUMBER_OF_BASIC_BLOCKS = 8; 50 template <class T> 51 using CFGSizedVector = llvm::SmallVector<T, EXPECTED_NUMBER_OF_BASIC_BLOCKS>; 52 constexpr llvm::StringLiteral CONVENTIONAL_NAMES[] = { 53 "completionHandler", "completion", "withCompletionHandler", 54 "withCompletion", "completionBlock", "withCompletionBlock", 55 "replyTo", "reply", "withReplyTo"}; 56 constexpr llvm::StringLiteral CONVENTIONAL_SUFFIXES[] = { 57 "WithCompletionHandler", "WithCompletion", "WithCompletionBlock", 58 "WithReplyTo", "WithReply"}; 59 constexpr llvm::StringLiteral CONVENTIONAL_CONDITIONS[] = { 60 "error", "cancel", "shouldCall", "done", "OK", "success"}; 61 62 struct KnownCalledOnceParameter { 63 llvm::StringLiteral FunctionName; 64 unsigned ParamIndex; 65 }; 66 constexpr KnownCalledOnceParameter KNOWN_CALLED_ONCE_PARAMETERS[] = { 67 {llvm::StringLiteral{"dispatch_async"}, 1}, 68 {llvm::StringLiteral{"dispatch_async_and_wait"}, 1}, 69 {llvm::StringLiteral{"dispatch_after"}, 2}, 70 {llvm::StringLiteral{"dispatch_sync"}, 1}, 71 {llvm::StringLiteral{"dispatch_once"}, 1}, 72 {llvm::StringLiteral{"dispatch_barrier_async"}, 1}, 73 {llvm::StringLiteral{"dispatch_barrier_async_and_wait"}, 1}, 74 {llvm::StringLiteral{"dispatch_barrier_sync"}, 1}}; 75 76 class ParameterStatus { 77 public: 78 // Status kind is basically the main part of parameter's status. 79 // The kind represents our knowledge (so far) about a tracked parameter 80 // in the context of this analysis. 81 // 82 // Since we want to report on missing and extraneous calls, we need to 83 // track the fact whether paramater was called or not. This automatically 84 // decides two kinds: `NotCalled` and `Called`. 85 // 86 // One of the erroneous situations is the case when parameter is called only 87 // on some of the paths. We could've considered it `NotCalled`, but we want 88 // to report double call warnings even if these two calls are not guaranteed 89 // to happen in every execution. We also don't want to have it as `Called` 90 // because not calling tracked parameter on all of the paths is an error 91 // on its own. For these reasons, we need to have a separate kind, 92 // `MaybeCalled`, and change `Called` to `DefinitelyCalled` to avoid 93 // confusion. 94 // 95 // Two violations of calling parameter more than once and not calling it on 96 // every path are not, however, mutually exclusive. In situations where both 97 // violations take place, we prefer to report ONLY double call. It's always 98 // harder to pinpoint a bug that has arisen when a user neglects to take the 99 // right action (and therefore, no action is taken), than when a user takes 100 // the wrong action. And, in order to remember that we already reported 101 // a double call, we need another kind: `Reported`. 102 // 103 // Our analysis is intra-procedural and, while in the perfect world, 104 // developers only use tracked parameters to call them, in the real world, 105 // the picture might be different. Parameters can be stored in global 106 // variables or leaked into other functions that we know nothing about. 107 // We try to be lenient and trust users. Another kind `Escaped` reflects 108 // such situations. We don't know if it gets called there or not, but we 109 // should always think of `Escaped` as the best possible option. 110 // 111 // Some of the paths in the analyzed functions might end with a call 112 // to noreturn functions. Such paths are not required to have parameter 113 // calls and we want to track that. For the purposes of better diagnostics, 114 // we don't want to reuse `Escaped` and, thus, have another kind `NoReturn`. 115 // 116 // Additionally, we have `NotVisited` kind that tells us nothing about 117 // a tracked parameter, but is used for tracking analyzed (aka visited) 118 // basic blocks. 119 // 120 // If we consider `|` to be a JOIN operation of two kinds coming from 121 // two different paths, the following properties must hold: 122 // 123 // 1. for any Kind K: K | K == K 124 // Joining two identical kinds should result in the same kind. 125 // 126 // 2. for any Kind K: Reported | K == Reported 127 // Doesn't matter on which path it was reported, it still is. 128 // 129 // 3. for any Kind K: NoReturn | K == K 130 // We can totally ignore noreturn paths during merges. 131 // 132 // 4. DefinitelyCalled | NotCalled == MaybeCalled 133 // Called on one path, not called on another - that's simply 134 // a definition for MaybeCalled. 135 // 136 // 5. for any Kind K in [DefinitelyCalled, NotCalled, MaybeCalled]: 137 // Escaped | K == K 138 // Escaped mirrors other statuses after joins. 139 // Every situation, when we join any of the listed kinds K, 140 // is a violation. For this reason, in order to assume the 141 // best outcome for this escape, we consider it to be the 142 // same as the other path. 143 // 144 // 6. for any Kind K in [DefinitelyCalled, NotCalled]: 145 // MaybeCalled | K == MaybeCalled 146 // MaybeCalled should basically stay after almost every join. 147 enum Kind { 148 // No-return paths should be absolutely transparent for the analysis. 149 // 0x0 is the identity element for selected join operation (binary or). 150 NoReturn = 0x0, /* 0000 */ 151 // Escaped marks situations when marked parameter escaped into 152 // another function (so we can assume that it was possibly called there). 153 Escaped = 0x1, /* 0001 */ 154 // Parameter was definitely called once at this point. 155 DefinitelyCalled = 0x3, /* 0011 */ 156 // Kinds less or equal to NON_ERROR_STATUS are not considered errors. 157 NON_ERROR_STATUS = DefinitelyCalled, 158 // Parameter was not yet called. 159 NotCalled = 0x5, /* 0101 */ 160 // Parameter was not called at least on one path leading to this point, 161 // while there is also at least one path that it gets called. 162 MaybeCalled = 0x7, /* 0111 */ 163 // Parameter was not yet analyzed. 164 NotVisited = 0x8, /* 1000 */ 165 // We already reported a violation and stopped tracking calls for this 166 // parameter. 167 Reported = 0x15, /* 1111 */ 168 LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Reported) 169 }; 170 171 constexpr ParameterStatus() = default; 172 /* implicit */ ParameterStatus(Kind K) : StatusKind(K) { 173 assert(!seenAnyCalls(K) && "Can't initialize status without a call"); 174 } 175 ParameterStatus(Kind K, const Expr *Call) : StatusKind(K), Call(Call) { 176 assert(seenAnyCalls(K) && "This kind is not supposed to have a call"); 177 } 178 179 const Expr &getCall() const { 180 assert(seenAnyCalls(getKind()) && "ParameterStatus doesn't have a call"); 181 return *Call; 182 } 183 static bool seenAnyCalls(Kind K) { 184 return (K & DefinitelyCalled) == DefinitelyCalled && K != Reported; 185 } 186 bool seenAnyCalls() const { return seenAnyCalls(getKind()); } 187 188 static bool isErrorStatus(Kind K) { return K > NON_ERROR_STATUS; } 189 bool isErrorStatus() const { return isErrorStatus(getKind()); } 190 191 Kind getKind() const { return StatusKind; } 192 193 void join(const ParameterStatus &Other) { 194 // If we have a pointer already, let's keep it. 195 // For the purposes of the analysis, it doesn't really matter 196 // which call we report. 197 // 198 // If we don't have a pointer, let's take whatever gets joined. 199 if (!Call) { 200 Call = Other.Call; 201 } 202 // Join kinds. 203 StatusKind |= Other.getKind(); 204 } 205 206 bool operator==(const ParameterStatus &Other) const { 207 // We compare only kinds, pointers on their own is only additional 208 // information. 209 return getKind() == Other.getKind(); 210 } 211 212 private: 213 // It would've been a perfect place to use llvm::PointerIntPair, but 214 // unfortunately NumLowBitsAvailable for clang::Expr had been reduced to 2. 215 Kind StatusKind = NotVisited; 216 const Expr *Call = nullptr; 217 }; 218 219 /// State aggregates statuses of all tracked parameters. 220 class State { 221 public: 222 State(unsigned Size, ParameterStatus::Kind K = ParameterStatus::NotVisited) 223 : ParamData(Size, K) {} 224 225 /// Return status of a parameter with the given index. 226 /// \{ 227 ParameterStatus &getStatusFor(unsigned Index) { return ParamData[Index]; } 228 const ParameterStatus &getStatusFor(unsigned Index) const { 229 return ParamData[Index]; 230 } 231 /// \} 232 233 /// Return true if parameter with the given index can be called. 234 bool seenAnyCalls(unsigned Index) const { 235 return getStatusFor(Index).seenAnyCalls(); 236 } 237 /// Return a reference that we consider a call. 238 /// 239 /// Should only be used for parameters that can be called. 240 const Expr &getCallFor(unsigned Index) const { 241 return getStatusFor(Index).getCall(); 242 } 243 /// Return status kind of parameter with the given index. 244 ParameterStatus::Kind getKindFor(unsigned Index) const { 245 return getStatusFor(Index).getKind(); 246 } 247 248 bool isVisited() const { 249 return llvm::all_of(ParamData, [](const ParameterStatus &S) { 250 return S.getKind() != ParameterStatus::NotVisited; 251 }); 252 } 253 254 // Join other state into the current state. 255 void join(const State &Other) { 256 assert(ParamData.size() == Other.ParamData.size() && 257 "Couldn't join statuses with different sizes"); 258 for (auto Pair : llvm::zip(ParamData, Other.ParamData)) { 259 std::get<0>(Pair).join(std::get<1>(Pair)); 260 } 261 } 262 263 using iterator = ParamSizedVector<ParameterStatus>::iterator; 264 using const_iterator = ParamSizedVector<ParameterStatus>::const_iterator; 265 266 iterator begin() { return ParamData.begin(); } 267 iterator end() { return ParamData.end(); } 268 269 const_iterator begin() const { return ParamData.begin(); } 270 const_iterator end() const { return ParamData.end(); } 271 272 bool operator==(const State &Other) const { 273 return ParamData == Other.ParamData; 274 } 275 276 private: 277 ParamSizedVector<ParameterStatus> ParamData; 278 }; 279 280 /// A simple class that finds DeclRefExpr in the given expression. 281 /// 282 /// However, we don't want to find ANY nested DeclRefExpr skipping whatever 283 /// expressions on our way. Only certain expressions considered "no-op" 284 /// for our task are indeed skipped. 285 class DeclRefFinder 286 : public ConstStmtVisitor<DeclRefFinder, const DeclRefExpr *> { 287 public: 288 /// Find a DeclRefExpr in the given expression. 289 /// 290 /// In its most basic form (ShouldRetrieveFromComparisons == false), 291 /// this function can be simply reduced to the following question: 292 /// 293 /// - If expression E is used as a function argument, could we say 294 /// that DeclRefExpr nested in E is used as an argument? 295 /// 296 /// According to this rule, we can say that parens, casts and dereferencing 297 /// (dereferencing only applied to function pointers, but this is our case) 298 /// can be skipped. 299 /// 300 /// When we should look into comparisons the question changes to: 301 /// 302 /// - If expression E is used as a condition, could we say that 303 /// DeclRefExpr is being checked? 304 /// 305 /// And even though, these are two different questions, they have quite a lot 306 /// in common. Actually, we can say that whatever expression answers 307 /// positively the first question also fits the second question as well. 308 /// 309 /// In addition, we skip binary operators == and !=, and unary opeartor !. 310 static const DeclRefExpr *find(const Expr *E, 311 bool ShouldRetrieveFromComparisons = false) { 312 return DeclRefFinder(ShouldRetrieveFromComparisons).Visit(E); 313 } 314 315 const DeclRefExpr *VisitDeclRefExpr(const DeclRefExpr *DR) { return DR; } 316 317 const DeclRefExpr *VisitUnaryOperator(const UnaryOperator *UO) { 318 switch (UO->getOpcode()) { 319 case UO_LNot: 320 // We care about logical not only if we care about comparisons. 321 if (!ShouldRetrieveFromComparisons) 322 return nullptr; 323 [[fallthrough]]; 324 // Function pointer/references can be dereferenced before a call. 325 // That doesn't make it, however, any different from a regular call. 326 // For this reason, dereference operation is a "no-op". 327 case UO_Deref: 328 return Visit(UO->getSubExpr()); 329 default: 330 return nullptr; 331 } 332 } 333 334 const DeclRefExpr *VisitBinaryOperator(const BinaryOperator *BO) { 335 if (!ShouldRetrieveFromComparisons) 336 return nullptr; 337 338 switch (BO->getOpcode()) { 339 case BO_EQ: 340 case BO_NE: { 341 const DeclRefExpr *LHS = Visit(BO->getLHS()); 342 return LHS ? LHS : Visit(BO->getRHS()); 343 } 344 default: 345 return nullptr; 346 } 347 } 348 349 const DeclRefExpr *VisitOpaqueValueExpr(const OpaqueValueExpr *OVE) { 350 return Visit(OVE->getSourceExpr()); 351 } 352 353 const DeclRefExpr *VisitCallExpr(const CallExpr *CE) { 354 if (!ShouldRetrieveFromComparisons) 355 return nullptr; 356 357 // We want to see through some of the boolean builtin functions 358 // that we are likely to see in conditions. 359 switch (CE->getBuiltinCallee()) { 360 case Builtin::BI__builtin_expect: 361 case Builtin::BI__builtin_expect_with_probability: { 362 assert(CE->getNumArgs() >= 2); 363 364 const DeclRefExpr *Candidate = Visit(CE->getArg(0)); 365 return Candidate != nullptr ? Candidate : Visit(CE->getArg(1)); 366 } 367 368 case Builtin::BI__builtin_unpredictable: 369 return Visit(CE->getArg(0)); 370 371 default: 372 return nullptr; 373 } 374 } 375 376 const DeclRefExpr *VisitExpr(const Expr *E) { 377 // It is a fallback method that gets called whenever the actual type 378 // of the given expression is not covered. 379 // 380 // We first check if we have anything to skip. And then repeat the whole 381 // procedure for a nested expression instead. 382 const Expr *DeclutteredExpr = E->IgnoreParenCasts(); 383 return E != DeclutteredExpr ? Visit(DeclutteredExpr) : nullptr; 384 } 385 386 private: 387 DeclRefFinder(bool ShouldRetrieveFromComparisons) 388 : ShouldRetrieveFromComparisons(ShouldRetrieveFromComparisons) {} 389 390 bool ShouldRetrieveFromComparisons; 391 }; 392 393 const DeclRefExpr *findDeclRefExpr(const Expr *In, 394 bool ShouldRetrieveFromComparisons = false) { 395 return DeclRefFinder::find(In, ShouldRetrieveFromComparisons); 396 } 397 398 const ParmVarDecl * 399 findReferencedParmVarDecl(const Expr *In, 400 bool ShouldRetrieveFromComparisons = false) { 401 if (const DeclRefExpr *DR = 402 findDeclRefExpr(In, ShouldRetrieveFromComparisons)) { 403 return dyn_cast<ParmVarDecl>(DR->getDecl()); 404 } 405 406 return nullptr; 407 } 408 409 /// Return conditions expression of a statement if it has one. 410 const Expr *getCondition(const Stmt *S) { 411 if (!S) { 412 return nullptr; 413 } 414 415 if (const auto *If = dyn_cast<IfStmt>(S)) { 416 return If->getCond(); 417 } 418 if (const auto *Ternary = dyn_cast<AbstractConditionalOperator>(S)) { 419 return Ternary->getCond(); 420 } 421 422 return nullptr; 423 } 424 425 /// A small helper class that collects all named identifiers in the given 426 /// expression. It traverses it recursively, so names from deeper levels 427 /// of the AST will end up in the results. 428 /// Results might have duplicate names, if this is a problem, convert to 429 /// string sets afterwards. 430 class NamesCollector : public RecursiveASTVisitor<NamesCollector> { 431 public: 432 static constexpr unsigned EXPECTED_NUMBER_OF_NAMES = 5; 433 using NameCollection = 434 llvm::SmallVector<llvm::StringRef, EXPECTED_NUMBER_OF_NAMES>; 435 436 static NameCollection collect(const Expr *From) { 437 NamesCollector Impl; 438 Impl.TraverseStmt(const_cast<Expr *>(From)); 439 return Impl.Result; 440 } 441 442 bool VisitDeclRefExpr(const DeclRefExpr *E) { 443 Result.push_back(E->getDecl()->getName()); 444 return true; 445 } 446 447 bool VisitObjCPropertyRefExpr(const ObjCPropertyRefExpr *E) { 448 llvm::StringRef Name; 449 450 if (E->isImplicitProperty()) { 451 ObjCMethodDecl *PropertyMethodDecl = nullptr; 452 if (E->isMessagingGetter()) { 453 PropertyMethodDecl = E->getImplicitPropertyGetter(); 454 } else { 455 PropertyMethodDecl = E->getImplicitPropertySetter(); 456 } 457 assert(PropertyMethodDecl && 458 "Implicit property must have associated declaration"); 459 Name = PropertyMethodDecl->getSelector().getNameForSlot(0); 460 } else { 461 assert(E->isExplicitProperty()); 462 Name = E->getExplicitProperty()->getName(); 463 } 464 465 Result.push_back(Name); 466 return true; 467 } 468 469 private: 470 NamesCollector() = default; 471 NameCollection Result; 472 }; 473 474 /// Check whether the given expression mentions any of conventional names. 475 bool mentionsAnyOfConventionalNames(const Expr *E) { 476 NamesCollector::NameCollection MentionedNames = NamesCollector::collect(E); 477 478 return llvm::any_of(MentionedNames, [](llvm::StringRef ConditionName) { 479 return llvm::any_of( 480 CONVENTIONAL_CONDITIONS, 481 [ConditionName](const llvm::StringLiteral &Conventional) { 482 return ConditionName.contains_insensitive(Conventional); 483 }); 484 }); 485 } 486 487 /// Clarification is a simple pair of a reason why parameter is not called 488 /// on every path and a statement to blame. 489 struct Clarification { 490 NeverCalledReason Reason; 491 const Stmt *Location; 492 }; 493 494 /// A helper class that can produce a clarification based on the given pair 495 /// of basic blocks. 496 class NotCalledClarifier 497 : public ConstStmtVisitor<NotCalledClarifier, 498 llvm::Optional<Clarification>> { 499 public: 500 /// The main entrypoint for the class, the function that tries to find the 501 /// clarification of how to explain which sub-path starts with a CFG edge 502 /// from Conditional to SuccWithoutCall. 503 /// 504 /// This means that this function has one precondition: 505 /// SuccWithoutCall should be a successor block for Conditional. 506 /// 507 /// Because clarification is not needed for non-trivial pairs of blocks 508 /// (i.e. SuccWithoutCall is not the only successor), it returns meaningful 509 /// results only for such cases. For this very reason, the parent basic 510 /// block, Conditional, is named that way, so it is clear what kind of 511 /// block is expected. 512 static llvm::Optional<Clarification> 513 clarify(const CFGBlock *Conditional, const CFGBlock *SuccWithoutCall) { 514 if (const Stmt *Terminator = Conditional->getTerminatorStmt()) { 515 return NotCalledClarifier{Conditional, SuccWithoutCall}.Visit(Terminator); 516 } 517 return std::nullopt; 518 } 519 520 llvm::Optional<Clarification> VisitIfStmt(const IfStmt *If) { 521 return VisitBranchingBlock(If, NeverCalledReason::IfThen); 522 } 523 524 llvm::Optional<Clarification> 525 VisitAbstractConditionalOperator(const AbstractConditionalOperator *Ternary) { 526 return VisitBranchingBlock(Ternary, NeverCalledReason::IfThen); 527 } 528 529 llvm::Optional<Clarification> VisitSwitchStmt(const SwitchStmt *Switch) { 530 const Stmt *CaseToBlame = SuccInQuestion->getLabel(); 531 if (!CaseToBlame) { 532 // If interesting basic block is not labeled, it means that this 533 // basic block does not represent any of the cases. 534 return Clarification{NeverCalledReason::SwitchSkipped, Switch}; 535 } 536 537 for (const SwitchCase *Case = Switch->getSwitchCaseList(); Case; 538 Case = Case->getNextSwitchCase()) { 539 if (Case == CaseToBlame) { 540 return Clarification{NeverCalledReason::Switch, Case}; 541 } 542 } 543 544 llvm_unreachable("Found unexpected switch structure"); 545 } 546 547 llvm::Optional<Clarification> VisitForStmt(const ForStmt *For) { 548 return VisitBranchingBlock(For, NeverCalledReason::LoopEntered); 549 } 550 551 llvm::Optional<Clarification> VisitWhileStmt(const WhileStmt *While) { 552 return VisitBranchingBlock(While, NeverCalledReason::LoopEntered); 553 } 554 555 llvm::Optional<Clarification> 556 VisitBranchingBlock(const Stmt *Terminator, NeverCalledReason DefaultReason) { 557 assert(Parent->succ_size() == 2 && 558 "Branching block should have exactly two successors"); 559 unsigned SuccessorIndex = getSuccessorIndex(Parent, SuccInQuestion); 560 NeverCalledReason ActualReason = 561 updateForSuccessor(DefaultReason, SuccessorIndex); 562 return Clarification{ActualReason, Terminator}; 563 } 564 565 llvm::Optional<Clarification> VisitBinaryOperator(const BinaryOperator *) { 566 // We don't want to report on short-curcuit logical operations. 567 return std::nullopt; 568 } 569 570 llvm::Optional<Clarification> VisitStmt(const Stmt *Terminator) { 571 // If we got here, we didn't have a visit function for more derived 572 // classes of statement that this terminator actually belongs to. 573 // 574 // This is not a good scenario and should not happen in practice, but 575 // at least we'll warn the user. 576 return Clarification{NeverCalledReason::FallbackReason, Terminator}; 577 } 578 579 static unsigned getSuccessorIndex(const CFGBlock *Parent, 580 const CFGBlock *Child) { 581 CFGBlock::const_succ_iterator It = llvm::find(Parent->succs(), Child); 582 assert(It != Parent->succ_end() && 583 "Given blocks should be in parent-child relationship"); 584 return It - Parent->succ_begin(); 585 } 586 587 static NeverCalledReason 588 updateForSuccessor(NeverCalledReason ReasonForTrueBranch, 589 unsigned SuccessorIndex) { 590 assert(SuccessorIndex <= 1); 591 unsigned RawReason = 592 static_cast<unsigned>(ReasonForTrueBranch) + SuccessorIndex; 593 assert(RawReason <= 594 static_cast<unsigned>(NeverCalledReason::LARGEST_VALUE)); 595 return static_cast<NeverCalledReason>(RawReason); 596 } 597 598 private: 599 NotCalledClarifier(const CFGBlock *Parent, const CFGBlock *SuccInQuestion) 600 : Parent(Parent), SuccInQuestion(SuccInQuestion) {} 601 602 const CFGBlock *Parent, *SuccInQuestion; 603 }; 604 605 class CalledOnceChecker : public ConstStmtVisitor<CalledOnceChecker> { 606 public: 607 static void check(AnalysisDeclContext &AC, CalledOnceCheckHandler &Handler, 608 bool CheckConventionalParameters) { 609 CalledOnceChecker(AC, Handler, CheckConventionalParameters).check(); 610 } 611 612 private: 613 CalledOnceChecker(AnalysisDeclContext &AC, CalledOnceCheckHandler &Handler, 614 bool CheckConventionalParameters) 615 : FunctionCFG(*AC.getCFG()), AC(AC), Handler(Handler), 616 CheckConventionalParameters(CheckConventionalParameters), 617 CurrentState(0) { 618 initDataStructures(); 619 assert((size() == 0 || !States.empty()) && 620 "Data structures are inconsistent"); 621 } 622 623 //===----------------------------------------------------------------------===// 624 // Initializing functions 625 //===----------------------------------------------------------------------===// 626 627 void initDataStructures() { 628 const Decl *AnalyzedDecl = AC.getDecl(); 629 630 if (const auto *Function = dyn_cast<FunctionDecl>(AnalyzedDecl)) { 631 findParamsToTrack(Function); 632 } else if (const auto *Method = dyn_cast<ObjCMethodDecl>(AnalyzedDecl)) { 633 findParamsToTrack(Method); 634 } else if (const auto *Block = dyn_cast<BlockDecl>(AnalyzedDecl)) { 635 findCapturesToTrack(Block); 636 findParamsToTrack(Block); 637 } 638 639 // Have something to track, let's init states for every block from the CFG. 640 if (size() != 0) { 641 States = 642 CFGSizedVector<State>(FunctionCFG.getNumBlockIDs(), State(size())); 643 } 644 } 645 646 void findCapturesToTrack(const BlockDecl *Block) { 647 for (const auto &Capture : Block->captures()) { 648 if (const auto *P = dyn_cast<ParmVarDecl>(Capture.getVariable())) { 649 // Parameter DeclContext is its owning function or method. 650 const DeclContext *ParamContext = P->getDeclContext(); 651 if (shouldBeCalledOnce(ParamContext, P)) { 652 TrackedParams.push_back(P); 653 } 654 } 655 } 656 } 657 658 template <class FunctionLikeDecl> 659 void findParamsToTrack(const FunctionLikeDecl *Function) { 660 for (unsigned Index : llvm::seq<unsigned>(0u, Function->param_size())) { 661 if (shouldBeCalledOnce(Function, Index)) { 662 TrackedParams.push_back(Function->getParamDecl(Index)); 663 } 664 } 665 } 666 667 //===----------------------------------------------------------------------===// 668 // Main logic 'check' functions 669 //===----------------------------------------------------------------------===// 670 671 void check() { 672 // Nothing to check here: we don't have marked parameters. 673 if (size() == 0 || isPossiblyEmptyImpl()) 674 return; 675 676 assert( 677 llvm::none_of(States, [](const State &S) { return S.isVisited(); }) && 678 "None of the blocks should be 'visited' before the analysis"); 679 680 // For our task, both backward and forward approaches suite well. 681 // However, in order to report better diagnostics, we decided to go with 682 // backward analysis. 683 // 684 // Let's consider the following CFG and how forward and backward analyses 685 // will work for it. 686 // 687 // FORWARD: | BACKWARD: 688 // #1 | #1 689 // +---------+ | +-----------+ 690 // | if | | |MaybeCalled| 691 // +---------+ | +-----------+ 692 // |NotCalled| | | if | 693 // +---------+ | +-----------+ 694 // / \ | / \ 695 // #2 / \ #3 | #2 / \ #3 696 // +----------------+ +---------+ | +----------------+ +---------+ 697 // | foo() | | ... | | |DefinitelyCalled| |NotCalled| 698 // +----------------+ +---------+ | +----------------+ +---------+ 699 // |DefinitelyCalled| |NotCalled| | | foo() | | ... | 700 // +----------------+ +---------+ | +----------------+ +---------+ 701 // \ / | \ / 702 // \ #4 / | \ #4 / 703 // +-----------+ | +---------+ 704 // | ... | | |NotCalled| 705 // +-----------+ | +---------+ 706 // |MaybeCalled| | | ... | 707 // +-----------+ | +---------+ 708 // 709 // The most natural way to report lacking call in the block #3 would be to 710 // message that the false branch of the if statement in the block #1 doesn't 711 // have a call. And while with the forward approach we'll need to find a 712 // least common ancestor or something like that to find the 'if' to blame, 713 // backward analysis gives it to us out of the box. 714 BackwardDataflowWorklist Worklist(FunctionCFG, AC); 715 716 // Let's visit EXIT. 717 const CFGBlock *Exit = &FunctionCFG.getExit(); 718 assignState(Exit, State(size(), ParameterStatus::NotCalled)); 719 Worklist.enqueuePredecessors(Exit); 720 721 while (const CFGBlock *BB = Worklist.dequeue()) { 722 assert(BB && "Worklist should filter out null blocks"); 723 check(BB); 724 assert(CurrentState.isVisited() && 725 "After the check, basic block should be visited"); 726 727 // Traverse successor basic blocks if the status of this block 728 // has changed. 729 if (assignState(BB, CurrentState)) { 730 Worklist.enqueuePredecessors(BB); 731 } 732 } 733 734 // Check that we have all tracked parameters at the last block. 735 // As we are performing a backward version of the analysis, 736 // it should be the ENTRY block. 737 checkEntry(&FunctionCFG.getEntry()); 738 } 739 740 void check(const CFGBlock *BB) { 741 // We start with a state 'inherited' from all the successors. 742 CurrentState = joinSuccessors(BB); 743 assert(CurrentState.isVisited() && 744 "Shouldn't start with a 'not visited' state"); 745 746 // This is the 'exit' situation, broken promises are probably OK 747 // in such scenarios. 748 if (BB->hasNoReturnElement()) { 749 markNoReturn(); 750 // This block still can have calls (even multiple calls) and 751 // for this reason there is no early return here. 752 } 753 754 // We use a backward dataflow propagation and for this reason we 755 // should traverse basic blocks bottom-up. 756 for (const CFGElement &Element : llvm::reverse(*BB)) { 757 if (Optional<CFGStmt> S = Element.getAs<CFGStmt>()) { 758 check(S->getStmt()); 759 } 760 } 761 } 762 void check(const Stmt *S) { Visit(S); } 763 764 void checkEntry(const CFGBlock *Entry) { 765 // We finalize this algorithm with the ENTRY block because 766 // we use a backward version of the analysis. This is where 767 // we can judge that some of the tracked parameters are not called on 768 // every path from ENTRY to EXIT. 769 770 const State &EntryStatus = getState(Entry); 771 llvm::BitVector NotCalledOnEveryPath(size(), false); 772 llvm::BitVector NotUsedOnEveryPath(size(), false); 773 774 // Check if there are no calls of the marked parameter at all 775 for (const auto &IndexedStatus : llvm::enumerate(EntryStatus)) { 776 const ParmVarDecl *Parameter = getParameter(IndexedStatus.index()); 777 778 switch (IndexedStatus.value().getKind()) { 779 case ParameterStatus::NotCalled: 780 // If there were places where this parameter escapes (aka being used), 781 // we can provide a more useful diagnostic by pointing at the exact 782 // branches where it is not even mentioned. 783 if (!hasEverEscaped(IndexedStatus.index())) { 784 // This parameter is was not used at all, so we should report the 785 // most generic version of the warning. 786 if (isCaptured(Parameter)) { 787 // We want to specify that it was captured by the block. 788 Handler.handleCapturedNeverCalled(Parameter, AC.getDecl(), 789 !isExplicitlyMarked(Parameter)); 790 } else { 791 Handler.handleNeverCalled(Parameter, 792 !isExplicitlyMarked(Parameter)); 793 } 794 } else { 795 // Mark it as 'interesting' to figure out which paths don't even 796 // have escapes. 797 NotUsedOnEveryPath[IndexedStatus.index()] = true; 798 } 799 800 break; 801 case ParameterStatus::MaybeCalled: 802 // If we have 'maybe called' at this point, we have an error 803 // that there is at least one path where this parameter 804 // is not called. 805 // 806 // However, reporting the warning with only that information can be 807 // too vague for the users. For this reason, we mark such parameters 808 // as "interesting" for further analysis. 809 NotCalledOnEveryPath[IndexedStatus.index()] = true; 810 break; 811 default: 812 break; 813 } 814 } 815 816 // Early exit if we don't have parameters for extra analysis... 817 if (NotCalledOnEveryPath.none() && NotUsedOnEveryPath.none() && 818 // ... or if we've seen variables with cleanup functions. 819 // We can't reason that we've seen every path in this case, 820 // and thus abandon reporting any warnings that imply that. 821 !FunctionHasCleanupVars) 822 return; 823 824 // We are looking for a pair of blocks A, B so that the following is true: 825 // * A is a predecessor of B 826 // * B is marked as NotCalled 827 // * A has at least one successor marked as either 828 // Escaped or DefinitelyCalled 829 // 830 // In that situation, it is guaranteed that B is the first block of the path 831 // where the user doesn't call or use parameter in question. 832 // 833 // For this reason, branch A -> B can be used for reporting. 834 // 835 // This part of the algorithm is guarded by a condition that the function 836 // does indeed have a violation of contract. For this reason, we can 837 // spend more time to find a good spot to place the warning. 838 // 839 // The following algorithm has the worst case complexity of O(V + E), 840 // where V is the number of basic blocks in FunctionCFG, 841 // E is the number of edges between blocks in FunctionCFG. 842 for (const CFGBlock *BB : FunctionCFG) { 843 if (!BB) 844 continue; 845 846 const State &BlockState = getState(BB); 847 848 for (unsigned Index : llvm::seq(0u, size())) { 849 // We don't want to use 'isLosingCall' here because we want to report 850 // the following situation as well: 851 // 852 // MaybeCalled 853 // | ... | 854 // MaybeCalled NotCalled 855 // 856 // Even though successor is not 'DefinitelyCalled', it is still useful 857 // to report it, it is still a path without a call. 858 if (NotCalledOnEveryPath[Index] && 859 BlockState.getKindFor(Index) == ParameterStatus::MaybeCalled) { 860 861 findAndReportNotCalledBranches(BB, Index); 862 } else if (NotUsedOnEveryPath[Index] && 863 isLosingEscape(BlockState, BB, Index)) { 864 865 findAndReportNotCalledBranches(BB, Index, /* IsEscape = */ true); 866 } 867 } 868 } 869 } 870 871 /// Check potential call of a tracked parameter. 872 void checkDirectCall(const CallExpr *Call) { 873 if (auto Index = getIndexOfCallee(Call)) { 874 processCallFor(*Index, Call); 875 } 876 } 877 878 /// Check the call expression for being an indirect call of one of the tracked 879 /// parameters. It is indirect in the sense that this particular call is not 880 /// calling the parameter itself, but rather uses it as the argument. 881 template <class CallLikeExpr> 882 void checkIndirectCall(const CallLikeExpr *CallOrMessage) { 883 // CallExpr::arguments does not interact nicely with llvm::enumerate. 884 llvm::ArrayRef<const Expr *> Arguments = 885 llvm::ArrayRef(CallOrMessage->getArgs(), CallOrMessage->getNumArgs()); 886 887 // Let's check if any of the call arguments is a point of interest. 888 for (const auto &Argument : llvm::enumerate(Arguments)) { 889 if (auto Index = getIndexOfExpression(Argument.value())) { 890 if (shouldBeCalledOnce(CallOrMessage, Argument.index())) { 891 // If the corresponding parameter is marked as 'called_once' we should 892 // consider it as a call. 893 processCallFor(*Index, CallOrMessage); 894 } else { 895 // Otherwise, we mark this parameter as escaped, which can be 896 // interpreted both as called or not called depending on the context. 897 processEscapeFor(*Index); 898 } 899 // Otherwise, let's keep the state as it is. 900 } 901 } 902 } 903 904 /// Process call of the parameter with the given index 905 void processCallFor(unsigned Index, const Expr *Call) { 906 ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(Index); 907 908 if (CurrentParamStatus.seenAnyCalls()) { 909 910 // At this point, this parameter was called, so this is a second call. 911 const ParmVarDecl *Parameter = getParameter(Index); 912 Handler.handleDoubleCall( 913 Parameter, &CurrentState.getCallFor(Index), Call, 914 !isExplicitlyMarked(Parameter), 915 // We are sure that the second call is definitely 916 // going to happen if the status is 'DefinitelyCalled'. 917 CurrentParamStatus.getKind() == ParameterStatus::DefinitelyCalled); 918 919 // Mark this parameter as already reported on, so we don't repeat 920 // warnings. 921 CurrentParamStatus = ParameterStatus::Reported; 922 923 } else if (CurrentParamStatus.getKind() != ParameterStatus::Reported) { 924 // If we didn't report anything yet, let's mark this parameter 925 // as called. 926 ParameterStatus Called(ParameterStatus::DefinitelyCalled, Call); 927 CurrentParamStatus = Called; 928 } 929 } 930 931 /// Process escape of the parameter with the given index 932 void processEscapeFor(unsigned Index) { 933 ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(Index); 934 935 // Escape overrides whatever error we think happened. 936 if (CurrentParamStatus.isErrorStatus()) { 937 CurrentParamStatus = ParameterStatus::Escaped; 938 } 939 } 940 941 void findAndReportNotCalledBranches(const CFGBlock *Parent, unsigned Index, 942 bool IsEscape = false) { 943 for (const CFGBlock *Succ : Parent->succs()) { 944 if (!Succ) 945 continue; 946 947 if (getState(Succ).getKindFor(Index) == ParameterStatus::NotCalled) { 948 assert(Parent->succ_size() >= 2 && 949 "Block should have at least two successors at this point"); 950 if (auto Clarification = NotCalledClarifier::clarify(Parent, Succ)) { 951 const ParmVarDecl *Parameter = getParameter(Index); 952 Handler.handleNeverCalled( 953 Parameter, AC.getDecl(), Clarification->Location, 954 Clarification->Reason, !IsEscape, !isExplicitlyMarked(Parameter)); 955 } 956 } 957 } 958 } 959 960 //===----------------------------------------------------------------------===// 961 // Predicate functions to check parameters 962 //===----------------------------------------------------------------------===// 963 964 /// Return true if parameter is explicitly marked as 'called_once'. 965 static bool isExplicitlyMarked(const ParmVarDecl *Parameter) { 966 return Parameter->hasAttr<CalledOnceAttr>(); 967 } 968 969 /// Return true if the given name matches conventional pattens. 970 static bool isConventional(llvm::StringRef Name) { 971 return llvm::count(CONVENTIONAL_NAMES, Name) != 0; 972 } 973 974 /// Return true if the given name has conventional suffixes. 975 static bool hasConventionalSuffix(llvm::StringRef Name) { 976 return llvm::any_of(CONVENTIONAL_SUFFIXES, [Name](llvm::StringRef Suffix) { 977 return Name.endswith(Suffix); 978 }); 979 } 980 981 /// Return true if the given type can be used for conventional parameters. 982 static bool isConventional(QualType Ty) { 983 if (!Ty->isBlockPointerType()) { 984 return false; 985 } 986 987 QualType BlockType = Ty->castAs<BlockPointerType>()->getPointeeType(); 988 // Completion handlers should have a block type with void return type. 989 return BlockType->castAs<FunctionType>()->getReturnType()->isVoidType(); 990 } 991 992 /// Return true if the only parameter of the function is conventional. 993 static bool isOnlyParameterConventional(const FunctionDecl *Function) { 994 IdentifierInfo *II = Function->getIdentifier(); 995 return Function->getNumParams() == 1 && II && 996 hasConventionalSuffix(II->getName()); 997 } 998 999 /// Return true/false if 'swift_async' attribute states that the given 1000 /// parameter is conventionally called once. 1001 /// Return std::nullopt if the given declaration doesn't have 'swift_async' 1002 /// attribute. 1003 static llvm::Optional<bool> isConventionalSwiftAsync(const Decl *D, 1004 unsigned ParamIndex) { 1005 if (const SwiftAsyncAttr *A = D->getAttr<SwiftAsyncAttr>()) { 1006 if (A->getKind() == SwiftAsyncAttr::None) { 1007 return false; 1008 } 1009 1010 return A->getCompletionHandlerIndex().getASTIndex() == ParamIndex; 1011 } 1012 return std::nullopt; 1013 } 1014 1015 /// Return true if the specified selector represents init method. 1016 static bool isInitMethod(Selector MethodSelector) { 1017 return MethodSelector.getMethodFamily() == OMF_init; 1018 } 1019 1020 /// Return true if the specified selector piece matches conventions. 1021 static bool isConventionalSelectorPiece(Selector MethodSelector, 1022 unsigned PieceIndex, 1023 QualType PieceType) { 1024 if (!isConventional(PieceType) || isInitMethod(MethodSelector)) { 1025 return false; 1026 } 1027 1028 if (MethodSelector.getNumArgs() == 1) { 1029 assert(PieceIndex == 0); 1030 return hasConventionalSuffix(MethodSelector.getNameForSlot(0)); 1031 } 1032 1033 llvm::StringRef PieceName = MethodSelector.getNameForSlot(PieceIndex); 1034 return isConventional(PieceName) || hasConventionalSuffix(PieceName); 1035 } 1036 1037 bool shouldBeCalledOnce(const ParmVarDecl *Parameter) const { 1038 return isExplicitlyMarked(Parameter) || 1039 (CheckConventionalParameters && 1040 (isConventional(Parameter->getName()) || 1041 hasConventionalSuffix(Parameter->getName())) && 1042 isConventional(Parameter->getType())); 1043 } 1044 1045 bool shouldBeCalledOnce(const DeclContext *ParamContext, 1046 const ParmVarDecl *Param) { 1047 unsigned ParamIndex = Param->getFunctionScopeIndex(); 1048 if (const auto *Function = dyn_cast<FunctionDecl>(ParamContext)) { 1049 return shouldBeCalledOnce(Function, ParamIndex); 1050 } 1051 if (const auto *Method = dyn_cast<ObjCMethodDecl>(ParamContext)) { 1052 return shouldBeCalledOnce(Method, ParamIndex); 1053 } 1054 return shouldBeCalledOnce(Param); 1055 } 1056 1057 bool shouldBeCalledOnce(const BlockDecl *Block, unsigned ParamIndex) const { 1058 return shouldBeCalledOnce(Block->getParamDecl(ParamIndex)); 1059 } 1060 1061 bool shouldBeCalledOnce(const FunctionDecl *Function, 1062 unsigned ParamIndex) const { 1063 if (ParamIndex >= Function->getNumParams()) { 1064 return false; 1065 } 1066 // 'swift_async' goes first and overrides anything else. 1067 if (auto ConventionalAsync = 1068 isConventionalSwiftAsync(Function, ParamIndex)) { 1069 return *ConventionalAsync; 1070 } 1071 1072 return shouldBeCalledOnce(Function->getParamDecl(ParamIndex)) || 1073 (CheckConventionalParameters && 1074 isOnlyParameterConventional(Function)); 1075 } 1076 1077 bool shouldBeCalledOnce(const ObjCMethodDecl *Method, 1078 unsigned ParamIndex) const { 1079 Selector MethodSelector = Method->getSelector(); 1080 if (ParamIndex >= MethodSelector.getNumArgs()) { 1081 return false; 1082 } 1083 1084 // 'swift_async' goes first and overrides anything else. 1085 if (auto ConventionalAsync = isConventionalSwiftAsync(Method, ParamIndex)) { 1086 return *ConventionalAsync; 1087 } 1088 1089 const ParmVarDecl *Parameter = Method->getParamDecl(ParamIndex); 1090 return shouldBeCalledOnce(Parameter) || 1091 (CheckConventionalParameters && 1092 isConventionalSelectorPiece(MethodSelector, ParamIndex, 1093 Parameter->getType())); 1094 } 1095 1096 bool shouldBeCalledOnce(const CallExpr *Call, unsigned ParamIndex) const { 1097 const FunctionDecl *Function = Call->getDirectCallee(); 1098 return Function && shouldBeCalledOnce(Function, ParamIndex); 1099 } 1100 1101 bool shouldBeCalledOnce(const ObjCMessageExpr *Message, 1102 unsigned ParamIndex) const { 1103 const ObjCMethodDecl *Method = Message->getMethodDecl(); 1104 return Method && ParamIndex < Method->param_size() && 1105 shouldBeCalledOnce(Method, ParamIndex); 1106 } 1107 1108 //===----------------------------------------------------------------------===// 1109 // Utility methods 1110 //===----------------------------------------------------------------------===// 1111 1112 bool isCaptured(const ParmVarDecl *Parameter) const { 1113 if (const BlockDecl *Block = dyn_cast<BlockDecl>(AC.getDecl())) { 1114 return Block->capturesVariable(Parameter); 1115 } 1116 return false; 1117 } 1118 1119 // Return a call site where the block is called exactly once or null otherwise 1120 const Expr *getBlockGuaraneedCallSite(const BlockExpr *Block) const { 1121 ParentMap &PM = AC.getParentMap(); 1122 1123 // We don't want to track the block through assignments and so on, instead 1124 // we simply see how the block used and if it's used directly in a call, 1125 // we decide based on call to what it is. 1126 // 1127 // In order to do this, we go up the parents of the block looking for 1128 // a call or a message expressions. These might not be immediate parents 1129 // of the actual block expression due to casts and parens, so we skip them. 1130 for (const Stmt *Prev = Block, *Current = PM.getParent(Block); 1131 Current != nullptr; Prev = Current, Current = PM.getParent(Current)) { 1132 // Skip no-op (for our case) operations. 1133 if (isa<CastExpr>(Current) || isa<ParenExpr>(Current)) 1134 continue; 1135 1136 // At this point, Prev represents our block as an immediate child of the 1137 // call. 1138 if (const auto *Call = dyn_cast<CallExpr>(Current)) { 1139 // It might be the call of the Block itself... 1140 if (Call->getCallee() == Prev) 1141 return Call; 1142 1143 // ...or it can be an indirect call of the block. 1144 return shouldBlockArgumentBeCalledOnce(Call, Prev) ? Call : nullptr; 1145 } 1146 if (const auto *Message = dyn_cast<ObjCMessageExpr>(Current)) { 1147 return shouldBlockArgumentBeCalledOnce(Message, Prev) ? Message 1148 : nullptr; 1149 } 1150 1151 break; 1152 } 1153 1154 return nullptr; 1155 } 1156 1157 template <class CallLikeExpr> 1158 bool shouldBlockArgumentBeCalledOnce(const CallLikeExpr *CallOrMessage, 1159 const Stmt *BlockArgument) const { 1160 // CallExpr::arguments does not interact nicely with llvm::enumerate. 1161 llvm::ArrayRef<const Expr *> Arguments = 1162 llvm::ArrayRef(CallOrMessage->getArgs(), CallOrMessage->getNumArgs()); 1163 1164 for (const auto &Argument : llvm::enumerate(Arguments)) { 1165 if (Argument.value() == BlockArgument) { 1166 return shouldBlockArgumentBeCalledOnce(CallOrMessage, Argument.index()); 1167 } 1168 } 1169 1170 return false; 1171 } 1172 1173 bool shouldBlockArgumentBeCalledOnce(const CallExpr *Call, 1174 unsigned ParamIndex) const { 1175 const FunctionDecl *Function = Call->getDirectCallee(); 1176 return shouldBlockArgumentBeCalledOnce(Function, ParamIndex) || 1177 shouldBeCalledOnce(Call, ParamIndex); 1178 } 1179 1180 bool shouldBlockArgumentBeCalledOnce(const ObjCMessageExpr *Message, 1181 unsigned ParamIndex) const { 1182 // At the moment, we don't have any Obj-C methods we want to specifically 1183 // check in here. 1184 return shouldBeCalledOnce(Message, ParamIndex); 1185 } 1186 1187 static bool shouldBlockArgumentBeCalledOnce(const FunctionDecl *Function, 1188 unsigned ParamIndex) { 1189 // There is a list of important API functions that while not following 1190 // conventions nor being directly annotated, still guarantee that the 1191 // callback parameter will be called exactly once. 1192 // 1193 // Here we check if this is the case. 1194 return Function && 1195 llvm::any_of(KNOWN_CALLED_ONCE_PARAMETERS, 1196 [Function, ParamIndex]( 1197 const KnownCalledOnceParameter &Reference) { 1198 return Reference.FunctionName == 1199 Function->getName() && 1200 Reference.ParamIndex == ParamIndex; 1201 }); 1202 } 1203 1204 /// Return true if the analyzed function is actually a default implementation 1205 /// of the method that has to be overriden. 1206 /// 1207 /// These functions can have tracked parameters, but wouldn't call them 1208 /// because they are not designed to perform any meaningful actions. 1209 /// 1210 /// There are a couple of flavors of such default implementations: 1211 /// 1. Empty methods or methods with a single return statement 1212 /// 2. Methods that have one block with a call to no return function 1213 /// 3. Methods with only assertion-like operations 1214 bool isPossiblyEmptyImpl() const { 1215 if (!isa<ObjCMethodDecl>(AC.getDecl())) { 1216 // We care only about functions that are not supposed to be called. 1217 // Only methods can be overriden. 1218 return false; 1219 } 1220 1221 // Case #1 (without return statements) 1222 if (FunctionCFG.size() == 2) { 1223 // Method has only two blocks: ENTRY and EXIT. 1224 // This is equivalent to empty function. 1225 return true; 1226 } 1227 1228 // Case #2 1229 if (FunctionCFG.size() == 3) { 1230 const CFGBlock &Entry = FunctionCFG.getEntry(); 1231 if (Entry.succ_empty()) { 1232 return false; 1233 } 1234 1235 const CFGBlock *OnlyBlock = *Entry.succ_begin(); 1236 // Method has only one block, let's see if it has a no-return 1237 // element. 1238 if (OnlyBlock && OnlyBlock->hasNoReturnElement()) { 1239 return true; 1240 } 1241 // Fallthrough, CFGs with only one block can fall into #1 and #3 as well. 1242 } 1243 1244 // Cases #1 (return statements) and #3. 1245 // 1246 // It is hard to detect that something is an assertion or came 1247 // from assertion. Here we use a simple heuristic: 1248 // 1249 // - If it came from a macro, it can be an assertion. 1250 // 1251 // Additionally, we can't assume a number of basic blocks or the CFG's 1252 // structure because assertions might include loops and conditions. 1253 return llvm::all_of(FunctionCFG, [](const CFGBlock *BB) { 1254 if (!BB) { 1255 // Unreachable blocks are totally fine. 1256 return true; 1257 } 1258 1259 // Return statements can have sub-expressions that are represented as 1260 // separate statements of a basic block. We should allow this. 1261 // This parent map will be initialized with a parent tree for all 1262 // subexpressions of the block's return statement (if it has one). 1263 std::unique_ptr<ParentMap> ReturnChildren; 1264 1265 return llvm::all_of( 1266 llvm::reverse(*BB), // we should start with return statements, if we 1267 // have any, i.e. from the bottom of the block 1268 [&ReturnChildren](const CFGElement &Element) { 1269 if (Optional<CFGStmt> S = Element.getAs<CFGStmt>()) { 1270 const Stmt *SuspiciousStmt = S->getStmt(); 1271 1272 if (isa<ReturnStmt>(SuspiciousStmt)) { 1273 // Let's initialize this structure to test whether 1274 // some further statement is a part of this return. 1275 ReturnChildren = std::make_unique<ParentMap>( 1276 const_cast<Stmt *>(SuspiciousStmt)); 1277 // Return statements are allowed as part of #1. 1278 return true; 1279 } 1280 1281 return SuspiciousStmt->getBeginLoc().isMacroID() || 1282 (ReturnChildren && 1283 ReturnChildren->hasParent(SuspiciousStmt)); 1284 } 1285 return true; 1286 }); 1287 }); 1288 } 1289 1290 /// Check if parameter with the given index has ever escaped. 1291 bool hasEverEscaped(unsigned Index) const { 1292 return llvm::any_of(States, [Index](const State &StateForOneBB) { 1293 return StateForOneBB.getKindFor(Index) == ParameterStatus::Escaped; 1294 }); 1295 } 1296 1297 /// Return status stored for the given basic block. 1298 /// \{ 1299 State &getState(const CFGBlock *BB) { 1300 assert(BB); 1301 return States[BB->getBlockID()]; 1302 } 1303 const State &getState(const CFGBlock *BB) const { 1304 assert(BB); 1305 return States[BB->getBlockID()]; 1306 } 1307 /// \} 1308 1309 /// Assign status to the given basic block. 1310 /// 1311 /// Returns true when the stored status changed. 1312 bool assignState(const CFGBlock *BB, const State &ToAssign) { 1313 State &Current = getState(BB); 1314 if (Current == ToAssign) { 1315 return false; 1316 } 1317 1318 Current = ToAssign; 1319 return true; 1320 } 1321 1322 /// Join all incoming statuses for the given basic block. 1323 State joinSuccessors(const CFGBlock *BB) const { 1324 auto Succs = 1325 llvm::make_filter_range(BB->succs(), [this](const CFGBlock *Succ) { 1326 return Succ && this->getState(Succ).isVisited(); 1327 }); 1328 // We came to this block from somewhere after all. 1329 assert(!Succs.empty() && 1330 "Basic block should have at least one visited successor"); 1331 1332 State Result = getState(*Succs.begin()); 1333 1334 for (const CFGBlock *Succ : llvm::drop_begin(Succs, 1)) { 1335 Result.join(getState(Succ)); 1336 } 1337 1338 if (const Expr *Condition = getCondition(BB->getTerminatorStmt())) { 1339 handleConditional(BB, Condition, Result); 1340 } 1341 1342 return Result; 1343 } 1344 1345 void handleConditional(const CFGBlock *BB, const Expr *Condition, 1346 State &ToAlter) const { 1347 handleParameterCheck(BB, Condition, ToAlter); 1348 if (SuppressOnConventionalErrorPaths) { 1349 handleConventionalCheck(BB, Condition, ToAlter); 1350 } 1351 } 1352 1353 void handleParameterCheck(const CFGBlock *BB, const Expr *Condition, 1354 State &ToAlter) const { 1355 // In this function, we try to deal with the following pattern: 1356 // 1357 // if (parameter) 1358 // parameter(...); 1359 // 1360 // It's not good to show a warning here because clearly 'parameter' 1361 // couldn't and shouldn't be called on the 'else' path. 1362 // 1363 // Let's check if this if statement has a check involving one of 1364 // the tracked parameters. 1365 if (const ParmVarDecl *Parameter = findReferencedParmVarDecl( 1366 Condition, 1367 /* ShouldRetrieveFromComparisons = */ true)) { 1368 if (const auto Index = getIndex(*Parameter)) { 1369 ParameterStatus &CurrentStatus = ToAlter.getStatusFor(*Index); 1370 1371 // We don't want to deep dive into semantics of the check and 1372 // figure out if that check was for null or something else. 1373 // We simply trust the user that they know what they are doing. 1374 // 1375 // For this reason, in the following loop we look for the 1376 // best-looking option. 1377 for (const CFGBlock *Succ : BB->succs()) { 1378 if (!Succ) 1379 continue; 1380 1381 const ParameterStatus &StatusInSucc = 1382 getState(Succ).getStatusFor(*Index); 1383 1384 if (StatusInSucc.isErrorStatus()) { 1385 continue; 1386 } 1387 1388 // Let's use this status instead. 1389 CurrentStatus = StatusInSucc; 1390 1391 if (StatusInSucc.getKind() == ParameterStatus::DefinitelyCalled) { 1392 // This is the best option to have and we already found it. 1393 break; 1394 } 1395 1396 // If we found 'Escaped' first, we still might find 'DefinitelyCalled' 1397 // on the other branch. And we prefer the latter. 1398 } 1399 } 1400 } 1401 } 1402 1403 void handleConventionalCheck(const CFGBlock *BB, const Expr *Condition, 1404 State &ToAlter) const { 1405 // Even when the analysis is technically correct, it is a widespread pattern 1406 // not to call completion handlers in some scenarios. These usually have 1407 // typical conditional names, such as 'error' or 'cancel'. 1408 if (!mentionsAnyOfConventionalNames(Condition)) { 1409 return; 1410 } 1411 1412 for (const auto &IndexedStatus : llvm::enumerate(ToAlter)) { 1413 const ParmVarDecl *Parameter = getParameter(IndexedStatus.index()); 1414 // Conventions do not apply to explicitly marked parameters. 1415 if (isExplicitlyMarked(Parameter)) { 1416 continue; 1417 } 1418 1419 ParameterStatus &CurrentStatus = IndexedStatus.value(); 1420 // If we did find that on one of the branches the user uses the callback 1421 // and doesn't on the other path, we believe that they know what they are 1422 // doing and trust them. 1423 // 1424 // There are two possible scenarios for that: 1425 // 1. Current status is 'MaybeCalled' and one of the branches is 1426 // 'DefinitelyCalled' 1427 // 2. Current status is 'NotCalled' and one of the branches is 'Escaped' 1428 if (isLosingCall(ToAlter, BB, IndexedStatus.index()) || 1429 isLosingEscape(ToAlter, BB, IndexedStatus.index())) { 1430 CurrentStatus = ParameterStatus::Escaped; 1431 } 1432 } 1433 } 1434 1435 bool isLosingCall(const State &StateAfterJoin, const CFGBlock *JoinBlock, 1436 unsigned ParameterIndex) const { 1437 // Let's check if the block represents DefinitelyCalled -> MaybeCalled 1438 // transition. 1439 return isLosingJoin(StateAfterJoin, JoinBlock, ParameterIndex, 1440 ParameterStatus::MaybeCalled, 1441 ParameterStatus::DefinitelyCalled); 1442 } 1443 1444 bool isLosingEscape(const State &StateAfterJoin, const CFGBlock *JoinBlock, 1445 unsigned ParameterIndex) const { 1446 // Let's check if the block represents Escaped -> NotCalled transition. 1447 return isLosingJoin(StateAfterJoin, JoinBlock, ParameterIndex, 1448 ParameterStatus::NotCalled, ParameterStatus::Escaped); 1449 } 1450 1451 bool isLosingJoin(const State &StateAfterJoin, const CFGBlock *JoinBlock, 1452 unsigned ParameterIndex, ParameterStatus::Kind AfterJoin, 1453 ParameterStatus::Kind BeforeJoin) const { 1454 assert(!ParameterStatus::isErrorStatus(BeforeJoin) && 1455 ParameterStatus::isErrorStatus(AfterJoin) && 1456 "It's not a losing join if statuses do not represent " 1457 "correct-to-error transition"); 1458 1459 const ParameterStatus &CurrentStatus = 1460 StateAfterJoin.getStatusFor(ParameterIndex); 1461 1462 return CurrentStatus.getKind() == AfterJoin && 1463 anySuccessorHasStatus(JoinBlock, ParameterIndex, BeforeJoin); 1464 } 1465 1466 /// Return true if any of the successors of the given basic block has 1467 /// a specified status for the given parameter. 1468 bool anySuccessorHasStatus(const CFGBlock *Parent, unsigned ParameterIndex, 1469 ParameterStatus::Kind ToFind) const { 1470 return llvm::any_of( 1471 Parent->succs(), [this, ParameterIndex, ToFind](const CFGBlock *Succ) { 1472 return Succ && getState(Succ).getKindFor(ParameterIndex) == ToFind; 1473 }); 1474 } 1475 1476 /// Check given expression that was discovered to escape. 1477 void checkEscapee(const Expr *E) { 1478 if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(E)) { 1479 checkEscapee(*Parameter); 1480 } 1481 } 1482 1483 /// Check given parameter that was discovered to escape. 1484 void checkEscapee(const ParmVarDecl &Parameter) { 1485 if (auto Index = getIndex(Parameter)) { 1486 processEscapeFor(*Index); 1487 } 1488 } 1489 1490 /// Mark all parameters in the current state as 'no-return'. 1491 void markNoReturn() { 1492 for (ParameterStatus &PS : CurrentState) { 1493 PS = ParameterStatus::NoReturn; 1494 } 1495 } 1496 1497 /// Check if the given assignment represents suppression and act on it. 1498 void checkSuppression(const BinaryOperator *Assignment) { 1499 // Suppression has the following form: 1500 // parameter = 0; 1501 // 0 can be of any form (NULL, nil, etc.) 1502 if (auto Index = getIndexOfExpression(Assignment->getLHS())) { 1503 1504 // We don't care what is written in the RHS, it could be whatever 1505 // we can interpret as 0. 1506 if (auto Constant = 1507 Assignment->getRHS()->IgnoreParenCasts()->getIntegerConstantExpr( 1508 AC.getASTContext())) { 1509 1510 ParameterStatus &CurrentParamStatus = CurrentState.getStatusFor(*Index); 1511 1512 if (0 == *Constant && CurrentParamStatus.seenAnyCalls()) { 1513 // Even though this suppression mechanism is introduced to tackle 1514 // false positives for multiple calls, the fact that the user has 1515 // to use suppression can also tell us that we couldn't figure out 1516 // how different paths cancel each other out. And if that is true, 1517 // we will most certainly have false positives about parameters not 1518 // being called on certain paths. 1519 // 1520 // For this reason, we abandon tracking this parameter altogether. 1521 CurrentParamStatus = ParameterStatus::Reported; 1522 } 1523 } 1524 } 1525 } 1526 1527 public: 1528 //===----------------------------------------------------------------------===// 1529 // Tree traversal methods 1530 //===----------------------------------------------------------------------===// 1531 1532 void VisitCallExpr(const CallExpr *Call) { 1533 // This call might be a direct call, i.e. a parameter call... 1534 checkDirectCall(Call); 1535 // ... or an indirect call, i.e. when parameter is an argument. 1536 checkIndirectCall(Call); 1537 } 1538 1539 void VisitObjCMessageExpr(const ObjCMessageExpr *Message) { 1540 // The most common situation that we are defending against here is 1541 // copying a tracked parameter. 1542 if (const Expr *Receiver = Message->getInstanceReceiver()) { 1543 checkEscapee(Receiver); 1544 } 1545 // Message expressions unlike calls, could not be direct. 1546 checkIndirectCall(Message); 1547 } 1548 1549 void VisitBlockExpr(const BlockExpr *Block) { 1550 // Block expressions are tricky. It is a very common practice to capture 1551 // completion handlers by blocks and use them there. 1552 // For this reason, it is important to analyze blocks and report warnings 1553 // for completion handler misuse in blocks. 1554 // 1555 // However, it can be quite difficult to track how the block itself is being 1556 // used. The full precise anlysis of that will be similar to alias analysis 1557 // for completion handlers and can be too heavyweight for a compile-time 1558 // diagnostic. Instead, we judge about the immediate use of the block. 1559 // 1560 // Here, we try to find a call expression where we know due to conventions, 1561 // annotations, or other reasons that the block is called once and only 1562 // once. 1563 const Expr *CalledOnceCallSite = getBlockGuaraneedCallSite(Block); 1564 1565 // We need to report this information to the handler because in the 1566 // situation when we know that the block is called exactly once, we can be 1567 // stricter in terms of reported diagnostics. 1568 if (CalledOnceCallSite) { 1569 Handler.handleBlockThatIsGuaranteedToBeCalledOnce(Block->getBlockDecl()); 1570 } else { 1571 Handler.handleBlockWithNoGuarantees(Block->getBlockDecl()); 1572 } 1573 1574 for (const auto &Capture : Block->getBlockDecl()->captures()) { 1575 if (const auto *Param = dyn_cast<ParmVarDecl>(Capture.getVariable())) { 1576 if (auto Index = getIndex(*Param)) { 1577 if (CalledOnceCallSite) { 1578 // The call site of a block can be considered a call site of the 1579 // captured parameter we track. 1580 processCallFor(*Index, CalledOnceCallSite); 1581 } else { 1582 // We still should consider this block as an escape for parameter, 1583 // if we don't know about its call site or the number of time it 1584 // can be invoked. 1585 processEscapeFor(*Index); 1586 } 1587 } 1588 } 1589 } 1590 } 1591 1592 void VisitBinaryOperator(const BinaryOperator *Op) { 1593 if (Op->getOpcode() == clang::BO_Assign) { 1594 // Let's check if one of the tracked parameters is assigned into 1595 // something, and if it is we don't want to track extra variables, so we 1596 // consider it as an escapee. 1597 checkEscapee(Op->getRHS()); 1598 1599 // Let's check whether this assignment is a suppression. 1600 checkSuppression(Op); 1601 } 1602 } 1603 1604 void VisitDeclStmt(const DeclStmt *DS) { 1605 // Variable initialization is not assignment and should be handled 1606 // separately. 1607 // 1608 // Multiple declarations can be a part of declaration statement. 1609 for (const auto *Declaration : DS->getDeclGroup()) { 1610 if (const auto *Var = dyn_cast<VarDecl>(Declaration)) { 1611 if (Var->getInit()) { 1612 checkEscapee(Var->getInit()); 1613 } 1614 1615 if (Var->hasAttr<CleanupAttr>()) { 1616 FunctionHasCleanupVars = true; 1617 } 1618 } 1619 } 1620 } 1621 1622 void VisitCStyleCastExpr(const CStyleCastExpr *Cast) { 1623 // We consider '(void)parameter' as a manual no-op escape. 1624 // It should be used to explicitly tell the analysis that this parameter 1625 // is intentionally not called on this path. 1626 if (Cast->getType().getCanonicalType()->isVoidType()) { 1627 checkEscapee(Cast->getSubExpr()); 1628 } 1629 } 1630 1631 void VisitObjCAtThrowStmt(const ObjCAtThrowStmt *) { 1632 // It is OK not to call marked parameters on exceptional paths. 1633 markNoReturn(); 1634 } 1635 1636 private: 1637 unsigned size() const { return TrackedParams.size(); } 1638 1639 llvm::Optional<unsigned> getIndexOfCallee(const CallExpr *Call) const { 1640 return getIndexOfExpression(Call->getCallee()); 1641 } 1642 1643 llvm::Optional<unsigned> getIndexOfExpression(const Expr *E) const { 1644 if (const ParmVarDecl *Parameter = findReferencedParmVarDecl(E)) { 1645 return getIndex(*Parameter); 1646 } 1647 1648 return std::nullopt; 1649 } 1650 1651 llvm::Optional<unsigned> getIndex(const ParmVarDecl &Parameter) const { 1652 // Expected number of parameters that we actually track is 1. 1653 // 1654 // Also, the maximum number of declared parameters could not be on a scale 1655 // of hundreds of thousands. 1656 // 1657 // In this setting, linear search seems reasonable and even performs better 1658 // than bisection. 1659 ParamSizedVector<const ParmVarDecl *>::const_iterator It = 1660 llvm::find(TrackedParams, &Parameter); 1661 1662 if (It != TrackedParams.end()) { 1663 return It - TrackedParams.begin(); 1664 } 1665 1666 return std::nullopt; 1667 } 1668 1669 const ParmVarDecl *getParameter(unsigned Index) const { 1670 assert(Index < TrackedParams.size()); 1671 return TrackedParams[Index]; 1672 } 1673 1674 const CFG &FunctionCFG; 1675 AnalysisDeclContext &AC; 1676 CalledOnceCheckHandler &Handler; 1677 bool CheckConventionalParameters; 1678 // As of now, we turn this behavior off. So, we still are going to report 1679 // missing calls on paths that look like it was intentional. 1680 // Technically such reports are true positives, but they can make some users 1681 // grumpy because of the sheer number of warnings. 1682 // It can be turned back on if we decide that we want to have the other way 1683 // around. 1684 bool SuppressOnConventionalErrorPaths = false; 1685 1686 // The user can annotate variable declarations with cleanup functions, which 1687 // essentially imposes a custom destructor logic on that variable. 1688 // It is possible to use it, however, to call tracked parameters on all exits 1689 // from the function. For this reason, we track the fact that the function 1690 // actually has these. 1691 bool FunctionHasCleanupVars = false; 1692 1693 State CurrentState; 1694 ParamSizedVector<const ParmVarDecl *> TrackedParams; 1695 CFGSizedVector<State> States; 1696 }; 1697 1698 } // end anonymous namespace 1699 1700 namespace clang { 1701 void checkCalledOnceParameters(AnalysisDeclContext &AC, 1702 CalledOnceCheckHandler &Handler, 1703 bool CheckConventionalParameters) { 1704 CalledOnceChecker::check(AC, Handler, CheckConventionalParameters); 1705 } 1706 } // end namespace clang 1707