1 //===- ObjCARCOpts.cpp - ObjC ARC Optimization ----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file defines ObjC ARC optimizations. ARC stands for Automatic 11 /// Reference Counting and is a system for managing reference counts for objects 12 /// in Objective C. 13 /// 14 /// The optimizations performed include elimination of redundant, partially 15 /// redundant, and inconsequential reference count operations, elimination of 16 /// redundant weak pointer operations, and numerous minor simplifications. 17 /// 18 /// WARNING: This file knows about certain library functions. It recognizes them 19 /// by name, and hardwires knowledge of their semantics. 20 /// 21 /// WARNING: This file knows about how certain Objective-C library functions are 22 /// used. Naive LLVM IR transformations which would otherwise be 23 /// behavior-preserving may break these assumptions. 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "ARCRuntimeEntryPoints.h" 28 #include "BlotMapVector.h" 29 #include "DependencyAnalysis.h" 30 #include "ObjCARC.h" 31 #include "ProvenanceAnalysis.h" 32 #include "PtrState.h" 33 #include "llvm/ADT/DenseMap.h" 34 #include "llvm/ADT/STLExtras.h" 35 #include "llvm/ADT/SmallPtrSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/Analysis/AliasAnalysis.h" 39 #include "llvm/Analysis/ObjCARCAnalysisUtils.h" 40 #include "llvm/Analysis/ObjCARCInstKind.h" 41 #include "llvm/Analysis/ObjCARCUtil.h" 42 #include "llvm/IR/BasicBlock.h" 43 #include "llvm/IR/CFG.h" 44 #include "llvm/IR/Constant.h" 45 #include "llvm/IR/Constants.h" 46 #include "llvm/IR/DerivedTypes.h" 47 #include "llvm/IR/EHPersonalities.h" 48 #include "llvm/IR/Function.h" 49 #include "llvm/IR/GlobalVariable.h" 50 #include "llvm/IR/InstIterator.h" 51 #include "llvm/IR/InstrTypes.h" 52 #include "llvm/IR/Instruction.h" 53 #include "llvm/IR/Instructions.h" 54 #include "llvm/IR/LLVMContext.h" 55 #include "llvm/IR/Metadata.h" 56 #include "llvm/IR/Type.h" 57 #include "llvm/IR/User.h" 58 #include "llvm/IR/Value.h" 59 #include "llvm/Support/Casting.h" 60 #include "llvm/Support/CommandLine.h" 61 #include "llvm/Support/Compiler.h" 62 #include "llvm/Support/Debug.h" 63 #include "llvm/Support/ErrorHandling.h" 64 #include "llvm/Support/raw_ostream.h" 65 #include "llvm/Transforms/ObjCARC.h" 66 #include <cassert> 67 #include <iterator> 68 #include <utility> 69 70 using namespace llvm; 71 using namespace llvm::objcarc; 72 73 #define DEBUG_TYPE "objc-arc-opts" 74 75 static cl::opt<unsigned> MaxPtrStates("arc-opt-max-ptr-states", 76 cl::Hidden, 77 cl::desc("Maximum number of ptr states the optimizer keeps track of"), 78 cl::init(4095)); 79 80 /// \defgroup ARCUtilities Utility declarations/definitions specific to ARC. 81 /// @{ 82 83 /// This is similar to GetRCIdentityRoot but it stops as soon 84 /// as it finds a value with multiple uses. 85 static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { 86 // ConstantData (like ConstantPointerNull and UndefValue) is used across 87 // modules. It's never a single-use value. 88 if (isa<ConstantData>(Arg)) 89 return nullptr; 90 91 if (Arg->hasOneUse()) { 92 if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg)) 93 return FindSingleUseIdentifiedObject(BC->getOperand(0)); 94 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg)) 95 if (GEP->hasAllZeroIndices()) 96 return FindSingleUseIdentifiedObject(GEP->getPointerOperand()); 97 if (IsForwarding(GetBasicARCInstKind(Arg))) 98 return FindSingleUseIdentifiedObject( 99 cast<CallInst>(Arg)->getArgOperand(0)); 100 if (!IsObjCIdentifiedObject(Arg)) 101 return nullptr; 102 return Arg; 103 } 104 105 // If we found an identifiable object but it has multiple uses, but they are 106 // trivial uses, we can still consider this to be a single-use value. 107 if (IsObjCIdentifiedObject(Arg)) { 108 for (const User *U : Arg->users()) 109 if (!U->use_empty() || GetRCIdentityRoot(U) != Arg) 110 return nullptr; 111 112 return Arg; 113 } 114 115 return nullptr; 116 } 117 118 /// @} 119 /// 120 /// \defgroup ARCOpt ARC Optimization. 121 /// @{ 122 123 // TODO: On code like this: 124 // 125 // objc_retain(%x) 126 // stuff_that_cannot_release() 127 // objc_autorelease(%x) 128 // stuff_that_cannot_release() 129 // objc_retain(%x) 130 // stuff_that_cannot_release() 131 // objc_autorelease(%x) 132 // 133 // The second retain and autorelease can be deleted. 134 135 // TODO: It should be possible to delete 136 // objc_autoreleasePoolPush and objc_autoreleasePoolPop 137 // pairs if nothing is actually autoreleased between them. Also, autorelease 138 // calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code 139 // after inlining) can be turned into plain release calls. 140 141 // TODO: Critical-edge splitting. If the optimial insertion point is 142 // a critical edge, the current algorithm has to fail, because it doesn't 143 // know how to split edges. It should be possible to make the optimizer 144 // think in terms of edges, rather than blocks, and then split critical 145 // edges on demand. 146 147 // TODO: OptimizeSequences could generalized to be Interprocedural. 148 149 // TODO: Recognize that a bunch of other objc runtime calls have 150 // non-escaping arguments and non-releasing arguments, and may be 151 // non-autoreleasing. 152 153 // TODO: Sink autorelease calls as far as possible. Unfortunately we 154 // usually can't sink them past other calls, which would be the main 155 // case where it would be useful. 156 157 // TODO: The pointer returned from objc_loadWeakRetained is retained. 158 159 // TODO: Delete release+retain pairs (rare). 160 161 STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); 162 STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); 163 STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases"); 164 STATISTIC(NumRets, "Number of return value forwarding " 165 "retain+autoreleases eliminated"); 166 STATISTIC(NumRRs, "Number of retain+release paths eliminated"); 167 STATISTIC(NumPeeps, "Number of calls peephole-optimized"); 168 #ifndef NDEBUG 169 STATISTIC(NumRetainsBeforeOpt, 170 "Number of retains before optimization"); 171 STATISTIC(NumReleasesBeforeOpt, 172 "Number of releases before optimization"); 173 STATISTIC(NumRetainsAfterOpt, 174 "Number of retains after optimization"); 175 STATISTIC(NumReleasesAfterOpt, 176 "Number of releases after optimization"); 177 #endif 178 179 namespace { 180 181 /// Per-BasicBlock state. 182 class BBState { 183 /// The number of unique control paths from the entry which can reach this 184 /// block. 185 unsigned TopDownPathCount = 0; 186 187 /// The number of unique control paths to exits from this block. 188 unsigned BottomUpPathCount = 0; 189 190 /// The top-down traversal uses this to record information known about a 191 /// pointer at the bottom of each block. 192 BlotMapVector<const Value *, TopDownPtrState> PerPtrTopDown; 193 194 /// The bottom-up traversal uses this to record information known about a 195 /// pointer at the top of each block. 196 BlotMapVector<const Value *, BottomUpPtrState> PerPtrBottomUp; 197 198 /// Effective predecessors of the current block ignoring ignorable edges and 199 /// ignored backedges. 200 SmallVector<BasicBlock *, 2> Preds; 201 202 /// Effective successors of the current block ignoring ignorable edges and 203 /// ignored backedges. 204 SmallVector<BasicBlock *, 2> Succs; 205 206 public: 207 static const unsigned OverflowOccurredValue; 208 209 BBState() = default; 210 211 using top_down_ptr_iterator = decltype(PerPtrTopDown)::iterator; 212 using const_top_down_ptr_iterator = decltype(PerPtrTopDown)::const_iterator; 213 214 top_down_ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); } 215 top_down_ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); } 216 const_top_down_ptr_iterator top_down_ptr_begin() const { 217 return PerPtrTopDown.begin(); 218 } 219 const_top_down_ptr_iterator top_down_ptr_end() const { 220 return PerPtrTopDown.end(); 221 } 222 bool hasTopDownPtrs() const { 223 return !PerPtrTopDown.empty(); 224 } 225 226 unsigned top_down_ptr_list_size() const { 227 return std::distance(top_down_ptr_begin(), top_down_ptr_end()); 228 } 229 230 using bottom_up_ptr_iterator = decltype(PerPtrBottomUp)::iterator; 231 using const_bottom_up_ptr_iterator = 232 decltype(PerPtrBottomUp)::const_iterator; 233 234 bottom_up_ptr_iterator bottom_up_ptr_begin() { 235 return PerPtrBottomUp.begin(); 236 } 237 bottom_up_ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); } 238 const_bottom_up_ptr_iterator bottom_up_ptr_begin() const { 239 return PerPtrBottomUp.begin(); 240 } 241 const_bottom_up_ptr_iterator bottom_up_ptr_end() const { 242 return PerPtrBottomUp.end(); 243 } 244 bool hasBottomUpPtrs() const { 245 return !PerPtrBottomUp.empty(); 246 } 247 248 unsigned bottom_up_ptr_list_size() const { 249 return std::distance(bottom_up_ptr_begin(), bottom_up_ptr_end()); 250 } 251 252 /// Mark this block as being an entry block, which has one path from the 253 /// entry by definition. 254 void SetAsEntry() { TopDownPathCount = 1; } 255 256 /// Mark this block as being an exit block, which has one path to an exit by 257 /// definition. 258 void SetAsExit() { BottomUpPathCount = 1; } 259 260 /// Attempt to find the PtrState object describing the top down state for 261 /// pointer Arg. Return a new initialized PtrState describing the top down 262 /// state for Arg if we do not find one. 263 TopDownPtrState &getPtrTopDownState(const Value *Arg) { 264 return PerPtrTopDown[Arg]; 265 } 266 267 /// Attempt to find the PtrState object describing the bottom up state for 268 /// pointer Arg. Return a new initialized PtrState describing the bottom up 269 /// state for Arg if we do not find one. 270 BottomUpPtrState &getPtrBottomUpState(const Value *Arg) { 271 return PerPtrBottomUp[Arg]; 272 } 273 274 /// Attempt to find the PtrState object describing the bottom up state for 275 /// pointer Arg. 276 bottom_up_ptr_iterator findPtrBottomUpState(const Value *Arg) { 277 return PerPtrBottomUp.find(Arg); 278 } 279 280 void clearBottomUpPointers() { 281 PerPtrBottomUp.clear(); 282 } 283 284 void clearTopDownPointers() { 285 PerPtrTopDown.clear(); 286 } 287 288 void InitFromPred(const BBState &Other); 289 void InitFromSucc(const BBState &Other); 290 void MergePred(const BBState &Other); 291 void MergeSucc(const BBState &Other); 292 293 /// Compute the number of possible unique paths from an entry to an exit 294 /// which pass through this block. This is only valid after both the 295 /// top-down and bottom-up traversals are complete. 296 /// 297 /// Returns true if overflow occurred. Returns false if overflow did not 298 /// occur. 299 bool GetAllPathCountWithOverflow(unsigned &PathCount) const { 300 if (TopDownPathCount == OverflowOccurredValue || 301 BottomUpPathCount == OverflowOccurredValue) 302 return true; 303 unsigned long long Product = 304 (unsigned long long)TopDownPathCount*BottomUpPathCount; 305 // Overflow occurred if any of the upper bits of Product are set or if all 306 // the lower bits of Product are all set. 307 return (Product >> 32) || 308 ((PathCount = Product) == OverflowOccurredValue); 309 } 310 311 // Specialized CFG utilities. 312 using edge_iterator = SmallVectorImpl<BasicBlock *>::const_iterator; 313 314 edge_iterator pred_begin() const { return Preds.begin(); } 315 edge_iterator pred_end() const { return Preds.end(); } 316 edge_iterator succ_begin() const { return Succs.begin(); } 317 edge_iterator succ_end() const { return Succs.end(); } 318 319 void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); } 320 void addPred(BasicBlock *Pred) { Preds.push_back(Pred); } 321 322 bool isExit() const { return Succs.empty(); } 323 }; 324 325 } // end anonymous namespace 326 327 const unsigned BBState::OverflowOccurredValue = 0xffffffff; 328 329 namespace llvm { 330 331 raw_ostream &operator<<(raw_ostream &OS, 332 BBState &BBState) LLVM_ATTRIBUTE_UNUSED; 333 334 } // end namespace llvm 335 336 void BBState::InitFromPred(const BBState &Other) { 337 PerPtrTopDown = Other.PerPtrTopDown; 338 TopDownPathCount = Other.TopDownPathCount; 339 } 340 341 void BBState::InitFromSucc(const BBState &Other) { 342 PerPtrBottomUp = Other.PerPtrBottomUp; 343 BottomUpPathCount = Other.BottomUpPathCount; 344 } 345 346 /// The top-down traversal uses this to merge information about predecessors to 347 /// form the initial state for a new block. 348 void BBState::MergePred(const BBState &Other) { 349 if (TopDownPathCount == OverflowOccurredValue) 350 return; 351 352 // Other.TopDownPathCount can be 0, in which case it is either dead or a 353 // loop backedge. Loop backedges are special. 354 TopDownPathCount += Other.TopDownPathCount; 355 356 // In order to be consistent, we clear the top down pointers when by adding 357 // TopDownPathCount becomes OverflowOccurredValue even though "true" overflow 358 // has not occurred. 359 if (TopDownPathCount == OverflowOccurredValue) { 360 clearTopDownPointers(); 361 return; 362 } 363 364 // Check for overflow. If we have overflow, fall back to conservative 365 // behavior. 366 if (TopDownPathCount < Other.TopDownPathCount) { 367 TopDownPathCount = OverflowOccurredValue; 368 clearTopDownPointers(); 369 return; 370 } 371 372 // For each entry in the other set, if our set has an entry with the same key, 373 // merge the entries. Otherwise, copy the entry and merge it with an empty 374 // entry. 375 for (auto MI = Other.top_down_ptr_begin(), ME = Other.top_down_ptr_end(); 376 MI != ME; ++MI) { 377 auto Pair = PerPtrTopDown.insert(*MI); 378 Pair.first->second.Merge(Pair.second ? TopDownPtrState() : MI->second, 379 /*TopDown=*/true); 380 } 381 382 // For each entry in our set, if the other set doesn't have an entry with the 383 // same key, force it to merge with an empty entry. 384 for (auto MI = top_down_ptr_begin(), ME = top_down_ptr_end(); MI != ME; ++MI) 385 if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end()) 386 MI->second.Merge(TopDownPtrState(), /*TopDown=*/true); 387 } 388 389 /// The bottom-up traversal uses this to merge information about successors to 390 /// form the initial state for a new block. 391 void BBState::MergeSucc(const BBState &Other) { 392 if (BottomUpPathCount == OverflowOccurredValue) 393 return; 394 395 // Other.BottomUpPathCount can be 0, in which case it is either dead or a 396 // loop backedge. Loop backedges are special. 397 BottomUpPathCount += Other.BottomUpPathCount; 398 399 // In order to be consistent, we clear the top down pointers when by adding 400 // BottomUpPathCount becomes OverflowOccurredValue even though "true" overflow 401 // has not occurred. 402 if (BottomUpPathCount == OverflowOccurredValue) { 403 clearBottomUpPointers(); 404 return; 405 } 406 407 // Check for overflow. If we have overflow, fall back to conservative 408 // behavior. 409 if (BottomUpPathCount < Other.BottomUpPathCount) { 410 BottomUpPathCount = OverflowOccurredValue; 411 clearBottomUpPointers(); 412 return; 413 } 414 415 // For each entry in the other set, if our set has an entry with the 416 // same key, merge the entries. Otherwise, copy the entry and merge 417 // it with an empty entry. 418 for (auto MI = Other.bottom_up_ptr_begin(), ME = Other.bottom_up_ptr_end(); 419 MI != ME; ++MI) { 420 auto Pair = PerPtrBottomUp.insert(*MI); 421 Pair.first->second.Merge(Pair.second ? BottomUpPtrState() : MI->second, 422 /*TopDown=*/false); 423 } 424 425 // For each entry in our set, if the other set doesn't have an entry 426 // with the same key, force it to merge with an empty entry. 427 for (auto MI = bottom_up_ptr_begin(), ME = bottom_up_ptr_end(); MI != ME; 428 ++MI) 429 if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end()) 430 MI->second.Merge(BottomUpPtrState(), /*TopDown=*/false); 431 } 432 433 raw_ostream &llvm::operator<<(raw_ostream &OS, BBState &BBInfo) { 434 // Dump the pointers we are tracking. 435 OS << " TopDown State:\n"; 436 if (!BBInfo.hasTopDownPtrs()) { 437 LLVM_DEBUG(dbgs() << " NONE!\n"); 438 } else { 439 for (auto I = BBInfo.top_down_ptr_begin(), E = BBInfo.top_down_ptr_end(); 440 I != E; ++I) { 441 const PtrState &P = I->second; 442 OS << " Ptr: " << *I->first 443 << "\n KnownSafe: " << (P.IsKnownSafe()?"true":"false") 444 << "\n ImpreciseRelease: " 445 << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n" 446 << " HasCFGHazards: " 447 << (P.IsCFGHazardAfflicted()?"true":"false") << "\n" 448 << " KnownPositive: " 449 << (P.HasKnownPositiveRefCount()?"true":"false") << "\n" 450 << " Seq: " 451 << P.GetSeq() << "\n"; 452 } 453 } 454 455 OS << " BottomUp State:\n"; 456 if (!BBInfo.hasBottomUpPtrs()) { 457 LLVM_DEBUG(dbgs() << " NONE!\n"); 458 } else { 459 for (auto I = BBInfo.bottom_up_ptr_begin(), E = BBInfo.bottom_up_ptr_end(); 460 I != E; ++I) { 461 const PtrState &P = I->second; 462 OS << " Ptr: " << *I->first 463 << "\n KnownSafe: " << (P.IsKnownSafe()?"true":"false") 464 << "\n ImpreciseRelease: " 465 << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n" 466 << " HasCFGHazards: " 467 << (P.IsCFGHazardAfflicted()?"true":"false") << "\n" 468 << " KnownPositive: " 469 << (P.HasKnownPositiveRefCount()?"true":"false") << "\n" 470 << " Seq: " 471 << P.GetSeq() << "\n"; 472 } 473 } 474 475 return OS; 476 } 477 478 namespace { 479 480 /// The main ARC optimization pass. 481 class ObjCARCOpt { 482 bool Changed = false; 483 bool CFGChanged = false; 484 ProvenanceAnalysis PA; 485 486 /// A cache of references to runtime entry point constants. 487 ARCRuntimeEntryPoints EP; 488 489 /// A cache of MDKinds that can be passed into other functions to propagate 490 /// MDKind identifiers. 491 ARCMDKindCache MDKindCache; 492 493 BundledRetainClaimRVs *BundledInsts = nullptr; 494 495 /// A flag indicating whether the optimization that removes or moves 496 /// retain/release pairs should be performed. 497 bool DisableRetainReleasePairing = false; 498 499 /// Flags which determine whether each of the interesting runtime functions 500 /// is in fact used in the current function. 501 unsigned UsedInThisFunction; 502 503 DenseMap<BasicBlock *, ColorVector> BlockEHColors; 504 505 bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV); 506 void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, 507 ARCInstKind &Class); 508 void OptimizeIndividualCalls(Function &F); 509 510 /// Optimize an individual call, optionally passing the 511 /// GetArgRCIdentityRoot if it has already been computed. 512 void OptimizeIndividualCallImpl(Function &F, Instruction *Inst, 513 ARCInstKind Class, const Value *Arg); 514 515 /// Try to optimize an AutoreleaseRV with a RetainRV or UnsafeClaimRV. If the 516 /// optimization occurs, returns true to indicate that the caller should 517 /// assume the instructions are dead. 518 bool OptimizeInlinedAutoreleaseRVCall(Function &F, Instruction *Inst, 519 const Value *&Arg, ARCInstKind Class, 520 Instruction *AutoreleaseRV, 521 const Value *&AutoreleaseRVArg); 522 523 void CheckForCFGHazards(const BasicBlock *BB, 524 DenseMap<const BasicBlock *, BBState> &BBStates, 525 BBState &MyStates) const; 526 bool VisitInstructionBottomUp(Instruction *Inst, BasicBlock *BB, 527 BlotMapVector<Value *, RRInfo> &Retains, 528 BBState &MyStates); 529 bool VisitBottomUp(BasicBlock *BB, 530 DenseMap<const BasicBlock *, BBState> &BBStates, 531 BlotMapVector<Value *, RRInfo> &Retains); 532 bool VisitInstructionTopDown( 533 Instruction *Inst, DenseMap<Value *, RRInfo> &Releases, BBState &MyStates, 534 const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 535 &ReleaseInsertPtToRCIdentityRoots); 536 bool VisitTopDown( 537 BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates, 538 DenseMap<Value *, RRInfo> &Releases, 539 const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 540 &ReleaseInsertPtToRCIdentityRoots); 541 bool Visit(Function &F, DenseMap<const BasicBlock *, BBState> &BBStates, 542 BlotMapVector<Value *, RRInfo> &Retains, 543 DenseMap<Value *, RRInfo> &Releases); 544 545 void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove, 546 BlotMapVector<Value *, RRInfo> &Retains, 547 DenseMap<Value *, RRInfo> &Releases, 548 SmallVectorImpl<Instruction *> &DeadInsts, Module *M); 549 550 bool PairUpRetainsAndReleases(DenseMap<const BasicBlock *, BBState> &BBStates, 551 BlotMapVector<Value *, RRInfo> &Retains, 552 DenseMap<Value *, RRInfo> &Releases, Module *M, 553 Instruction *Retain, 554 SmallVectorImpl<Instruction *> &DeadInsts, 555 RRInfo &RetainsToMove, RRInfo &ReleasesToMove, 556 Value *Arg, bool KnownSafe, 557 bool &AnyPairsCompletelyEliminated); 558 559 bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates, 560 BlotMapVector<Value *, RRInfo> &Retains, 561 DenseMap<Value *, RRInfo> &Releases, Module *M); 562 563 void OptimizeWeakCalls(Function &F); 564 565 bool OptimizeSequences(Function &F); 566 567 void OptimizeReturns(Function &F); 568 569 template <typename PredicateT> 570 static void cloneOpBundlesIf(CallBase *CI, 571 SmallVectorImpl<OperandBundleDef> &OpBundles, 572 PredicateT Predicate) { 573 for (unsigned I = 0, E = CI->getNumOperandBundles(); I != E; ++I) { 574 OperandBundleUse B = CI->getOperandBundleAt(I); 575 if (Predicate(B)) 576 OpBundles.emplace_back(B); 577 } 578 } 579 580 void addOpBundleForFunclet(BasicBlock *BB, 581 SmallVectorImpl<OperandBundleDef> &OpBundles) { 582 if (!BlockEHColors.empty()) { 583 const ColorVector &CV = BlockEHColors.find(BB)->second; 584 assert(CV.size() > 0 && "Uncolored block"); 585 for (BasicBlock *EHPadBB : CV) 586 if (auto *EHPad = 587 dyn_cast<FuncletPadInst>(EHPadBB->getFirstNonPHIIt())) { 588 OpBundles.emplace_back("funclet", EHPad); 589 return; 590 } 591 } 592 } 593 594 #ifndef NDEBUG 595 void GatherStatistics(Function &F, bool AfterOptimization = false); 596 #endif 597 598 public: 599 void init(Function &F); 600 bool run(Function &F, AAResults &AA); 601 bool hasCFGChanged() const { return CFGChanged; } 602 }; 603 } // end anonymous namespace 604 605 /// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is 606 /// not a return value. 607 bool 608 ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { 609 // Check for the argument being from an immediately preceding call or invoke. 610 const Value *Arg = GetArgRCIdentityRoot(RetainRV); 611 if (const Instruction *Call = dyn_cast<CallBase>(Arg)) { 612 if (Call->getParent() == RetainRV->getParent()) { 613 BasicBlock::const_iterator I(Call); 614 ++I; 615 while (IsNoopInstruction(&*I)) 616 ++I; 617 if (&*I == RetainRV) 618 return false; 619 } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) { 620 BasicBlock *RetainRVParent = RetainRV->getParent(); 621 if (II->getNormalDest() == RetainRVParent) { 622 BasicBlock::const_iterator I = RetainRVParent->begin(); 623 while (IsNoopInstruction(&*I)) 624 ++I; 625 if (&*I == RetainRV) 626 return false; 627 } 628 } 629 } 630 631 assert(!BundledInsts->contains(RetainRV) && 632 "a bundled retainRV's argument should be a call"); 633 634 // Turn it to a plain objc_retain. 635 Changed = true; 636 ++NumPeeps; 637 638 LLVM_DEBUG(dbgs() << "Transforming objc_retainAutoreleasedReturnValue => " 639 "objc_retain since the operand is not a return value.\n" 640 "Old = " 641 << *RetainRV << "\n"); 642 643 Function *NewDecl = EP.get(ARCRuntimeEntryPointKind::Retain); 644 cast<CallInst>(RetainRV)->setCalledFunction(NewDecl); 645 646 LLVM_DEBUG(dbgs() << "New = " << *RetainRV << "\n"); 647 648 return false; 649 } 650 651 bool ObjCARCOpt::OptimizeInlinedAutoreleaseRVCall( 652 Function &F, Instruction *Inst, const Value *&Arg, ARCInstKind Class, 653 Instruction *AutoreleaseRV, const Value *&AutoreleaseRVArg) { 654 if (BundledInsts->contains(Inst)) 655 return false; 656 657 // Must be in the same basic block. 658 assert(Inst->getParent() == AutoreleaseRV->getParent()); 659 660 // Must operate on the same root. 661 Arg = GetArgRCIdentityRoot(Inst); 662 AutoreleaseRVArg = GetArgRCIdentityRoot(AutoreleaseRV); 663 if (Arg != AutoreleaseRVArg) { 664 // If there isn't an exact match, check if we have equivalent PHIs. 665 const PHINode *PN = dyn_cast<PHINode>(Arg); 666 if (!PN) 667 return false; 668 669 SmallVector<const Value *, 4> ArgUsers; 670 getEquivalentPHIs(*PN, ArgUsers); 671 if (!llvm::is_contained(ArgUsers, AutoreleaseRVArg)) 672 return false; 673 } 674 675 // Okay, this is a match. Merge them. 676 ++NumPeeps; 677 LLVM_DEBUG(dbgs() << "Found inlined objc_autoreleaseReturnValue '" 678 << *AutoreleaseRV << "' paired with '" << *Inst << "'\n"); 679 680 // Delete the RV pair, starting with the AutoreleaseRV. 681 AutoreleaseRV->replaceAllUsesWith( 682 cast<CallInst>(AutoreleaseRV)->getArgOperand(0)); 683 Changed = true; 684 EraseInstruction(AutoreleaseRV); 685 if (Class == ARCInstKind::RetainRV) { 686 // AutoreleaseRV and RetainRV cancel out. Delete the RetainRV. 687 Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0)); 688 EraseInstruction(Inst); 689 return true; 690 } 691 692 // UnsafeClaimRV is a frontend peephole for RetainRV + Release. Since the 693 // AutoreleaseRV and RetainRV cancel out, replace UnsafeClaimRV with Release. 694 assert(Class == ARCInstKind::UnsafeClaimRV); 695 Value *CallArg = cast<CallInst>(Inst)->getArgOperand(0); 696 CallInst *Release = 697 CallInst::Create(EP.get(ARCRuntimeEntryPointKind::Release), CallArg, "", 698 Inst->getIterator()); 699 assert(IsAlwaysTail(ARCInstKind::UnsafeClaimRV) && 700 "Expected UnsafeClaimRV to be safe to tail call"); 701 Release->setTailCall(); 702 Inst->replaceAllUsesWith(CallArg); 703 EraseInstruction(Inst); 704 705 // Run the normal optimizations on Release. 706 OptimizeIndividualCallImpl(F, Release, ARCInstKind::Release, Arg); 707 return true; 708 } 709 710 /// Turn objc_autoreleaseReturnValue into objc_autorelease if the result is not 711 /// used as a return value. 712 void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, 713 Instruction *AutoreleaseRV, 714 ARCInstKind &Class) { 715 // Check for a return of the pointer value. 716 const Value *Ptr = GetArgRCIdentityRoot(AutoreleaseRV); 717 718 // If the argument is ConstantPointerNull or UndefValue, its other users 719 // aren't actually interesting to look at. 720 if (isa<ConstantData>(Ptr)) 721 return; 722 723 SmallVector<const Value *, 2> Users; 724 Users.push_back(Ptr); 725 726 // Add PHIs that are equivalent to Ptr to Users. 727 if (const PHINode *PN = dyn_cast<PHINode>(Ptr)) 728 getEquivalentPHIs(*PN, Users); 729 730 do { 731 Ptr = Users.pop_back_val(); 732 for (const User *U : Ptr->users()) { 733 if (isa<ReturnInst>(U) || GetBasicARCInstKind(U) == ARCInstKind::RetainRV) 734 return; 735 if (isa<BitCastInst>(U)) 736 Users.push_back(U); 737 } 738 } while (!Users.empty()); 739 740 Changed = true; 741 ++NumPeeps; 742 743 LLVM_DEBUG( 744 dbgs() << "Transforming objc_autoreleaseReturnValue => " 745 "objc_autorelease since its operand is not used as a return " 746 "value.\n" 747 "Old = " 748 << *AutoreleaseRV << "\n"); 749 750 CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV); 751 Function *NewDecl = EP.get(ARCRuntimeEntryPointKind::Autorelease); 752 AutoreleaseRVCI->setCalledFunction(NewDecl); 753 AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease. 754 Class = ARCInstKind::Autorelease; 755 756 LLVM_DEBUG(dbgs() << "New: " << *AutoreleaseRV << "\n"); 757 } 758 759 /// Visit each call, one at a time, and make simplifications without doing any 760 /// additional analysis. 761 void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { 762 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeIndividualCalls ==\n"); 763 // Reset all the flags in preparation for recomputing them. 764 UsedInThisFunction = 0; 765 766 // Store any delayed AutoreleaseRV intrinsics, so they can be easily paired 767 // with RetainRV and UnsafeClaimRV. 768 Instruction *DelayedAutoreleaseRV = nullptr; 769 const Value *DelayedAutoreleaseRVArg = nullptr; 770 auto setDelayedAutoreleaseRV = [&](Instruction *AutoreleaseRV) { 771 assert(!DelayedAutoreleaseRV || !AutoreleaseRV); 772 DelayedAutoreleaseRV = AutoreleaseRV; 773 DelayedAutoreleaseRVArg = nullptr; 774 }; 775 auto optimizeDelayedAutoreleaseRV = [&]() { 776 if (!DelayedAutoreleaseRV) 777 return; 778 OptimizeIndividualCallImpl(F, DelayedAutoreleaseRV, 779 ARCInstKind::AutoreleaseRV, 780 DelayedAutoreleaseRVArg); 781 setDelayedAutoreleaseRV(nullptr); 782 }; 783 auto shouldDelayAutoreleaseRV = [&](Instruction *NonARCInst) { 784 // Nothing to delay, but we may as well skip the logic below. 785 if (!DelayedAutoreleaseRV) 786 return true; 787 788 // If we hit the end of the basic block we're not going to find an RV-pair. 789 // Stop delaying. 790 if (NonARCInst->isTerminator()) 791 return false; 792 793 // Given the frontend rules for emitting AutoreleaseRV, RetainRV, and 794 // UnsafeClaimRV, it's probably safe to skip over even opaque function calls 795 // here since OptimizeInlinedAutoreleaseRVCall will confirm that they 796 // have the same RCIdentityRoot. However, what really matters is 797 // skipping instructions or intrinsics that the inliner could leave behind; 798 // be conservative for now and don't skip over opaque calls, which could 799 // potentially include other ARC calls. 800 auto *CB = dyn_cast<CallBase>(NonARCInst); 801 if (!CB) 802 return true; 803 return CB->getIntrinsicID() != Intrinsic::not_intrinsic; 804 }; 805 806 // Visit all objc_* calls in F. 807 for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { 808 Instruction *Inst = &*I++; 809 810 if (auto *CI = dyn_cast<CallInst>(Inst)) 811 if (objcarc::hasAttachedCallOpBundle(CI)) { 812 BundledInsts->insertRVCall(I->getIterator(), CI); 813 Changed = true; 814 } 815 816 ARCInstKind Class = GetBasicARCInstKind(Inst); 817 818 // Skip this loop if this instruction isn't itself an ARC intrinsic. 819 const Value *Arg = nullptr; 820 switch (Class) { 821 default: 822 optimizeDelayedAutoreleaseRV(); 823 break; 824 case ARCInstKind::CallOrUser: 825 case ARCInstKind::User: 826 case ARCInstKind::None: 827 // This is a non-ARC instruction. If we're delaying an AutoreleaseRV, 828 // check if it's safe to skip over it; if not, optimize the AutoreleaseRV 829 // now. 830 if (!shouldDelayAutoreleaseRV(Inst)) 831 optimizeDelayedAutoreleaseRV(); 832 continue; 833 case ARCInstKind::AutoreleaseRV: 834 optimizeDelayedAutoreleaseRV(); 835 setDelayedAutoreleaseRV(Inst); 836 continue; 837 case ARCInstKind::RetainRV: 838 case ARCInstKind::UnsafeClaimRV: 839 if (DelayedAutoreleaseRV) { 840 // We have a potential RV pair. Check if they cancel out. 841 if (OptimizeInlinedAutoreleaseRVCall(F, Inst, Arg, Class, 842 DelayedAutoreleaseRV, 843 DelayedAutoreleaseRVArg)) { 844 setDelayedAutoreleaseRV(nullptr); 845 continue; 846 } 847 optimizeDelayedAutoreleaseRV(); 848 } 849 break; 850 } 851 852 OptimizeIndividualCallImpl(F, Inst, Class, Arg); 853 } 854 855 // Catch the final delayed AutoreleaseRV. 856 optimizeDelayedAutoreleaseRV(); 857 } 858 859 /// This function returns true if the value is inert. An ObjC ARC runtime call 860 /// taking an inert operand can be safely deleted. 861 static bool isInertARCValue(Value *V, SmallPtrSet<Value *, 1> &VisitedPhis) { 862 V = V->stripPointerCasts(); 863 864 if (IsNullOrUndef(V)) 865 return true; 866 867 // See if this is a global attribute annotated with an 'objc_arc_inert'. 868 if (auto *GV = dyn_cast<GlobalVariable>(V)) 869 if (GV->hasAttribute("objc_arc_inert")) 870 return true; 871 872 if (auto PN = dyn_cast<PHINode>(V)) { 873 // Ignore this phi if it has already been discovered. 874 if (!VisitedPhis.insert(PN).second) 875 return true; 876 // Look through phis's operands. 877 for (Value *Opnd : PN->incoming_values()) 878 if (!isInertARCValue(Opnd, VisitedPhis)) 879 return false; 880 return true; 881 } 882 883 return false; 884 } 885 886 void ObjCARCOpt::OptimizeIndividualCallImpl(Function &F, Instruction *Inst, 887 ARCInstKind Class, 888 const Value *Arg) { 889 LLVM_DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n"); 890 891 // We can delete this call if it takes an inert value. 892 SmallPtrSet<Value *, 1> VisitedPhis; 893 894 if (BundledInsts->contains(Inst)) { 895 UsedInThisFunction |= 1 << unsigned(Class); 896 return; 897 } 898 899 if (IsNoopOnGlobal(Class)) 900 if (isInertARCValue(Inst->getOperand(0), VisitedPhis)) { 901 if (!Inst->getType()->isVoidTy()) 902 Inst->replaceAllUsesWith(Inst->getOperand(0)); 903 Inst->eraseFromParent(); 904 Changed = true; 905 return; 906 } 907 908 switch (Class) { 909 default: 910 break; 911 912 // Delete no-op casts. These function calls have special semantics, but 913 // the semantics are entirely implemented via lowering in the front-end, 914 // so by the time they reach the optimizer, they are just no-op calls 915 // which return their argument. 916 // 917 // There are gray areas here, as the ability to cast reference-counted 918 // pointers to raw void* and back allows code to break ARC assumptions, 919 // however these are currently considered to be unimportant. 920 case ARCInstKind::NoopCast: 921 Changed = true; 922 ++NumNoops; 923 LLVM_DEBUG(dbgs() << "Erasing no-op cast: " << *Inst << "\n"); 924 EraseInstruction(Inst); 925 return; 926 927 // If the pointer-to-weak-pointer is null, it's undefined behavior. 928 case ARCInstKind::StoreWeak: 929 case ARCInstKind::LoadWeak: 930 case ARCInstKind::LoadWeakRetained: 931 case ARCInstKind::InitWeak: 932 case ARCInstKind::DestroyWeak: { 933 CallInst *CI = cast<CallInst>(Inst); 934 if (IsNullOrUndef(CI->getArgOperand(0))) { 935 Changed = true; 936 new StoreInst(ConstantInt::getTrue(CI->getContext()), 937 PoisonValue::get(PointerType::getUnqual(CI->getContext())), 938 CI->getIterator()); 939 Value *NewValue = PoisonValue::get(CI->getType()); 940 LLVM_DEBUG( 941 dbgs() << "A null pointer-to-weak-pointer is undefined behavior." 942 "\nOld = " 943 << *CI << "\nNew = " << *NewValue << "\n"); 944 CI->replaceAllUsesWith(NewValue); 945 CI->eraseFromParent(); 946 return; 947 } 948 break; 949 } 950 case ARCInstKind::CopyWeak: 951 case ARCInstKind::MoveWeak: { 952 CallInst *CI = cast<CallInst>(Inst); 953 if (IsNullOrUndef(CI->getArgOperand(0)) || 954 IsNullOrUndef(CI->getArgOperand(1))) { 955 Changed = true; 956 new StoreInst(ConstantInt::getTrue(CI->getContext()), 957 PoisonValue::get(PointerType::getUnqual(CI->getContext())), 958 CI->getIterator()); 959 960 Value *NewValue = PoisonValue::get(CI->getType()); 961 LLVM_DEBUG( 962 dbgs() << "A null pointer-to-weak-pointer is undefined behavior." 963 "\nOld = " 964 << *CI << "\nNew = " << *NewValue << "\n"); 965 966 CI->replaceAllUsesWith(NewValue); 967 CI->eraseFromParent(); 968 return; 969 } 970 break; 971 } 972 case ARCInstKind::RetainRV: 973 if (OptimizeRetainRVCall(F, Inst)) 974 return; 975 break; 976 case ARCInstKind::AutoreleaseRV: 977 OptimizeAutoreleaseRVCall(F, Inst, Class); 978 break; 979 } 980 981 // objc_autorelease(x) -> objc_release(x) if x is otherwise unused. 982 if (IsAutorelease(Class) && Inst->use_empty()) { 983 CallInst *Call = cast<CallInst>(Inst); 984 const Value *Arg = Call->getArgOperand(0); 985 Arg = FindSingleUseIdentifiedObject(Arg); 986 if (Arg) { 987 Changed = true; 988 ++NumAutoreleases; 989 990 // Create the declaration lazily. 991 LLVMContext &C = Inst->getContext(); 992 993 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release); 994 CallInst *NewCall = CallInst::Create(Decl, Call->getArgOperand(0), "", 995 Call->getIterator()); 996 NewCall->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease), 997 MDNode::get(C, {})); 998 999 LLVM_DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) " 1000 "since x is otherwise unused.\nOld: " 1001 << *Call << "\nNew: " << *NewCall << "\n"); 1002 1003 EraseInstruction(Call); 1004 Inst = NewCall; 1005 Class = ARCInstKind::Release; 1006 } 1007 } 1008 1009 // For functions which can never be passed stack arguments, add 1010 // a tail keyword. 1011 if (IsAlwaysTail(Class) && !cast<CallInst>(Inst)->isNoTailCall()) { 1012 Changed = true; 1013 LLVM_DEBUG( 1014 dbgs() << "Adding tail keyword to function since it can never be " 1015 "passed stack args: " 1016 << *Inst << "\n"); 1017 cast<CallInst>(Inst)->setTailCall(); 1018 } 1019 1020 // Ensure that functions that can never have a "tail" keyword due to the 1021 // semantics of ARC truly do not do so. 1022 if (IsNeverTail(Class)) { 1023 Changed = true; 1024 LLVM_DEBUG(dbgs() << "Removing tail keyword from function: " << *Inst 1025 << "\n"); 1026 cast<CallInst>(Inst)->setTailCall(false); 1027 } 1028 1029 // Set nounwind as needed. 1030 if (IsNoThrow(Class)) { 1031 Changed = true; 1032 LLVM_DEBUG(dbgs() << "Found no throw class. Setting nounwind on: " << *Inst 1033 << "\n"); 1034 cast<CallInst>(Inst)->setDoesNotThrow(); 1035 } 1036 1037 // Note: This catches instructions unrelated to ARC. 1038 if (!IsNoopOnNull(Class)) { 1039 UsedInThisFunction |= 1 << unsigned(Class); 1040 return; 1041 } 1042 1043 // If we haven't already looked up the root, look it up now. 1044 if (!Arg) 1045 Arg = GetArgRCIdentityRoot(Inst); 1046 1047 // ARC calls with null are no-ops. Delete them. 1048 if (IsNullOrUndef(Arg)) { 1049 Changed = true; 1050 ++NumNoops; 1051 LLVM_DEBUG(dbgs() << "ARC calls with null are no-ops. Erasing: " << *Inst 1052 << "\n"); 1053 EraseInstruction(Inst); 1054 return; 1055 } 1056 1057 // Keep track of which of retain, release, autorelease, and retain_block 1058 // are actually present in this function. 1059 UsedInThisFunction |= 1 << unsigned(Class); 1060 1061 // If Arg is a PHI, and one or more incoming values to the 1062 // PHI are null, and the call is control-equivalent to the PHI, and there 1063 // are no relevant side effects between the PHI and the call, and the call 1064 // is not a release that doesn't have the clang.imprecise_release tag, the 1065 // call could be pushed up to just those paths with non-null incoming 1066 // values. For now, don't bother splitting critical edges for this. 1067 if (Class == ARCInstKind::Release && 1068 !Inst->getMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease))) 1069 return; 1070 1071 SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist; 1072 Worklist.push_back(std::make_pair(Inst, Arg)); 1073 do { 1074 std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val(); 1075 Inst = Pair.first; 1076 Arg = Pair.second; 1077 1078 const PHINode *PN = dyn_cast<PHINode>(Arg); 1079 if (!PN) 1080 continue; 1081 1082 // Determine if the PHI has any null operands, or any incoming 1083 // critical edges. 1084 bool HasNull = false; 1085 bool HasCriticalEdges = false; 1086 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 1087 Value *Incoming = GetRCIdentityRoot(PN->getIncomingValue(i)); 1088 if (IsNullOrUndef(Incoming)) 1089 HasNull = true; 1090 else if (PN->getIncomingBlock(i)->getTerminator()->getNumSuccessors() != 1091 1) { 1092 HasCriticalEdges = true; 1093 break; 1094 } 1095 } 1096 // If we have null operands and no critical edges, optimize. 1097 if (HasCriticalEdges) 1098 continue; 1099 if (!HasNull) 1100 continue; 1101 1102 Instruction *DepInst = nullptr; 1103 1104 // Check that there is nothing that cares about the reference 1105 // count between the call and the phi. 1106 switch (Class) { 1107 case ARCInstKind::Retain: 1108 case ARCInstKind::RetainBlock: 1109 // These can always be moved up. 1110 break; 1111 case ARCInstKind::Release: 1112 // These can't be moved across things that care about the retain 1113 // count. 1114 DepInst = findSingleDependency(NeedsPositiveRetainCount, Arg, 1115 Inst->getParent(), Inst, PA); 1116 break; 1117 case ARCInstKind::Autorelease: 1118 // These can't be moved across autorelease pool scope boundaries. 1119 DepInst = findSingleDependency(AutoreleasePoolBoundary, Arg, 1120 Inst->getParent(), Inst, PA); 1121 break; 1122 case ARCInstKind::UnsafeClaimRV: 1123 case ARCInstKind::RetainRV: 1124 case ARCInstKind::AutoreleaseRV: 1125 // Don't move these; the RV optimization depends on the autoreleaseRV 1126 // being tail called, and the retainRV being immediately after a call 1127 // (which might still happen if we get lucky with codegen layout, but 1128 // it's not worth taking the chance). 1129 continue; 1130 default: 1131 llvm_unreachable("Invalid dependence flavor"); 1132 } 1133 1134 if (DepInst != PN) 1135 continue; 1136 1137 Changed = true; 1138 ++NumPartialNoops; 1139 // Clone the call into each predecessor that has a non-null value. 1140 CallInst *CInst = cast<CallInst>(Inst); 1141 Type *ParamTy = CInst->getArgOperand(0)->getType(); 1142 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 1143 Value *Incoming = GetRCIdentityRoot(PN->getIncomingValue(i)); 1144 if (IsNullOrUndef(Incoming)) 1145 continue; 1146 Value *Op = PN->getIncomingValue(i); 1147 BasicBlock::iterator InsertPos = 1148 PN->getIncomingBlock(i)->back().getIterator(); 1149 SmallVector<OperandBundleDef, 1> OpBundles; 1150 cloneOpBundlesIf(CInst, OpBundles, [](const OperandBundleUse &B) { 1151 return B.getTagID() != LLVMContext::OB_funclet; 1152 }); 1153 addOpBundleForFunclet(InsertPos->getParent(), OpBundles); 1154 CallInst *Clone = CallInst::Create(CInst, OpBundles); 1155 if (Op->getType() != ParamTy) 1156 Op = new BitCastInst(Op, ParamTy, "", InsertPos); 1157 Clone->setArgOperand(0, Op); 1158 Clone->insertBefore(*InsertPos->getParent(), InsertPos); 1159 1160 LLVM_DEBUG(dbgs() << "Cloning " << *CInst << "\n" 1161 "And inserting clone at " 1162 << *InsertPos << "\n"); 1163 Worklist.push_back(std::make_pair(Clone, Incoming)); 1164 } 1165 // Erase the original call. 1166 LLVM_DEBUG(dbgs() << "Erasing: " << *CInst << "\n"); 1167 EraseInstruction(CInst); 1168 } while (!Worklist.empty()); 1169 } 1170 1171 /// If we have a top down pointer in the S_Use state, make sure that there are 1172 /// no CFG hazards by checking the states of various bottom up pointers. 1173 static void CheckForUseCFGHazard(const Sequence SuccSSeq, 1174 const bool SuccSRRIKnownSafe, 1175 TopDownPtrState &S, 1176 bool &SomeSuccHasSame, 1177 bool &AllSuccsHaveSame, 1178 bool &NotAllSeqEqualButKnownSafe, 1179 bool &ShouldContinue) { 1180 switch (SuccSSeq) { 1181 case S_CanRelease: { 1182 if (!S.IsKnownSafe() && !SuccSRRIKnownSafe) { 1183 S.ClearSequenceProgress(); 1184 break; 1185 } 1186 S.SetCFGHazardAfflicted(true); 1187 ShouldContinue = true; 1188 break; 1189 } 1190 case S_Use: 1191 SomeSuccHasSame = true; 1192 break; 1193 case S_Stop: 1194 case S_MovableRelease: 1195 if (!S.IsKnownSafe() && !SuccSRRIKnownSafe) 1196 AllSuccsHaveSame = false; 1197 else 1198 NotAllSeqEqualButKnownSafe = true; 1199 break; 1200 case S_Retain: 1201 llvm_unreachable("bottom-up pointer in retain state!"); 1202 case S_None: 1203 llvm_unreachable("This should have been handled earlier."); 1204 } 1205 } 1206 1207 /// If we have a Top Down pointer in the S_CanRelease state, make sure that 1208 /// there are no CFG hazards by checking the states of various bottom up 1209 /// pointers. 1210 static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq, 1211 const bool SuccSRRIKnownSafe, 1212 TopDownPtrState &S, 1213 bool &SomeSuccHasSame, 1214 bool &AllSuccsHaveSame, 1215 bool &NotAllSeqEqualButKnownSafe) { 1216 switch (SuccSSeq) { 1217 case S_CanRelease: 1218 SomeSuccHasSame = true; 1219 break; 1220 case S_Stop: 1221 case S_MovableRelease: 1222 case S_Use: 1223 if (!S.IsKnownSafe() && !SuccSRRIKnownSafe) 1224 AllSuccsHaveSame = false; 1225 else 1226 NotAllSeqEqualButKnownSafe = true; 1227 break; 1228 case S_Retain: 1229 llvm_unreachable("bottom-up pointer in retain state!"); 1230 case S_None: 1231 llvm_unreachable("This should have been handled earlier."); 1232 } 1233 } 1234 1235 /// Check for critical edges, loop boundaries, irreducible control flow, or 1236 /// other CFG structures where moving code across the edge would result in it 1237 /// being executed more. 1238 void 1239 ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, 1240 DenseMap<const BasicBlock *, BBState> &BBStates, 1241 BBState &MyStates) const { 1242 // If any top-down local-use or possible-dec has a succ which is earlier in 1243 // the sequence, forget it. 1244 for (auto I = MyStates.top_down_ptr_begin(), E = MyStates.top_down_ptr_end(); 1245 I != E; ++I) { 1246 TopDownPtrState &S = I->second; 1247 const Sequence Seq = I->second.GetSeq(); 1248 1249 // We only care about S_Retain, S_CanRelease, and S_Use. 1250 if (Seq == S_None) 1251 continue; 1252 1253 // Make sure that if extra top down states are added in the future that this 1254 // code is updated to handle it. 1255 assert((Seq == S_Retain || Seq == S_CanRelease || Seq == S_Use) && 1256 "Unknown top down sequence state."); 1257 1258 const Value *Arg = I->first; 1259 bool SomeSuccHasSame = false; 1260 bool AllSuccsHaveSame = true; 1261 bool NotAllSeqEqualButKnownSafe = false; 1262 1263 for (const BasicBlock *Succ : successors(BB)) { 1264 // If VisitBottomUp has pointer information for this successor, take 1265 // what we know about it. 1266 const DenseMap<const BasicBlock *, BBState>::iterator BBI = 1267 BBStates.find(Succ); 1268 assert(BBI != BBStates.end()); 1269 const BottomUpPtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); 1270 const Sequence SuccSSeq = SuccS.GetSeq(); 1271 1272 // If bottom up, the pointer is in an S_None state, clear the sequence 1273 // progress since the sequence in the bottom up state finished 1274 // suggesting a mismatch in between retains/releases. This is true for 1275 // all three cases that we are handling here: S_Retain, S_Use, and 1276 // S_CanRelease. 1277 if (SuccSSeq == S_None) { 1278 S.ClearSequenceProgress(); 1279 continue; 1280 } 1281 1282 // If we have S_Use or S_CanRelease, perform our check for cfg hazard 1283 // checks. 1284 const bool SuccSRRIKnownSafe = SuccS.IsKnownSafe(); 1285 1286 // *NOTE* We do not use Seq from above here since we are allowing for 1287 // S.GetSeq() to change while we are visiting basic blocks. 1288 switch(S.GetSeq()) { 1289 case S_Use: { 1290 bool ShouldContinue = false; 1291 CheckForUseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S, SomeSuccHasSame, 1292 AllSuccsHaveSame, NotAllSeqEqualButKnownSafe, 1293 ShouldContinue); 1294 if (ShouldContinue) 1295 continue; 1296 break; 1297 } 1298 case S_CanRelease: 1299 CheckForCanReleaseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S, 1300 SomeSuccHasSame, AllSuccsHaveSame, 1301 NotAllSeqEqualButKnownSafe); 1302 break; 1303 case S_Retain: 1304 case S_None: 1305 case S_Stop: 1306 case S_MovableRelease: 1307 break; 1308 } 1309 } 1310 1311 // If the state at the other end of any of the successor edges 1312 // matches the current state, require all edges to match. This 1313 // guards against loops in the middle of a sequence. 1314 if (SomeSuccHasSame && !AllSuccsHaveSame) { 1315 S.ClearSequenceProgress(); 1316 } else if (NotAllSeqEqualButKnownSafe) { 1317 // If we would have cleared the state foregoing the fact that we are known 1318 // safe, stop code motion. This is because whether or not it is safe to 1319 // remove RR pairs via KnownSafe is an orthogonal concept to whether we 1320 // are allowed to perform code motion. 1321 S.SetCFGHazardAfflicted(true); 1322 } 1323 } 1324 } 1325 1326 bool ObjCARCOpt::VisitInstructionBottomUp( 1327 Instruction *Inst, BasicBlock *BB, BlotMapVector<Value *, RRInfo> &Retains, 1328 BBState &MyStates) { 1329 bool NestingDetected = false; 1330 ARCInstKind Class = GetARCInstKind(Inst); 1331 const Value *Arg = nullptr; 1332 1333 LLVM_DEBUG(dbgs() << " Class: " << Class << "\n"); 1334 1335 switch (Class) { 1336 case ARCInstKind::Release: { 1337 Arg = GetArgRCIdentityRoot(Inst); 1338 1339 BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg); 1340 NestingDetected |= S.InitBottomUp(MDKindCache, Inst); 1341 break; 1342 } 1343 case ARCInstKind::RetainBlock: 1344 // In OptimizeIndividualCalls, we have strength reduced all optimizable 1345 // objc_retainBlocks to objc_retains. Thus at this point any 1346 // objc_retainBlocks that we see are not optimizable. 1347 break; 1348 case ARCInstKind::Retain: 1349 case ARCInstKind::RetainRV: { 1350 Arg = GetArgRCIdentityRoot(Inst); 1351 BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg); 1352 if (S.MatchWithRetain()) { 1353 // Don't do retain+release tracking for ARCInstKind::RetainRV, because 1354 // it's better to let it remain as the first instruction after a call. 1355 if (Class != ARCInstKind::RetainRV) { 1356 LLVM_DEBUG(dbgs() << " Matching with: " << *Inst << "\n"); 1357 Retains[Inst] = S.GetRRInfo(); 1358 } 1359 S.ClearSequenceProgress(); 1360 } 1361 // A retain moving bottom up can be a use. 1362 break; 1363 } 1364 case ARCInstKind::AutoreleasepoolPop: 1365 // Conservatively, clear MyStates for all known pointers. 1366 MyStates.clearBottomUpPointers(); 1367 return NestingDetected; 1368 case ARCInstKind::AutoreleasepoolPush: 1369 case ARCInstKind::None: 1370 // These are irrelevant. 1371 return NestingDetected; 1372 default: 1373 break; 1374 } 1375 1376 // Consider any other possible effects of this instruction on each 1377 // pointer being tracked. 1378 for (auto MI = MyStates.bottom_up_ptr_begin(), 1379 ME = MyStates.bottom_up_ptr_end(); 1380 MI != ME; ++MI) { 1381 const Value *Ptr = MI->first; 1382 if (Ptr == Arg) 1383 continue; // Handled above. 1384 BottomUpPtrState &S = MI->second; 1385 1386 if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class)) 1387 continue; 1388 1389 S.HandlePotentialUse(BB, Inst, Ptr, PA, Class); 1390 } 1391 1392 return NestingDetected; 1393 } 1394 1395 bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB, 1396 DenseMap<const BasicBlock *, BBState> &BBStates, 1397 BlotMapVector<Value *, RRInfo> &Retains) { 1398 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::VisitBottomUp ==\n"); 1399 1400 bool NestingDetected = false; 1401 BBState &MyStates = BBStates[BB]; 1402 1403 // Merge the states from each successor to compute the initial state 1404 // for the current block. 1405 BBState::edge_iterator SI(MyStates.succ_begin()), 1406 SE(MyStates.succ_end()); 1407 if (SI != SE) { 1408 const BasicBlock *Succ = *SI; 1409 DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ); 1410 assert(I != BBStates.end()); 1411 MyStates.InitFromSucc(I->second); 1412 ++SI; 1413 for (; SI != SE; ++SI) { 1414 Succ = *SI; 1415 I = BBStates.find(Succ); 1416 assert(I != BBStates.end()); 1417 MyStates.MergeSucc(I->second); 1418 } 1419 } 1420 1421 LLVM_DEBUG(dbgs() << "Before:\n" 1422 << BBStates[BB] << "\n" 1423 << "Performing Dataflow:\n"); 1424 1425 // Visit all the instructions, bottom-up. 1426 for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) { 1427 Instruction *Inst = &*std::prev(I); 1428 1429 // Invoke instructions are visited as part of their successors (below). 1430 if (isa<InvokeInst>(Inst)) 1431 continue; 1432 1433 LLVM_DEBUG(dbgs() << " Visiting " << *Inst << "\n"); 1434 1435 NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates); 1436 1437 // Bail out if the number of pointers being tracked becomes too large so 1438 // that this pass can complete in a reasonable amount of time. 1439 if (MyStates.bottom_up_ptr_list_size() > MaxPtrStates) { 1440 DisableRetainReleasePairing = true; 1441 return false; 1442 } 1443 } 1444 1445 // If there's a predecessor with an invoke, visit the invoke as if it were 1446 // part of this block, since we can't insert code after an invoke in its own 1447 // block, and we don't want to split critical edges. 1448 for (BBState::edge_iterator PI(MyStates.pred_begin()), 1449 PE(MyStates.pred_end()); PI != PE; ++PI) { 1450 BasicBlock *Pred = *PI; 1451 if (InvokeInst *II = dyn_cast<InvokeInst>(&Pred->back())) 1452 NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates); 1453 } 1454 1455 LLVM_DEBUG(dbgs() << "\nFinal State:\n" << BBStates[BB] << "\n"); 1456 1457 return NestingDetected; 1458 } 1459 1460 // Fill ReleaseInsertPtToRCIdentityRoots, which is a map from insertion points 1461 // to the set of RC identity roots that would be released by the release calls 1462 // moved to the insertion points. 1463 static void collectReleaseInsertPts( 1464 const BlotMapVector<Value *, RRInfo> &Retains, 1465 DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 1466 &ReleaseInsertPtToRCIdentityRoots) { 1467 for (const auto &P : Retains) { 1468 // Retains is a map from an objc_retain call to a RRInfo of the RC identity 1469 // root of the call. Get the RC identity root of the objc_retain call. 1470 Instruction *Retain = cast<Instruction>(P.first); 1471 Value *Root = GetRCIdentityRoot(Retain->getOperand(0)); 1472 // Collect all the insertion points of the objc_release calls that release 1473 // the RC identity root of the objc_retain call. 1474 for (const Instruction *InsertPt : P.second.ReverseInsertPts) 1475 ReleaseInsertPtToRCIdentityRoots[InsertPt].insert(Root); 1476 } 1477 } 1478 1479 // Get the RC identity roots from an insertion point of an objc_release call. 1480 // Return nullptr if the passed instruction isn't an insertion point. 1481 static const SmallPtrSet<const Value *, 2> * 1482 getRCIdentityRootsFromReleaseInsertPt( 1483 const Instruction *InsertPt, 1484 const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 1485 &ReleaseInsertPtToRCIdentityRoots) { 1486 auto I = ReleaseInsertPtToRCIdentityRoots.find(InsertPt); 1487 if (I == ReleaseInsertPtToRCIdentityRoots.end()) 1488 return nullptr; 1489 return &I->second; 1490 } 1491 1492 bool ObjCARCOpt::VisitInstructionTopDown( 1493 Instruction *Inst, DenseMap<Value *, RRInfo> &Releases, BBState &MyStates, 1494 const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 1495 &ReleaseInsertPtToRCIdentityRoots) { 1496 bool NestingDetected = false; 1497 ARCInstKind Class = GetARCInstKind(Inst); 1498 const Value *Arg = nullptr; 1499 1500 // Make sure a call to objc_retain isn't moved past insertion points of calls 1501 // to objc_release. 1502 if (const SmallPtrSet<const Value *, 2> *Roots = 1503 getRCIdentityRootsFromReleaseInsertPt( 1504 Inst, ReleaseInsertPtToRCIdentityRoots)) 1505 for (const auto *Root : *Roots) { 1506 TopDownPtrState &S = MyStates.getPtrTopDownState(Root); 1507 // Disable code motion if the current position is S_Retain to prevent 1508 // moving the objc_retain call past objc_release calls. If it's 1509 // S_CanRelease or larger, it's not necessary to disable code motion as 1510 // the insertion points that prevent the objc_retain call from moving down 1511 // should have been set already. 1512 if (S.GetSeq() == S_Retain) 1513 S.SetCFGHazardAfflicted(true); 1514 } 1515 1516 LLVM_DEBUG(dbgs() << " Class: " << Class << "\n"); 1517 1518 switch (Class) { 1519 case ARCInstKind::RetainBlock: 1520 // In OptimizeIndividualCalls, we have strength reduced all optimizable 1521 // objc_retainBlocks to objc_retains. Thus at this point any 1522 // objc_retainBlocks that we see are not optimizable. We need to break since 1523 // a retain can be a potential use. 1524 break; 1525 case ARCInstKind::Retain: 1526 case ARCInstKind::RetainRV: { 1527 Arg = GetArgRCIdentityRoot(Inst); 1528 TopDownPtrState &S = MyStates.getPtrTopDownState(Arg); 1529 NestingDetected |= S.InitTopDown(Class, Inst); 1530 // A retain can be a potential use; proceed to the generic checking 1531 // code below. 1532 break; 1533 } 1534 case ARCInstKind::Release: { 1535 Arg = GetArgRCIdentityRoot(Inst); 1536 TopDownPtrState &S = MyStates.getPtrTopDownState(Arg); 1537 // Try to form a tentative pair in between this release instruction and the 1538 // top down pointers that we are tracking. 1539 if (S.MatchWithRelease(MDKindCache, Inst)) { 1540 // If we succeed, copy S's RRInfo into the Release -> {Retain Set 1541 // Map}. Then we clear S. 1542 LLVM_DEBUG(dbgs() << " Matching with: " << *Inst << "\n"); 1543 Releases[Inst] = S.GetRRInfo(); 1544 S.ClearSequenceProgress(); 1545 } 1546 break; 1547 } 1548 case ARCInstKind::AutoreleasepoolPop: 1549 // Conservatively, clear MyStates for all known pointers. 1550 MyStates.clearTopDownPointers(); 1551 return false; 1552 case ARCInstKind::AutoreleasepoolPush: 1553 case ARCInstKind::None: 1554 // These can not be uses of 1555 return false; 1556 default: 1557 break; 1558 } 1559 1560 // Consider any other possible effects of this instruction on each 1561 // pointer being tracked. 1562 for (auto MI = MyStates.top_down_ptr_begin(), 1563 ME = MyStates.top_down_ptr_end(); 1564 MI != ME; ++MI) { 1565 const Value *Ptr = MI->first; 1566 if (Ptr == Arg) 1567 continue; // Handled above. 1568 TopDownPtrState &S = MI->second; 1569 if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class, *BundledInsts)) 1570 continue; 1571 1572 S.HandlePotentialUse(Inst, Ptr, PA, Class); 1573 } 1574 1575 return NestingDetected; 1576 } 1577 1578 bool ObjCARCOpt::VisitTopDown( 1579 BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates, 1580 DenseMap<Value *, RRInfo> &Releases, 1581 const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 1582 &ReleaseInsertPtToRCIdentityRoots) { 1583 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::VisitTopDown ==\n"); 1584 bool NestingDetected = false; 1585 BBState &MyStates = BBStates[BB]; 1586 1587 // Merge the states from each predecessor to compute the initial state 1588 // for the current block. 1589 BBState::edge_iterator PI(MyStates.pred_begin()), 1590 PE(MyStates.pred_end()); 1591 if (PI != PE) { 1592 const BasicBlock *Pred = *PI; 1593 DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred); 1594 assert(I != BBStates.end()); 1595 MyStates.InitFromPred(I->second); 1596 ++PI; 1597 for (; PI != PE; ++PI) { 1598 Pred = *PI; 1599 I = BBStates.find(Pred); 1600 assert(I != BBStates.end()); 1601 MyStates.MergePred(I->second); 1602 } 1603 } 1604 1605 // Check that BB and MyStates have the same number of predecessors. This 1606 // prevents retain calls that live outside a loop from being moved into the 1607 // loop. 1608 if (!BB->hasNPredecessors(MyStates.pred_end() - MyStates.pred_begin())) 1609 for (auto I = MyStates.top_down_ptr_begin(), 1610 E = MyStates.top_down_ptr_end(); 1611 I != E; ++I) 1612 I->second.SetCFGHazardAfflicted(true); 1613 1614 LLVM_DEBUG(dbgs() << "Before:\n" 1615 << BBStates[BB] << "\n" 1616 << "Performing Dataflow:\n"); 1617 1618 // Visit all the instructions, top-down. 1619 for (Instruction &Inst : *BB) { 1620 LLVM_DEBUG(dbgs() << " Visiting " << Inst << "\n"); 1621 1622 NestingDetected |= VisitInstructionTopDown( 1623 &Inst, Releases, MyStates, ReleaseInsertPtToRCIdentityRoots); 1624 1625 // Bail out if the number of pointers being tracked becomes too large so 1626 // that this pass can complete in a reasonable amount of time. 1627 if (MyStates.top_down_ptr_list_size() > MaxPtrStates) { 1628 DisableRetainReleasePairing = true; 1629 return false; 1630 } 1631 } 1632 1633 LLVM_DEBUG(dbgs() << "\nState Before Checking for CFG Hazards:\n" 1634 << BBStates[BB] << "\n\n"); 1635 CheckForCFGHazards(BB, BBStates, MyStates); 1636 LLVM_DEBUG(dbgs() << "Final State:\n" << BBStates[BB] << "\n"); 1637 return NestingDetected; 1638 } 1639 1640 static void 1641 ComputePostOrders(Function &F, 1642 SmallVectorImpl<BasicBlock *> &PostOrder, 1643 SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder, 1644 unsigned NoObjCARCExceptionsMDKind, 1645 DenseMap<const BasicBlock *, BBState> &BBStates) { 1646 /// The visited set, for doing DFS walks. 1647 SmallPtrSet<BasicBlock *, 16> Visited; 1648 1649 // Do DFS, computing the PostOrder. 1650 SmallPtrSet<BasicBlock *, 16> OnStack; 1651 SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack; 1652 1653 // Functions always have exactly one entry block, and we don't have 1654 // any other block that we treat like an entry block. 1655 BasicBlock *EntryBB = &F.getEntryBlock(); 1656 BBState &MyStates = BBStates[EntryBB]; 1657 MyStates.SetAsEntry(); 1658 Instruction *EntryTI = EntryBB->getTerminator(); 1659 SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI))); 1660 Visited.insert(EntryBB); 1661 OnStack.insert(EntryBB); 1662 do { 1663 dfs_next_succ: 1664 BasicBlock *CurrBB = SuccStack.back().first; 1665 succ_iterator SE(CurrBB->getTerminator(), false); 1666 1667 while (SuccStack.back().second != SE) { 1668 BasicBlock *SuccBB = *SuccStack.back().second++; 1669 if (Visited.insert(SuccBB).second) { 1670 SuccStack.push_back( 1671 std::make_pair(SuccBB, succ_iterator(SuccBB->getTerminator()))); 1672 BBStates[CurrBB].addSucc(SuccBB); 1673 BBState &SuccStates = BBStates[SuccBB]; 1674 SuccStates.addPred(CurrBB); 1675 OnStack.insert(SuccBB); 1676 goto dfs_next_succ; 1677 } 1678 1679 if (!OnStack.count(SuccBB)) { 1680 BBStates[CurrBB].addSucc(SuccBB); 1681 BBStates[SuccBB].addPred(CurrBB); 1682 } 1683 } 1684 OnStack.erase(CurrBB); 1685 PostOrder.push_back(CurrBB); 1686 SuccStack.pop_back(); 1687 } while (!SuccStack.empty()); 1688 1689 Visited.clear(); 1690 1691 // Do reverse-CFG DFS, computing the reverse-CFG PostOrder. 1692 // Functions may have many exits, and there also blocks which we treat 1693 // as exits due to ignored edges. 1694 SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack; 1695 for (BasicBlock &ExitBB : F) { 1696 BBState &MyStates = BBStates[&ExitBB]; 1697 if (!MyStates.isExit()) 1698 continue; 1699 1700 MyStates.SetAsExit(); 1701 1702 PredStack.push_back(std::make_pair(&ExitBB, MyStates.pred_begin())); 1703 Visited.insert(&ExitBB); 1704 while (!PredStack.empty()) { 1705 reverse_dfs_next_succ: 1706 BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end(); 1707 while (PredStack.back().second != PE) { 1708 BasicBlock *BB = *PredStack.back().second++; 1709 if (Visited.insert(BB).second) { 1710 PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin())); 1711 goto reverse_dfs_next_succ; 1712 } 1713 } 1714 ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first); 1715 } 1716 } 1717 } 1718 1719 // Visit the function both top-down and bottom-up. 1720 bool ObjCARCOpt::Visit(Function &F, 1721 DenseMap<const BasicBlock *, BBState> &BBStates, 1722 BlotMapVector<Value *, RRInfo> &Retains, 1723 DenseMap<Value *, RRInfo> &Releases) { 1724 // Use reverse-postorder traversals, because we magically know that loops 1725 // will be well behaved, i.e. they won't repeatedly call retain on a single 1726 // pointer without doing a release. We can't use the ReversePostOrderTraversal 1727 // class here because we want the reverse-CFG postorder to consider each 1728 // function exit point, and we want to ignore selected cycle edges. 1729 SmallVector<BasicBlock *, 16> PostOrder; 1730 SmallVector<BasicBlock *, 16> ReverseCFGPostOrder; 1731 ComputePostOrders(F, PostOrder, ReverseCFGPostOrder, 1732 MDKindCache.get(ARCMDKindID::NoObjCARCExceptions), 1733 BBStates); 1734 1735 // Use reverse-postorder on the reverse CFG for bottom-up. 1736 bool BottomUpNestingDetected = false; 1737 for (BasicBlock *BB : llvm::reverse(ReverseCFGPostOrder)) { 1738 BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains); 1739 if (DisableRetainReleasePairing) 1740 return false; 1741 } 1742 1743 DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 1744 ReleaseInsertPtToRCIdentityRoots; 1745 collectReleaseInsertPts(Retains, ReleaseInsertPtToRCIdentityRoots); 1746 1747 // Use reverse-postorder for top-down. 1748 bool TopDownNestingDetected = false; 1749 for (BasicBlock *BB : llvm::reverse(PostOrder)) { 1750 TopDownNestingDetected |= 1751 VisitTopDown(BB, BBStates, Releases, ReleaseInsertPtToRCIdentityRoots); 1752 if (DisableRetainReleasePairing) 1753 return false; 1754 } 1755 1756 return TopDownNestingDetected && BottomUpNestingDetected; 1757 } 1758 1759 /// Move the calls in RetainsToMove and ReleasesToMove. 1760 void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &RetainsToMove, 1761 RRInfo &ReleasesToMove, 1762 BlotMapVector<Value *, RRInfo> &Retains, 1763 DenseMap<Value *, RRInfo> &Releases, 1764 SmallVectorImpl<Instruction *> &DeadInsts, 1765 Module *M) { 1766 LLVM_DEBUG(dbgs() << "== ObjCARCOpt::MoveCalls ==\n"); 1767 1768 // Insert the new retain and release calls. 1769 for (Instruction *InsertPt : ReleasesToMove.ReverseInsertPts) { 1770 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain); 1771 SmallVector<OperandBundleDef, 1> BundleList; 1772 addOpBundleForFunclet(InsertPt->getParent(), BundleList); 1773 CallInst *Call = 1774 CallInst::Create(Decl, Arg, BundleList, "", InsertPt->getIterator()); 1775 Call->setDoesNotThrow(); 1776 Call->setTailCall(); 1777 1778 LLVM_DEBUG(dbgs() << "Inserting new Retain: " << *Call 1779 << "\n" 1780 "At insertion point: " 1781 << *InsertPt << "\n"); 1782 } 1783 for (Instruction *InsertPt : RetainsToMove.ReverseInsertPts) { 1784 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release); 1785 SmallVector<OperandBundleDef, 1> BundleList; 1786 addOpBundleForFunclet(InsertPt->getParent(), BundleList); 1787 CallInst *Call = 1788 CallInst::Create(Decl, Arg, BundleList, "", InsertPt->getIterator()); 1789 // Attach a clang.imprecise_release metadata tag, if appropriate. 1790 if (MDNode *M = ReleasesToMove.ReleaseMetadata) 1791 Call->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease), M); 1792 Call->setDoesNotThrow(); 1793 if (ReleasesToMove.IsTailCallRelease) 1794 Call->setTailCall(); 1795 1796 LLVM_DEBUG(dbgs() << "Inserting new Release: " << *Call 1797 << "\n" 1798 "At insertion point: " 1799 << *InsertPt << "\n"); 1800 } 1801 1802 // Delete the original retain and release calls. 1803 for (Instruction *OrigRetain : RetainsToMove.Calls) { 1804 Retains.blot(OrigRetain); 1805 DeadInsts.push_back(OrigRetain); 1806 LLVM_DEBUG(dbgs() << "Deleting retain: " << *OrigRetain << "\n"); 1807 } 1808 for (Instruction *OrigRelease : ReleasesToMove.Calls) { 1809 Releases.erase(OrigRelease); 1810 DeadInsts.push_back(OrigRelease); 1811 LLVM_DEBUG(dbgs() << "Deleting release: " << *OrigRelease << "\n"); 1812 } 1813 } 1814 1815 bool ObjCARCOpt::PairUpRetainsAndReleases( 1816 DenseMap<const BasicBlock *, BBState> &BBStates, 1817 BlotMapVector<Value *, RRInfo> &Retains, 1818 DenseMap<Value *, RRInfo> &Releases, Module *M, 1819 Instruction *Retain, 1820 SmallVectorImpl<Instruction *> &DeadInsts, RRInfo &RetainsToMove, 1821 RRInfo &ReleasesToMove, Value *Arg, bool KnownSafe, 1822 bool &AnyPairsCompletelyEliminated) { 1823 // If a pair happens in a region where it is known that the reference count 1824 // is already incremented, we can similarly ignore possible decrements unless 1825 // we are dealing with a retainable object with multiple provenance sources. 1826 bool KnownSafeTD = true, KnownSafeBU = true; 1827 bool CFGHazardAfflicted = false; 1828 1829 // Connect the dots between the top-down-collected RetainsToMove and 1830 // bottom-up-collected ReleasesToMove to form sets of related calls. 1831 // This is an iterative process so that we connect multiple releases 1832 // to multiple retains if needed. 1833 unsigned OldDelta = 0; 1834 unsigned NewDelta = 0; 1835 unsigned OldCount = 0; 1836 unsigned NewCount = 0; 1837 bool FirstRelease = true; 1838 for (SmallVector<Instruction *, 4> NewRetains{Retain};;) { 1839 SmallVector<Instruction *, 4> NewReleases; 1840 for (Instruction *NewRetain : NewRetains) { 1841 auto It = Retains.find(NewRetain); 1842 assert(It != Retains.end()); 1843 const RRInfo &NewRetainRRI = It->second; 1844 KnownSafeTD &= NewRetainRRI.KnownSafe; 1845 CFGHazardAfflicted |= NewRetainRRI.CFGHazardAfflicted; 1846 for (Instruction *NewRetainRelease : NewRetainRRI.Calls) { 1847 auto Jt = Releases.find(NewRetainRelease); 1848 if (Jt == Releases.end()) 1849 return false; 1850 const RRInfo &NewRetainReleaseRRI = Jt->second; 1851 1852 // If the release does not have a reference to the retain as well, 1853 // something happened which is unaccounted for. Do not do anything. 1854 // 1855 // This can happen if we catch an additive overflow during path count 1856 // merging. 1857 if (!NewRetainReleaseRRI.Calls.count(NewRetain)) 1858 return false; 1859 1860 if (ReleasesToMove.Calls.insert(NewRetainRelease).second) { 1861 // If we overflow when we compute the path count, don't remove/move 1862 // anything. 1863 const BBState &NRRBBState = BBStates[NewRetainRelease->getParent()]; 1864 unsigned PathCount = BBState::OverflowOccurredValue; 1865 if (NRRBBState.GetAllPathCountWithOverflow(PathCount)) 1866 return false; 1867 assert(PathCount != BBState::OverflowOccurredValue && 1868 "PathCount at this point can not be " 1869 "OverflowOccurredValue."); 1870 OldDelta -= PathCount; 1871 1872 // Merge the ReleaseMetadata and IsTailCallRelease values. 1873 if (FirstRelease) { 1874 ReleasesToMove.ReleaseMetadata = 1875 NewRetainReleaseRRI.ReleaseMetadata; 1876 ReleasesToMove.IsTailCallRelease = 1877 NewRetainReleaseRRI.IsTailCallRelease; 1878 FirstRelease = false; 1879 } else { 1880 if (ReleasesToMove.ReleaseMetadata != 1881 NewRetainReleaseRRI.ReleaseMetadata) 1882 ReleasesToMove.ReleaseMetadata = nullptr; 1883 if (ReleasesToMove.IsTailCallRelease != 1884 NewRetainReleaseRRI.IsTailCallRelease) 1885 ReleasesToMove.IsTailCallRelease = false; 1886 } 1887 1888 // Collect the optimal insertion points. 1889 if (!KnownSafe) 1890 for (Instruction *RIP : NewRetainReleaseRRI.ReverseInsertPts) { 1891 if (ReleasesToMove.ReverseInsertPts.insert(RIP).second) { 1892 // If we overflow when we compute the path count, don't 1893 // remove/move anything. 1894 const BBState &RIPBBState = BBStates[RIP->getParent()]; 1895 PathCount = BBState::OverflowOccurredValue; 1896 if (RIPBBState.GetAllPathCountWithOverflow(PathCount)) 1897 return false; 1898 assert(PathCount != BBState::OverflowOccurredValue && 1899 "PathCount at this point can not be " 1900 "OverflowOccurredValue."); 1901 NewDelta -= PathCount; 1902 } 1903 } 1904 NewReleases.push_back(NewRetainRelease); 1905 } 1906 } 1907 } 1908 NewRetains.clear(); 1909 if (NewReleases.empty()) break; 1910 1911 // Back the other way. 1912 for (Instruction *NewRelease : NewReleases) { 1913 auto It = Releases.find(NewRelease); 1914 assert(It != Releases.end()); 1915 const RRInfo &NewReleaseRRI = It->second; 1916 KnownSafeBU &= NewReleaseRRI.KnownSafe; 1917 CFGHazardAfflicted |= NewReleaseRRI.CFGHazardAfflicted; 1918 for (Instruction *NewReleaseRetain : NewReleaseRRI.Calls) { 1919 auto Jt = Retains.find(NewReleaseRetain); 1920 if (Jt == Retains.end()) 1921 return false; 1922 const RRInfo &NewReleaseRetainRRI = Jt->second; 1923 1924 // If the retain does not have a reference to the release as well, 1925 // something happened which is unaccounted for. Do not do anything. 1926 // 1927 // This can happen if we catch an additive overflow during path count 1928 // merging. 1929 if (!NewReleaseRetainRRI.Calls.count(NewRelease)) 1930 return false; 1931 1932 if (RetainsToMove.Calls.insert(NewReleaseRetain).second) { 1933 // If we overflow when we compute the path count, don't remove/move 1934 // anything. 1935 const BBState &NRRBBState = BBStates[NewReleaseRetain->getParent()]; 1936 unsigned PathCount = BBState::OverflowOccurredValue; 1937 if (NRRBBState.GetAllPathCountWithOverflow(PathCount)) 1938 return false; 1939 assert(PathCount != BBState::OverflowOccurredValue && 1940 "PathCount at this point can not be " 1941 "OverflowOccurredValue."); 1942 OldDelta += PathCount; 1943 OldCount += PathCount; 1944 1945 // Collect the optimal insertion points. 1946 if (!KnownSafe) 1947 for (Instruction *RIP : NewReleaseRetainRRI.ReverseInsertPts) { 1948 if (RetainsToMove.ReverseInsertPts.insert(RIP).second) { 1949 // If we overflow when we compute the path count, don't 1950 // remove/move anything. 1951 const BBState &RIPBBState = BBStates[RIP->getParent()]; 1952 1953 PathCount = BBState::OverflowOccurredValue; 1954 if (RIPBBState.GetAllPathCountWithOverflow(PathCount)) 1955 return false; 1956 assert(PathCount != BBState::OverflowOccurredValue && 1957 "PathCount at this point can not be " 1958 "OverflowOccurredValue."); 1959 NewDelta += PathCount; 1960 NewCount += PathCount; 1961 } 1962 } 1963 NewRetains.push_back(NewReleaseRetain); 1964 } 1965 } 1966 } 1967 if (NewRetains.empty()) break; 1968 } 1969 1970 // We can only remove pointers if we are known safe in both directions. 1971 bool UnconditionallySafe = KnownSafeTD && KnownSafeBU; 1972 if (UnconditionallySafe) { 1973 RetainsToMove.ReverseInsertPts.clear(); 1974 ReleasesToMove.ReverseInsertPts.clear(); 1975 NewCount = 0; 1976 } else { 1977 // Determine whether the new insertion points we computed preserve the 1978 // balance of retain and release calls through the program. 1979 // TODO: If the fully aggressive solution isn't valid, try to find a 1980 // less aggressive solution which is. 1981 if (NewDelta != 0) 1982 return false; 1983 1984 // At this point, we are not going to remove any RR pairs, but we still are 1985 // able to move RR pairs. If one of our pointers is afflicted with 1986 // CFGHazards, we cannot perform such code motion so exit early. 1987 const bool WillPerformCodeMotion = 1988 !RetainsToMove.ReverseInsertPts.empty() || 1989 !ReleasesToMove.ReverseInsertPts.empty(); 1990 if (CFGHazardAfflicted && WillPerformCodeMotion) 1991 return false; 1992 } 1993 1994 // Determine whether the original call points are balanced in the retain and 1995 // release calls through the program. If not, conservatively don't touch 1996 // them. 1997 // TODO: It's theoretically possible to do code motion in this case, as 1998 // long as the existing imbalances are maintained. 1999 if (OldDelta != 0) 2000 return false; 2001 2002 Changed = true; 2003 assert(OldCount != 0 && "Unreachable code?"); 2004 NumRRs += OldCount - NewCount; 2005 // Set to true if we completely removed any RR pairs. 2006 AnyPairsCompletelyEliminated = NewCount == 0; 2007 2008 // We can move calls! 2009 return true; 2010 } 2011 2012 /// Identify pairings between the retains and releases, and delete and/or move 2013 /// them. 2014 bool ObjCARCOpt::PerformCodePlacement( 2015 DenseMap<const BasicBlock *, BBState> &BBStates, 2016 BlotMapVector<Value *, RRInfo> &Retains, 2017 DenseMap<Value *, RRInfo> &Releases, Module *M) { 2018 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n"); 2019 2020 bool AnyPairsCompletelyEliminated = false; 2021 SmallVector<Instruction *, 8> DeadInsts; 2022 2023 // Visit each retain. 2024 for (BlotMapVector<Value *, RRInfo>::const_iterator I = Retains.begin(), 2025 E = Retains.end(); 2026 I != E; ++I) { 2027 Value *V = I->first; 2028 if (!V) continue; // blotted 2029 2030 Instruction *Retain = cast<Instruction>(V); 2031 2032 LLVM_DEBUG(dbgs() << "Visiting: " << *Retain << "\n"); 2033 2034 Value *Arg = GetArgRCIdentityRoot(Retain); 2035 2036 // If the object being released is in static or stack storage, we know it's 2037 // not being managed by ObjC reference counting, so we can delete pairs 2038 // regardless of what possible decrements or uses lie between them. 2039 bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg); 2040 2041 // A constant pointer can't be pointing to an object on the heap. It may 2042 // be reference-counted, but it won't be deleted. 2043 if (const LoadInst *LI = dyn_cast<LoadInst>(Arg)) 2044 if (const GlobalVariable *GV = 2045 dyn_cast<GlobalVariable>( 2046 GetRCIdentityRoot(LI->getPointerOperand()))) 2047 if (GV->isConstant()) 2048 KnownSafe = true; 2049 2050 // Connect the dots between the top-down-collected RetainsToMove and 2051 // bottom-up-collected ReleasesToMove to form sets of related calls. 2052 RRInfo RetainsToMove, ReleasesToMove; 2053 2054 bool PerformMoveCalls = PairUpRetainsAndReleases( 2055 BBStates, Retains, Releases, M, Retain, DeadInsts, 2056 RetainsToMove, ReleasesToMove, Arg, KnownSafe, 2057 AnyPairsCompletelyEliminated); 2058 2059 if (PerformMoveCalls) { 2060 // Ok, everything checks out and we're all set. Let's move/delete some 2061 // code! 2062 MoveCalls(Arg, RetainsToMove, ReleasesToMove, 2063 Retains, Releases, DeadInsts, M); 2064 } 2065 } 2066 2067 // Now that we're done moving everything, we can delete the newly dead 2068 // instructions, as we no longer need them as insert points. 2069 while (!DeadInsts.empty()) 2070 EraseInstruction(DeadInsts.pop_back_val()); 2071 2072 return AnyPairsCompletelyEliminated; 2073 } 2074 2075 /// Weak pointer optimizations. 2076 void ObjCARCOpt::OptimizeWeakCalls(Function &F) { 2077 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeWeakCalls ==\n"); 2078 2079 // First, do memdep-style RLE and S2L optimizations. We can't use memdep 2080 // itself because it uses AliasAnalysis and we need to do provenance 2081 // queries instead. 2082 for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { 2083 Instruction *Inst = &*I++; 2084 2085 LLVM_DEBUG(dbgs() << "Visiting: " << *Inst << "\n"); 2086 2087 ARCInstKind Class = GetBasicARCInstKind(Inst); 2088 if (Class != ARCInstKind::LoadWeak && 2089 Class != ARCInstKind::LoadWeakRetained) 2090 continue; 2091 2092 // Delete objc_loadWeak calls with no users. 2093 if (Class == ARCInstKind::LoadWeak && Inst->use_empty()) { 2094 Inst->eraseFromParent(); 2095 Changed = true; 2096 continue; 2097 } 2098 2099 // TODO: For now, just look for an earlier available version of this value 2100 // within the same block. Theoretically, we could do memdep-style non-local 2101 // analysis too, but that would want caching. A better approach would be to 2102 // use the technique that EarlyCSE uses. 2103 inst_iterator Current = std::prev(I); 2104 BasicBlock *CurrentBB = &*Current.getBasicBlockIterator(); 2105 for (BasicBlock::iterator B = CurrentBB->begin(), 2106 J = Current.getInstructionIterator(); 2107 J != B; --J) { 2108 Instruction *EarlierInst = &*std::prev(J); 2109 ARCInstKind EarlierClass = GetARCInstKind(EarlierInst); 2110 switch (EarlierClass) { 2111 case ARCInstKind::LoadWeak: 2112 case ARCInstKind::LoadWeakRetained: { 2113 // If this is loading from the same pointer, replace this load's value 2114 // with that one. 2115 CallInst *Call = cast<CallInst>(Inst); 2116 CallInst *EarlierCall = cast<CallInst>(EarlierInst); 2117 Value *Arg = Call->getArgOperand(0); 2118 Value *EarlierArg = EarlierCall->getArgOperand(0); 2119 switch (PA.getAA()->alias(Arg, EarlierArg)) { 2120 case AliasResult::MustAlias: 2121 Changed = true; 2122 // If the load has a builtin retain, insert a plain retain for it. 2123 if (Class == ARCInstKind::LoadWeakRetained) { 2124 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain); 2125 CallInst *CI = 2126 CallInst::Create(Decl, EarlierCall, "", Call->getIterator()); 2127 CI->setTailCall(); 2128 } 2129 // Zap the fully redundant load. 2130 Call->replaceAllUsesWith(EarlierCall); 2131 Call->eraseFromParent(); 2132 goto clobbered; 2133 case AliasResult::MayAlias: 2134 case AliasResult::PartialAlias: 2135 goto clobbered; 2136 case AliasResult::NoAlias: 2137 break; 2138 } 2139 break; 2140 } 2141 case ARCInstKind::StoreWeak: 2142 case ARCInstKind::InitWeak: { 2143 // If this is storing to the same pointer and has the same size etc. 2144 // replace this load's value with the stored value. 2145 CallInst *Call = cast<CallInst>(Inst); 2146 CallInst *EarlierCall = cast<CallInst>(EarlierInst); 2147 Value *Arg = Call->getArgOperand(0); 2148 Value *EarlierArg = EarlierCall->getArgOperand(0); 2149 switch (PA.getAA()->alias(Arg, EarlierArg)) { 2150 case AliasResult::MustAlias: 2151 Changed = true; 2152 // If the load has a builtin retain, insert a plain retain for it. 2153 if (Class == ARCInstKind::LoadWeakRetained) { 2154 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain); 2155 CallInst *CI = 2156 CallInst::Create(Decl, EarlierCall, "", Call->getIterator()); 2157 CI->setTailCall(); 2158 } 2159 // Zap the fully redundant load. 2160 Call->replaceAllUsesWith(EarlierCall->getArgOperand(1)); 2161 Call->eraseFromParent(); 2162 goto clobbered; 2163 case AliasResult::MayAlias: 2164 case AliasResult::PartialAlias: 2165 goto clobbered; 2166 case AliasResult::NoAlias: 2167 break; 2168 } 2169 break; 2170 } 2171 case ARCInstKind::MoveWeak: 2172 case ARCInstKind::CopyWeak: 2173 // TOOD: Grab the copied value. 2174 goto clobbered; 2175 case ARCInstKind::AutoreleasepoolPush: 2176 case ARCInstKind::None: 2177 case ARCInstKind::IntrinsicUser: 2178 case ARCInstKind::User: 2179 // Weak pointers are only modified through the weak entry points 2180 // (and arbitrary calls, which could call the weak entry points). 2181 break; 2182 default: 2183 // Anything else could modify the weak pointer. 2184 goto clobbered; 2185 } 2186 } 2187 clobbered:; 2188 } 2189 2190 // Then, for each destroyWeak with an alloca operand, check to see if 2191 // the alloca and all its users can be zapped. 2192 for (Instruction &Inst : llvm::make_early_inc_range(instructions(F))) { 2193 ARCInstKind Class = GetBasicARCInstKind(&Inst); 2194 if (Class != ARCInstKind::DestroyWeak) 2195 continue; 2196 2197 CallInst *Call = cast<CallInst>(&Inst); 2198 Value *Arg = Call->getArgOperand(0); 2199 if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) { 2200 for (User *U : Alloca->users()) { 2201 const Instruction *UserInst = cast<Instruction>(U); 2202 switch (GetBasicARCInstKind(UserInst)) { 2203 case ARCInstKind::InitWeak: 2204 case ARCInstKind::StoreWeak: 2205 case ARCInstKind::DestroyWeak: 2206 continue; 2207 default: 2208 goto done; 2209 } 2210 } 2211 Changed = true; 2212 for (User *U : llvm::make_early_inc_range(Alloca->users())) { 2213 CallInst *UserInst = cast<CallInst>(U); 2214 switch (GetBasicARCInstKind(UserInst)) { 2215 case ARCInstKind::InitWeak: 2216 case ARCInstKind::StoreWeak: 2217 // These functions return their second argument. 2218 UserInst->replaceAllUsesWith(UserInst->getArgOperand(1)); 2219 break; 2220 case ARCInstKind::DestroyWeak: 2221 // No return value. 2222 break; 2223 default: 2224 llvm_unreachable("alloca really is used!"); 2225 } 2226 UserInst->eraseFromParent(); 2227 } 2228 Alloca->eraseFromParent(); 2229 done:; 2230 } 2231 } 2232 } 2233 2234 /// Identify program paths which execute sequences of retains and releases which 2235 /// can be eliminated. 2236 bool ObjCARCOpt::OptimizeSequences(Function &F) { 2237 // Releases, Retains - These are used to store the results of the main flow 2238 // analysis. These use Value* as the key instead of Instruction* so that the 2239 // map stays valid when we get around to rewriting code and calls get 2240 // replaced by arguments. 2241 DenseMap<Value *, RRInfo> Releases; 2242 BlotMapVector<Value *, RRInfo> Retains; 2243 2244 // This is used during the traversal of the function to track the 2245 // states for each identified object at each block. 2246 DenseMap<const BasicBlock *, BBState> BBStates; 2247 2248 // Analyze the CFG of the function, and all instructions. 2249 bool NestingDetected = Visit(F, BBStates, Retains, Releases); 2250 2251 if (DisableRetainReleasePairing) 2252 return false; 2253 2254 // Transform. 2255 bool AnyPairsCompletelyEliminated = PerformCodePlacement(BBStates, Retains, 2256 Releases, 2257 F.getParent()); 2258 2259 return AnyPairsCompletelyEliminated && NestingDetected; 2260 } 2261 2262 /// Check if there is a dependent call earlier that does not have anything in 2263 /// between the Retain and the call that can affect the reference count of their 2264 /// shared pointer argument. Note that Retain need not be in BB. 2265 static CallInst *HasSafePathToPredecessorCall(const Value *Arg, 2266 Instruction *Retain, 2267 ProvenanceAnalysis &PA) { 2268 auto *Call = dyn_cast_or_null<CallInst>(findSingleDependency( 2269 CanChangeRetainCount, Arg, Retain->getParent(), Retain, PA)); 2270 2271 // Check that the pointer is the return value of the call. 2272 if (!Call || Arg != Call) 2273 return nullptr; 2274 2275 // Check that the call is a regular call. 2276 ARCInstKind Class = GetBasicARCInstKind(Call); 2277 return Class == ARCInstKind::CallOrUser || Class == ARCInstKind::Call 2278 ? Call 2279 : nullptr; 2280 } 2281 2282 /// Find a dependent retain that precedes the given autorelease for which there 2283 /// is nothing in between the two instructions that can affect the ref count of 2284 /// Arg. 2285 static CallInst * 2286 FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB, 2287 Instruction *Autorelease, 2288 ProvenanceAnalysis &PA) { 2289 auto *Retain = dyn_cast_or_null<CallInst>( 2290 findSingleDependency(CanChangeRetainCount, Arg, BB, Autorelease, PA)); 2291 2292 // Check that we found a retain with the same argument. 2293 if (!Retain || !IsRetain(GetBasicARCInstKind(Retain)) || 2294 GetArgRCIdentityRoot(Retain) != Arg) { 2295 return nullptr; 2296 } 2297 2298 return Retain; 2299 } 2300 2301 /// Look for an ``autorelease'' instruction dependent on Arg such that there are 2302 /// no instructions dependent on Arg that need a positive ref count in between 2303 /// the autorelease and the ret. 2304 static CallInst * 2305 FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB, 2306 ReturnInst *Ret, 2307 ProvenanceAnalysis &PA) { 2308 SmallPtrSet<Instruction *, 4> DepInsts; 2309 auto *Autorelease = dyn_cast_or_null<CallInst>( 2310 findSingleDependency(NeedsPositiveRetainCount, Arg, BB, Ret, PA)); 2311 2312 if (!Autorelease) 2313 return nullptr; 2314 ARCInstKind AutoreleaseClass = GetBasicARCInstKind(Autorelease); 2315 if (!IsAutorelease(AutoreleaseClass)) 2316 return nullptr; 2317 if (GetArgRCIdentityRoot(Autorelease) != Arg) 2318 return nullptr; 2319 2320 return Autorelease; 2321 } 2322 2323 /// Look for this pattern: 2324 /// \code 2325 /// %call = call i8* @something(...) 2326 /// %2 = call i8* @objc_retain(i8* %call) 2327 /// %3 = call i8* @objc_autorelease(i8* %2) 2328 /// ret i8* %3 2329 /// \endcode 2330 /// And delete the retain and autorelease. 2331 void ObjCARCOpt::OptimizeReturns(Function &F) { 2332 if (!F.getReturnType()->isPointerTy()) 2333 return; 2334 2335 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeReturns ==\n"); 2336 2337 for (BasicBlock &BB: F) { 2338 ReturnInst *Ret = dyn_cast<ReturnInst>(&BB.back()); 2339 if (!Ret) 2340 continue; 2341 2342 LLVM_DEBUG(dbgs() << "Visiting: " << *Ret << "\n"); 2343 2344 const Value *Arg = GetRCIdentityRoot(Ret->getOperand(0)); 2345 2346 // Look for an ``autorelease'' instruction that is a predecessor of Ret and 2347 // dependent on Arg such that there are no instructions dependent on Arg 2348 // that need a positive ref count in between the autorelease and Ret. 2349 CallInst *Autorelease = 2350 FindPredecessorAutoreleaseWithSafePath(Arg, &BB, Ret, PA); 2351 2352 if (!Autorelease) 2353 continue; 2354 2355 CallInst *Retain = FindPredecessorRetainWithSafePath( 2356 Arg, Autorelease->getParent(), Autorelease, PA); 2357 2358 if (!Retain) 2359 continue; 2360 2361 // Check that there is nothing that can affect the reference count 2362 // between the retain and the call. Note that Retain need not be in BB. 2363 CallInst *Call = HasSafePathToPredecessorCall(Arg, Retain, PA); 2364 2365 // Don't remove retainRV/autoreleaseRV pairs if the call isn't a tail call. 2366 if (!Call || 2367 (!Call->isTailCall() && 2368 GetBasicARCInstKind(Retain) == ARCInstKind::RetainRV && 2369 GetBasicARCInstKind(Autorelease) == ARCInstKind::AutoreleaseRV)) 2370 continue; 2371 2372 // If so, we can zap the retain and autorelease. 2373 Changed = true; 2374 ++NumRets; 2375 LLVM_DEBUG(dbgs() << "Erasing: " << *Retain << "\nErasing: " << *Autorelease 2376 << "\n"); 2377 BundledInsts->eraseInst(Retain); 2378 EraseInstruction(Autorelease); 2379 } 2380 } 2381 2382 #ifndef NDEBUG 2383 void 2384 ObjCARCOpt::GatherStatistics(Function &F, bool AfterOptimization) { 2385 Statistic &NumRetains = 2386 AfterOptimization ? NumRetainsAfterOpt : NumRetainsBeforeOpt; 2387 Statistic &NumReleases = 2388 AfterOptimization ? NumReleasesAfterOpt : NumReleasesBeforeOpt; 2389 2390 for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { 2391 Instruction *Inst = &*I++; 2392 switch (GetBasicARCInstKind(Inst)) { 2393 default: 2394 break; 2395 case ARCInstKind::Retain: 2396 ++NumRetains; 2397 break; 2398 case ARCInstKind::Release: 2399 ++NumReleases; 2400 break; 2401 } 2402 } 2403 } 2404 #endif 2405 2406 void ObjCARCOpt::init(Function &F) { 2407 if (!EnableARCOpts) 2408 return; 2409 2410 // Intuitively, objc_retain and others are nocapture, however in practice 2411 // they are not, because they return their argument value. And objc_release 2412 // calls finalizers which can have arbitrary side effects. 2413 MDKindCache.init(F.getParent()); 2414 2415 // Initialize our runtime entry point cache. 2416 EP.init(F.getParent()); 2417 2418 // Compute which blocks are in which funclet. 2419 if (F.hasPersonalityFn() && 2420 isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 2421 BlockEHColors = colorEHFunclets(F); 2422 } 2423 2424 bool ObjCARCOpt::run(Function &F, AAResults &AA) { 2425 if (!EnableARCOpts) 2426 return false; 2427 2428 Changed = CFGChanged = false; 2429 BundledRetainClaimRVs BRV(/*ContractPass=*/false); 2430 BundledInsts = &BRV; 2431 2432 LLVM_DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName() 2433 << " >>>" 2434 "\n"); 2435 2436 std::pair<bool, bool> R = BundledInsts->insertAfterInvokes(F, nullptr); 2437 Changed |= R.first; 2438 CFGChanged |= R.second; 2439 2440 PA.setAA(&AA); 2441 2442 #ifndef NDEBUG 2443 if (AreStatisticsEnabled()) { 2444 GatherStatistics(F, false); 2445 } 2446 #endif 2447 2448 // This pass performs several distinct transformations. As a compile-time aid 2449 // when compiling code that isn't ObjC, skip these if the relevant ObjC 2450 // library functions aren't declared. 2451 2452 // Preliminary optimizations. This also computes UsedInThisFunction. 2453 OptimizeIndividualCalls(F); 2454 2455 // Optimizations for weak pointers. 2456 if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::LoadWeak)) | 2457 (1 << unsigned(ARCInstKind::LoadWeakRetained)) | 2458 (1 << unsigned(ARCInstKind::StoreWeak)) | 2459 (1 << unsigned(ARCInstKind::InitWeak)) | 2460 (1 << unsigned(ARCInstKind::CopyWeak)) | 2461 (1 << unsigned(ARCInstKind::MoveWeak)) | 2462 (1 << unsigned(ARCInstKind::DestroyWeak)))) 2463 OptimizeWeakCalls(F); 2464 2465 // Optimizations for retain+release pairs. 2466 if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::Retain)) | 2467 (1 << unsigned(ARCInstKind::RetainRV)) | 2468 (1 << unsigned(ARCInstKind::RetainBlock)))) 2469 if (UsedInThisFunction & (1 << unsigned(ARCInstKind::Release))) 2470 // Run OptimizeSequences until it either stops making changes or 2471 // no retain+release pair nesting is detected. 2472 while (OptimizeSequences(F)) {} 2473 2474 // Optimizations if objc_autorelease is used. 2475 if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::Autorelease)) | 2476 (1 << unsigned(ARCInstKind::AutoreleaseRV)))) 2477 OptimizeReturns(F); 2478 2479 // Gather statistics after optimization. 2480 #ifndef NDEBUG 2481 if (AreStatisticsEnabled()) { 2482 GatherStatistics(F, true); 2483 } 2484 #endif 2485 2486 LLVM_DEBUG(dbgs() << "\n"); 2487 2488 return Changed; 2489 } 2490 2491 /// @} 2492 /// 2493 2494 PreservedAnalyses ObjCARCOptPass::run(Function &F, 2495 FunctionAnalysisManager &AM) { 2496 ObjCARCOpt OCAO; 2497 OCAO.init(F); 2498 2499 bool Changed = OCAO.run(F, AM.getResult<AAManager>(F)); 2500 bool CFGChanged = OCAO.hasCFGChanged(); 2501 if (Changed) { 2502 PreservedAnalyses PA; 2503 if (!CFGChanged) 2504 PA.preserveSet<CFGAnalyses>(); 2505 return PA; 2506 } 2507 return PreservedAnalyses::all(); 2508 } 2509