1 //===- GuardWidening.cpp - ---- Guard widening ----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the guard widening pass. The semantics of the 10 // @llvm.experimental.guard intrinsic lets LLVM transform it so that it fails 11 // more often that it did before the transform. This optimization is called 12 // "widening" and can be used hoist and common runtime checks in situations like 13 // these: 14 // 15 // %cmp0 = 7 u< Length 16 // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] 17 // call @unknown_side_effects() 18 // %cmp1 = 9 u< Length 19 // call @llvm.experimental.guard(i1 %cmp1) [ "deopt"(...) ] 20 // ... 21 // 22 // => 23 // 24 // %cmp0 = 9 u< Length 25 // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] 26 // call @unknown_side_effects() 27 // ... 28 // 29 // If %cmp0 is false, @llvm.experimental.guard will "deoptimize" back to a 30 // generic implementation of the same function, which will have the correct 31 // semantics from that point onward. It is always _legal_ to deoptimize (so 32 // replacing %cmp0 with false is "correct"), though it may not always be 33 // profitable to do so. 34 // 35 // NB! This pass is a work in progress. It hasn't been tuned to be "production 36 // ready" yet. It is known to have quadriatic running time and will not scale 37 // to large numbers of guards 38 // 39 //===----------------------------------------------------------------------===// 40 41 #include "llvm/Transforms/Scalar/GuardWidening.h" 42 #include "llvm/ADT/DenseMap.h" 43 #include "llvm/ADT/DepthFirstIterator.h" 44 #include "llvm/ADT/Statistic.h" 45 #include "llvm/Analysis/AssumptionCache.h" 46 #include "llvm/Analysis/GuardUtils.h" 47 #include "llvm/Analysis/LoopInfo.h" 48 #include "llvm/Analysis/MemorySSAUpdater.h" 49 #include "llvm/Analysis/PostDominators.h" 50 #include "llvm/Analysis/ValueTracking.h" 51 #include "llvm/IR/ConstantRange.h" 52 #include "llvm/IR/Dominators.h" 53 #include "llvm/IR/IRBuilder.h" 54 #include "llvm/IR/IntrinsicInst.h" 55 #include "llvm/IR/PatternMatch.h" 56 #include "llvm/Support/CommandLine.h" 57 #include "llvm/Support/Debug.h" 58 #include "llvm/Support/KnownBits.h" 59 #include "llvm/Transforms/Scalar.h" 60 #include "llvm/Transforms/Utils/GuardUtils.h" 61 #include "llvm/Transforms/Utils/LoopUtils.h" 62 #include <functional> 63 64 using namespace llvm; 65 66 #define DEBUG_TYPE "guard-widening" 67 68 STATISTIC(GuardsEliminated, "Number of eliminated guards"); 69 STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches"); 70 STATISTIC(FreezeAdded, "Number of freeze instruction introduced"); 71 72 static cl::opt<bool> 73 WidenBranchGuards("guard-widening-widen-branch-guards", cl::Hidden, 74 cl::desc("Whether or not we should widen guards " 75 "expressed as branches by widenable conditions"), 76 cl::init(true)); 77 78 namespace { 79 80 // Get the condition of \p I. It can either be a guard or a conditional branch. 81 static Value *getCondition(Instruction *I) { 82 if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) { 83 assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && 84 "Bad guard intrinsic?"); 85 return GI->getArgOperand(0); 86 } 87 Value *Cond, *WC; 88 BasicBlock *IfTrueBB, *IfFalseBB; 89 if (parseWidenableBranch(I, Cond, WC, IfTrueBB, IfFalseBB)) 90 return Cond; 91 92 return cast<BranchInst>(I)->getCondition(); 93 } 94 95 // Set the condition for \p I to \p NewCond. \p I can either be a guard or a 96 // conditional branch. 97 static void setCondition(Instruction *I, Value *NewCond) { 98 if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) { 99 assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && 100 "Bad guard intrinsic?"); 101 GI->setArgOperand(0, NewCond); 102 return; 103 } 104 cast<BranchInst>(I)->setCondition(NewCond); 105 } 106 107 // Eliminates the guard instruction properly. 108 static void eliminateGuard(Instruction *GuardInst, MemorySSAUpdater *MSSAU) { 109 GuardInst->eraseFromParent(); 110 if (MSSAU) 111 MSSAU->removeMemoryAccess(GuardInst); 112 ++GuardsEliminated; 113 } 114 115 /// Find a point at which the widened condition of \p Guard should be inserted. 116 /// When it is represented as intrinsic call, we can do it right before the call 117 /// instruction. However, when we are dealing with widenable branch, we must 118 /// account for the following situation: widening should not turn a 119 /// loop-invariant condition into a loop-variant. It means that if 120 /// widenable.condition() call is invariant (w.r.t. any loop), the new wide 121 /// condition should stay invariant. Otherwise there can be a miscompile, like 122 /// the one described at https://github.com/llvm/llvm-project/issues/60234. The 123 /// safest way to do it is to expand the new condition at WC's block. 124 static std::optional<BasicBlock::iterator> 125 findInsertionPointForWideCondition(Instruction *WCOrGuard) { 126 if (isGuard(WCOrGuard)) 127 return WCOrGuard->getIterator(); 128 if (auto WC = extractWidenableCondition(WCOrGuard)) 129 return cast<Instruction>(WC)->getIterator(); 130 return std::nullopt; 131 } 132 133 class GuardWideningImpl { 134 DominatorTree &DT; 135 PostDominatorTree *PDT; 136 LoopInfo &LI; 137 AssumptionCache &AC; 138 MemorySSAUpdater *MSSAU; 139 140 /// Together, these describe the region of interest. This might be all of 141 /// the blocks within a function, or only a given loop's blocks and preheader. 142 DomTreeNode *Root; 143 std::function<bool(BasicBlock*)> BlockFilter; 144 145 /// The set of guards and conditional branches whose conditions have been 146 /// widened into dominating guards. 147 SmallVector<Instruction *, 16> EliminatedGuardsAndBranches; 148 149 /// The set of guards which have been widened to include conditions to other 150 /// guards. 151 DenseSet<Instruction *> WidenedGuards; 152 153 /// Try to eliminate instruction \p Instr by widening it into an earlier 154 /// dominating guard. \p DFSI is the DFS iterator on the dominator tree that 155 /// is currently visiting the block containing \p Guard, and \p GuardsPerBlock 156 /// maps BasicBlocks to the set of guards seen in that block. 157 bool eliminateInstrViaWidening( 158 Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, 159 const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> 160 &GuardsPerBlock); 161 162 /// Used to keep track of which widening potential is more effective. 163 enum WideningScore { 164 /// Don't widen. 165 WS_IllegalOrNegative, 166 167 /// Widening is performance neutral as far as the cycles spent in check 168 /// conditions goes (but can still help, e.g., code layout, having less 169 /// deopt state). 170 WS_Neutral, 171 172 /// Widening is profitable. 173 WS_Positive, 174 175 /// Widening is very profitable. Not significantly different from \c 176 /// WS_Positive, except by the order. 177 WS_VeryPositive 178 }; 179 180 static StringRef scoreTypeToString(WideningScore WS); 181 182 /// Compute the score for widening the condition in \p DominatedInstr 183 /// into \p WideningPoint. 184 WideningScore computeWideningScore(Instruction *DominatedInstr, 185 Instruction *ToWiden, 186 BasicBlock::iterator WideningPoint, 187 SmallVectorImpl<Value *> &ChecksToHoist, 188 SmallVectorImpl<Value *> &ChecksToWiden); 189 190 /// Helper to check if \p V can be hoisted to \p InsertPos. 191 bool canBeHoistedTo(const Value *V, BasicBlock::iterator InsertPos) const { 192 SmallPtrSet<const Instruction *, 8> Visited; 193 return canBeHoistedTo(V, InsertPos, Visited); 194 } 195 196 bool canBeHoistedTo(const Value *V, BasicBlock::iterator InsertPos, 197 SmallPtrSetImpl<const Instruction *> &Visited) const; 198 199 bool canBeHoistedTo(const SmallVectorImpl<Value *> &Checks, 200 BasicBlock::iterator InsertPos) const { 201 return all_of(Checks, 202 [&](const Value *V) { return canBeHoistedTo(V, InsertPos); }); 203 } 204 /// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c 205 /// canBeHoistedTo returned true. 206 void makeAvailableAt(Value *V, BasicBlock::iterator InsertPos) const; 207 208 void makeAvailableAt(const SmallVectorImpl<Value *> &Checks, 209 BasicBlock::iterator InsertPos) const { 210 for (Value *V : Checks) 211 makeAvailableAt(V, InsertPos); 212 } 213 214 /// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try 215 /// to generate an expression computing the logical AND of \p ChecksToHoist 216 /// and \p ChecksToWiden. Return true if the expression computing the AND is 217 /// only as expensive as computing one of the set of expressions. If \p 218 /// InsertPt is true then actually generate the resulting expression, make it 219 /// available at \p InsertPt and return it in \p Result (else no change to the 220 /// IR is made). 221 std::optional<Value *> 222 mergeChecks(SmallVectorImpl<Value *> &ChecksToHoist, 223 SmallVectorImpl<Value *> &ChecksToWiden, 224 std::optional<BasicBlock::iterator> InsertPt); 225 226 /// Generate the logical AND of \p ChecksToHoist and \p OldCondition and make 227 /// it available at InsertPt 228 Value *hoistChecks(SmallVectorImpl<Value *> &ChecksToHoist, 229 Value *OldCondition, BasicBlock::iterator InsertPt); 230 231 /// Adds freeze to Orig and push it as far as possible very aggressively. 232 /// Also replaces all uses of frozen instruction with frozen version. 233 Value *freezeAndPush(Value *Orig, BasicBlock::iterator InsertPt); 234 235 /// Represents a range check of the form \c Base + \c Offset u< \c Length, 236 /// with the constraint that \c Length is not negative. \c CheckInst is the 237 /// pre-existing instruction in the IR that computes the result of this range 238 /// check. 239 class RangeCheck { 240 const Value *Base; 241 const ConstantInt *Offset; 242 const Value *Length; 243 ICmpInst *CheckInst; 244 245 public: 246 explicit RangeCheck(const Value *Base, const ConstantInt *Offset, 247 const Value *Length, ICmpInst *CheckInst) 248 : Base(Base), Offset(Offset), Length(Length), CheckInst(CheckInst) {} 249 250 void setBase(const Value *NewBase) { Base = NewBase; } 251 void setOffset(const ConstantInt *NewOffset) { Offset = NewOffset; } 252 253 const Value *getBase() const { return Base; } 254 const ConstantInt *getOffset() const { return Offset; } 255 const APInt &getOffsetValue() const { return getOffset()->getValue(); } 256 const Value *getLength() const { return Length; }; 257 ICmpInst *getCheckInst() const { return CheckInst; } 258 259 void print(raw_ostream &OS, bool PrintTypes = false) { 260 OS << "Base: "; 261 Base->printAsOperand(OS, PrintTypes); 262 OS << " Offset: "; 263 Offset->printAsOperand(OS, PrintTypes); 264 OS << " Length: "; 265 Length->printAsOperand(OS, PrintTypes); 266 } 267 268 LLVM_DUMP_METHOD void dump() { 269 print(dbgs()); 270 dbgs() << "\n"; 271 } 272 }; 273 274 /// Parse \p ToParse into a conjunction (logical-and) of range checks; and 275 /// append them to \p Checks. Returns true on success, may clobber \c Checks 276 /// on failure. 277 bool parseRangeChecks(SmallVectorImpl<Value *> &ToParse, 278 SmallVectorImpl<RangeCheck> &Checks) { 279 for (auto CheckCond : ToParse) { 280 if (!parseRangeChecks(CheckCond, Checks)) 281 return false; 282 } 283 return true; 284 } 285 286 bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks); 287 288 /// Combine the checks in \p Checks into a smaller set of checks and append 289 /// them into \p CombinedChecks. Return true on success (i.e. all of checks 290 /// in \p Checks were combined into \p CombinedChecks). Clobbers \p Checks 291 /// and \p CombinedChecks on success and on failure. 292 bool combineRangeChecks(SmallVectorImpl<RangeCheck> &Checks, 293 SmallVectorImpl<RangeCheck> &CombinedChecks) const; 294 295 /// Can we compute the logical AND of \p ChecksToHoist and \p ChecksToWiden 296 /// for the price of computing only one of the set of expressions? 297 bool isWideningCondProfitable(SmallVectorImpl<Value *> &ChecksToHoist, 298 SmallVectorImpl<Value *> &ChecksToWiden) { 299 return mergeChecks(ChecksToHoist, ChecksToWiden, /*InsertPt=*/std::nullopt) 300 .has_value(); 301 } 302 303 /// Widen \p ChecksToWiden to fail if any of \p ChecksToHoist is false 304 void widenGuard(SmallVectorImpl<Value *> &ChecksToHoist, 305 SmallVectorImpl<Value *> &ChecksToWiden, 306 Instruction *ToWiden) { 307 auto InsertPt = findInsertionPointForWideCondition(ToWiden); 308 auto MergedCheck = mergeChecks(ChecksToHoist, ChecksToWiden, InsertPt); 309 Value *Result = MergedCheck ? *MergedCheck 310 : hoistChecks(ChecksToHoist, 311 getCondition(ToWiden), *InsertPt); 312 313 if (isGuardAsWidenableBranch(ToWiden)) { 314 setWidenableBranchCond(cast<BranchInst>(ToWiden), Result); 315 return; 316 } 317 setCondition(ToWiden, Result); 318 } 319 320 public: 321 explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT, 322 LoopInfo &LI, AssumptionCache &AC, 323 MemorySSAUpdater *MSSAU, DomTreeNode *Root, 324 std::function<bool(BasicBlock *)> BlockFilter) 325 : DT(DT), PDT(PDT), LI(LI), AC(AC), MSSAU(MSSAU), Root(Root), 326 BlockFilter(BlockFilter) {} 327 328 /// The entry point for this pass. 329 bool run(); 330 }; 331 } 332 333 static bool isSupportedGuardInstruction(const Instruction *Insn) { 334 if (isGuard(Insn)) 335 return true; 336 if (WidenBranchGuards && isGuardAsWidenableBranch(Insn)) 337 return true; 338 return false; 339 } 340 341 bool GuardWideningImpl::run() { 342 DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock; 343 bool Changed = false; 344 for (auto DFI = df_begin(Root), DFE = df_end(Root); 345 DFI != DFE; ++DFI) { 346 auto *BB = (*DFI)->getBlock(); 347 if (!BlockFilter(BB)) 348 continue; 349 350 auto &CurrentList = GuardsInBlock[BB]; 351 352 for (auto &I : *BB) 353 if (isSupportedGuardInstruction(&I)) 354 CurrentList.push_back(cast<Instruction>(&I)); 355 356 for (auto *II : CurrentList) 357 Changed |= eliminateInstrViaWidening(II, DFI, GuardsInBlock); 358 } 359 360 assert(EliminatedGuardsAndBranches.empty() || Changed); 361 for (auto *I : EliminatedGuardsAndBranches) 362 if (!WidenedGuards.count(I)) { 363 assert(isa<ConstantInt>(getCondition(I)) && "Should be!"); 364 if (isSupportedGuardInstruction(I)) 365 eliminateGuard(I, MSSAU); 366 else { 367 assert(isa<BranchInst>(I) && 368 "Eliminated something other than guard or branch?"); 369 ++CondBranchEliminated; 370 } 371 } 372 373 return Changed; 374 } 375 376 bool GuardWideningImpl::eliminateInstrViaWidening( 377 Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, 378 const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> 379 &GuardsInBlock) { 380 SmallVector<Value *> ChecksToHoist; 381 parseWidenableGuard(Instr, ChecksToHoist); 382 // Ignore trivial true or false conditions. These instructions will be 383 // trivially eliminated by any cleanup pass. Do not erase them because other 384 // guards can possibly be widened into them. 385 if (ChecksToHoist.empty() || 386 (ChecksToHoist.size() == 1 && isa<ConstantInt>(ChecksToHoist.front()))) 387 return false; 388 389 Instruction *BestSoFar = nullptr; 390 auto BestScoreSoFar = WS_IllegalOrNegative; 391 392 // In the set of dominating guards, find the one we can merge GuardInst with 393 // for the most profit. 394 for (unsigned i = 0, e = DFSI.getPathLength(); i != e; ++i) { 395 auto *CurBB = DFSI.getPath(i)->getBlock(); 396 if (!BlockFilter(CurBB)) 397 break; 398 assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!"); 399 const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second; 400 401 auto I = GuardsInCurBB.begin(); 402 auto E = Instr->getParent() == CurBB ? find(GuardsInCurBB, Instr) 403 : GuardsInCurBB.end(); 404 405 #ifndef NDEBUG 406 { 407 unsigned Index = 0; 408 for (auto &I : *CurBB) { 409 if (Index == GuardsInCurBB.size()) 410 break; 411 if (GuardsInCurBB[Index] == &I) 412 Index++; 413 } 414 assert(Index == GuardsInCurBB.size() && 415 "Guards expected to be in order!"); 416 } 417 #endif 418 419 assert((i == (e - 1)) == (Instr->getParent() == CurBB) && "Bad DFS?"); 420 421 for (auto *Candidate : make_range(I, E)) { 422 auto WideningPoint = findInsertionPointForWideCondition(Candidate); 423 if (!WideningPoint) 424 continue; 425 SmallVector<Value *> CandidateChecks; 426 parseWidenableGuard(Candidate, CandidateChecks); 427 auto Score = computeWideningScore(Instr, Candidate, *WideningPoint, 428 ChecksToHoist, CandidateChecks); 429 LLVM_DEBUG(dbgs() << "Score between " << *Instr << " and " << *Candidate 430 << " is " << scoreTypeToString(Score) << "\n"); 431 if (Score > BestScoreSoFar) { 432 BestScoreSoFar = Score; 433 BestSoFar = Candidate; 434 } 435 } 436 } 437 438 if (BestScoreSoFar == WS_IllegalOrNegative) { 439 LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *Instr << "\n"); 440 return false; 441 } 442 443 assert(BestSoFar != Instr && "Should have never visited same guard!"); 444 assert(DT.dominates(BestSoFar, Instr) && "Should be!"); 445 446 LLVM_DEBUG(dbgs() << "Widening " << *Instr << " into " << *BestSoFar 447 << " with score " << scoreTypeToString(BestScoreSoFar) 448 << "\n"); 449 SmallVector<Value *> ChecksToWiden; 450 parseWidenableGuard(BestSoFar, ChecksToWiden); 451 widenGuard(ChecksToHoist, ChecksToWiden, BestSoFar); 452 auto NewGuardCondition = ConstantInt::getTrue(Instr->getContext()); 453 setCondition(Instr, NewGuardCondition); 454 EliminatedGuardsAndBranches.push_back(Instr); 455 WidenedGuards.insert(BestSoFar); 456 return true; 457 } 458 459 GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore( 460 Instruction *DominatedInstr, Instruction *ToWiden, 461 BasicBlock::iterator WideningPoint, SmallVectorImpl<Value *> &ChecksToHoist, 462 SmallVectorImpl<Value *> &ChecksToWiden) { 463 Loop *DominatedInstrLoop = LI.getLoopFor(DominatedInstr->getParent()); 464 Loop *DominatingGuardLoop = LI.getLoopFor(WideningPoint->getParent()); 465 bool HoistingOutOfLoop = false; 466 467 if (DominatingGuardLoop != DominatedInstrLoop) { 468 // Be conservative and don't widen into a sibling loop. TODO: If the 469 // sibling is colder, we should consider allowing this. 470 if (DominatingGuardLoop && 471 !DominatingGuardLoop->contains(DominatedInstrLoop)) 472 return WS_IllegalOrNegative; 473 474 HoistingOutOfLoop = true; 475 } 476 477 if (!canBeHoistedTo(ChecksToHoist, WideningPoint)) 478 return WS_IllegalOrNegative; 479 // Further in the GuardWideningImpl::hoistChecks the entire condition might be 480 // widened, not the parsed list of checks. So we need to check the possibility 481 // of that condition hoisting. 482 if (!canBeHoistedTo(getCondition(ToWiden), WideningPoint)) 483 return WS_IllegalOrNegative; 484 485 // If the guard was conditional executed, it may never be reached 486 // dynamically. There are two potential downsides to hoisting it out of the 487 // conditionally executed region: 1) we may spuriously deopt without need and 488 // 2) we have the extra cost of computing the guard condition in the common 489 // case. At the moment, we really only consider the second in our heuristic 490 // here. TODO: evaluate cost model for spurious deopt 491 // NOTE: As written, this also lets us hoist right over another guard which 492 // is essentially just another spelling for control flow. 493 if (isWideningCondProfitable(ChecksToHoist, ChecksToWiden)) 494 return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive; 495 496 if (HoistingOutOfLoop) 497 return WS_Positive; 498 499 // For a given basic block \p BB, return its successor which is guaranteed or 500 // highly likely will be taken as its successor. 501 auto GetLikelySuccessor = [](const BasicBlock * BB)->const BasicBlock * { 502 if (auto *UniqueSucc = BB->getUniqueSuccessor()) 503 return UniqueSucc; 504 auto *Term = BB->getTerminator(); 505 Value *Cond = nullptr; 506 const BasicBlock *IfTrue = nullptr, *IfFalse = nullptr; 507 using namespace PatternMatch; 508 if (!match(Term, m_Br(m_Value(Cond), m_BasicBlock(IfTrue), 509 m_BasicBlock(IfFalse)))) 510 return nullptr; 511 // For constant conditions, only one dynamical successor is possible 512 if (auto *ConstCond = dyn_cast<ConstantInt>(Cond)) 513 return ConstCond->isAllOnesValue() ? IfTrue : IfFalse; 514 // If one of successors ends with deopt, another one is likely. 515 if (IfFalse->getPostdominatingDeoptimizeCall()) 516 return IfTrue; 517 if (IfTrue->getPostdominatingDeoptimizeCall()) 518 return IfFalse; 519 // TODO: Use branch frequency metatada to allow hoisting through non-deopt 520 // branches? 521 return nullptr; 522 }; 523 524 // Returns true if we might be hoisting above explicit control flow into a 525 // considerably hotter block. Note that this completely ignores implicit 526 // control flow (guards, calls which throw, etc...). That choice appears 527 // arbitrary (we assume that implicit control flow exits are all rare). 528 auto MaybeHoistingToHotterBlock = [&]() { 529 const auto *DominatingBlock = WideningPoint->getParent(); 530 const auto *DominatedBlock = DominatedInstr->getParent(); 531 532 // Descend as low as we can, always taking the likely successor. 533 assert(DT.isReachableFromEntry(DominatingBlock) && "Unreached code"); 534 assert(DT.isReachableFromEntry(DominatedBlock) && "Unreached code"); 535 assert(DT.dominates(DominatingBlock, DominatedBlock) && "No dominance"); 536 while (DominatedBlock != DominatingBlock) { 537 auto *LikelySucc = GetLikelySuccessor(DominatingBlock); 538 // No likely successor? 539 if (!LikelySucc) 540 break; 541 // Only go down the dominator tree. 542 if (!DT.properlyDominates(DominatingBlock, LikelySucc)) 543 break; 544 DominatingBlock = LikelySucc; 545 } 546 547 // Found? 548 if (DominatedBlock == DominatingBlock) 549 return false; 550 // We followed the likely successor chain and went past the dominated 551 // block. It means that the dominated guard is in dead/very cold code. 552 if (!DT.dominates(DominatingBlock, DominatedBlock)) 553 return true; 554 // TODO: diamond, triangle cases 555 if (!PDT) 556 return true; 557 return !PDT->dominates(DominatedBlock, DominatingBlock); 558 }; 559 560 return MaybeHoistingToHotterBlock() ? WS_IllegalOrNegative : WS_Neutral; 561 } 562 563 bool GuardWideningImpl::canBeHoistedTo( 564 const Value *V, BasicBlock::iterator Loc, 565 SmallPtrSetImpl<const Instruction *> &Visited) const { 566 auto *Inst = dyn_cast<Instruction>(V); 567 if (!Inst || DT.dominates(Inst, Loc) || Visited.count(Inst)) 568 return true; 569 570 if (!isSafeToSpeculativelyExecute(Inst, Loc, &AC, &DT) || 571 Inst->mayReadFromMemory()) 572 return false; 573 574 Visited.insert(Inst); 575 576 // We only want to go _up_ the dominance chain when recursing. 577 assert(!isa<PHINode>(Loc) && 578 "PHIs should return false for isSafeToSpeculativelyExecute"); 579 assert(DT.isReachableFromEntry(Inst->getParent()) && 580 "We did a DFS from the block entry!"); 581 return all_of(Inst->operands(), 582 [&](Value *Op) { return canBeHoistedTo(Op, Loc, Visited); }); 583 } 584 585 void GuardWideningImpl::makeAvailableAt(Value *V, 586 BasicBlock::iterator Loc) const { 587 auto *Inst = dyn_cast<Instruction>(V); 588 if (!Inst || DT.dominates(Inst, Loc)) 589 return; 590 591 assert(isSafeToSpeculativelyExecute(Inst, Loc, &AC, &DT) && 592 !Inst->mayReadFromMemory() && 593 "Should've checked with canBeHoistedTo!"); 594 595 for (Value *Op : Inst->operands()) 596 makeAvailableAt(Op, Loc); 597 598 Inst->moveBefore(*Loc->getParent(), Loc); 599 } 600 601 // Return Instruction before which we can insert freeze for the value V as close 602 // to def as possible. If there is no place to add freeze, return empty. 603 static std::optional<BasicBlock::iterator> 604 getFreezeInsertPt(Value *V, const DominatorTree &DT) { 605 auto *I = dyn_cast<Instruction>(V); 606 if (!I) 607 return DT.getRoot()->getFirstNonPHIOrDbgOrAlloca()->getIterator(); 608 609 std::optional<BasicBlock::iterator> Res = I->getInsertionPointAfterDef(); 610 // If there is no place to add freeze - return nullptr. 611 if (!Res || !DT.dominates(I, &**Res)) 612 return std::nullopt; 613 614 Instruction *ResInst = &**Res; 615 616 // If there is a User dominated by original I, then it should be dominated 617 // by Freeze instruction as well. 618 if (any_of(I->users(), [&](User *U) { 619 Instruction *User = cast<Instruction>(U); 620 return ResInst != User && DT.dominates(I, User) && 621 !DT.dominates(ResInst, User); 622 })) 623 return std::nullopt; 624 return Res; 625 } 626 627 Value *GuardWideningImpl::freezeAndPush(Value *Orig, 628 BasicBlock::iterator InsertPt) { 629 if (isGuaranteedNotToBePoison(Orig, nullptr, InsertPt, &DT)) 630 return Orig; 631 std::optional<BasicBlock::iterator> InsertPtAtDef = 632 getFreezeInsertPt(Orig, DT); 633 if (!InsertPtAtDef) { 634 FreezeInst *FI = new FreezeInst(Orig, "gw.freeze"); 635 FI->insertBefore(*InsertPt->getParent(), InsertPt); 636 return FI; 637 } 638 if (isa<Constant>(Orig) || isa<GlobalValue>(Orig)) { 639 BasicBlock::iterator InsertPt = *InsertPtAtDef; 640 FreezeInst *FI = new FreezeInst(Orig, "gw.freeze"); 641 FI->insertBefore(*InsertPt->getParent(), InsertPt); 642 return FI; 643 } 644 645 SmallSet<Value *, 16> Visited; 646 SmallVector<Value *, 16> Worklist; 647 SmallSet<Instruction *, 16> DropPoisonFlags; 648 SmallVector<Value *, 16> NeedFreeze; 649 DenseMap<Value *, FreezeInst *> CacheOfFreezes; 650 651 // A bit overloaded data structures. Visited contains constant/GV 652 // if we already met it. In this case CacheOfFreezes has a freeze if it is 653 // required. 654 auto handleConstantOrGlobal = [&](Use &U) { 655 Value *Def = U.get(); 656 if (!isa<Constant>(Def) && !isa<GlobalValue>(Def)) 657 return false; 658 659 if (Visited.insert(Def).second) { 660 if (isGuaranteedNotToBePoison(Def, nullptr, InsertPt, &DT)) 661 return true; 662 BasicBlock::iterator InsertPt = *getFreezeInsertPt(Def, DT); 663 FreezeInst *FI = new FreezeInst(Def, Def->getName() + ".gw.fr"); 664 FI->insertBefore(*InsertPt->getParent(), InsertPt); 665 CacheOfFreezes[Def] = FI; 666 } 667 668 if (CacheOfFreezes.count(Def)) 669 U.set(CacheOfFreezes[Def]); 670 return true; 671 }; 672 673 Worklist.push_back(Orig); 674 while (!Worklist.empty()) { 675 Value *V = Worklist.pop_back_val(); 676 if (!Visited.insert(V).second) 677 continue; 678 679 if (isGuaranteedNotToBePoison(V, nullptr, InsertPt, &DT)) 680 continue; 681 682 Instruction *I = dyn_cast<Instruction>(V); 683 if (!I || canCreateUndefOrPoison(cast<Operator>(I), 684 /*ConsiderFlagsAndMetadata*/ false)) { 685 NeedFreeze.push_back(V); 686 continue; 687 } 688 // Check all operands. If for any of them we cannot insert Freeze, 689 // stop here. Otherwise, iterate. 690 if (any_of(I->operands(), [&](Value *Op) { 691 return isa<Instruction>(Op) && !getFreezeInsertPt(Op, DT); 692 })) { 693 NeedFreeze.push_back(I); 694 continue; 695 } 696 DropPoisonFlags.insert(I); 697 for (Use &U : I->operands()) 698 if (!handleConstantOrGlobal(U)) 699 Worklist.push_back(U.get()); 700 } 701 for (Instruction *I : DropPoisonFlags) 702 I->dropPoisonGeneratingAnnotations(); 703 704 Value *Result = Orig; 705 for (Value *V : NeedFreeze) { 706 BasicBlock::iterator FreezeInsertPt = *getFreezeInsertPt(V, DT); 707 FreezeInst *FI = new FreezeInst(V, V->getName() + ".gw.fr"); 708 FI->insertBefore(*FreezeInsertPt->getParent(), FreezeInsertPt); 709 ++FreezeAdded; 710 if (V == Orig) 711 Result = FI; 712 V->replaceUsesWithIf( 713 FI, [&](const Use & U)->bool { return U.getUser() != FI; }); 714 } 715 716 return Result; 717 } 718 719 std::optional<Value *> 720 GuardWideningImpl::mergeChecks(SmallVectorImpl<Value *> &ChecksToHoist, 721 SmallVectorImpl<Value *> &ChecksToWiden, 722 std::optional<BasicBlock::iterator> InsertPt) { 723 using namespace llvm::PatternMatch; 724 725 Value *Result = nullptr; 726 { 727 // L >u C0 && L >u C1 -> L >u max(C0, C1) 728 ConstantInt *RHS0, *RHS1; 729 Value *LHS; 730 CmpPredicate Pred0, Pred1; 731 // TODO: Support searching for pairs to merge from both whole lists of 732 // ChecksToHoist and ChecksToWiden. 733 if (ChecksToWiden.size() == 1 && ChecksToHoist.size() == 1 && 734 match(ChecksToWiden.front(), 735 m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) && 736 match(ChecksToHoist.front(), 737 m_ICmp(Pred1, m_Specific(LHS), m_ConstantInt(RHS1)))) { 738 739 ConstantRange CR0 = 740 ConstantRange::makeExactICmpRegion(Pred0, RHS0->getValue()); 741 ConstantRange CR1 = 742 ConstantRange::makeExactICmpRegion(Pred1, RHS1->getValue()); 743 744 // Given what we're doing here and the semantics of guards, it would 745 // be correct to use a subset intersection, but that may be too 746 // aggressive in cases we care about. 747 if (std::optional<ConstantRange> Intersect = 748 CR0.exactIntersectWith(CR1)) { 749 APInt NewRHSAP; 750 CmpInst::Predicate Pred; 751 if (Intersect->getEquivalentICmp(Pred, NewRHSAP)) { 752 if (InsertPt) { 753 ConstantInt *NewRHS = 754 ConstantInt::get((*InsertPt)->getContext(), NewRHSAP); 755 assert(canBeHoistedTo(LHS, *InsertPt) && "must be"); 756 makeAvailableAt(LHS, *InsertPt); 757 Result = new ICmpInst(*InsertPt, Pred, LHS, NewRHS, "wide.chk"); 758 } 759 return Result; 760 } 761 } 762 } 763 } 764 765 { 766 SmallVector<GuardWideningImpl::RangeCheck, 4> Checks, CombinedChecks; 767 if (parseRangeChecks(ChecksToWiden, Checks) && 768 parseRangeChecks(ChecksToHoist, Checks) && 769 combineRangeChecks(Checks, CombinedChecks)) { 770 if (InsertPt) { 771 for (auto &RC : CombinedChecks) { 772 makeAvailableAt(RC.getCheckInst(), *InsertPt); 773 if (Result) 774 Result = BinaryOperator::CreateAnd(RC.getCheckInst(), Result, "", 775 *InsertPt); 776 else 777 Result = RC.getCheckInst(); 778 } 779 assert(Result && "Failed to find result value"); 780 Result->setName("wide.chk"); 781 Result = freezeAndPush(Result, *InsertPt); 782 } 783 return Result; 784 } 785 } 786 // We were not able to compute ChecksToHoist AND ChecksToWiden for the price 787 // of one. 788 return std::nullopt; 789 } 790 791 Value *GuardWideningImpl::hoistChecks(SmallVectorImpl<Value *> &ChecksToHoist, 792 Value *OldCondition, 793 BasicBlock::iterator InsertPt) { 794 assert(!ChecksToHoist.empty()); 795 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 796 makeAvailableAt(ChecksToHoist, InsertPt); 797 makeAvailableAt(OldCondition, InsertPt); 798 Value *Result = Builder.CreateAnd(ChecksToHoist); 799 Result = freezeAndPush(Result, InsertPt); 800 Result = Builder.CreateAnd(OldCondition, Result); 801 Result->setName("wide.chk"); 802 return Result; 803 } 804 805 bool GuardWideningImpl::parseRangeChecks( 806 Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks) { 807 using namespace llvm::PatternMatch; 808 809 auto *IC = dyn_cast<ICmpInst>(CheckCond); 810 if (!IC || !IC->getOperand(0)->getType()->isIntegerTy() || 811 (IC->getPredicate() != ICmpInst::ICMP_ULT && 812 IC->getPredicate() != ICmpInst::ICMP_UGT)) 813 return false; 814 815 const Value *CmpLHS = IC->getOperand(0), *CmpRHS = IC->getOperand(1); 816 if (IC->getPredicate() == ICmpInst::ICMP_UGT) 817 std::swap(CmpLHS, CmpRHS); 818 819 auto &DL = IC->getDataLayout(); 820 821 GuardWideningImpl::RangeCheck Check( 822 CmpLHS, cast<ConstantInt>(ConstantInt::getNullValue(CmpRHS->getType())), 823 CmpRHS, IC); 824 825 if (!isKnownNonNegative(Check.getLength(), DL)) 826 return false; 827 828 // What we have in \c Check now is a correct interpretation of \p CheckCond. 829 // Try to see if we can move some constant offsets into the \c Offset field. 830 831 bool Changed; 832 auto &Ctx = CheckCond->getContext(); 833 834 do { 835 Value *OpLHS; 836 ConstantInt *OpRHS; 837 Changed = false; 838 839 #ifndef NDEBUG 840 auto *BaseInst = dyn_cast<Instruction>(Check.getBase()); 841 assert((!BaseInst || DT.isReachableFromEntry(BaseInst->getParent())) && 842 "Unreachable instruction?"); 843 #endif 844 845 if (match(Check.getBase(), m_Add(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { 846 Check.setBase(OpLHS); 847 APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); 848 Check.setOffset(ConstantInt::get(Ctx, NewOffset)); 849 Changed = true; 850 } else if (match(Check.getBase(), 851 m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { 852 KnownBits Known = computeKnownBits(OpLHS, DL); 853 if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) { 854 Check.setBase(OpLHS); 855 APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); 856 Check.setOffset(ConstantInt::get(Ctx, NewOffset)); 857 Changed = true; 858 } 859 } 860 } while (Changed); 861 862 Checks.push_back(Check); 863 return true; 864 } 865 866 bool GuardWideningImpl::combineRangeChecks( 867 SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks, 868 SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) const { 869 unsigned OldCount = Checks.size(); 870 while (!Checks.empty()) { 871 // Pick all of the range checks with a specific base and length, and try to 872 // merge them. 873 const Value *CurrentBase = Checks.front().getBase(); 874 const Value *CurrentLength = Checks.front().getLength(); 875 876 SmallVector<GuardWideningImpl::RangeCheck, 3> CurrentChecks; 877 878 auto IsCurrentCheck = [&](GuardWideningImpl::RangeCheck &RC) { 879 return RC.getBase() == CurrentBase && RC.getLength() == CurrentLength; 880 }; 881 882 copy_if(Checks, std::back_inserter(CurrentChecks), IsCurrentCheck); 883 erase_if(Checks, IsCurrentCheck); 884 885 assert(CurrentChecks.size() != 0 && "We know we have at least one!"); 886 887 if (CurrentChecks.size() < 3) { 888 llvm::append_range(RangeChecksOut, CurrentChecks); 889 continue; 890 } 891 892 // CurrentChecks.size() will typically be 3 here, but so far there has been 893 // no need to hard-code that fact. 894 895 llvm::sort(CurrentChecks, [&](const GuardWideningImpl::RangeCheck &LHS, 896 const GuardWideningImpl::RangeCheck &RHS) { 897 return LHS.getOffsetValue().slt(RHS.getOffsetValue()); 898 }); 899 900 // Note: std::sort should not invalidate the ChecksStart iterator. 901 902 const ConstantInt *MinOffset = CurrentChecks.front().getOffset(); 903 const ConstantInt *MaxOffset = CurrentChecks.back().getOffset(); 904 905 unsigned BitWidth = MaxOffset->getValue().getBitWidth(); 906 if ((MaxOffset->getValue() - MinOffset->getValue()) 907 .ugt(APInt::getSignedMinValue(BitWidth))) 908 return false; 909 910 APInt MaxDiff = MaxOffset->getValue() - MinOffset->getValue(); 911 const APInt &HighOffset = MaxOffset->getValue(); 912 auto OffsetOK = [&](const GuardWideningImpl::RangeCheck &RC) { 913 return (HighOffset - RC.getOffsetValue()).ult(MaxDiff); 914 }; 915 916 if (MaxDiff.isMinValue() || !all_of(drop_begin(CurrentChecks), OffsetOK)) 917 return false; 918 919 // We have a series of f+1 checks as: 920 // 921 // I+k_0 u< L ... Chk_0 922 // I+k_1 u< L ... Chk_1 923 // ... 924 // I+k_f u< L ... Chk_f 925 // 926 // with forall i in [0,f]: k_f-k_i u< k_f-k_0 ... Precond_0 927 // k_f-k_0 u< INT_MIN+k_f ... Precond_1 928 // k_f != k_0 ... Precond_2 929 // 930 // Claim: 931 // Chk_0 AND Chk_f implies all the other checks 932 // 933 // Informal proof sketch: 934 // 935 // We will show that the integer range [I+k_0,I+k_f] does not unsigned-wrap 936 // (i.e. going from I+k_0 to I+k_f does not cross the -1,0 boundary) and 937 // thus I+k_f is the greatest unsigned value in that range. 938 // 939 // This combined with Ckh_(f+1) shows that everything in that range is u< L. 940 // Via Precond_0 we know that all of the indices in Chk_0 through Chk_(f+1) 941 // lie in [I+k_0,I+k_f], this proving our claim. 942 // 943 // To see that [I+k_0,I+k_f] is not a wrapping range, note that there are 944 // two possibilities: I+k_0 u< I+k_f or I+k_0 >u I+k_f (they can't be equal 945 // since k_0 != k_f). In the former case, [I+k_0,I+k_f] is not a wrapping 946 // range by definition, and the latter case is impossible: 947 // 948 // 0-----I+k_f---I+k_0----L---INT_MAX,INT_MIN------------------(-1) 949 // xxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 950 // 951 // For Chk_0 to succeed, we'd have to have k_f-k_0 (the range highlighted 952 // with 'x' above) to be at least >u INT_MIN. 953 954 RangeChecksOut.emplace_back(CurrentChecks.front()); 955 RangeChecksOut.emplace_back(CurrentChecks.back()); 956 } 957 958 assert(RangeChecksOut.size() <= OldCount && "We pessimized!"); 959 return RangeChecksOut.size() != OldCount; 960 } 961 962 #ifndef NDEBUG 963 StringRef GuardWideningImpl::scoreTypeToString(WideningScore WS) { 964 switch (WS) { 965 case WS_IllegalOrNegative: 966 return "IllegalOrNegative"; 967 case WS_Neutral: 968 return "Neutral"; 969 case WS_Positive: 970 return "Positive"; 971 case WS_VeryPositive: 972 return "VeryPositive"; 973 } 974 975 llvm_unreachable("Fully covered switch above!"); 976 } 977 #endif 978 979 PreservedAnalyses GuardWideningPass::run(Function &F, 980 FunctionAnalysisManager &AM) { 981 // Avoid requesting analyses if there are no guards or widenable conditions. 982 auto *GuardDecl = Intrinsic::getDeclarationIfExists( 983 F.getParent(), Intrinsic::experimental_guard); 984 bool HasIntrinsicGuards = GuardDecl && !GuardDecl->use_empty(); 985 auto *WCDecl = Intrinsic::getDeclarationIfExists( 986 F.getParent(), Intrinsic::experimental_widenable_condition); 987 bool HasWidenableConditions = WCDecl && !WCDecl->use_empty(); 988 if (!HasIntrinsicGuards && !HasWidenableConditions) 989 return PreservedAnalyses::all(); 990 auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 991 auto &LI = AM.getResult<LoopAnalysis>(F); 992 auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F); 993 auto &AC = AM.getResult<AssumptionAnalysis>(F); 994 auto *MSSAA = AM.getCachedResult<MemorySSAAnalysis>(F); 995 std::unique_ptr<MemorySSAUpdater> MSSAU; 996 if (MSSAA) 997 MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAA->getMSSA()); 998 if (!GuardWideningImpl(DT, &PDT, LI, AC, MSSAU ? MSSAU.get() : nullptr, 999 DT.getRootNode(), [](BasicBlock *) { return true; }) 1000 .run()) 1001 return PreservedAnalyses::all(); 1002 1003 PreservedAnalyses PA; 1004 PA.preserveSet<CFGAnalyses>(); 1005 PA.preserve<MemorySSAAnalysis>(); 1006 return PA; 1007 } 1008 1009 PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM, 1010 LoopStandardAnalysisResults &AR, 1011 LPMUpdater &U) { 1012 BasicBlock *RootBB = L.getLoopPredecessor(); 1013 if (!RootBB) 1014 RootBB = L.getHeader(); 1015 auto BlockFilter = [&](BasicBlock *BB) { 1016 return BB == RootBB || L.contains(BB); 1017 }; 1018 std::unique_ptr<MemorySSAUpdater> MSSAU; 1019 if (AR.MSSA) 1020 MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA); 1021 if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, AR.AC, 1022 MSSAU ? MSSAU.get() : nullptr, AR.DT.getNode(RootBB), 1023 BlockFilter) 1024 .run()) 1025 return PreservedAnalyses::all(); 1026 1027 auto PA = getLoopPassPreservedAnalyses(); 1028 if (AR.MSSA) 1029 PA.preserve<MemorySSAAnalysis>(); 1030 return PA; 1031 } 1032