1 //===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the Correlated Value Propagation pass. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" 14 #include "llvm/ADT/DepthFirstIterator.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/ADT/Statistic.h" 17 #include "llvm/Analysis/DomTreeUpdater.h" 18 #include "llvm/Analysis/GlobalsModRef.h" 19 #include "llvm/Analysis/InstructionSimplify.h" 20 #include "llvm/Analysis/LazyValueInfo.h" 21 #include "llvm/Analysis/ValueTracking.h" 22 #include "llvm/IR/Attributes.h" 23 #include "llvm/IR/BasicBlock.h" 24 #include "llvm/IR/CFG.h" 25 #include "llvm/IR/Constant.h" 26 #include "llvm/IR/ConstantRange.h" 27 #include "llvm/IR/Constants.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/IRBuilder.h" 31 #include "llvm/IR/InstrTypes.h" 32 #include "llvm/IR/Instruction.h" 33 #include "llvm/IR/Instructions.h" 34 #include "llvm/IR/IntrinsicInst.h" 35 #include "llvm/IR/Operator.h" 36 #include "llvm/IR/PatternMatch.h" 37 #include "llvm/IR/PassManager.h" 38 #include "llvm/IR/Type.h" 39 #include "llvm/IR/Value.h" 40 #include "llvm/Support/Casting.h" 41 #include "llvm/Support/CommandLine.h" 42 #include "llvm/Transforms/Utils/Local.h" 43 #include <cassert> 44 #include <optional> 45 #include <utility> 46 47 using namespace llvm; 48 49 #define DEBUG_TYPE "correlated-value-propagation" 50 51 STATISTIC(NumPhis, "Number of phis propagated"); 52 STATISTIC(NumPhiCommon, "Number of phis deleted via common incoming value"); 53 STATISTIC(NumSelects, "Number of selects propagated"); 54 STATISTIC(NumCmps, "Number of comparisons propagated"); 55 STATISTIC(NumReturns, "Number of return values propagated"); 56 STATISTIC(NumDeadCases, "Number of switch cases removed"); 57 STATISTIC(NumSDivSRemsNarrowed, 58 "Number of sdivs/srems whose width was decreased"); 59 STATISTIC(NumSDivs, "Number of sdiv converted to udiv"); 60 STATISTIC(NumUDivURemsNarrowed, 61 "Number of udivs/urems whose width was decreased"); 62 STATISTIC(NumAShrsConverted, "Number of ashr converted to lshr"); 63 STATISTIC(NumAShrsRemoved, "Number of ashr removed"); 64 STATISTIC(NumSRems, "Number of srem converted to urem"); 65 STATISTIC(NumSExt, "Number of sext converted to zext"); 66 STATISTIC(NumSIToFP, "Number of sitofp converted to uitofp"); 67 STATISTIC(NumSICmps, "Number of signed icmp preds simplified to unsigned"); 68 STATISTIC(NumAnd, "Number of ands removed"); 69 STATISTIC(NumNW, "Number of no-wrap deductions"); 70 STATISTIC(NumNSW, "Number of no-signed-wrap deductions"); 71 STATISTIC(NumNUW, "Number of no-unsigned-wrap deductions"); 72 STATISTIC(NumAddNW, "Number of no-wrap deductions for add"); 73 STATISTIC(NumAddNSW, "Number of no-signed-wrap deductions for add"); 74 STATISTIC(NumAddNUW, "Number of no-unsigned-wrap deductions for add"); 75 STATISTIC(NumSubNW, "Number of no-wrap deductions for sub"); 76 STATISTIC(NumSubNSW, "Number of no-signed-wrap deductions for sub"); 77 STATISTIC(NumSubNUW, "Number of no-unsigned-wrap deductions for sub"); 78 STATISTIC(NumMulNW, "Number of no-wrap deductions for mul"); 79 STATISTIC(NumMulNSW, "Number of no-signed-wrap deductions for mul"); 80 STATISTIC(NumMulNUW, "Number of no-unsigned-wrap deductions for mul"); 81 STATISTIC(NumShlNW, "Number of no-wrap deductions for shl"); 82 STATISTIC(NumShlNSW, "Number of no-signed-wrap deductions for shl"); 83 STATISTIC(NumShlNUW, "Number of no-unsigned-wrap deductions for shl"); 84 STATISTIC(NumAbs, "Number of llvm.abs intrinsics removed"); 85 STATISTIC(NumOverflows, "Number of overflow checks removed"); 86 STATISTIC(NumSaturating, 87 "Number of saturating arithmetics converted to normal arithmetics"); 88 STATISTIC(NumNonNull, "Number of function pointer arguments marked non-null"); 89 STATISTIC(NumCmpIntr, "Number of llvm.[us]cmp intrinsics removed"); 90 STATISTIC(NumMinMax, "Number of llvm.[us]{min,max} intrinsics removed"); 91 STATISTIC(NumSMinMax, 92 "Number of llvm.s{min,max} intrinsics simplified to unsigned"); 93 STATISTIC(NumUDivURemsNarrowedExpanded, 94 "Number of bound udiv's/urem's expanded"); 95 STATISTIC(NumNNeg, "Number of zext/uitofp non-negative deductions"); 96 97 static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) { 98 if (Constant *C = LVI->getConstant(V, At)) 99 return C; 100 101 // TODO: The following really should be sunk inside LVI's core algorithm, or 102 // at least the outer shims around such. 103 auto *C = dyn_cast<CmpInst>(V); 104 if (!C) 105 return nullptr; 106 107 Value *Op0 = C->getOperand(0); 108 Constant *Op1 = dyn_cast<Constant>(C->getOperand(1)); 109 if (!Op1) 110 return nullptr; 111 112 return LVI->getPredicateAt(C->getPredicate(), Op0, Op1, At, 113 /*UseBlockValue=*/false); 114 } 115 116 static bool processSelect(SelectInst *S, LazyValueInfo *LVI) { 117 if (S->getType()->isVectorTy() || isa<Constant>(S->getCondition())) 118 return false; 119 120 bool Changed = false; 121 for (Use &U : make_early_inc_range(S->uses())) { 122 auto *I = cast<Instruction>(U.getUser()); 123 Constant *C; 124 if (auto *PN = dyn_cast<PHINode>(I)) 125 C = LVI->getConstantOnEdge(S->getCondition(), PN->getIncomingBlock(U), 126 I->getParent(), I); 127 else 128 C = getConstantAt(S->getCondition(), I, LVI); 129 130 auto *CI = dyn_cast_or_null<ConstantInt>(C); 131 if (!CI) 132 continue; 133 134 U.set(CI->isOne() ? S->getTrueValue() : S->getFalseValue()); 135 Changed = true; 136 ++NumSelects; 137 } 138 139 if (Changed && S->use_empty()) 140 S->eraseFromParent(); 141 142 return Changed; 143 } 144 145 /// Try to simplify a phi with constant incoming values that match the edge 146 /// values of a non-constant value on all other edges: 147 /// bb0: 148 /// %isnull = icmp eq i8* %x, null 149 /// br i1 %isnull, label %bb2, label %bb1 150 /// bb1: 151 /// br label %bb2 152 /// bb2: 153 /// %r = phi i8* [ %x, %bb1 ], [ null, %bb0 ] 154 /// --> 155 /// %r = %x 156 static bool simplifyCommonValuePhi(PHINode *P, LazyValueInfo *LVI, 157 DominatorTree *DT) { 158 // Collect incoming constants and initialize possible common value. 159 SmallVector<std::pair<Constant *, unsigned>, 4> IncomingConstants; 160 Value *CommonValue = nullptr; 161 for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) { 162 Value *Incoming = P->getIncomingValue(i); 163 if (auto *IncomingConstant = dyn_cast<Constant>(Incoming)) { 164 IncomingConstants.push_back(std::make_pair(IncomingConstant, i)); 165 } else if (!CommonValue) { 166 // The potential common value is initialized to the first non-constant. 167 CommonValue = Incoming; 168 } else if (Incoming != CommonValue) { 169 // There can be only one non-constant common value. 170 return false; 171 } 172 } 173 174 if (!CommonValue || IncomingConstants.empty()) 175 return false; 176 177 // The common value must be valid in all incoming blocks. 178 BasicBlock *ToBB = P->getParent(); 179 if (auto *CommonInst = dyn_cast<Instruction>(CommonValue)) 180 if (!DT->dominates(CommonInst, ToBB)) 181 return false; 182 183 // We have a phi with exactly 1 variable incoming value and 1 or more constant 184 // incoming values. See if all constant incoming values can be mapped back to 185 // the same incoming variable value. 186 for (auto &IncomingConstant : IncomingConstants) { 187 Constant *C = IncomingConstant.first; 188 BasicBlock *IncomingBB = P->getIncomingBlock(IncomingConstant.second); 189 if (C != LVI->getConstantOnEdge(CommonValue, IncomingBB, ToBB, P)) 190 return false; 191 } 192 193 // LVI only guarantees that the value matches a certain constant if the value 194 // is not poison. Make sure we don't replace a well-defined value with poison. 195 // This is usually satisfied due to a prior branch on the value. 196 if (!isGuaranteedNotToBePoison(CommonValue, nullptr, P, DT)) 197 return false; 198 199 // All constant incoming values map to the same variable along the incoming 200 // edges of the phi. The phi is unnecessary. 201 P->replaceAllUsesWith(CommonValue); 202 P->eraseFromParent(); 203 ++NumPhiCommon; 204 return true; 205 } 206 207 static Value *getValueOnEdge(LazyValueInfo *LVI, Value *Incoming, 208 BasicBlock *From, BasicBlock *To, 209 Instruction *CxtI) { 210 if (Constant *C = LVI->getConstantOnEdge(Incoming, From, To, CxtI)) 211 return C; 212 213 // Look if the incoming value is a select with a scalar condition for which 214 // LVI can tells us the value. In that case replace the incoming value with 215 // the appropriate value of the select. This often allows us to remove the 216 // select later. 217 auto *SI = dyn_cast<SelectInst>(Incoming); 218 if (!SI) 219 return nullptr; 220 221 // Once LVI learns to handle vector types, we could also add support 222 // for vector type constants that are not all zeroes or all ones. 223 Value *Condition = SI->getCondition(); 224 if (!Condition->getType()->isVectorTy()) { 225 if (Constant *C = LVI->getConstantOnEdge(Condition, From, To, CxtI)) { 226 if (C->isOneValue()) 227 return SI->getTrueValue(); 228 if (C->isZeroValue()) 229 return SI->getFalseValue(); 230 } 231 } 232 233 // Look if the select has a constant but LVI tells us that the incoming 234 // value can never be that constant. In that case replace the incoming 235 // value with the other value of the select. This often allows us to 236 // remove the select later. 237 238 // The "false" case 239 if (auto *C = dyn_cast<Constant>(SI->getFalseValue())) 240 if (auto *Res = dyn_cast_or_null<ConstantInt>( 241 LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C, From, To, CxtI)); 242 Res && Res->isZero()) 243 return SI->getTrueValue(); 244 245 // The "true" case, 246 // similar to the select "false" case, but try the select "true" value 247 if (auto *C = dyn_cast<Constant>(SI->getTrueValue())) 248 if (auto *Res = dyn_cast_or_null<ConstantInt>( 249 LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C, From, To, CxtI)); 250 Res && Res->isZero()) 251 return SI->getFalseValue(); 252 253 return nullptr; 254 } 255 256 static bool processPHI(PHINode *P, LazyValueInfo *LVI, DominatorTree *DT, 257 const SimplifyQuery &SQ) { 258 bool Changed = false; 259 260 BasicBlock *BB = P->getParent(); 261 for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) { 262 Value *Incoming = P->getIncomingValue(i); 263 if (isa<Constant>(Incoming)) continue; 264 265 Value *V = getValueOnEdge(LVI, Incoming, P->getIncomingBlock(i), BB, P); 266 if (V) { 267 P->setIncomingValue(i, V); 268 Changed = true; 269 } 270 } 271 272 if (Value *V = simplifyInstruction(P, SQ)) { 273 P->replaceAllUsesWith(V); 274 P->eraseFromParent(); 275 Changed = true; 276 } 277 278 if (!Changed) 279 Changed = simplifyCommonValuePhi(P, LVI, DT); 280 281 if (Changed) 282 ++NumPhis; 283 284 return Changed; 285 } 286 287 static bool processICmp(ICmpInst *Cmp, LazyValueInfo *LVI) { 288 // Only for signed relational comparisons of integers. 289 if (!Cmp->getOperand(0)->getType()->isIntOrIntVectorTy()) 290 return false; 291 292 if (!Cmp->isSigned()) 293 return false; 294 295 ICmpInst::Predicate UnsignedPred = 296 ConstantRange::getEquivalentPredWithFlippedSignedness( 297 Cmp->getPredicate(), 298 LVI->getConstantRangeAtUse(Cmp->getOperandUse(0), 299 /*UndefAllowed*/ true), 300 LVI->getConstantRangeAtUse(Cmp->getOperandUse(1), 301 /*UndefAllowed*/ true)); 302 303 if (UnsignedPred == ICmpInst::Predicate::BAD_ICMP_PREDICATE) 304 return false; 305 306 ++NumSICmps; 307 Cmp->setPredicate(UnsignedPred); 308 309 return true; 310 } 311 312 /// See if LazyValueInfo's ability to exploit edge conditions or range 313 /// information is sufficient to prove this comparison. Even for local 314 /// conditions, this can sometimes prove conditions instcombine can't by 315 /// exploiting range information. 316 static bool constantFoldCmp(CmpInst *Cmp, LazyValueInfo *LVI) { 317 Value *Op0 = Cmp->getOperand(0); 318 Value *Op1 = Cmp->getOperand(1); 319 Constant *Res = LVI->getPredicateAt(Cmp->getPredicate(), Op0, Op1, Cmp, 320 /*UseBlockValue=*/true); 321 if (!Res) 322 return false; 323 324 ++NumCmps; 325 Cmp->replaceAllUsesWith(Res); 326 Cmp->eraseFromParent(); 327 return true; 328 } 329 330 static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) { 331 if (constantFoldCmp(Cmp, LVI)) 332 return true; 333 334 if (auto *ICmp = dyn_cast<ICmpInst>(Cmp)) 335 if (processICmp(ICmp, LVI)) 336 return true; 337 338 return false; 339 } 340 341 /// Simplify a switch instruction by removing cases which can never fire. If the 342 /// uselessness of a case could be determined locally then constant propagation 343 /// would already have figured it out. Instead, walk the predecessors and 344 /// statically evaluate cases based on information available on that edge. Cases 345 /// that cannot fire no matter what the incoming edge can safely be removed. If 346 /// a case fires on every incoming edge then the entire switch can be removed 347 /// and replaced with a branch to the case destination. 348 static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI, 349 DominatorTree *DT) { 350 DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy); 351 Value *Cond = I->getCondition(); 352 BasicBlock *BB = I->getParent(); 353 354 // Analyse each switch case in turn. 355 bool Changed = false; 356 DenseMap<BasicBlock*, int> SuccessorsCount; 357 for (auto *Succ : successors(BB)) 358 SuccessorsCount[Succ]++; 359 360 { // Scope for SwitchInstProfUpdateWrapper. It must not live during 361 // ConstantFoldTerminator() as the underlying SwitchInst can be changed. 362 SwitchInstProfUpdateWrapper SI(*I); 363 unsigned ReachableCaseCount = 0; 364 365 for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) { 366 ConstantInt *Case = CI->getCaseValue(); 367 auto *Res = dyn_cast_or_null<ConstantInt>( 368 LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I, 369 /* UseBlockValue */ true)); 370 371 if (Res && Res->isZero()) { 372 // This case never fires - remove it. 373 BasicBlock *Succ = CI->getCaseSuccessor(); 374 Succ->removePredecessor(BB); 375 CI = SI.removeCase(CI); 376 CE = SI->case_end(); 377 378 // The condition can be modified by removePredecessor's PHI simplification 379 // logic. 380 Cond = SI->getCondition(); 381 382 ++NumDeadCases; 383 Changed = true; 384 if (--SuccessorsCount[Succ] == 0) 385 DTU.applyUpdatesPermissive({{DominatorTree::Delete, BB, Succ}}); 386 continue; 387 } 388 if (Res && Res->isOne()) { 389 // This case always fires. Arrange for the switch to be turned into an 390 // unconditional branch by replacing the switch condition with the case 391 // value. 392 SI->setCondition(Case); 393 NumDeadCases += SI->getNumCases(); 394 Changed = true; 395 break; 396 } 397 398 // Increment the case iterator since we didn't delete it. 399 ++CI; 400 ++ReachableCaseCount; 401 } 402 403 BasicBlock *DefaultDest = SI->getDefaultDest(); 404 if (ReachableCaseCount > 1 && 405 !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())) { 406 ConstantRange CR = LVI->getConstantRangeAtUse(I->getOperandUse(0), 407 /*UndefAllowed*/ false); 408 // The default dest is unreachable if all cases are covered. 409 if (!CR.isSizeLargerThan(ReachableCaseCount)) { 410 BasicBlock *NewUnreachableBB = 411 BasicBlock::Create(BB->getContext(), "default.unreachable", 412 BB->getParent(), DefaultDest); 413 new UnreachableInst(BB->getContext(), NewUnreachableBB); 414 415 DefaultDest->removePredecessor(BB); 416 SI->setDefaultDest(NewUnreachableBB); 417 418 if (SuccessorsCount[DefaultDest] == 1) 419 DTU.applyUpdates({{DominatorTree::Delete, BB, DefaultDest}}); 420 DTU.applyUpdates({{DominatorTree::Insert, BB, NewUnreachableBB}}); 421 422 ++NumDeadCases; 423 Changed = true; 424 } 425 } 426 } 427 428 if (Changed) 429 // If the switch has been simplified to the point where it can be replaced 430 // by a branch then do so now. 431 ConstantFoldTerminator(BB, /*DeleteDeadConditions = */ false, 432 /*TLI = */ nullptr, &DTU); 433 return Changed; 434 } 435 436 // See if we can prove that the given binary op intrinsic will not overflow. 437 static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI) { 438 ConstantRange LRange = 439 LVI->getConstantRangeAtUse(BO->getOperandUse(0), /*UndefAllowed*/ false); 440 ConstantRange RRange = 441 LVI->getConstantRangeAtUse(BO->getOperandUse(1), /*UndefAllowed*/ false); 442 ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion( 443 BO->getBinaryOp(), RRange, BO->getNoWrapKind()); 444 return NWRegion.contains(LRange); 445 } 446 447 static void setDeducedOverflowingFlags(Value *V, Instruction::BinaryOps Opcode, 448 bool NewNSW, bool NewNUW) { 449 Statistic *OpcNW, *OpcNSW, *OpcNUW; 450 switch (Opcode) { 451 case Instruction::Add: 452 OpcNW = &NumAddNW; 453 OpcNSW = &NumAddNSW; 454 OpcNUW = &NumAddNUW; 455 break; 456 case Instruction::Sub: 457 OpcNW = &NumSubNW; 458 OpcNSW = &NumSubNSW; 459 OpcNUW = &NumSubNUW; 460 break; 461 case Instruction::Mul: 462 OpcNW = &NumMulNW; 463 OpcNSW = &NumMulNSW; 464 OpcNUW = &NumMulNUW; 465 break; 466 case Instruction::Shl: 467 OpcNW = &NumShlNW; 468 OpcNSW = &NumShlNSW; 469 OpcNUW = &NumShlNUW; 470 break; 471 default: 472 llvm_unreachable("Will not be called with other binops"); 473 } 474 475 auto *Inst = dyn_cast<Instruction>(V); 476 if (NewNSW) { 477 ++NumNW; 478 ++*OpcNW; 479 ++NumNSW; 480 ++*OpcNSW; 481 if (Inst) 482 Inst->setHasNoSignedWrap(); 483 } 484 if (NewNUW) { 485 ++NumNW; 486 ++*OpcNW; 487 ++NumNUW; 488 ++*OpcNUW; 489 if (Inst) 490 Inst->setHasNoUnsignedWrap(); 491 } 492 } 493 494 static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI); 495 496 // See if @llvm.abs argument is alays positive/negative, and simplify. 497 // Notably, INT_MIN can belong to either range, regardless of the NSW, 498 // because it is negation-invariant. 499 static bool processAbsIntrinsic(IntrinsicInst *II, LazyValueInfo *LVI) { 500 Value *X = II->getArgOperand(0); 501 bool IsIntMinPoison = cast<ConstantInt>(II->getArgOperand(1))->isOne(); 502 APInt IntMin = APInt::getSignedMinValue(X->getType()->getScalarSizeInBits()); 503 ConstantRange Range = LVI->getConstantRangeAtUse( 504 II->getOperandUse(0), /*UndefAllowed*/ IsIntMinPoison); 505 506 // Is X in [0, IntMin]? NOTE: INT_MIN is fine! 507 if (Range.icmp(CmpInst::ICMP_ULE, IntMin)) { 508 ++NumAbs; 509 II->replaceAllUsesWith(X); 510 II->eraseFromParent(); 511 return true; 512 } 513 514 // Is X in [IntMin, 0]? NOTE: INT_MIN is fine! 515 if (Range.getSignedMax().isNonPositive()) { 516 IRBuilder<> B(II); 517 Value *NegX = B.CreateNeg(X, II->getName(), 518 /*HasNSW=*/IsIntMinPoison); 519 ++NumAbs; 520 II->replaceAllUsesWith(NegX); 521 II->eraseFromParent(); 522 523 // See if we can infer some no-wrap flags. 524 if (auto *BO = dyn_cast<BinaryOperator>(NegX)) 525 processBinOp(BO, LVI); 526 527 return true; 528 } 529 530 // Argument's range crosses zero. 531 // Can we at least tell that the argument is never INT_MIN? 532 if (!IsIntMinPoison && !Range.contains(IntMin)) { 533 ++NumNSW; 534 ++NumSubNSW; 535 II->setArgOperand(1, ConstantInt::getTrue(II->getContext())); 536 return true; 537 } 538 return false; 539 } 540 541 static bool processCmpIntrinsic(CmpIntrinsic *CI, LazyValueInfo *LVI) { 542 ConstantRange LHS_CR = 543 LVI->getConstantRangeAtUse(CI->getOperandUse(0), /*UndefAllowed*/ false); 544 ConstantRange RHS_CR = 545 LVI->getConstantRangeAtUse(CI->getOperandUse(1), /*UndefAllowed*/ false); 546 547 if (LHS_CR.icmp(CI->getGTPredicate(), RHS_CR)) { 548 ++NumCmpIntr; 549 CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); 550 CI->eraseFromParent(); 551 return true; 552 } 553 if (LHS_CR.icmp(CI->getLTPredicate(), RHS_CR)) { 554 ++NumCmpIntr; 555 CI->replaceAllUsesWith(ConstantInt::getSigned(CI->getType(), -1)); 556 CI->eraseFromParent(); 557 return true; 558 } 559 if (LHS_CR.icmp(ICmpInst::ICMP_EQ, RHS_CR)) { 560 ++NumCmpIntr; 561 CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 0)); 562 CI->eraseFromParent(); 563 return true; 564 } 565 566 return false; 567 } 568 569 // See if this min/max intrinsic always picks it's one specific operand. 570 // If not, check whether we can canonicalize signed minmax into unsigned version 571 static bool processMinMaxIntrinsic(MinMaxIntrinsic *MM, LazyValueInfo *LVI) { 572 CmpInst::Predicate Pred = CmpInst::getNonStrictPredicate(MM->getPredicate()); 573 ConstantRange LHS_CR = LVI->getConstantRangeAtUse(MM->getOperandUse(0), 574 /*UndefAllowed*/ false); 575 ConstantRange RHS_CR = LVI->getConstantRangeAtUse(MM->getOperandUse(1), 576 /*UndefAllowed*/ false); 577 if (LHS_CR.icmp(Pred, RHS_CR)) { 578 ++NumMinMax; 579 MM->replaceAllUsesWith(MM->getLHS()); 580 MM->eraseFromParent(); 581 return true; 582 } 583 if (RHS_CR.icmp(Pred, LHS_CR)) { 584 ++NumMinMax; 585 MM->replaceAllUsesWith(MM->getRHS()); 586 MM->eraseFromParent(); 587 return true; 588 } 589 590 if (MM->isSigned() && 591 ConstantRange::areInsensitiveToSignednessOfICmpPredicate(LHS_CR, 592 RHS_CR)) { 593 ++NumSMinMax; 594 IRBuilder<> B(MM); 595 MM->replaceAllUsesWith(B.CreateBinaryIntrinsic( 596 MM->getIntrinsicID() == Intrinsic::smin ? Intrinsic::umin 597 : Intrinsic::umax, 598 MM->getLHS(), MM->getRHS())); 599 MM->eraseFromParent(); 600 return true; 601 } 602 603 return false; 604 } 605 606 // Rewrite this with.overflow intrinsic as non-overflowing. 607 static bool processOverflowIntrinsic(WithOverflowInst *WO, LazyValueInfo *LVI) { 608 IRBuilder<> B(WO); 609 Instruction::BinaryOps Opcode = WO->getBinaryOp(); 610 bool NSW = WO->isSigned(); 611 bool NUW = !WO->isSigned(); 612 613 Value *NewOp = 614 B.CreateBinOp(Opcode, WO->getLHS(), WO->getRHS(), WO->getName()); 615 setDeducedOverflowingFlags(NewOp, Opcode, NSW, NUW); 616 617 StructType *ST = cast<StructType>(WO->getType()); 618 Constant *Struct = ConstantStruct::get(ST, 619 { PoisonValue::get(ST->getElementType(0)), 620 ConstantInt::getFalse(ST->getElementType(1)) }); 621 Value *NewI = B.CreateInsertValue(Struct, NewOp, 0); 622 WO->replaceAllUsesWith(NewI); 623 WO->eraseFromParent(); 624 ++NumOverflows; 625 626 // See if we can infer the other no-wrap too. 627 if (auto *BO = dyn_cast<BinaryOperator>(NewOp)) 628 processBinOp(BO, LVI); 629 630 return true; 631 } 632 633 static bool processSaturatingInst(SaturatingInst *SI, LazyValueInfo *LVI) { 634 Instruction::BinaryOps Opcode = SI->getBinaryOp(); 635 bool NSW = SI->isSigned(); 636 bool NUW = !SI->isSigned(); 637 BinaryOperator *BinOp = BinaryOperator::Create( 638 Opcode, SI->getLHS(), SI->getRHS(), SI->getName(), SI->getIterator()); 639 BinOp->setDebugLoc(SI->getDebugLoc()); 640 setDeducedOverflowingFlags(BinOp, Opcode, NSW, NUW); 641 642 SI->replaceAllUsesWith(BinOp); 643 SI->eraseFromParent(); 644 ++NumSaturating; 645 646 // See if we can infer the other no-wrap too. 647 if (auto *BO = dyn_cast<BinaryOperator>(BinOp)) 648 processBinOp(BO, LVI); 649 650 return true; 651 } 652 653 /// Infer nonnull attributes for the arguments at the specified callsite. 654 static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) { 655 656 if (CB.getIntrinsicID() == Intrinsic::abs) { 657 return processAbsIntrinsic(&cast<IntrinsicInst>(CB), LVI); 658 } 659 660 if (auto *CI = dyn_cast<CmpIntrinsic>(&CB)) { 661 return processCmpIntrinsic(CI, LVI); 662 } 663 664 if (auto *MM = dyn_cast<MinMaxIntrinsic>(&CB)) { 665 return processMinMaxIntrinsic(MM, LVI); 666 } 667 668 if (auto *WO = dyn_cast<WithOverflowInst>(&CB)) { 669 if (willNotOverflow(WO, LVI)) 670 return processOverflowIntrinsic(WO, LVI); 671 } 672 673 if (auto *SI = dyn_cast<SaturatingInst>(&CB)) { 674 if (willNotOverflow(SI, LVI)) 675 return processSaturatingInst(SI, LVI); 676 } 677 678 bool Changed = false; 679 680 // Deopt bundle operands are intended to capture state with minimal 681 // perturbance of the code otherwise. If we can find a constant value for 682 // any such operand and remove a use of the original value, that's 683 // desireable since it may allow further optimization of that value (e.g. via 684 // single use rules in instcombine). Since deopt uses tend to, 685 // idiomatically, appear along rare conditional paths, it's reasonable likely 686 // we may have a conditional fact with which LVI can fold. 687 if (auto DeoptBundle = CB.getOperandBundle(LLVMContext::OB_deopt)) { 688 for (const Use &ConstU : DeoptBundle->Inputs) { 689 Use &U = const_cast<Use&>(ConstU); 690 Value *V = U.get(); 691 if (V->getType()->isVectorTy()) continue; 692 if (isa<Constant>(V)) continue; 693 694 Constant *C = LVI->getConstant(V, &CB); 695 if (!C) continue; 696 U.set(C); 697 Changed = true; 698 } 699 } 700 701 SmallVector<unsigned, 4> ArgNos; 702 unsigned ArgNo = 0; 703 704 for (Value *V : CB.args()) { 705 PointerType *Type = dyn_cast<PointerType>(V->getType()); 706 // Try to mark pointer typed parameters as non-null. We skip the 707 // relatively expensive analysis for constants which are obviously either 708 // null or non-null to start with. 709 if (Type && !CB.paramHasAttr(ArgNo, Attribute::NonNull) && 710 !isa<Constant>(V)) 711 if (auto *Res = dyn_cast_or_null<ConstantInt>(LVI->getPredicateAt( 712 ICmpInst::ICMP_EQ, V, ConstantPointerNull::get(Type), &CB, 713 /*UseBlockValue=*/false)); 714 Res && Res->isZero()) 715 ArgNos.push_back(ArgNo); 716 ArgNo++; 717 } 718 719 assert(ArgNo == CB.arg_size() && "Call arguments not processed correctly."); 720 721 if (ArgNos.empty()) 722 return Changed; 723 724 NumNonNull += ArgNos.size(); 725 AttributeList AS = CB.getAttributes(); 726 LLVMContext &Ctx = CB.getContext(); 727 AS = AS.addParamAttribute(Ctx, ArgNos, 728 Attribute::get(Ctx, Attribute::NonNull)); 729 CB.setAttributes(AS); 730 731 return true; 732 } 733 734 enum class Domain { NonNegative, NonPositive, Unknown }; 735 736 static Domain getDomain(const ConstantRange &CR) { 737 if (CR.isAllNonNegative()) 738 return Domain::NonNegative; 739 if (CR.icmp(ICmpInst::ICMP_SLE, APInt::getZero(CR.getBitWidth()))) 740 return Domain::NonPositive; 741 return Domain::Unknown; 742 } 743 744 /// Try to shrink a sdiv/srem's width down to the smallest power of two that's 745 /// sufficient to contain its operands. 746 static bool narrowSDivOrSRem(BinaryOperator *Instr, const ConstantRange &LCR, 747 const ConstantRange &RCR) { 748 assert(Instr->getOpcode() == Instruction::SDiv || 749 Instr->getOpcode() == Instruction::SRem); 750 751 // Find the smallest power of two bitwidth that's sufficient to hold Instr's 752 // operands. 753 unsigned OrigWidth = Instr->getType()->getScalarSizeInBits(); 754 755 // What is the smallest bit width that can accommodate the entire value ranges 756 // of both of the operands? 757 unsigned MinSignedBits = 758 std::max(LCR.getMinSignedBits(), RCR.getMinSignedBits()); 759 760 // sdiv/srem is UB if divisor is -1 and divident is INT_MIN, so unless we can 761 // prove that such a combination is impossible, we need to bump the bitwidth. 762 if (RCR.contains(APInt::getAllOnes(OrigWidth)) && 763 LCR.contains(APInt::getSignedMinValue(MinSignedBits).sext(OrigWidth))) 764 ++MinSignedBits; 765 766 // Don't shrink below 8 bits wide. 767 unsigned NewWidth = std::max<unsigned>(PowerOf2Ceil(MinSignedBits), 8); 768 769 // NewWidth might be greater than OrigWidth if OrigWidth is not a power of 770 // two. 771 if (NewWidth >= OrigWidth) 772 return false; 773 774 ++NumSDivSRemsNarrowed; 775 IRBuilder<> B{Instr}; 776 auto *TruncTy = Instr->getType()->getWithNewBitWidth(NewWidth); 777 auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy, 778 Instr->getName() + ".lhs.trunc"); 779 auto *RHS = B.CreateTruncOrBitCast(Instr->getOperand(1), TruncTy, 780 Instr->getName() + ".rhs.trunc"); 781 auto *BO = B.CreateBinOp(Instr->getOpcode(), LHS, RHS, Instr->getName()); 782 auto *Sext = B.CreateSExt(BO, Instr->getType(), Instr->getName() + ".sext"); 783 if (auto *BinOp = dyn_cast<BinaryOperator>(BO)) 784 if (BinOp->getOpcode() == Instruction::SDiv) 785 BinOp->setIsExact(Instr->isExact()); 786 787 Instr->replaceAllUsesWith(Sext); 788 Instr->eraseFromParent(); 789 return true; 790 } 791 792 static bool expandUDivOrURem(BinaryOperator *Instr, const ConstantRange &XCR, 793 const ConstantRange &YCR) { 794 Type *Ty = Instr->getType(); 795 assert(Instr->getOpcode() == Instruction::UDiv || 796 Instr->getOpcode() == Instruction::URem); 797 bool IsRem = Instr->getOpcode() == Instruction::URem; 798 799 Value *X = Instr->getOperand(0); 800 Value *Y = Instr->getOperand(1); 801 802 // X u/ Y -> 0 iff X u< Y 803 // X u% Y -> X iff X u< Y 804 if (XCR.icmp(ICmpInst::ICMP_ULT, YCR)) { 805 Instr->replaceAllUsesWith(IsRem ? X : Constant::getNullValue(Ty)); 806 Instr->eraseFromParent(); 807 ++NumUDivURemsNarrowedExpanded; 808 return true; 809 } 810 811 // Given 812 // R = X u% Y 813 // We can represent the modulo operation as a loop/self-recursion: 814 // urem_rec(X, Y): 815 // Z = X - Y 816 // if X u< Y 817 // ret X 818 // else 819 // ret urem_rec(Z, Y) 820 // which isn't better, but if we only need a single iteration 821 // to compute the answer, this becomes quite good: 822 // R = X < Y ? X : X - Y iff X u< 2*Y (w/ unsigned saturation) 823 // Now, we do not care about all full multiples of Y in X, they do not change 824 // the answer, thus we could rewrite the expression as: 825 // X* = X - (Y * |_ X / Y _|) 826 // R = X* % Y 827 // so we don't need the *first* iteration to return, we just need to 828 // know *which* iteration will always return, so we could also rewrite it as: 829 // X* = X - (Y * |_ X / Y _|) 830 // R = X* % Y iff X* u< 2*Y (w/ unsigned saturation) 831 // but that does not seem profitable here. 832 833 // Even if we don't know X's range, the divisor may be so large, X can't ever 834 // be 2x larger than that. I.e. if divisor is always negative. 835 if (!XCR.icmp(ICmpInst::ICMP_ULT, 836 YCR.umul_sat(APInt(YCR.getBitWidth(), 2))) && 837 !YCR.isAllNegative()) 838 return false; 839 840 IRBuilder<> B(Instr); 841 Value *ExpandedOp; 842 if (XCR.icmp(ICmpInst::ICMP_UGE, YCR)) { 843 // If X is between Y and 2*Y the result is known. 844 if (IsRem) 845 ExpandedOp = B.CreateNUWSub(X, Y); 846 else 847 ExpandedOp = ConstantInt::get(Instr->getType(), 1); 848 } else if (IsRem) { 849 // NOTE: this transformation introduces two uses of X, 850 // but it may be undef so we must freeze it first. 851 Value *FrozenX = X; 852 if (!isGuaranteedNotToBeUndef(X)) 853 FrozenX = B.CreateFreeze(X, X->getName() + ".frozen"); 854 Value *FrozenY = Y; 855 if (!isGuaranteedNotToBeUndef(Y)) 856 FrozenY = B.CreateFreeze(Y, Y->getName() + ".frozen"); 857 auto *AdjX = B.CreateNUWSub(FrozenX, FrozenY, Instr->getName() + ".urem"); 858 auto *Cmp = B.CreateICmp(ICmpInst::ICMP_ULT, FrozenX, FrozenY, 859 Instr->getName() + ".cmp"); 860 ExpandedOp = B.CreateSelect(Cmp, FrozenX, AdjX); 861 } else { 862 auto *Cmp = 863 B.CreateICmp(ICmpInst::ICMP_UGE, X, Y, Instr->getName() + ".cmp"); 864 ExpandedOp = B.CreateZExt(Cmp, Ty, Instr->getName() + ".udiv"); 865 } 866 ExpandedOp->takeName(Instr); 867 Instr->replaceAllUsesWith(ExpandedOp); 868 Instr->eraseFromParent(); 869 ++NumUDivURemsNarrowedExpanded; 870 return true; 871 } 872 873 /// Try to shrink a udiv/urem's width down to the smallest power of two that's 874 /// sufficient to contain its operands. 875 static bool narrowUDivOrURem(BinaryOperator *Instr, const ConstantRange &XCR, 876 const ConstantRange &YCR) { 877 assert(Instr->getOpcode() == Instruction::UDiv || 878 Instr->getOpcode() == Instruction::URem); 879 880 // Find the smallest power of two bitwidth that's sufficient to hold Instr's 881 // operands. 882 883 // What is the smallest bit width that can accommodate the entire value ranges 884 // of both of the operands? 885 unsigned MaxActiveBits = std::max(XCR.getActiveBits(), YCR.getActiveBits()); 886 // Don't shrink below 8 bits wide. 887 unsigned NewWidth = std::max<unsigned>(PowerOf2Ceil(MaxActiveBits), 8); 888 889 // NewWidth might be greater than OrigWidth if OrigWidth is not a power of 890 // two. 891 if (NewWidth >= Instr->getType()->getScalarSizeInBits()) 892 return false; 893 894 ++NumUDivURemsNarrowed; 895 IRBuilder<> B{Instr}; 896 auto *TruncTy = Instr->getType()->getWithNewBitWidth(NewWidth); 897 auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy, 898 Instr->getName() + ".lhs.trunc"); 899 auto *RHS = B.CreateTruncOrBitCast(Instr->getOperand(1), TruncTy, 900 Instr->getName() + ".rhs.trunc"); 901 auto *BO = B.CreateBinOp(Instr->getOpcode(), LHS, RHS, Instr->getName()); 902 auto *Zext = B.CreateZExt(BO, Instr->getType(), Instr->getName() + ".zext"); 903 if (auto *BinOp = dyn_cast<BinaryOperator>(BO)) 904 if (BinOp->getOpcode() == Instruction::UDiv) 905 BinOp->setIsExact(Instr->isExact()); 906 907 Instr->replaceAllUsesWith(Zext); 908 Instr->eraseFromParent(); 909 return true; 910 } 911 912 static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) { 913 assert(Instr->getOpcode() == Instruction::UDiv || 914 Instr->getOpcode() == Instruction::URem); 915 ConstantRange XCR = LVI->getConstantRangeAtUse(Instr->getOperandUse(0), 916 /*UndefAllowed*/ false); 917 // Allow undef for RHS, as we can assume it is division by zero UB. 918 ConstantRange YCR = LVI->getConstantRangeAtUse(Instr->getOperandUse(1), 919 /*UndefAllowed*/ true); 920 if (expandUDivOrURem(Instr, XCR, YCR)) 921 return true; 922 923 return narrowUDivOrURem(Instr, XCR, YCR); 924 } 925 926 static bool processSRem(BinaryOperator *SDI, const ConstantRange &LCR, 927 const ConstantRange &RCR, LazyValueInfo *LVI) { 928 assert(SDI->getOpcode() == Instruction::SRem); 929 930 if (LCR.abs().icmp(CmpInst::ICMP_ULT, RCR.abs())) { 931 SDI->replaceAllUsesWith(SDI->getOperand(0)); 932 SDI->eraseFromParent(); 933 return true; 934 } 935 936 struct Operand { 937 Value *V; 938 Domain D; 939 }; 940 std::array<Operand, 2> Ops = {{{SDI->getOperand(0), getDomain(LCR)}, 941 {SDI->getOperand(1), getDomain(RCR)}}}; 942 if (Ops[0].D == Domain::Unknown || Ops[1].D == Domain::Unknown) 943 return false; 944 945 // We know domains of both of the operands! 946 ++NumSRems; 947 948 // We need operands to be non-negative, so negate each one that isn't. 949 for (Operand &Op : Ops) { 950 if (Op.D == Domain::NonNegative) 951 continue; 952 auto *BO = BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", 953 SDI->getIterator()); 954 BO->setDebugLoc(SDI->getDebugLoc()); 955 Op.V = BO; 956 } 957 958 auto *URem = BinaryOperator::CreateURem(Ops[0].V, Ops[1].V, SDI->getName(), 959 SDI->getIterator()); 960 URem->setDebugLoc(SDI->getDebugLoc()); 961 962 auto *Res = URem; 963 964 // If the divident was non-positive, we need to negate the result. 965 if (Ops[0].D == Domain::NonPositive) { 966 Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", 967 SDI->getIterator()); 968 Res->setDebugLoc(SDI->getDebugLoc()); 969 } 970 971 SDI->replaceAllUsesWith(Res); 972 SDI->eraseFromParent(); 973 974 // Try to simplify our new urem. 975 processUDivOrURem(URem, LVI); 976 977 return true; 978 } 979 980 /// See if LazyValueInfo's ability to exploit edge conditions or range 981 /// information is sufficient to prove the signs of both operands of this SDiv. 982 /// If this is the case, replace the SDiv with a UDiv. Even for local 983 /// conditions, this can sometimes prove conditions instcombine can't by 984 /// exploiting range information. 985 static bool processSDiv(BinaryOperator *SDI, const ConstantRange &LCR, 986 const ConstantRange &RCR, LazyValueInfo *LVI) { 987 assert(SDI->getOpcode() == Instruction::SDiv); 988 989 // Check whether the division folds to a constant. 990 ConstantRange DivCR = LCR.sdiv(RCR); 991 if (const APInt *Elem = DivCR.getSingleElement()) { 992 SDI->replaceAllUsesWith(ConstantInt::get(SDI->getType(), *Elem)); 993 SDI->eraseFromParent(); 994 return true; 995 } 996 997 struct Operand { 998 Value *V; 999 Domain D; 1000 }; 1001 std::array<Operand, 2> Ops = {{{SDI->getOperand(0), getDomain(LCR)}, 1002 {SDI->getOperand(1), getDomain(RCR)}}}; 1003 if (Ops[0].D == Domain::Unknown || Ops[1].D == Domain::Unknown) 1004 return false; 1005 1006 // We know domains of both of the operands! 1007 ++NumSDivs; 1008 1009 // We need operands to be non-negative, so negate each one that isn't. 1010 for (Operand &Op : Ops) { 1011 if (Op.D == Domain::NonNegative) 1012 continue; 1013 auto *BO = BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", 1014 SDI->getIterator()); 1015 BO->setDebugLoc(SDI->getDebugLoc()); 1016 Op.V = BO; 1017 } 1018 1019 auto *UDiv = BinaryOperator::CreateUDiv(Ops[0].V, Ops[1].V, SDI->getName(), 1020 SDI->getIterator()); 1021 UDiv->setDebugLoc(SDI->getDebugLoc()); 1022 UDiv->setIsExact(SDI->isExact()); 1023 1024 auto *Res = UDiv; 1025 1026 // If the operands had two different domains, we need to negate the result. 1027 if (Ops[0].D != Ops[1].D) { 1028 Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", 1029 SDI->getIterator()); 1030 Res->setDebugLoc(SDI->getDebugLoc()); 1031 } 1032 1033 SDI->replaceAllUsesWith(Res); 1034 SDI->eraseFromParent(); 1035 1036 // Try to simplify our new udiv. 1037 processUDivOrURem(UDiv, LVI); 1038 1039 return true; 1040 } 1041 1042 static bool processSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) { 1043 assert(Instr->getOpcode() == Instruction::SDiv || 1044 Instr->getOpcode() == Instruction::SRem); 1045 ConstantRange LCR = 1046 LVI->getConstantRangeAtUse(Instr->getOperandUse(0), /*AllowUndef*/ false); 1047 // Allow undef for RHS, as we can assume it is division by zero UB. 1048 ConstantRange RCR = 1049 LVI->getConstantRangeAtUse(Instr->getOperandUse(1), /*AlloweUndef*/ true); 1050 if (Instr->getOpcode() == Instruction::SDiv) 1051 if (processSDiv(Instr, LCR, RCR, LVI)) 1052 return true; 1053 1054 if (Instr->getOpcode() == Instruction::SRem) { 1055 if (processSRem(Instr, LCR, RCR, LVI)) 1056 return true; 1057 } 1058 1059 return narrowSDivOrSRem(Instr, LCR, RCR); 1060 } 1061 1062 static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) { 1063 ConstantRange LRange = 1064 LVI->getConstantRangeAtUse(SDI->getOperandUse(0), /*UndefAllowed*/ false); 1065 unsigned OrigWidth = SDI->getType()->getScalarSizeInBits(); 1066 ConstantRange NegOneOrZero = 1067 ConstantRange(APInt(OrigWidth, (uint64_t)-1, true), APInt(OrigWidth, 1)); 1068 if (NegOneOrZero.contains(LRange)) { 1069 // ashr of -1 or 0 never changes the value, so drop the whole instruction 1070 ++NumAShrsRemoved; 1071 SDI->replaceAllUsesWith(SDI->getOperand(0)); 1072 SDI->eraseFromParent(); 1073 return true; 1074 } 1075 1076 if (!LRange.isAllNonNegative()) 1077 return false; 1078 1079 ++NumAShrsConverted; 1080 auto *BO = BinaryOperator::CreateLShr(SDI->getOperand(0), SDI->getOperand(1), 1081 "", SDI->getIterator()); 1082 BO->takeName(SDI); 1083 BO->setDebugLoc(SDI->getDebugLoc()); 1084 BO->setIsExact(SDI->isExact()); 1085 SDI->replaceAllUsesWith(BO); 1086 SDI->eraseFromParent(); 1087 1088 return true; 1089 } 1090 1091 static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) { 1092 const Use &Base = SDI->getOperandUse(0); 1093 if (!LVI->getConstantRangeAtUse(Base, /*UndefAllowed*/ false) 1094 .isAllNonNegative()) 1095 return false; 1096 1097 ++NumSExt; 1098 auto *ZExt = CastInst::CreateZExtOrBitCast(Base, SDI->getType(), "", 1099 SDI->getIterator()); 1100 ZExt->takeName(SDI); 1101 ZExt->setDebugLoc(SDI->getDebugLoc()); 1102 ZExt->setNonNeg(); 1103 SDI->replaceAllUsesWith(ZExt); 1104 SDI->eraseFromParent(); 1105 1106 return true; 1107 } 1108 1109 static bool processPossibleNonNeg(PossiblyNonNegInst *I, LazyValueInfo *LVI) { 1110 if (I->hasNonNeg()) 1111 return false; 1112 1113 const Use &Base = I->getOperandUse(0); 1114 if (!LVI->getConstantRangeAtUse(Base, /*UndefAllowed*/ false) 1115 .isAllNonNegative()) 1116 return false; 1117 1118 ++NumNNeg; 1119 I->setNonNeg(); 1120 1121 return true; 1122 } 1123 1124 static bool processZExt(ZExtInst *ZExt, LazyValueInfo *LVI) { 1125 return processPossibleNonNeg(cast<PossiblyNonNegInst>(ZExt), LVI); 1126 } 1127 1128 static bool processUIToFP(UIToFPInst *UIToFP, LazyValueInfo *LVI) { 1129 return processPossibleNonNeg(cast<PossiblyNonNegInst>(UIToFP), LVI); 1130 } 1131 1132 static bool processSIToFP(SIToFPInst *SIToFP, LazyValueInfo *LVI) { 1133 const Use &Base = SIToFP->getOperandUse(0); 1134 if (!LVI->getConstantRangeAtUse(Base, /*UndefAllowed*/ false) 1135 .isAllNonNegative()) 1136 return false; 1137 1138 ++NumSIToFP; 1139 auto *UIToFP = CastInst::Create(Instruction::UIToFP, Base, SIToFP->getType(), 1140 "", SIToFP->getIterator()); 1141 UIToFP->takeName(SIToFP); 1142 UIToFP->setDebugLoc(SIToFP->getDebugLoc()); 1143 UIToFP->setNonNeg(); 1144 SIToFP->replaceAllUsesWith(UIToFP); 1145 SIToFP->eraseFromParent(); 1146 1147 return true; 1148 } 1149 1150 static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) { 1151 using OBO = OverflowingBinaryOperator; 1152 1153 bool NSW = BinOp->hasNoSignedWrap(); 1154 bool NUW = BinOp->hasNoUnsignedWrap(); 1155 if (NSW && NUW) 1156 return false; 1157 1158 Instruction::BinaryOps Opcode = BinOp->getOpcode(); 1159 ConstantRange LRange = LVI->getConstantRangeAtUse(BinOp->getOperandUse(0), 1160 /*UndefAllowed=*/false); 1161 ConstantRange RRange = LVI->getConstantRangeAtUse(BinOp->getOperandUse(1), 1162 /*UndefAllowed=*/false); 1163 1164 bool Changed = false; 1165 bool NewNUW = false, NewNSW = false; 1166 if (!NUW) { 1167 ConstantRange NUWRange = ConstantRange::makeGuaranteedNoWrapRegion( 1168 Opcode, RRange, OBO::NoUnsignedWrap); 1169 NewNUW = NUWRange.contains(LRange); 1170 Changed |= NewNUW; 1171 } 1172 if (!NSW) { 1173 ConstantRange NSWRange = ConstantRange::makeGuaranteedNoWrapRegion( 1174 Opcode, RRange, OBO::NoSignedWrap); 1175 NewNSW = NSWRange.contains(LRange); 1176 Changed |= NewNSW; 1177 } 1178 1179 setDeducedOverflowingFlags(BinOp, Opcode, NewNSW, NewNUW); 1180 1181 return Changed; 1182 } 1183 1184 static bool processAnd(BinaryOperator *BinOp, LazyValueInfo *LVI) { 1185 using namespace llvm::PatternMatch; 1186 1187 // Pattern match (and lhs, C) where C includes a superset of bits which might 1188 // be set in lhs. This is a common truncation idiom created by instcombine. 1189 const Use &LHS = BinOp->getOperandUse(0); 1190 const APInt *RHS; 1191 if (!match(BinOp->getOperand(1), m_LowBitMask(RHS))) 1192 return false; 1193 1194 // We can only replace the AND with LHS based on range info if the range does 1195 // not include undef. 1196 ConstantRange LRange = 1197 LVI->getConstantRangeAtUse(LHS, /*UndefAllowed=*/false); 1198 if (!LRange.getUnsignedMax().ule(*RHS)) 1199 return false; 1200 1201 BinOp->replaceAllUsesWith(LHS); 1202 BinOp->eraseFromParent(); 1203 NumAnd++; 1204 return true; 1205 } 1206 1207 static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT, 1208 const SimplifyQuery &SQ) { 1209 bool FnChanged = false; 1210 // Visiting in a pre-order depth-first traversal causes us to simplify early 1211 // blocks before querying later blocks (which require us to analyze early 1212 // blocks). Eagerly simplifying shallow blocks means there is strictly less 1213 // work to do for deep blocks. This also means we don't visit unreachable 1214 // blocks. 1215 for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { 1216 bool BBChanged = false; 1217 for (Instruction &II : llvm::make_early_inc_range(*BB)) { 1218 switch (II.getOpcode()) { 1219 case Instruction::Select: 1220 BBChanged |= processSelect(cast<SelectInst>(&II), LVI); 1221 break; 1222 case Instruction::PHI: 1223 BBChanged |= processPHI(cast<PHINode>(&II), LVI, DT, SQ); 1224 break; 1225 case Instruction::ICmp: 1226 case Instruction::FCmp: 1227 BBChanged |= processCmp(cast<CmpInst>(&II), LVI); 1228 break; 1229 case Instruction::Call: 1230 case Instruction::Invoke: 1231 BBChanged |= processCallSite(cast<CallBase>(II), LVI); 1232 break; 1233 case Instruction::SRem: 1234 case Instruction::SDiv: 1235 BBChanged |= processSDivOrSRem(cast<BinaryOperator>(&II), LVI); 1236 break; 1237 case Instruction::UDiv: 1238 case Instruction::URem: 1239 BBChanged |= processUDivOrURem(cast<BinaryOperator>(&II), LVI); 1240 break; 1241 case Instruction::AShr: 1242 BBChanged |= processAShr(cast<BinaryOperator>(&II), LVI); 1243 break; 1244 case Instruction::SExt: 1245 BBChanged |= processSExt(cast<SExtInst>(&II), LVI); 1246 break; 1247 case Instruction::ZExt: 1248 BBChanged |= processZExt(cast<ZExtInst>(&II), LVI); 1249 break; 1250 case Instruction::UIToFP: 1251 BBChanged |= processUIToFP(cast<UIToFPInst>(&II), LVI); 1252 break; 1253 case Instruction::SIToFP: 1254 BBChanged |= processSIToFP(cast<SIToFPInst>(&II), LVI); 1255 break; 1256 case Instruction::Add: 1257 case Instruction::Sub: 1258 case Instruction::Mul: 1259 case Instruction::Shl: 1260 BBChanged |= processBinOp(cast<BinaryOperator>(&II), LVI); 1261 break; 1262 case Instruction::And: 1263 BBChanged |= processAnd(cast<BinaryOperator>(&II), LVI); 1264 break; 1265 } 1266 } 1267 1268 Instruction *Term = BB->getTerminator(); 1269 switch (Term->getOpcode()) { 1270 case Instruction::Switch: 1271 BBChanged |= processSwitch(cast<SwitchInst>(Term), LVI, DT); 1272 break; 1273 case Instruction::Ret: { 1274 auto *RI = cast<ReturnInst>(Term); 1275 // Try to determine the return value if we can. This is mainly here to 1276 // simplify the writing of unit tests, but also helps to enable IPO by 1277 // constant folding the return values of callees. 1278 auto *RetVal = RI->getReturnValue(); 1279 if (!RetVal) break; // handle "ret void" 1280 if (isa<Constant>(RetVal)) break; // nothing to do 1281 if (auto *C = getConstantAt(RetVal, RI, LVI)) { 1282 ++NumReturns; 1283 RI->replaceUsesOfWith(RetVal, C); 1284 BBChanged = true; 1285 } 1286 } 1287 } 1288 1289 FnChanged |= BBChanged; 1290 } 1291 1292 return FnChanged; 1293 } 1294 1295 PreservedAnalyses 1296 CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) { 1297 LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F); 1298 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 1299 1300 bool Changed = runImpl(F, LVI, DT, getBestSimplifyQuery(AM, F)); 1301 1302 PreservedAnalyses PA; 1303 if (!Changed) { 1304 PA = PreservedAnalyses::all(); 1305 } else { 1306 #if defined(EXPENSIVE_CHECKS) 1307 assert(DT->verify(DominatorTree::VerificationLevel::Full)); 1308 #else 1309 assert(DT->verify(DominatorTree::VerificationLevel::Fast)); 1310 #endif // EXPENSIVE_CHECKS 1311 1312 PA.preserve<DominatorTreeAnalysis>(); 1313 PA.preserve<LazyValueAnalysis>(); 1314 } 1315 1316 // Keeping LVI alive is expensive, both because it uses a lot of memory, and 1317 // because invalidating values in LVI is expensive. While CVP does preserve 1318 // LVI, we know that passes after JumpThreading+CVP will not need the result 1319 // of this analysis, so we forcefully discard it early. 1320 PA.abandon<LazyValueAnalysis>(); 1321 return PA; 1322 } 1323