1 //===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the Correlated Value Propagation pass. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" 14 #include "llvm/ADT/DepthFirstIterator.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/ADT/Statistic.h" 17 #include "llvm/Analysis/DomTreeUpdater.h" 18 #include "llvm/Analysis/GlobalsModRef.h" 19 #include "llvm/Analysis/InstructionSimplify.h" 20 #include "llvm/Analysis/LazyValueInfo.h" 21 #include "llvm/Analysis/ValueTracking.h" 22 #include "llvm/IR/Attributes.h" 23 #include "llvm/IR/BasicBlock.h" 24 #include "llvm/IR/CFG.h" 25 #include "llvm/IR/Constant.h" 26 #include "llvm/IR/ConstantRange.h" 27 #include "llvm/IR/Constants.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/IRBuilder.h" 31 #include "llvm/IR/InstrTypes.h" 32 #include "llvm/IR/Instruction.h" 33 #include "llvm/IR/Instructions.h" 34 #include "llvm/IR/IntrinsicInst.h" 35 #include "llvm/IR/Operator.h" 36 #include "llvm/IR/PassManager.h" 37 #include "llvm/IR/PatternMatch.h" 38 #include "llvm/IR/Type.h" 39 #include "llvm/IR/Value.h" 40 #include "llvm/Support/Casting.h" 41 #include "llvm/Transforms/Utils/Local.h" 42 #include <cassert> 43 #include <optional> 44 #include <utility> 45 46 using namespace llvm; 47 48 #define DEBUG_TYPE "correlated-value-propagation" 49 50 STATISTIC(NumPhis, "Number of phis propagated"); 51 STATISTIC(NumPhiCommon, "Number of phis deleted via common incoming value"); 52 STATISTIC(NumSelects, "Number of selects propagated"); 53 STATISTIC(NumCmps, "Number of comparisons propagated"); 54 STATISTIC(NumReturns, "Number of return values propagated"); 55 STATISTIC(NumDeadCases, "Number of switch cases removed"); 56 STATISTIC(NumSDivSRemsNarrowed, 57 "Number of sdivs/srems whose width was decreased"); 58 STATISTIC(NumSDivs, "Number of sdiv converted to udiv"); 59 STATISTIC(NumUDivURemsNarrowed, 60 "Number of udivs/urems whose width was decreased"); 61 STATISTIC(NumAShrsConverted, "Number of ashr converted to lshr"); 62 STATISTIC(NumAShrsRemoved, "Number of ashr removed"); 63 STATISTIC(NumSRems, "Number of srem converted to urem"); 64 STATISTIC(NumSExt, "Number of sext converted to zext"); 65 STATISTIC(NumSIToFP, "Number of sitofp converted to uitofp"); 66 STATISTIC(NumSICmps, "Number of signed icmp preds simplified to unsigned"); 67 STATISTIC(NumAnd, "Number of ands removed"); 68 STATISTIC(NumNW, "Number of no-wrap deductions"); 69 STATISTIC(NumNSW, "Number of no-signed-wrap deductions"); 70 STATISTIC(NumNUW, "Number of no-unsigned-wrap deductions"); 71 STATISTIC(NumAddNW, "Number of no-wrap deductions for add"); 72 STATISTIC(NumAddNSW, "Number of no-signed-wrap deductions for add"); 73 STATISTIC(NumAddNUW, "Number of no-unsigned-wrap deductions for add"); 74 STATISTIC(NumSubNW, "Number of no-wrap deductions for sub"); 75 STATISTIC(NumSubNSW, "Number of no-signed-wrap deductions for sub"); 76 STATISTIC(NumSubNUW, "Number of no-unsigned-wrap deductions for sub"); 77 STATISTIC(NumMulNW, "Number of no-wrap deductions for mul"); 78 STATISTIC(NumMulNSW, "Number of no-signed-wrap deductions for mul"); 79 STATISTIC(NumMulNUW, "Number of no-unsigned-wrap deductions for mul"); 80 STATISTIC(NumShlNW, "Number of no-wrap deductions for shl"); 81 STATISTIC(NumShlNSW, "Number of no-signed-wrap deductions for shl"); 82 STATISTIC(NumShlNUW, "Number of no-unsigned-wrap deductions for shl"); 83 STATISTIC(NumAbs, "Number of llvm.abs intrinsics removed"); 84 STATISTIC(NumOverflows, "Number of overflow checks removed"); 85 STATISTIC(NumSaturating, 86 "Number of saturating arithmetics converted to normal arithmetics"); 87 STATISTIC(NumNonNull, "Number of function pointer arguments marked non-null"); 88 STATISTIC(NumCmpIntr, "Number of llvm.[us]cmp intrinsics removed"); 89 STATISTIC(NumMinMax, "Number of llvm.[us]{min,max} intrinsics removed"); 90 STATISTIC(NumSMinMax, 91 "Number of llvm.s{min,max} intrinsics simplified to unsigned"); 92 STATISTIC(NumUDivURemsNarrowedExpanded, 93 "Number of bound udiv's/urem's expanded"); 94 STATISTIC(NumNNeg, "Number of zext/uitofp non-negative deductions"); 95 96 static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) { 97 if (Constant *C = LVI->getConstant(V, At)) 98 return C; 99 100 // TODO: The following really should be sunk inside LVI's core algorithm, or 101 // at least the outer shims around such. 102 auto *C = dyn_cast<CmpInst>(V); 103 if (!C) 104 return nullptr; 105 106 Value *Op0 = C->getOperand(0); 107 Constant *Op1 = dyn_cast<Constant>(C->getOperand(1)); 108 if (!Op1) 109 return nullptr; 110 111 return LVI->getPredicateAt(C->getPredicate(), Op0, Op1, At, 112 /*UseBlockValue=*/false); 113 } 114 115 static bool processSelect(SelectInst *S, LazyValueInfo *LVI) { 116 if (S->getType()->isVectorTy() || isa<Constant>(S->getCondition())) 117 return false; 118 119 bool Changed = false; 120 for (Use &U : make_early_inc_range(S->uses())) { 121 auto *I = cast<Instruction>(U.getUser()); 122 Constant *C; 123 if (auto *PN = dyn_cast<PHINode>(I)) 124 C = LVI->getConstantOnEdge(S->getCondition(), PN->getIncomingBlock(U), 125 I->getParent(), I); 126 else 127 C = getConstantAt(S->getCondition(), I, LVI); 128 129 auto *CI = dyn_cast_or_null<ConstantInt>(C); 130 if (!CI) 131 continue; 132 133 U.set(CI->isOne() ? S->getTrueValue() : S->getFalseValue()); 134 Changed = true; 135 ++NumSelects; 136 } 137 138 if (Changed && S->use_empty()) 139 S->eraseFromParent(); 140 141 return Changed; 142 } 143 144 /// Try to simplify a phi with constant incoming values that match the edge 145 /// values of a non-constant value on all other edges: 146 /// bb0: 147 /// %isnull = icmp eq i8* %x, null 148 /// br i1 %isnull, label %bb2, label %bb1 149 /// bb1: 150 /// br label %bb2 151 /// bb2: 152 /// %r = phi i8* [ %x, %bb1 ], [ null, %bb0 ] 153 /// --> 154 /// %r = %x 155 static bool simplifyCommonValuePhi(PHINode *P, LazyValueInfo *LVI, 156 DominatorTree *DT) { 157 // Collect incoming constants and initialize possible common value. 158 SmallVector<std::pair<Constant *, unsigned>, 4> IncomingConstants; 159 Value *CommonValue = nullptr; 160 for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) { 161 Value *Incoming = P->getIncomingValue(i); 162 if (auto *IncomingConstant = dyn_cast<Constant>(Incoming)) { 163 IncomingConstants.push_back(std::make_pair(IncomingConstant, i)); 164 } else if (!CommonValue) { 165 // The potential common value is initialized to the first non-constant. 166 CommonValue = Incoming; 167 } else if (Incoming != CommonValue) { 168 // There can be only one non-constant common value. 169 return false; 170 } 171 } 172 173 if (!CommonValue || IncomingConstants.empty()) 174 return false; 175 176 // The common value must be valid in all incoming blocks. 177 BasicBlock *ToBB = P->getParent(); 178 if (auto *CommonInst = dyn_cast<Instruction>(CommonValue)) 179 if (!DT->dominates(CommonInst, ToBB)) 180 return false; 181 182 // We have a phi with exactly 1 variable incoming value and 1 or more constant 183 // incoming values. See if all constant incoming values can be mapped back to 184 // the same incoming variable value. 185 for (auto &IncomingConstant : IncomingConstants) { 186 Constant *C = IncomingConstant.first; 187 BasicBlock *IncomingBB = P->getIncomingBlock(IncomingConstant.second); 188 if (C != LVI->getConstantOnEdge(CommonValue, IncomingBB, ToBB, P)) 189 return false; 190 } 191 192 // LVI only guarantees that the value matches a certain constant if the value 193 // is not poison. Make sure we don't replace a well-defined value with poison. 194 // This is usually satisfied due to a prior branch on the value. 195 if (!isGuaranteedNotToBePoison(CommonValue, nullptr, P, DT)) 196 return false; 197 198 // All constant incoming values map to the same variable along the incoming 199 // edges of the phi. The phi is unnecessary. 200 P->replaceAllUsesWith(CommonValue); 201 P->eraseFromParent(); 202 ++NumPhiCommon; 203 return true; 204 } 205 206 static Value *getValueOnEdge(LazyValueInfo *LVI, Value *Incoming, 207 BasicBlock *From, BasicBlock *To, 208 Instruction *CxtI) { 209 if (Constant *C = LVI->getConstantOnEdge(Incoming, From, To, CxtI)) 210 return C; 211 212 // Look if the incoming value is a select with a scalar condition for which 213 // LVI can tells us the value. In that case replace the incoming value with 214 // the appropriate value of the select. This often allows us to remove the 215 // select later. 216 auto *SI = dyn_cast<SelectInst>(Incoming); 217 if (!SI) 218 return nullptr; 219 220 // Once LVI learns to handle vector types, we could also add support 221 // for vector type constants that are not all zeroes or all ones. 222 Value *Condition = SI->getCondition(); 223 if (!Condition->getType()->isVectorTy()) { 224 if (Constant *C = LVI->getConstantOnEdge(Condition, From, To, CxtI)) { 225 if (C->isOneValue()) 226 return SI->getTrueValue(); 227 if (C->isZeroValue()) 228 return SI->getFalseValue(); 229 } 230 } 231 232 // Look if the select has a constant but LVI tells us that the incoming 233 // value can never be that constant. In that case replace the incoming 234 // value with the other value of the select. This often allows us to 235 // remove the select later. 236 237 // The "false" case 238 if (auto *C = dyn_cast<Constant>(SI->getFalseValue())) 239 if (auto *Res = dyn_cast_or_null<ConstantInt>( 240 LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C, From, To, CxtI)); 241 Res && Res->isZero()) 242 return SI->getTrueValue(); 243 244 // The "true" case, 245 // similar to the select "false" case, but try the select "true" value 246 if (auto *C = dyn_cast<Constant>(SI->getTrueValue())) 247 if (auto *Res = dyn_cast_or_null<ConstantInt>( 248 LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C, From, To, CxtI)); 249 Res && Res->isZero()) 250 return SI->getFalseValue(); 251 252 return nullptr; 253 } 254 255 static bool processPHI(PHINode *P, LazyValueInfo *LVI, DominatorTree *DT, 256 const SimplifyQuery &SQ) { 257 bool Changed = false; 258 259 BasicBlock *BB = P->getParent(); 260 for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) { 261 Value *Incoming = P->getIncomingValue(i); 262 if (isa<Constant>(Incoming)) continue; 263 264 Value *V = getValueOnEdge(LVI, Incoming, P->getIncomingBlock(i), BB, P); 265 if (V) { 266 P->setIncomingValue(i, V); 267 Changed = true; 268 } 269 } 270 271 if (Value *V = simplifyInstruction(P, SQ)) { 272 P->replaceAllUsesWith(V); 273 P->eraseFromParent(); 274 Changed = true; 275 } 276 277 if (!Changed) 278 Changed = simplifyCommonValuePhi(P, LVI, DT); 279 280 if (Changed) 281 ++NumPhis; 282 283 return Changed; 284 } 285 286 static bool processICmp(ICmpInst *Cmp, LazyValueInfo *LVI) { 287 // Only for signed relational comparisons of integers. 288 if (!Cmp->getOperand(0)->getType()->isIntOrIntVectorTy()) 289 return false; 290 291 if (!Cmp->isSigned() && (!Cmp->isUnsigned() || Cmp->hasSameSign())) 292 return false; 293 294 bool Changed = false; 295 296 ConstantRange CR1 = LVI->getConstantRangeAtUse(Cmp->getOperandUse(0), 297 /*UndefAllowed=*/false), 298 CR2 = LVI->getConstantRangeAtUse(Cmp->getOperandUse(1), 299 /*UndefAllowed=*/false); 300 301 if (Cmp->isSigned()) { 302 ICmpInst::Predicate UnsignedPred = 303 ConstantRange::getEquivalentPredWithFlippedSignedness( 304 Cmp->getPredicate(), CR1, CR2); 305 306 if (UnsignedPred == ICmpInst::Predicate::BAD_ICMP_PREDICATE) 307 return false; 308 309 ++NumSICmps; 310 Cmp->setPredicate(UnsignedPred); 311 Changed = true; 312 } 313 314 if (ConstantRange::areInsensitiveToSignednessOfICmpPredicate(CR1, CR2)) { 315 Cmp->setSameSign(); 316 Changed = true; 317 } 318 319 return Changed; 320 } 321 322 /// See if LazyValueInfo's ability to exploit edge conditions or range 323 /// information is sufficient to prove this comparison. Even for local 324 /// conditions, this can sometimes prove conditions instcombine can't by 325 /// exploiting range information. 326 static bool constantFoldCmp(CmpInst *Cmp, LazyValueInfo *LVI) { 327 Value *Op0 = Cmp->getOperand(0); 328 Value *Op1 = Cmp->getOperand(1); 329 Constant *Res = LVI->getPredicateAt(Cmp->getPredicate(), Op0, Op1, Cmp, 330 /*UseBlockValue=*/true); 331 if (!Res) 332 return false; 333 334 ++NumCmps; 335 Cmp->replaceAllUsesWith(Res); 336 Cmp->eraseFromParent(); 337 return true; 338 } 339 340 static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) { 341 if (constantFoldCmp(Cmp, LVI)) 342 return true; 343 344 if (auto *ICmp = dyn_cast<ICmpInst>(Cmp)) 345 if (processICmp(ICmp, LVI)) 346 return true; 347 348 return false; 349 } 350 351 /// Simplify a switch instruction by removing cases which can never fire. If the 352 /// uselessness of a case could be determined locally then constant propagation 353 /// would already have figured it out. Instead, walk the predecessors and 354 /// statically evaluate cases based on information available on that edge. Cases 355 /// that cannot fire no matter what the incoming edge can safely be removed. If 356 /// a case fires on every incoming edge then the entire switch can be removed 357 /// and replaced with a branch to the case destination. 358 static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI, 359 DominatorTree *DT) { 360 DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy); 361 Value *Cond = I->getCondition(); 362 BasicBlock *BB = I->getParent(); 363 364 // Analyse each switch case in turn. 365 bool Changed = false; 366 DenseMap<BasicBlock*, int> SuccessorsCount; 367 for (auto *Succ : successors(BB)) 368 SuccessorsCount[Succ]++; 369 370 { // Scope for SwitchInstProfUpdateWrapper. It must not live during 371 // ConstantFoldTerminator() as the underlying SwitchInst can be changed. 372 SwitchInstProfUpdateWrapper SI(*I); 373 unsigned ReachableCaseCount = 0; 374 375 for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) { 376 ConstantInt *Case = CI->getCaseValue(); 377 auto *Res = dyn_cast_or_null<ConstantInt>( 378 LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I, 379 /* UseBlockValue */ true)); 380 381 if (Res && Res->isZero()) { 382 // This case never fires - remove it. 383 BasicBlock *Succ = CI->getCaseSuccessor(); 384 Succ->removePredecessor(BB); 385 CI = SI.removeCase(CI); 386 CE = SI->case_end(); 387 388 // The condition can be modified by removePredecessor's PHI simplification 389 // logic. 390 Cond = SI->getCondition(); 391 392 ++NumDeadCases; 393 Changed = true; 394 if (--SuccessorsCount[Succ] == 0) 395 DTU.applyUpdatesPermissive({{DominatorTree::Delete, BB, Succ}}); 396 continue; 397 } 398 if (Res && Res->isOne()) { 399 // This case always fires. Arrange for the switch to be turned into an 400 // unconditional branch by replacing the switch condition with the case 401 // value. 402 SI->setCondition(Case); 403 NumDeadCases += SI->getNumCases(); 404 Changed = true; 405 break; 406 } 407 408 // Increment the case iterator since we didn't delete it. 409 ++CI; 410 ++ReachableCaseCount; 411 } 412 413 BasicBlock *DefaultDest = SI->getDefaultDest(); 414 if (ReachableCaseCount > 1 && 415 !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())) { 416 ConstantRange CR = LVI->getConstantRangeAtUse(I->getOperandUse(0), 417 /*UndefAllowed*/ false); 418 // The default dest is unreachable if all cases are covered. 419 if (!CR.isSizeLargerThan(ReachableCaseCount)) { 420 BasicBlock *NewUnreachableBB = 421 BasicBlock::Create(BB->getContext(), "default.unreachable", 422 BB->getParent(), DefaultDest); 423 new UnreachableInst(BB->getContext(), NewUnreachableBB); 424 425 DefaultDest->removePredecessor(BB); 426 SI->setDefaultDest(NewUnreachableBB); 427 428 if (SuccessorsCount[DefaultDest] == 1) 429 DTU.applyUpdates({{DominatorTree::Delete, BB, DefaultDest}}); 430 DTU.applyUpdates({{DominatorTree::Insert, BB, NewUnreachableBB}}); 431 432 ++NumDeadCases; 433 Changed = true; 434 } 435 } 436 } 437 438 if (Changed) 439 // If the switch has been simplified to the point where it can be replaced 440 // by a branch then do so now. 441 ConstantFoldTerminator(BB, /*DeleteDeadConditions = */ false, 442 /*TLI = */ nullptr, &DTU); 443 return Changed; 444 } 445 446 // See if we can prove that the given binary op intrinsic will not overflow. 447 static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI) { 448 ConstantRange LRange = 449 LVI->getConstantRangeAtUse(BO->getOperandUse(0), /*UndefAllowed*/ false); 450 ConstantRange RRange = 451 LVI->getConstantRangeAtUse(BO->getOperandUse(1), /*UndefAllowed*/ false); 452 ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion( 453 BO->getBinaryOp(), RRange, BO->getNoWrapKind()); 454 return NWRegion.contains(LRange); 455 } 456 457 static void setDeducedOverflowingFlags(Value *V, Instruction::BinaryOps Opcode, 458 bool NewNSW, bool NewNUW) { 459 Statistic *OpcNW, *OpcNSW, *OpcNUW; 460 switch (Opcode) { 461 case Instruction::Add: 462 OpcNW = &NumAddNW; 463 OpcNSW = &NumAddNSW; 464 OpcNUW = &NumAddNUW; 465 break; 466 case Instruction::Sub: 467 OpcNW = &NumSubNW; 468 OpcNSW = &NumSubNSW; 469 OpcNUW = &NumSubNUW; 470 break; 471 case Instruction::Mul: 472 OpcNW = &NumMulNW; 473 OpcNSW = &NumMulNSW; 474 OpcNUW = &NumMulNUW; 475 break; 476 case Instruction::Shl: 477 OpcNW = &NumShlNW; 478 OpcNSW = &NumShlNSW; 479 OpcNUW = &NumShlNUW; 480 break; 481 default: 482 llvm_unreachable("Will not be called with other binops"); 483 } 484 485 auto *Inst = dyn_cast<Instruction>(V); 486 if (NewNSW) { 487 ++NumNW; 488 ++*OpcNW; 489 ++NumNSW; 490 ++*OpcNSW; 491 if (Inst) 492 Inst->setHasNoSignedWrap(); 493 } 494 if (NewNUW) { 495 ++NumNW; 496 ++*OpcNW; 497 ++NumNUW; 498 ++*OpcNUW; 499 if (Inst) 500 Inst->setHasNoUnsignedWrap(); 501 } 502 } 503 504 static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI); 505 506 // See if @llvm.abs argument is alays positive/negative, and simplify. 507 // Notably, INT_MIN can belong to either range, regardless of the NSW, 508 // because it is negation-invariant. 509 static bool processAbsIntrinsic(IntrinsicInst *II, LazyValueInfo *LVI) { 510 Value *X = II->getArgOperand(0); 511 bool IsIntMinPoison = cast<ConstantInt>(II->getArgOperand(1))->isOne(); 512 APInt IntMin = APInt::getSignedMinValue(X->getType()->getScalarSizeInBits()); 513 ConstantRange Range = LVI->getConstantRangeAtUse( 514 II->getOperandUse(0), /*UndefAllowed*/ IsIntMinPoison); 515 516 // Is X in [0, IntMin]? NOTE: INT_MIN is fine! 517 if (Range.icmp(CmpInst::ICMP_ULE, IntMin)) { 518 ++NumAbs; 519 II->replaceAllUsesWith(X); 520 II->eraseFromParent(); 521 return true; 522 } 523 524 // Is X in [IntMin, 0]? NOTE: INT_MIN is fine! 525 if (Range.getSignedMax().isNonPositive()) { 526 IRBuilder<> B(II); 527 Value *NegX = B.CreateNeg(X, II->getName(), 528 /*HasNSW=*/IsIntMinPoison); 529 ++NumAbs; 530 II->replaceAllUsesWith(NegX); 531 II->eraseFromParent(); 532 533 // See if we can infer some no-wrap flags. 534 if (auto *BO = dyn_cast<BinaryOperator>(NegX)) 535 processBinOp(BO, LVI); 536 537 return true; 538 } 539 540 // Argument's range crosses zero. 541 // Can we at least tell that the argument is never INT_MIN? 542 if (!IsIntMinPoison && !Range.contains(IntMin)) { 543 ++NumNSW; 544 ++NumSubNSW; 545 II->setArgOperand(1, ConstantInt::getTrue(II->getContext())); 546 return true; 547 } 548 return false; 549 } 550 551 static bool processCmpIntrinsic(CmpIntrinsic *CI, LazyValueInfo *LVI) { 552 ConstantRange LHS_CR = 553 LVI->getConstantRangeAtUse(CI->getOperandUse(0), /*UndefAllowed*/ false); 554 ConstantRange RHS_CR = 555 LVI->getConstantRangeAtUse(CI->getOperandUse(1), /*UndefAllowed*/ false); 556 557 if (LHS_CR.icmp(CI->getGTPredicate(), RHS_CR)) { 558 ++NumCmpIntr; 559 CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); 560 CI->eraseFromParent(); 561 return true; 562 } 563 if (LHS_CR.icmp(CI->getLTPredicate(), RHS_CR)) { 564 ++NumCmpIntr; 565 CI->replaceAllUsesWith(ConstantInt::getSigned(CI->getType(), -1)); 566 CI->eraseFromParent(); 567 return true; 568 } 569 if (LHS_CR.icmp(ICmpInst::ICMP_EQ, RHS_CR)) { 570 ++NumCmpIntr; 571 CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 0)); 572 CI->eraseFromParent(); 573 return true; 574 } 575 576 return false; 577 } 578 579 // See if this min/max intrinsic always picks it's one specific operand. 580 // If not, check whether we can canonicalize signed minmax into unsigned version 581 static bool processMinMaxIntrinsic(MinMaxIntrinsic *MM, LazyValueInfo *LVI) { 582 CmpInst::Predicate Pred = CmpInst::getNonStrictPredicate(MM->getPredicate()); 583 ConstantRange LHS_CR = LVI->getConstantRangeAtUse(MM->getOperandUse(0), 584 /*UndefAllowed*/ false); 585 ConstantRange RHS_CR = LVI->getConstantRangeAtUse(MM->getOperandUse(1), 586 /*UndefAllowed*/ false); 587 if (LHS_CR.icmp(Pred, RHS_CR)) { 588 ++NumMinMax; 589 MM->replaceAllUsesWith(MM->getLHS()); 590 MM->eraseFromParent(); 591 return true; 592 } 593 if (RHS_CR.icmp(Pred, LHS_CR)) { 594 ++NumMinMax; 595 MM->replaceAllUsesWith(MM->getRHS()); 596 MM->eraseFromParent(); 597 return true; 598 } 599 600 if (MM->isSigned() && 601 ConstantRange::areInsensitiveToSignednessOfICmpPredicate(LHS_CR, 602 RHS_CR)) { 603 ++NumSMinMax; 604 IRBuilder<> B(MM); 605 MM->replaceAllUsesWith(B.CreateBinaryIntrinsic( 606 MM->getIntrinsicID() == Intrinsic::smin ? Intrinsic::umin 607 : Intrinsic::umax, 608 MM->getLHS(), MM->getRHS())); 609 MM->eraseFromParent(); 610 return true; 611 } 612 613 return false; 614 } 615 616 // Rewrite this with.overflow intrinsic as non-overflowing. 617 static bool processOverflowIntrinsic(WithOverflowInst *WO, LazyValueInfo *LVI) { 618 IRBuilder<> B(WO); 619 Instruction::BinaryOps Opcode = WO->getBinaryOp(); 620 bool NSW = WO->isSigned(); 621 bool NUW = !WO->isSigned(); 622 623 Value *NewOp = 624 B.CreateBinOp(Opcode, WO->getLHS(), WO->getRHS(), WO->getName()); 625 setDeducedOverflowingFlags(NewOp, Opcode, NSW, NUW); 626 627 StructType *ST = cast<StructType>(WO->getType()); 628 Constant *Struct = ConstantStruct::get(ST, 629 { PoisonValue::get(ST->getElementType(0)), 630 ConstantInt::getFalse(ST->getElementType(1)) }); 631 Value *NewI = B.CreateInsertValue(Struct, NewOp, 0); 632 WO->replaceAllUsesWith(NewI); 633 WO->eraseFromParent(); 634 ++NumOverflows; 635 636 // See if we can infer the other no-wrap too. 637 if (auto *BO = dyn_cast<BinaryOperator>(NewOp)) 638 processBinOp(BO, LVI); 639 640 return true; 641 } 642 643 static bool processSaturatingInst(SaturatingInst *SI, LazyValueInfo *LVI) { 644 Instruction::BinaryOps Opcode = SI->getBinaryOp(); 645 bool NSW = SI->isSigned(); 646 bool NUW = !SI->isSigned(); 647 BinaryOperator *BinOp = BinaryOperator::Create( 648 Opcode, SI->getLHS(), SI->getRHS(), SI->getName(), SI->getIterator()); 649 BinOp->setDebugLoc(SI->getDebugLoc()); 650 setDeducedOverflowingFlags(BinOp, Opcode, NSW, NUW); 651 652 SI->replaceAllUsesWith(BinOp); 653 SI->eraseFromParent(); 654 ++NumSaturating; 655 656 // See if we can infer the other no-wrap too. 657 if (auto *BO = dyn_cast<BinaryOperator>(BinOp)) 658 processBinOp(BO, LVI); 659 660 return true; 661 } 662 663 /// Infer nonnull attributes for the arguments at the specified callsite. 664 static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) { 665 666 if (CB.getIntrinsicID() == Intrinsic::abs) { 667 return processAbsIntrinsic(&cast<IntrinsicInst>(CB), LVI); 668 } 669 670 if (auto *CI = dyn_cast<CmpIntrinsic>(&CB)) { 671 return processCmpIntrinsic(CI, LVI); 672 } 673 674 if (auto *MM = dyn_cast<MinMaxIntrinsic>(&CB)) { 675 return processMinMaxIntrinsic(MM, LVI); 676 } 677 678 if (auto *WO = dyn_cast<WithOverflowInst>(&CB)) { 679 if (willNotOverflow(WO, LVI)) 680 return processOverflowIntrinsic(WO, LVI); 681 } 682 683 if (auto *SI = dyn_cast<SaturatingInst>(&CB)) { 684 if (willNotOverflow(SI, LVI)) 685 return processSaturatingInst(SI, LVI); 686 } 687 688 bool Changed = false; 689 690 // Deopt bundle operands are intended to capture state with minimal 691 // perturbance of the code otherwise. If we can find a constant value for 692 // any such operand and remove a use of the original value, that's 693 // desireable since it may allow further optimization of that value (e.g. via 694 // single use rules in instcombine). Since deopt uses tend to, 695 // idiomatically, appear along rare conditional paths, it's reasonable likely 696 // we may have a conditional fact with which LVI can fold. 697 if (auto DeoptBundle = CB.getOperandBundle(LLVMContext::OB_deopt)) { 698 for (const Use &ConstU : DeoptBundle->Inputs) { 699 Use &U = const_cast<Use&>(ConstU); 700 Value *V = U.get(); 701 if (V->getType()->isVectorTy()) continue; 702 if (isa<Constant>(V)) continue; 703 704 Constant *C = LVI->getConstant(V, &CB); 705 if (!C) continue; 706 U.set(C); 707 Changed = true; 708 } 709 } 710 711 SmallVector<unsigned, 4> ArgNos; 712 unsigned ArgNo = 0; 713 714 for (Value *V : CB.args()) { 715 PointerType *Type = dyn_cast<PointerType>(V->getType()); 716 // Try to mark pointer typed parameters as non-null. We skip the 717 // relatively expensive analysis for constants which are obviously either 718 // null or non-null to start with. 719 if (Type && !CB.paramHasAttr(ArgNo, Attribute::NonNull) && 720 !isa<Constant>(V)) 721 if (auto *Res = dyn_cast_or_null<ConstantInt>(LVI->getPredicateAt( 722 ICmpInst::ICMP_EQ, V, ConstantPointerNull::get(Type), &CB, 723 /*UseBlockValue=*/false)); 724 Res && Res->isZero()) 725 ArgNos.push_back(ArgNo); 726 ArgNo++; 727 } 728 729 assert(ArgNo == CB.arg_size() && "Call arguments not processed correctly."); 730 731 if (ArgNos.empty()) 732 return Changed; 733 734 NumNonNull += ArgNos.size(); 735 AttributeList AS = CB.getAttributes(); 736 LLVMContext &Ctx = CB.getContext(); 737 AS = AS.addParamAttribute(Ctx, ArgNos, 738 Attribute::get(Ctx, Attribute::NonNull)); 739 CB.setAttributes(AS); 740 741 return true; 742 } 743 744 enum class Domain { NonNegative, NonPositive, Unknown }; 745 746 static Domain getDomain(const ConstantRange &CR) { 747 if (CR.isAllNonNegative()) 748 return Domain::NonNegative; 749 if (CR.icmp(ICmpInst::ICMP_SLE, APInt::getZero(CR.getBitWidth()))) 750 return Domain::NonPositive; 751 return Domain::Unknown; 752 } 753 754 /// Try to shrink a sdiv/srem's width down to the smallest power of two that's 755 /// sufficient to contain its operands. 756 static bool narrowSDivOrSRem(BinaryOperator *Instr, const ConstantRange &LCR, 757 const ConstantRange &RCR) { 758 assert(Instr->getOpcode() == Instruction::SDiv || 759 Instr->getOpcode() == Instruction::SRem); 760 761 // Find the smallest power of two bitwidth that's sufficient to hold Instr's 762 // operands. 763 unsigned OrigWidth = Instr->getType()->getScalarSizeInBits(); 764 765 // What is the smallest bit width that can accommodate the entire value ranges 766 // of both of the operands? 767 unsigned MinSignedBits = 768 std::max(LCR.getMinSignedBits(), RCR.getMinSignedBits()); 769 770 // sdiv/srem is UB if divisor is -1 and divident is INT_MIN, so unless we can 771 // prove that such a combination is impossible, we need to bump the bitwidth. 772 if (RCR.contains(APInt::getAllOnes(OrigWidth)) && 773 LCR.contains(APInt::getSignedMinValue(MinSignedBits).sext(OrigWidth))) 774 ++MinSignedBits; 775 776 // Don't shrink below 8 bits wide. 777 unsigned NewWidth = std::max<unsigned>(PowerOf2Ceil(MinSignedBits), 8); 778 779 // NewWidth might be greater than OrigWidth if OrigWidth is not a power of 780 // two. 781 if (NewWidth >= OrigWidth) 782 return false; 783 784 ++NumSDivSRemsNarrowed; 785 IRBuilder<> B{Instr}; 786 auto *TruncTy = Instr->getType()->getWithNewBitWidth(NewWidth); 787 auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy, 788 Instr->getName() + ".lhs.trunc"); 789 auto *RHS = B.CreateTruncOrBitCast(Instr->getOperand(1), TruncTy, 790 Instr->getName() + ".rhs.trunc"); 791 auto *BO = B.CreateBinOp(Instr->getOpcode(), LHS, RHS, Instr->getName()); 792 auto *Sext = B.CreateSExt(BO, Instr->getType(), Instr->getName() + ".sext"); 793 if (auto *BinOp = dyn_cast<BinaryOperator>(BO)) 794 if (BinOp->getOpcode() == Instruction::SDiv) 795 BinOp->setIsExact(Instr->isExact()); 796 797 Instr->replaceAllUsesWith(Sext); 798 Instr->eraseFromParent(); 799 return true; 800 } 801 802 static bool expandUDivOrURem(BinaryOperator *Instr, const ConstantRange &XCR, 803 const ConstantRange &YCR) { 804 Type *Ty = Instr->getType(); 805 assert(Instr->getOpcode() == Instruction::UDiv || 806 Instr->getOpcode() == Instruction::URem); 807 bool IsRem = Instr->getOpcode() == Instruction::URem; 808 809 Value *X = Instr->getOperand(0); 810 Value *Y = Instr->getOperand(1); 811 812 // X u/ Y -> 0 iff X u< Y 813 // X u% Y -> X iff X u< Y 814 if (XCR.icmp(ICmpInst::ICMP_ULT, YCR)) { 815 Instr->replaceAllUsesWith(IsRem ? X : Constant::getNullValue(Ty)); 816 Instr->eraseFromParent(); 817 ++NumUDivURemsNarrowedExpanded; 818 return true; 819 } 820 821 // Given 822 // R = X u% Y 823 // We can represent the modulo operation as a loop/self-recursion: 824 // urem_rec(X, Y): 825 // Z = X - Y 826 // if X u< Y 827 // ret X 828 // else 829 // ret urem_rec(Z, Y) 830 // which isn't better, but if we only need a single iteration 831 // to compute the answer, this becomes quite good: 832 // R = X < Y ? X : X - Y iff X u< 2*Y (w/ unsigned saturation) 833 // Now, we do not care about all full multiples of Y in X, they do not change 834 // the answer, thus we could rewrite the expression as: 835 // X* = X - (Y * |_ X / Y _|) 836 // R = X* % Y 837 // so we don't need the *first* iteration to return, we just need to 838 // know *which* iteration will always return, so we could also rewrite it as: 839 // X* = X - (Y * |_ X / Y _|) 840 // R = X* % Y iff X* u< 2*Y (w/ unsigned saturation) 841 // but that does not seem profitable here. 842 843 // Even if we don't know X's range, the divisor may be so large, X can't ever 844 // be 2x larger than that. I.e. if divisor is always negative. 845 if (!XCR.icmp(ICmpInst::ICMP_ULT, YCR.uadd_sat(YCR)) && !YCR.isAllNegative()) 846 return false; 847 848 IRBuilder<> B(Instr); 849 Value *ExpandedOp; 850 if (XCR.icmp(ICmpInst::ICMP_UGE, YCR)) { 851 // If X is between Y and 2*Y the result is known. 852 if (IsRem) 853 ExpandedOp = B.CreateNUWSub(X, Y); 854 else 855 ExpandedOp = ConstantInt::get(Instr->getType(), 1); 856 } else if (IsRem) { 857 // NOTE: this transformation introduces two uses of X, 858 // but it may be undef so we must freeze it first. 859 Value *FrozenX = X; 860 if (!isGuaranteedNotToBeUndef(X)) 861 FrozenX = B.CreateFreeze(X, X->getName() + ".frozen"); 862 Value *FrozenY = Y; 863 if (!isGuaranteedNotToBeUndef(Y)) 864 FrozenY = B.CreateFreeze(Y, Y->getName() + ".frozen"); 865 auto *AdjX = B.CreateNUWSub(FrozenX, FrozenY, Instr->getName() + ".urem"); 866 auto *Cmp = B.CreateICmp(ICmpInst::ICMP_ULT, FrozenX, FrozenY, 867 Instr->getName() + ".cmp"); 868 ExpandedOp = B.CreateSelect(Cmp, FrozenX, AdjX); 869 } else { 870 auto *Cmp = 871 B.CreateICmp(ICmpInst::ICMP_UGE, X, Y, Instr->getName() + ".cmp"); 872 ExpandedOp = B.CreateZExt(Cmp, Ty, Instr->getName() + ".udiv"); 873 } 874 ExpandedOp->takeName(Instr); 875 Instr->replaceAllUsesWith(ExpandedOp); 876 Instr->eraseFromParent(); 877 ++NumUDivURemsNarrowedExpanded; 878 return true; 879 } 880 881 /// Try to shrink a udiv/urem's width down to the smallest power of two that's 882 /// sufficient to contain its operands. 883 static bool narrowUDivOrURem(BinaryOperator *Instr, const ConstantRange &XCR, 884 const ConstantRange &YCR) { 885 assert(Instr->getOpcode() == Instruction::UDiv || 886 Instr->getOpcode() == Instruction::URem); 887 888 // Find the smallest power of two bitwidth that's sufficient to hold Instr's 889 // operands. 890 891 // What is the smallest bit width that can accommodate the entire value ranges 892 // of both of the operands? 893 unsigned MaxActiveBits = std::max(XCR.getActiveBits(), YCR.getActiveBits()); 894 // Don't shrink below 8 bits wide. 895 unsigned NewWidth = std::max<unsigned>(PowerOf2Ceil(MaxActiveBits), 8); 896 897 // NewWidth might be greater than OrigWidth if OrigWidth is not a power of 898 // two. 899 if (NewWidth >= Instr->getType()->getScalarSizeInBits()) 900 return false; 901 902 ++NumUDivURemsNarrowed; 903 IRBuilder<> B{Instr}; 904 auto *TruncTy = Instr->getType()->getWithNewBitWidth(NewWidth); 905 auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy, 906 Instr->getName() + ".lhs.trunc"); 907 auto *RHS = B.CreateTruncOrBitCast(Instr->getOperand(1), TruncTy, 908 Instr->getName() + ".rhs.trunc"); 909 auto *BO = B.CreateBinOp(Instr->getOpcode(), LHS, RHS, Instr->getName()); 910 auto *Zext = B.CreateZExt(BO, Instr->getType(), Instr->getName() + ".zext"); 911 if (auto *BinOp = dyn_cast<BinaryOperator>(BO)) 912 if (BinOp->getOpcode() == Instruction::UDiv) 913 BinOp->setIsExact(Instr->isExact()); 914 915 Instr->replaceAllUsesWith(Zext); 916 Instr->eraseFromParent(); 917 return true; 918 } 919 920 static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) { 921 assert(Instr->getOpcode() == Instruction::UDiv || 922 Instr->getOpcode() == Instruction::URem); 923 ConstantRange XCR = LVI->getConstantRangeAtUse(Instr->getOperandUse(0), 924 /*UndefAllowed*/ false); 925 // Allow undef for RHS, as we can assume it is division by zero UB. 926 ConstantRange YCR = LVI->getConstantRangeAtUse(Instr->getOperandUse(1), 927 /*UndefAllowed*/ true); 928 if (expandUDivOrURem(Instr, XCR, YCR)) 929 return true; 930 931 return narrowUDivOrURem(Instr, XCR, YCR); 932 } 933 934 static bool processSRem(BinaryOperator *SDI, const ConstantRange &LCR, 935 const ConstantRange &RCR, LazyValueInfo *LVI) { 936 assert(SDI->getOpcode() == Instruction::SRem); 937 938 if (LCR.abs().icmp(CmpInst::ICMP_ULT, RCR.abs())) { 939 SDI->replaceAllUsesWith(SDI->getOperand(0)); 940 SDI->eraseFromParent(); 941 return true; 942 } 943 944 struct Operand { 945 Value *V; 946 Domain D; 947 }; 948 std::array<Operand, 2> Ops = {{{SDI->getOperand(0), getDomain(LCR)}, 949 {SDI->getOperand(1), getDomain(RCR)}}}; 950 if (Ops[0].D == Domain::Unknown || Ops[1].D == Domain::Unknown) 951 return false; 952 953 // We know domains of both of the operands! 954 ++NumSRems; 955 956 // We need operands to be non-negative, so negate each one that isn't. 957 for (Operand &Op : Ops) { 958 if (Op.D == Domain::NonNegative) 959 continue; 960 auto *BO = BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", 961 SDI->getIterator()); 962 BO->setDebugLoc(SDI->getDebugLoc()); 963 Op.V = BO; 964 } 965 966 auto *URem = BinaryOperator::CreateURem(Ops[0].V, Ops[1].V, SDI->getName(), 967 SDI->getIterator()); 968 URem->setDebugLoc(SDI->getDebugLoc()); 969 970 auto *Res = URem; 971 972 // If the divident was non-positive, we need to negate the result. 973 if (Ops[0].D == Domain::NonPositive) { 974 Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", 975 SDI->getIterator()); 976 Res->setDebugLoc(SDI->getDebugLoc()); 977 } 978 979 SDI->replaceAllUsesWith(Res); 980 SDI->eraseFromParent(); 981 982 // Try to simplify our new urem. 983 processUDivOrURem(URem, LVI); 984 985 return true; 986 } 987 988 /// See if LazyValueInfo's ability to exploit edge conditions or range 989 /// information is sufficient to prove the signs of both operands of this SDiv. 990 /// If this is the case, replace the SDiv with a UDiv. Even for local 991 /// conditions, this can sometimes prove conditions instcombine can't by 992 /// exploiting range information. 993 static bool processSDiv(BinaryOperator *SDI, const ConstantRange &LCR, 994 const ConstantRange &RCR, LazyValueInfo *LVI) { 995 assert(SDI->getOpcode() == Instruction::SDiv); 996 997 // Check whether the division folds to a constant. 998 ConstantRange DivCR = LCR.sdiv(RCR); 999 if (const APInt *Elem = DivCR.getSingleElement()) { 1000 SDI->replaceAllUsesWith(ConstantInt::get(SDI->getType(), *Elem)); 1001 SDI->eraseFromParent(); 1002 return true; 1003 } 1004 1005 struct Operand { 1006 Value *V; 1007 Domain D; 1008 }; 1009 std::array<Operand, 2> Ops = {{{SDI->getOperand(0), getDomain(LCR)}, 1010 {SDI->getOperand(1), getDomain(RCR)}}}; 1011 if (Ops[0].D == Domain::Unknown || Ops[1].D == Domain::Unknown) 1012 return false; 1013 1014 // We know domains of both of the operands! 1015 ++NumSDivs; 1016 1017 // We need operands to be non-negative, so negate each one that isn't. 1018 for (Operand &Op : Ops) { 1019 if (Op.D == Domain::NonNegative) 1020 continue; 1021 auto *BO = BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", 1022 SDI->getIterator()); 1023 BO->setDebugLoc(SDI->getDebugLoc()); 1024 Op.V = BO; 1025 } 1026 1027 auto *UDiv = BinaryOperator::CreateUDiv(Ops[0].V, Ops[1].V, SDI->getName(), 1028 SDI->getIterator()); 1029 UDiv->setDebugLoc(SDI->getDebugLoc()); 1030 UDiv->setIsExact(SDI->isExact()); 1031 1032 auto *Res = UDiv; 1033 1034 // If the operands had two different domains, we need to negate the result. 1035 if (Ops[0].D != Ops[1].D) { 1036 Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", 1037 SDI->getIterator()); 1038 Res->setDebugLoc(SDI->getDebugLoc()); 1039 } 1040 1041 SDI->replaceAllUsesWith(Res); 1042 SDI->eraseFromParent(); 1043 1044 // Try to simplify our new udiv. 1045 processUDivOrURem(UDiv, LVI); 1046 1047 return true; 1048 } 1049 1050 static bool processSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) { 1051 assert(Instr->getOpcode() == Instruction::SDiv || 1052 Instr->getOpcode() == Instruction::SRem); 1053 ConstantRange LCR = 1054 LVI->getConstantRangeAtUse(Instr->getOperandUse(0), /*AllowUndef*/ false); 1055 // Allow undef for RHS, as we can assume it is division by zero UB. 1056 ConstantRange RCR = 1057 LVI->getConstantRangeAtUse(Instr->getOperandUse(1), /*AlloweUndef*/ true); 1058 if (Instr->getOpcode() == Instruction::SDiv) 1059 if (processSDiv(Instr, LCR, RCR, LVI)) 1060 return true; 1061 1062 if (Instr->getOpcode() == Instruction::SRem) { 1063 if (processSRem(Instr, LCR, RCR, LVI)) 1064 return true; 1065 } 1066 1067 return narrowSDivOrSRem(Instr, LCR, RCR); 1068 } 1069 1070 static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) { 1071 ConstantRange LRange = 1072 LVI->getConstantRangeAtUse(SDI->getOperandUse(0), /*UndefAllowed*/ false); 1073 unsigned OrigWidth = SDI->getType()->getScalarSizeInBits(); 1074 ConstantRange NegOneOrZero = 1075 ConstantRange(APInt(OrigWidth, (uint64_t)-1, true), APInt(OrigWidth, 1)); 1076 if (NegOneOrZero.contains(LRange)) { 1077 // ashr of -1 or 0 never changes the value, so drop the whole instruction 1078 ++NumAShrsRemoved; 1079 SDI->replaceAllUsesWith(SDI->getOperand(0)); 1080 SDI->eraseFromParent(); 1081 return true; 1082 } 1083 1084 if (!LRange.isAllNonNegative()) 1085 return false; 1086 1087 ++NumAShrsConverted; 1088 auto *BO = BinaryOperator::CreateLShr(SDI->getOperand(0), SDI->getOperand(1), 1089 "", SDI->getIterator()); 1090 BO->takeName(SDI); 1091 BO->setDebugLoc(SDI->getDebugLoc()); 1092 BO->setIsExact(SDI->isExact()); 1093 SDI->replaceAllUsesWith(BO); 1094 SDI->eraseFromParent(); 1095 1096 return true; 1097 } 1098 1099 static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) { 1100 const Use &Base = SDI->getOperandUse(0); 1101 if (!LVI->getConstantRangeAtUse(Base, /*UndefAllowed*/ false) 1102 .isAllNonNegative()) 1103 return false; 1104 1105 ++NumSExt; 1106 auto *ZExt = CastInst::CreateZExtOrBitCast(Base, SDI->getType(), "", 1107 SDI->getIterator()); 1108 ZExt->takeName(SDI); 1109 ZExt->setDebugLoc(SDI->getDebugLoc()); 1110 ZExt->setNonNeg(); 1111 SDI->replaceAllUsesWith(ZExt); 1112 SDI->eraseFromParent(); 1113 1114 return true; 1115 } 1116 1117 static bool processPossibleNonNeg(PossiblyNonNegInst *I, LazyValueInfo *LVI) { 1118 if (I->hasNonNeg()) 1119 return false; 1120 1121 const Use &Base = I->getOperandUse(0); 1122 if (!LVI->getConstantRangeAtUse(Base, /*UndefAllowed*/ false) 1123 .isAllNonNegative()) 1124 return false; 1125 1126 ++NumNNeg; 1127 I->setNonNeg(); 1128 1129 return true; 1130 } 1131 1132 static bool processZExt(ZExtInst *ZExt, LazyValueInfo *LVI) { 1133 return processPossibleNonNeg(cast<PossiblyNonNegInst>(ZExt), LVI); 1134 } 1135 1136 static bool processUIToFP(UIToFPInst *UIToFP, LazyValueInfo *LVI) { 1137 return processPossibleNonNeg(cast<PossiblyNonNegInst>(UIToFP), LVI); 1138 } 1139 1140 static bool processSIToFP(SIToFPInst *SIToFP, LazyValueInfo *LVI) { 1141 const Use &Base = SIToFP->getOperandUse(0); 1142 if (!LVI->getConstantRangeAtUse(Base, /*UndefAllowed*/ false) 1143 .isAllNonNegative()) 1144 return false; 1145 1146 ++NumSIToFP; 1147 auto *UIToFP = CastInst::Create(Instruction::UIToFP, Base, SIToFP->getType(), 1148 "", SIToFP->getIterator()); 1149 UIToFP->takeName(SIToFP); 1150 UIToFP->setDebugLoc(SIToFP->getDebugLoc()); 1151 UIToFP->setNonNeg(); 1152 SIToFP->replaceAllUsesWith(UIToFP); 1153 SIToFP->eraseFromParent(); 1154 1155 return true; 1156 } 1157 1158 static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) { 1159 using OBO = OverflowingBinaryOperator; 1160 1161 bool NSW = BinOp->hasNoSignedWrap(); 1162 bool NUW = BinOp->hasNoUnsignedWrap(); 1163 if (NSW && NUW) 1164 return false; 1165 1166 Instruction::BinaryOps Opcode = BinOp->getOpcode(); 1167 ConstantRange LRange = LVI->getConstantRangeAtUse(BinOp->getOperandUse(0), 1168 /*UndefAllowed=*/false); 1169 ConstantRange RRange = LVI->getConstantRangeAtUse(BinOp->getOperandUse(1), 1170 /*UndefAllowed=*/false); 1171 1172 bool Changed = false; 1173 bool NewNUW = false, NewNSW = false; 1174 if (!NUW) { 1175 ConstantRange NUWRange = ConstantRange::makeGuaranteedNoWrapRegion( 1176 Opcode, RRange, OBO::NoUnsignedWrap); 1177 NewNUW = NUWRange.contains(LRange); 1178 Changed |= NewNUW; 1179 } 1180 if (!NSW) { 1181 ConstantRange NSWRange = ConstantRange::makeGuaranteedNoWrapRegion( 1182 Opcode, RRange, OBO::NoSignedWrap); 1183 NewNSW = NSWRange.contains(LRange); 1184 Changed |= NewNSW; 1185 } 1186 1187 setDeducedOverflowingFlags(BinOp, Opcode, NewNSW, NewNUW); 1188 1189 return Changed; 1190 } 1191 1192 static bool processAnd(BinaryOperator *BinOp, LazyValueInfo *LVI) { 1193 using namespace llvm::PatternMatch; 1194 1195 // Pattern match (and lhs, C) where C includes a superset of bits which might 1196 // be set in lhs. This is a common truncation idiom created by instcombine. 1197 const Use &LHS = BinOp->getOperandUse(0); 1198 const APInt *RHS; 1199 if (!match(BinOp->getOperand(1), m_LowBitMask(RHS))) 1200 return false; 1201 1202 // We can only replace the AND with LHS based on range info if the range does 1203 // not include undef. 1204 ConstantRange LRange = 1205 LVI->getConstantRangeAtUse(LHS, /*UndefAllowed=*/false); 1206 if (!LRange.getUnsignedMax().ule(*RHS)) 1207 return false; 1208 1209 BinOp->replaceAllUsesWith(LHS); 1210 BinOp->eraseFromParent(); 1211 NumAnd++; 1212 return true; 1213 } 1214 1215 static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT, 1216 const SimplifyQuery &SQ) { 1217 bool FnChanged = false; 1218 std::optional<ConstantRange> RetRange; 1219 if (F.hasExactDefinition() && F.getReturnType()->isIntOrIntVectorTy()) 1220 RetRange = 1221 ConstantRange::getEmpty(F.getReturnType()->getScalarSizeInBits()); 1222 1223 // Visiting in a pre-order depth-first traversal causes us to simplify early 1224 // blocks before querying later blocks (which require us to analyze early 1225 // blocks). Eagerly simplifying shallow blocks means there is strictly less 1226 // work to do for deep blocks. This also means we don't visit unreachable 1227 // blocks. 1228 for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { 1229 bool BBChanged = false; 1230 for (Instruction &II : llvm::make_early_inc_range(*BB)) { 1231 switch (II.getOpcode()) { 1232 case Instruction::Select: 1233 BBChanged |= processSelect(cast<SelectInst>(&II), LVI); 1234 break; 1235 case Instruction::PHI: 1236 BBChanged |= processPHI(cast<PHINode>(&II), LVI, DT, SQ); 1237 break; 1238 case Instruction::ICmp: 1239 case Instruction::FCmp: 1240 BBChanged |= processCmp(cast<CmpInst>(&II), LVI); 1241 break; 1242 case Instruction::Call: 1243 case Instruction::Invoke: 1244 BBChanged |= processCallSite(cast<CallBase>(II), LVI); 1245 break; 1246 case Instruction::SRem: 1247 case Instruction::SDiv: 1248 BBChanged |= processSDivOrSRem(cast<BinaryOperator>(&II), LVI); 1249 break; 1250 case Instruction::UDiv: 1251 case Instruction::URem: 1252 BBChanged |= processUDivOrURem(cast<BinaryOperator>(&II), LVI); 1253 break; 1254 case Instruction::AShr: 1255 BBChanged |= processAShr(cast<BinaryOperator>(&II), LVI); 1256 break; 1257 case Instruction::SExt: 1258 BBChanged |= processSExt(cast<SExtInst>(&II), LVI); 1259 break; 1260 case Instruction::ZExt: 1261 BBChanged |= processZExt(cast<ZExtInst>(&II), LVI); 1262 break; 1263 case Instruction::UIToFP: 1264 BBChanged |= processUIToFP(cast<UIToFPInst>(&II), LVI); 1265 break; 1266 case Instruction::SIToFP: 1267 BBChanged |= processSIToFP(cast<SIToFPInst>(&II), LVI); 1268 break; 1269 case Instruction::Add: 1270 case Instruction::Sub: 1271 case Instruction::Mul: 1272 case Instruction::Shl: 1273 BBChanged |= processBinOp(cast<BinaryOperator>(&II), LVI); 1274 break; 1275 case Instruction::And: 1276 BBChanged |= processAnd(cast<BinaryOperator>(&II), LVI); 1277 break; 1278 } 1279 } 1280 1281 Instruction *Term = BB->getTerminator(); 1282 switch (Term->getOpcode()) { 1283 case Instruction::Switch: 1284 BBChanged |= processSwitch(cast<SwitchInst>(Term), LVI, DT); 1285 break; 1286 case Instruction::Ret: { 1287 auto *RI = cast<ReturnInst>(Term); 1288 // Try to determine the return value if we can. This is mainly here to 1289 // simplify the writing of unit tests, but also helps to enable IPO by 1290 // constant folding the return values of callees. 1291 auto *RetVal = RI->getReturnValue(); 1292 if (!RetVal) break; // handle "ret void" 1293 if (RetRange && !RetRange->isFullSet()) 1294 RetRange = 1295 RetRange->unionWith(LVI->getConstantRange(RetVal, RI, 1296 /*UndefAllowed=*/false)); 1297 1298 if (isa<Constant>(RetVal)) break; // nothing to do 1299 if (auto *C = getConstantAt(RetVal, RI, LVI)) { 1300 ++NumReturns; 1301 RI->replaceUsesOfWith(RetVal, C); 1302 BBChanged = true; 1303 } 1304 } 1305 } 1306 1307 FnChanged |= BBChanged; 1308 } 1309 1310 // Infer range attribute on return value. 1311 if (RetRange && !RetRange->isFullSet()) { 1312 Attribute RangeAttr = F.getRetAttribute(Attribute::Range); 1313 if (RangeAttr.isValid()) 1314 RetRange = RetRange->intersectWith(RangeAttr.getRange()); 1315 // Don't add attribute for constant integer returns to reduce noise. These 1316 // are propagated across functions by IPSCCP. 1317 if (!RetRange->isEmptySet() && !RetRange->isSingleElement()) { 1318 F.addRangeRetAttr(*RetRange); 1319 FnChanged = true; 1320 } 1321 } 1322 return FnChanged; 1323 } 1324 1325 PreservedAnalyses 1326 CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) { 1327 LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F); 1328 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 1329 1330 bool Changed = runImpl(F, LVI, DT, getBestSimplifyQuery(AM, F)); 1331 1332 PreservedAnalyses PA; 1333 if (!Changed) { 1334 PA = PreservedAnalyses::all(); 1335 } else { 1336 #if defined(EXPENSIVE_CHECKS) 1337 assert(DT->verify(DominatorTree::VerificationLevel::Full)); 1338 #else 1339 assert(DT->verify(DominatorTree::VerificationLevel::Fast)); 1340 #endif // EXPENSIVE_CHECKS 1341 1342 PA.preserve<DominatorTreeAnalysis>(); 1343 PA.preserve<LazyValueAnalysis>(); 1344 } 1345 1346 // Keeping LVI alive is expensive, both because it uses a lot of memory, and 1347 // because invalidating values in LVI is expensive. While CVP does preserve 1348 // LVI, we know that passes after JumpThreading+CVP will not need the result 1349 // of this analysis, so we forcefully discard it early. 1350 PA.abandon<LazyValueAnalysis>(); 1351 return PA; 1352 } 1353