1 //===- ValueTracking.cpp - Walk computations to compute properties --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains routines that help analyze properties that chains of 10 // computations have. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Analysis/ValueTracking.h" 15 #include "llvm/ADT/APFloat.h" 16 #include "llvm/ADT/APInt.h" 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/ScopeExit.h" 20 #include "llvm/ADT/SmallPtrSet.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/ADT/iterator_range.h" 25 #include "llvm/Analysis/AliasAnalysis.h" 26 #include "llvm/Analysis/AssumeBundleQueries.h" 27 #include "llvm/Analysis/AssumptionCache.h" 28 #include "llvm/Analysis/ConstantFolding.h" 29 #include "llvm/Analysis/DomConditionCache.h" 30 #include "llvm/Analysis/GuardUtils.h" 31 #include "llvm/Analysis/InstructionSimplify.h" 32 #include "llvm/Analysis/Loads.h" 33 #include "llvm/Analysis/LoopInfo.h" 34 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 35 #include "llvm/Analysis/TargetLibraryInfo.h" 36 #include "llvm/Analysis/VectorUtils.h" 37 #include "llvm/Analysis/WithCache.h" 38 #include "llvm/IR/Argument.h" 39 #include "llvm/IR/Attributes.h" 40 #include "llvm/IR/BasicBlock.h" 41 #include "llvm/IR/Constant.h" 42 #include "llvm/IR/ConstantRange.h" 43 #include "llvm/IR/Constants.h" 44 #include "llvm/IR/DerivedTypes.h" 45 #include "llvm/IR/DiagnosticInfo.h" 46 #include "llvm/IR/Dominators.h" 47 #include "llvm/IR/EHPersonalities.h" 48 #include "llvm/IR/Function.h" 49 #include "llvm/IR/GetElementPtrTypeIterator.h" 50 #include "llvm/IR/GlobalAlias.h" 51 #include "llvm/IR/GlobalValue.h" 52 #include "llvm/IR/GlobalVariable.h" 53 #include "llvm/IR/InstrTypes.h" 54 #include "llvm/IR/Instruction.h" 55 #include "llvm/IR/Instructions.h" 56 #include "llvm/IR/IntrinsicInst.h" 57 #include "llvm/IR/Intrinsics.h" 58 #include "llvm/IR/IntrinsicsAArch64.h" 59 #include "llvm/IR/IntrinsicsAMDGPU.h" 60 #include "llvm/IR/IntrinsicsRISCV.h" 61 #include "llvm/IR/IntrinsicsX86.h" 62 #include "llvm/IR/LLVMContext.h" 63 #include "llvm/IR/Metadata.h" 64 #include "llvm/IR/Module.h" 65 #include "llvm/IR/Operator.h" 66 #include "llvm/IR/PatternMatch.h" 67 #include "llvm/IR/Type.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/Support/Casting.h" 71 #include "llvm/Support/CommandLine.h" 72 #include "llvm/Support/Compiler.h" 73 #include "llvm/Support/ErrorHandling.h" 74 #include "llvm/Support/KnownBits.h" 75 #include "llvm/Support/MathExtras.h" 76 #include <algorithm> 77 #include <cassert> 78 #include <cstdint> 79 #include <optional> 80 #include <utility> 81 82 using namespace llvm; 83 using namespace llvm::PatternMatch; 84 85 // Controls the number of uses of the value searched for possible 86 // dominating comparisons. 87 static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses", 88 cl::Hidden, cl::init(20)); 89 90 91 /// Returns the bitwidth of the given scalar or pointer type. For vector types, 92 /// returns the element type's bitwidth. 93 static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { 94 if (unsigned BitWidth = Ty->getScalarSizeInBits()) 95 return BitWidth; 96 97 return DL.getPointerTypeSizeInBits(Ty); 98 } 99 100 // Given the provided Value and, potentially, a context instruction, return 101 // the preferred context instruction (if any). 102 static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) { 103 // If we've been provided with a context instruction, then use that (provided 104 // it has been inserted). 105 if (CxtI && CxtI->getParent()) 106 return CxtI; 107 108 // If the value is really an already-inserted instruction, then use that. 109 CxtI = dyn_cast<Instruction>(V); 110 if (CxtI && CxtI->getParent()) 111 return CxtI; 112 113 return nullptr; 114 } 115 116 static const Instruction *safeCxtI(const Value *V1, const Value *V2, const Instruction *CxtI) { 117 // If we've been provided with a context instruction, then use that (provided 118 // it has been inserted). 119 if (CxtI && CxtI->getParent()) 120 return CxtI; 121 122 // If the value is really an already-inserted instruction, then use that. 123 CxtI = dyn_cast<Instruction>(V1); 124 if (CxtI && CxtI->getParent()) 125 return CxtI; 126 127 CxtI = dyn_cast<Instruction>(V2); 128 if (CxtI && CxtI->getParent()) 129 return CxtI; 130 131 return nullptr; 132 } 133 134 static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf, 135 const APInt &DemandedElts, 136 APInt &DemandedLHS, APInt &DemandedRHS) { 137 if (isa<ScalableVectorType>(Shuf->getType())) { 138 assert(DemandedElts == APInt(1,1)); 139 DemandedLHS = DemandedRHS = DemandedElts; 140 return true; 141 } 142 143 int NumElts = 144 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements(); 145 return llvm::getShuffleDemandedElts(NumElts, Shuf->getShuffleMask(), 146 DemandedElts, DemandedLHS, DemandedRHS); 147 } 148 149 static void computeKnownBits(const Value *V, const APInt &DemandedElts, 150 KnownBits &Known, unsigned Depth, 151 const SimplifyQuery &Q); 152 153 void llvm::computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, 154 const SimplifyQuery &Q) { 155 // Since the number of lanes in a scalable vector is unknown at compile time, 156 // we track one bit which is implicitly broadcast to all lanes. This means 157 // that all lanes in a scalable vector are considered demanded. 158 auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); 159 APInt DemandedElts = 160 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 161 ::computeKnownBits(V, DemandedElts, Known, Depth, Q); 162 } 163 164 void llvm::computeKnownBits(const Value *V, KnownBits &Known, 165 const DataLayout &DL, unsigned Depth, 166 AssumptionCache *AC, const Instruction *CxtI, 167 const DominatorTree *DT, bool UseInstrInfo) { 168 computeKnownBits( 169 V, Known, Depth, 170 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 171 } 172 173 KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL, 174 unsigned Depth, AssumptionCache *AC, 175 const Instruction *CxtI, 176 const DominatorTree *DT, bool UseInstrInfo) { 177 return computeKnownBits( 178 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 179 } 180 181 KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, 182 const DataLayout &DL, unsigned Depth, 183 AssumptionCache *AC, const Instruction *CxtI, 184 const DominatorTree *DT, bool UseInstrInfo) { 185 return computeKnownBits( 186 V, DemandedElts, Depth, 187 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 188 } 189 190 static bool haveNoCommonBitsSetSpecialCases(const Value *LHS, const Value *RHS, 191 const SimplifyQuery &SQ) { 192 // Look for an inverted mask: (X & ~M) op (Y & M). 193 { 194 Value *M; 195 if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) && 196 match(RHS, m_c_And(m_Specific(M), m_Value())) && 197 isGuaranteedNotToBeUndef(M, SQ.AC, SQ.CxtI, SQ.DT)) 198 return true; 199 } 200 201 // X op (Y & ~X) 202 if (match(RHS, m_c_And(m_Not(m_Specific(LHS)), m_Value())) && 203 isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT)) 204 return true; 205 206 // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern 207 // for constant Y. 208 Value *Y; 209 if (match(RHS, 210 m_c_Xor(m_c_And(m_Specific(LHS), m_Value(Y)), m_Deferred(Y))) && 211 isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT) && 212 isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT)) 213 return true; 214 215 // Peek through extends to find a 'not' of the other side: 216 // (ext Y) op ext(~Y) 217 if (match(LHS, m_ZExtOrSExt(m_Value(Y))) && 218 match(RHS, m_ZExtOrSExt(m_Not(m_Specific(Y)))) && 219 isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT)) 220 return true; 221 222 // Look for: (A & B) op ~(A | B) 223 { 224 Value *A, *B; 225 if (match(LHS, m_And(m_Value(A), m_Value(B))) && 226 match(RHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))) && 227 isGuaranteedNotToBeUndef(A, SQ.AC, SQ.CxtI, SQ.DT) && 228 isGuaranteedNotToBeUndef(B, SQ.AC, SQ.CxtI, SQ.DT)) 229 return true; 230 } 231 232 return false; 233 } 234 235 bool llvm::haveNoCommonBitsSet(const WithCache<const Value *> &LHSCache, 236 const WithCache<const Value *> &RHSCache, 237 const SimplifyQuery &SQ) { 238 const Value *LHS = LHSCache.getValue(); 239 const Value *RHS = RHSCache.getValue(); 240 241 assert(LHS->getType() == RHS->getType() && 242 "LHS and RHS should have the same type"); 243 assert(LHS->getType()->isIntOrIntVectorTy() && 244 "LHS and RHS should be integers"); 245 246 if (haveNoCommonBitsSetSpecialCases(LHS, RHS, SQ) || 247 haveNoCommonBitsSetSpecialCases(RHS, LHS, SQ)) 248 return true; 249 250 return KnownBits::haveNoCommonBitsSet(LHSCache.getKnownBits(SQ), 251 RHSCache.getKnownBits(SQ)); 252 } 253 254 bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) { 255 return !I->user_empty() && all_of(I->users(), [](const User *U) { 256 ICmpInst::Predicate P; 257 return match(U, m_ICmp(P, m_Value(), m_Zero())) && ICmpInst::isEquality(P); 258 }); 259 } 260 261 static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, 262 const SimplifyQuery &Q); 263 264 bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, 265 bool OrZero, unsigned Depth, 266 AssumptionCache *AC, const Instruction *CxtI, 267 const DominatorTree *DT, bool UseInstrInfo) { 268 return ::isKnownToBeAPowerOfTwo( 269 V, OrZero, Depth, 270 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 271 } 272 273 static bool isKnownNonZero(const Value *V, const APInt &DemandedElts, 274 unsigned Depth, const SimplifyQuery &Q); 275 276 static bool isKnownNonZero(const Value *V, unsigned Depth, 277 const SimplifyQuery &Q); 278 279 bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth, 280 AssumptionCache *AC, const Instruction *CxtI, 281 const DominatorTree *DT, bool UseInstrInfo) { 282 return ::isKnownNonZero( 283 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 284 } 285 286 bool llvm::isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, 287 unsigned Depth) { 288 return computeKnownBits(V, Depth, SQ).isNonNegative(); 289 } 290 291 bool llvm::isKnownPositive(const Value *V, const SimplifyQuery &SQ, 292 unsigned Depth) { 293 if (auto *CI = dyn_cast<ConstantInt>(V)) 294 return CI->getValue().isStrictlyPositive(); 295 296 // TODO: We'd doing two recursive queries here. We should factor this such 297 // that only a single query is needed. 298 return isKnownNonNegative(V, SQ, Depth) && ::isKnownNonZero(V, Depth, SQ); 299 } 300 301 bool llvm::isKnownNegative(const Value *V, const SimplifyQuery &SQ, 302 unsigned Depth) { 303 return computeKnownBits(V, Depth, SQ).isNegative(); 304 } 305 306 static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth, 307 const SimplifyQuery &Q); 308 309 bool llvm::isKnownNonEqual(const Value *V1, const Value *V2, 310 const DataLayout &DL, AssumptionCache *AC, 311 const Instruction *CxtI, const DominatorTree *DT, 312 bool UseInstrInfo) { 313 return ::isKnownNonEqual( 314 V1, V2, 0, 315 SimplifyQuery(DL, DT, AC, safeCxtI(V2, V1, CxtI), UseInstrInfo)); 316 } 317 318 bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask, 319 const SimplifyQuery &SQ, unsigned Depth) { 320 KnownBits Known(Mask.getBitWidth()); 321 computeKnownBits(V, Known, Depth, SQ); 322 return Mask.isSubsetOf(Known.Zero); 323 } 324 325 static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts, 326 unsigned Depth, const SimplifyQuery &Q); 327 328 static unsigned ComputeNumSignBits(const Value *V, unsigned Depth, 329 const SimplifyQuery &Q) { 330 auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); 331 APInt DemandedElts = 332 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 333 return ComputeNumSignBits(V, DemandedElts, Depth, Q); 334 } 335 336 unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL, 337 unsigned Depth, AssumptionCache *AC, 338 const Instruction *CxtI, 339 const DominatorTree *DT, bool UseInstrInfo) { 340 return ::ComputeNumSignBits( 341 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 342 } 343 344 unsigned llvm::ComputeMaxSignificantBits(const Value *V, const DataLayout &DL, 345 unsigned Depth, AssumptionCache *AC, 346 const Instruction *CxtI, 347 const DominatorTree *DT) { 348 unsigned SignBits = ComputeNumSignBits(V, DL, Depth, AC, CxtI, DT); 349 return V->getType()->getScalarSizeInBits() - SignBits + 1; 350 } 351 352 static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, 353 bool NSW, const APInt &DemandedElts, 354 KnownBits &KnownOut, KnownBits &Known2, 355 unsigned Depth, const SimplifyQuery &Q) { 356 computeKnownBits(Op1, DemandedElts, KnownOut, Depth + 1, Q); 357 358 // If one operand is unknown and we have no nowrap information, 359 // the result will be unknown independently of the second operand. 360 if (KnownOut.isUnknown() && !NSW) 361 return; 362 363 computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q); 364 KnownOut = KnownBits::computeForAddSub(Add, NSW, Known2, KnownOut); 365 } 366 367 static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, 368 const APInt &DemandedElts, KnownBits &Known, 369 KnownBits &Known2, unsigned Depth, 370 const SimplifyQuery &Q) { 371 computeKnownBits(Op1, DemandedElts, Known, Depth + 1, Q); 372 computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q); 373 374 bool isKnownNegative = false; 375 bool isKnownNonNegative = false; 376 // If the multiplication is known not to overflow, compute the sign bit. 377 if (NSW) { 378 if (Op0 == Op1) { 379 // The product of a number with itself is non-negative. 380 isKnownNonNegative = true; 381 } else { 382 bool isKnownNonNegativeOp1 = Known.isNonNegative(); 383 bool isKnownNonNegativeOp0 = Known2.isNonNegative(); 384 bool isKnownNegativeOp1 = Known.isNegative(); 385 bool isKnownNegativeOp0 = Known2.isNegative(); 386 // The product of two numbers with the same sign is non-negative. 387 isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) || 388 (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); 389 // The product of a negative number and a non-negative number is either 390 // negative or zero. 391 if (!isKnownNonNegative) 392 isKnownNegative = 393 (isKnownNegativeOp1 && isKnownNonNegativeOp0 && 394 Known2.isNonZero()) || 395 (isKnownNegativeOp0 && isKnownNonNegativeOp1 && Known.isNonZero()); 396 } 397 } 398 399 bool SelfMultiply = Op0 == Op1; 400 if (SelfMultiply) 401 SelfMultiply &= 402 isGuaranteedNotToBeUndef(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1); 403 Known = KnownBits::mul(Known, Known2, SelfMultiply); 404 405 // Only make use of no-wrap flags if we failed to compute the sign bit 406 // directly. This matters if the multiplication always overflows, in 407 // which case we prefer to follow the result of the direct computation, 408 // though as the program is invoking undefined behaviour we can choose 409 // whatever we like here. 410 if (isKnownNonNegative && !Known.isNegative()) 411 Known.makeNonNegative(); 412 else if (isKnownNegative && !Known.isNonNegative()) 413 Known.makeNegative(); 414 } 415 416 void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, 417 KnownBits &Known) { 418 unsigned BitWidth = Known.getBitWidth(); 419 unsigned NumRanges = Ranges.getNumOperands() / 2; 420 assert(NumRanges >= 1); 421 422 Known.Zero.setAllBits(); 423 Known.One.setAllBits(); 424 425 for (unsigned i = 0; i < NumRanges; ++i) { 426 ConstantInt *Lower = 427 mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0)); 428 ConstantInt *Upper = 429 mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1)); 430 ConstantRange Range(Lower->getValue(), Upper->getValue()); 431 432 // The first CommonPrefixBits of all values in Range are equal. 433 unsigned CommonPrefixBits = 434 (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countl_zero(); 435 APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits); 436 APInt UnsignedMax = Range.getUnsignedMax().zextOrTrunc(BitWidth); 437 Known.One &= UnsignedMax & Mask; 438 Known.Zero &= ~UnsignedMax & Mask; 439 } 440 } 441 442 static bool isEphemeralValueOf(const Instruction *I, const Value *E) { 443 SmallVector<const Value *, 16> WorkSet(1, I); 444 SmallPtrSet<const Value *, 32> Visited; 445 SmallPtrSet<const Value *, 16> EphValues; 446 447 // The instruction defining an assumption's condition itself is always 448 // considered ephemeral to that assumption (even if it has other 449 // non-ephemeral users). See r246696's test case for an example. 450 if (is_contained(I->operands(), E)) 451 return true; 452 453 while (!WorkSet.empty()) { 454 const Value *V = WorkSet.pop_back_val(); 455 if (!Visited.insert(V).second) 456 continue; 457 458 // If all uses of this value are ephemeral, then so is this value. 459 if (llvm::all_of(V->users(), [&](const User *U) { 460 return EphValues.count(U); 461 })) { 462 if (V == E) 463 return true; 464 465 if (V == I || (isa<Instruction>(V) && 466 !cast<Instruction>(V)->mayHaveSideEffects() && 467 !cast<Instruction>(V)->isTerminator())) { 468 EphValues.insert(V); 469 if (const User *U = dyn_cast<User>(V)) 470 append_range(WorkSet, U->operands()); 471 } 472 } 473 } 474 475 return false; 476 } 477 478 // Is this an intrinsic that cannot be speculated but also cannot trap? 479 bool llvm::isAssumeLikeIntrinsic(const Instruction *I) { 480 if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(I)) 481 return CI->isAssumeLikeIntrinsic(); 482 483 return false; 484 } 485 486 bool llvm::isValidAssumeForContext(const Instruction *Inv, 487 const Instruction *CxtI, 488 const DominatorTree *DT) { 489 // There are two restrictions on the use of an assume: 490 // 1. The assume must dominate the context (or the control flow must 491 // reach the assume whenever it reaches the context). 492 // 2. The context must not be in the assume's set of ephemeral values 493 // (otherwise we will use the assume to prove that the condition 494 // feeding the assume is trivially true, thus causing the removal of 495 // the assume). 496 497 if (Inv->getParent() == CxtI->getParent()) { 498 // If Inv and CtxI are in the same block, check if the assume (Inv) is first 499 // in the BB. 500 if (Inv->comesBefore(CxtI)) 501 return true; 502 503 // Don't let an assume affect itself - this would cause the problems 504 // `isEphemeralValueOf` is trying to prevent, and it would also make 505 // the loop below go out of bounds. 506 if (Inv == CxtI) 507 return false; 508 509 // The context comes first, but they're both in the same block. 510 // Make sure there is nothing in between that might interrupt 511 // the control flow, not even CxtI itself. 512 // We limit the scan distance between the assume and its context instruction 513 // to avoid a compile-time explosion. This limit is chosen arbitrarily, so 514 // it can be adjusted if needed (could be turned into a cl::opt). 515 auto Range = make_range(CxtI->getIterator(), Inv->getIterator()); 516 if (!isGuaranteedToTransferExecutionToSuccessor(Range, 15)) 517 return false; 518 519 return !isEphemeralValueOf(Inv, CxtI); 520 } 521 522 // Inv and CxtI are in different blocks. 523 if (DT) { 524 if (DT->dominates(Inv, CxtI)) 525 return true; 526 } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) { 527 // We don't have a DT, but this trivially dominates. 528 return true; 529 } 530 531 return false; 532 } 533 534 // TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but 535 // we still have enough information about `RHS` to conclude non-zero. For 536 // example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops 537 // so the extra compile time may not be worth it, but possibly a second API 538 // should be created for use outside of loops. 539 static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) { 540 // v u> y implies v != 0. 541 if (Pred == ICmpInst::ICMP_UGT) 542 return true; 543 544 // Special-case v != 0 to also handle v != null. 545 if (Pred == ICmpInst::ICMP_NE) 546 return match(RHS, m_Zero()); 547 548 // All other predicates - rely on generic ConstantRange handling. 549 const APInt *C; 550 auto Zero = APInt::getZero(RHS->getType()->getScalarSizeInBits()); 551 if (match(RHS, m_APInt(C))) { 552 ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, *C); 553 return !TrueValues.contains(Zero); 554 } 555 556 auto *VC = dyn_cast<ConstantDataVector>(RHS); 557 if (VC == nullptr) 558 return false; 559 560 for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem; 561 ++ElemIdx) { 562 ConstantRange TrueValues = ConstantRange::makeExactICmpRegion( 563 Pred, VC->getElementAsAPInt(ElemIdx)); 564 if (TrueValues.contains(Zero)) 565 return false; 566 } 567 return true; 568 } 569 570 static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) { 571 // Use of assumptions is context-sensitive. If we don't have a context, we 572 // cannot use them! 573 if (!Q.AC || !Q.CxtI) 574 return false; 575 576 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) { 577 if (!Elem.Assume) 578 continue; 579 580 AssumeInst *I = cast<AssumeInst>(Elem.Assume); 581 assert(I->getFunction() == Q.CxtI->getFunction() && 582 "Got assumption for the wrong function!"); 583 584 if (Elem.Index != AssumptionCache::ExprResultIdx) { 585 if (!V->getType()->isPointerTy()) 586 continue; 587 if (RetainedKnowledge RK = getKnowledgeFromBundle( 588 *I, I->bundle_op_info_begin()[Elem.Index])) { 589 if (RK.WasOn == V && 590 (RK.AttrKind == Attribute::NonNull || 591 (RK.AttrKind == Attribute::Dereferenceable && 592 !NullPointerIsDefined(Q.CxtI->getFunction(), 593 V->getType()->getPointerAddressSpace()))) && 594 isValidAssumeForContext(I, Q.CxtI, Q.DT)) 595 return true; 596 } 597 continue; 598 } 599 600 // Warning: This loop can end up being somewhat performance sensitive. 601 // We're running this loop for once for each value queried resulting in a 602 // runtime of ~O(#assumes * #values). 603 604 Value *RHS; 605 CmpInst::Predicate Pred; 606 auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V))); 607 if (!match(I->getArgOperand(0), m_c_ICmp(Pred, m_V, m_Value(RHS)))) 608 return false; 609 610 if (cmpExcludesZero(Pred, RHS) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) 611 return true; 612 } 613 614 return false; 615 } 616 617 static void computeKnownBitsFromCmp(const Value *V, CmpInst::Predicate Pred, 618 Value *LHS, Value *RHS, KnownBits &Known, 619 const SimplifyQuery &Q) { 620 if (RHS->getType()->isPointerTy()) { 621 // Handle comparison of pointer to null explicitly, as it will not be 622 // covered by the m_APInt() logic below. 623 if (LHS == V && match(RHS, m_Zero())) { 624 switch (Pred) { 625 case ICmpInst::ICMP_EQ: 626 Known.setAllZero(); 627 break; 628 case ICmpInst::ICMP_SGE: 629 case ICmpInst::ICMP_SGT: 630 Known.makeNonNegative(); 631 break; 632 case ICmpInst::ICMP_SLT: 633 Known.makeNegative(); 634 break; 635 default: 636 break; 637 } 638 } 639 return; 640 } 641 642 unsigned BitWidth = Known.getBitWidth(); 643 auto m_V = 644 m_CombineOr(m_Specific(V), m_PtrToIntSameSize(Q.DL, m_Specific(V))); 645 646 const APInt *Mask, *C; 647 uint64_t ShAmt; 648 switch (Pred) { 649 case ICmpInst::ICMP_EQ: 650 // assume(V = C) 651 if (match(LHS, m_V) && match(RHS, m_APInt(C))) { 652 Known = Known.unionWith(KnownBits::makeConstant(*C)); 653 // assume(V & Mask = C) 654 } else if (match(LHS, m_And(m_V, m_APInt(Mask))) && 655 match(RHS, m_APInt(C))) { 656 // For one bits in Mask, we can propagate bits from C to V. 657 Known.Zero |= ~*C & *Mask; 658 Known.One |= *C & *Mask; 659 // assume(V | Mask = C) 660 } else if (match(LHS, m_Or(m_V, m_APInt(Mask))) && match(RHS, m_APInt(C))) { 661 // For zero bits in Mask, we can propagate bits from C to V. 662 Known.Zero |= ~*C & ~*Mask; 663 Known.One |= *C & ~*Mask; 664 // assume(V ^ Mask = C) 665 } else if (match(LHS, m_Xor(m_V, m_APInt(Mask))) && 666 match(RHS, m_APInt(C))) { 667 // Equivalent to assume(V == Mask ^ C) 668 Known = Known.unionWith(KnownBits::makeConstant(*C ^ *Mask)); 669 // assume(V << ShAmt = C) 670 } else if (match(LHS, m_Shl(m_V, m_ConstantInt(ShAmt))) && 671 match(RHS, m_APInt(C)) && ShAmt < BitWidth) { 672 // For those bits in C that are known, we can propagate them to known 673 // bits in V shifted to the right by ShAmt. 674 KnownBits RHSKnown = KnownBits::makeConstant(*C); 675 RHSKnown.Zero.lshrInPlace(ShAmt); 676 RHSKnown.One.lshrInPlace(ShAmt); 677 Known = Known.unionWith(RHSKnown); 678 // assume(V >> ShAmt = C) 679 } else if (match(LHS, m_Shr(m_V, m_ConstantInt(ShAmt))) && 680 match(RHS, m_APInt(C)) && ShAmt < BitWidth) { 681 KnownBits RHSKnown = KnownBits::makeConstant(*C); 682 // For those bits in RHS that are known, we can propagate them to known 683 // bits in V shifted to the right by C. 684 Known.Zero |= RHSKnown.Zero << ShAmt; 685 Known.One |= RHSKnown.One << ShAmt; 686 } 687 break; 688 case ICmpInst::ICMP_NE: { 689 // assume (V & B != 0) where B is a power of 2 690 const APInt *BPow2; 691 if (match(LHS, m_And(m_V, m_Power2(BPow2))) && match(RHS, m_Zero())) 692 Known.One |= *BPow2; 693 break; 694 } 695 default: 696 const APInt *Offset = nullptr; 697 if (match(LHS, m_CombineOr(m_V, m_Add(m_V, m_APInt(Offset)))) && 698 match(RHS, m_APInt(C))) { 699 ConstantRange LHSRange = ConstantRange::makeAllowedICmpRegion(Pred, *C); 700 if (Offset) 701 LHSRange = LHSRange.sub(*Offset); 702 Known = Known.unionWith(LHSRange.toKnownBits()); 703 } 704 break; 705 } 706 } 707 708 void llvm::computeKnownBitsFromContext(const Value *V, KnownBits &Known, 709 unsigned Depth, const SimplifyQuery &Q) { 710 if (!Q.CxtI) 711 return; 712 713 if (Q.DC && Q.DT) { 714 // Handle dominating conditions. 715 for (BranchInst *BI : Q.DC->conditionsFor(V)) { 716 auto *Cmp = dyn_cast<ICmpInst>(BI->getCondition()); 717 if (!Cmp) 718 continue; 719 720 BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0)); 721 if (Q.DT->dominates(Edge0, Q.CxtI->getParent())) 722 computeKnownBitsFromCmp(V, Cmp->getPredicate(), Cmp->getOperand(0), 723 Cmp->getOperand(1), Known, Q); 724 725 BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1)); 726 if (Q.DT->dominates(Edge1, Q.CxtI->getParent())) 727 computeKnownBitsFromCmp(V, Cmp->getInversePredicate(), 728 Cmp->getOperand(0), Cmp->getOperand(1), Known, 729 Q); 730 } 731 732 if (Known.hasConflict()) 733 Known.resetAll(); 734 } 735 736 if (!Q.AC) 737 return; 738 739 unsigned BitWidth = Known.getBitWidth(); 740 741 // Note that the patterns below need to be kept in sync with the code 742 // in AssumptionCache::updateAffectedValues. 743 744 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) { 745 if (!Elem.Assume) 746 continue; 747 748 AssumeInst *I = cast<AssumeInst>(Elem.Assume); 749 assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() && 750 "Got assumption for the wrong function!"); 751 752 if (Elem.Index != AssumptionCache::ExprResultIdx) { 753 if (!V->getType()->isPointerTy()) 754 continue; 755 if (RetainedKnowledge RK = getKnowledgeFromBundle( 756 *I, I->bundle_op_info_begin()[Elem.Index])) { 757 if (RK.WasOn == V && RK.AttrKind == Attribute::Alignment && 758 isPowerOf2_64(RK.ArgValue) && 759 isValidAssumeForContext(I, Q.CxtI, Q.DT)) 760 Known.Zero.setLowBits(Log2_64(RK.ArgValue)); 761 } 762 continue; 763 } 764 765 // Warning: This loop can end up being somewhat performance sensitive. 766 // We're running this loop for once for each value queried resulting in a 767 // runtime of ~O(#assumes * #values). 768 769 Value *Arg = I->getArgOperand(0); 770 771 if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { 772 assert(BitWidth == 1 && "assume operand is not i1?"); 773 (void)BitWidth; 774 Known.setAllOnes(); 775 return; 776 } 777 if (match(Arg, m_Not(m_Specific(V))) && 778 isValidAssumeForContext(I, Q.CxtI, Q.DT)) { 779 assert(BitWidth == 1 && "assume operand is not i1?"); 780 (void)BitWidth; 781 Known.setAllZero(); 782 return; 783 } 784 785 // The remaining tests are all recursive, so bail out if we hit the limit. 786 if (Depth == MaxAnalysisRecursionDepth) 787 continue; 788 789 ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg); 790 if (!Cmp) 791 continue; 792 793 if (!isValidAssumeForContext(I, Q.CxtI, Q.DT)) 794 continue; 795 796 computeKnownBitsFromCmp(V, Cmp->getPredicate(), Cmp->getOperand(0), 797 Cmp->getOperand(1), Known, Q); 798 } 799 800 // Conflicting assumption: Undefined behavior will occur on this execution 801 // path. 802 if (Known.hasConflict()) 803 Known.resetAll(); 804 } 805 806 /// Compute known bits from a shift operator, including those with a 807 /// non-constant shift amount. Known is the output of this function. Known2 is a 808 /// pre-allocated temporary with the same bit width as Known and on return 809 /// contains the known bit of the shift value source. KF is an 810 /// operator-specific function that, given the known-bits and a shift amount, 811 /// compute the implied known-bits of the shift operator's result respectively 812 /// for that shift amount. The results from calling KF are conservatively 813 /// combined for all permitted shift amounts. 814 static void computeKnownBitsFromShiftOperator( 815 const Operator *I, const APInt &DemandedElts, KnownBits &Known, 816 KnownBits &Known2, unsigned Depth, const SimplifyQuery &Q, 817 function_ref<KnownBits(const KnownBits &, const KnownBits &, bool)> KF) { 818 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 819 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); 820 // To limit compile-time impact, only query isKnownNonZero() if we know at 821 // least something about the shift amount. 822 bool ShAmtNonZero = 823 Known.isNonZero() || 824 (Known.getMaxValue().ult(Known.getBitWidth()) && 825 isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q)); 826 Known = KF(Known2, Known, ShAmtNonZero); 827 } 828 829 static KnownBits 830 getKnownBitsFromAndXorOr(const Operator *I, const APInt &DemandedElts, 831 const KnownBits &KnownLHS, const KnownBits &KnownRHS, 832 unsigned Depth, const SimplifyQuery &Q) { 833 unsigned BitWidth = KnownLHS.getBitWidth(); 834 KnownBits KnownOut(BitWidth); 835 bool IsAnd = false; 836 bool HasKnownOne = !KnownLHS.One.isZero() || !KnownRHS.One.isZero(); 837 Value *X = nullptr, *Y = nullptr; 838 839 switch (I->getOpcode()) { 840 case Instruction::And: 841 KnownOut = KnownLHS & KnownRHS; 842 IsAnd = true; 843 // and(x, -x) is common idioms that will clear all but lowest set 844 // bit. If we have a single known bit in x, we can clear all bits 845 // above it. 846 // TODO: instcombine often reassociates independent `and` which can hide 847 // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x). 848 if (HasKnownOne && match(I, m_c_And(m_Value(X), m_Neg(m_Deferred(X))))) { 849 // -(-x) == x so using whichever (LHS/RHS) gets us a better result. 850 if (KnownLHS.countMaxTrailingZeros() <= KnownRHS.countMaxTrailingZeros()) 851 KnownOut = KnownLHS.blsi(); 852 else 853 KnownOut = KnownRHS.blsi(); 854 } 855 break; 856 case Instruction::Or: 857 KnownOut = KnownLHS | KnownRHS; 858 break; 859 case Instruction::Xor: 860 KnownOut = KnownLHS ^ KnownRHS; 861 // xor(x, x-1) is common idioms that will clear all but lowest set 862 // bit. If we have a single known bit in x, we can clear all bits 863 // above it. 864 // TODO: xor(x, x-1) is often rewritting as xor(x, x-C) where C != 865 // -1 but for the purpose of demanded bits (xor(x, x-C) & 866 // Demanded) == (xor(x, x-1) & Demanded). Extend the xor pattern 867 // to use arbitrary C if xor(x, x-C) as the same as xor(x, x-1). 868 if (HasKnownOne && 869 match(I, m_c_Xor(m_Value(X), m_c_Add(m_Deferred(X), m_AllOnes())))) { 870 const KnownBits &XBits = I->getOperand(0) == X ? KnownLHS : KnownRHS; 871 KnownOut = XBits.blsmsk(); 872 } 873 break; 874 default: 875 llvm_unreachable("Invalid Op used in 'analyzeKnownBitsFromAndXorOr'"); 876 } 877 878 // and(x, add (x, -1)) is a common idiom that always clears the low bit; 879 // xor/or(x, add (x, -1)) is an idiom that will always set the low bit. 880 // here we handle the more general case of adding any odd number by 881 // matching the form and/xor/or(x, add(x, y)) where y is odd. 882 // TODO: This could be generalized to clearing any bit set in y where the 883 // following bit is known to be unset in y. 884 if (!KnownOut.Zero[0] && !KnownOut.One[0] && 885 (match(I, m_c_BinOp(m_Value(X), m_c_Add(m_Deferred(X), m_Value(Y)))) || 886 match(I, m_c_BinOp(m_Value(X), m_Sub(m_Deferred(X), m_Value(Y)))) || 887 match(I, m_c_BinOp(m_Value(X), m_Sub(m_Value(Y), m_Deferred(X)))))) { 888 KnownBits KnownY(BitWidth); 889 computeKnownBits(Y, DemandedElts, KnownY, Depth + 1, Q); 890 if (KnownY.countMinTrailingOnes() > 0) { 891 if (IsAnd) 892 KnownOut.Zero.setBit(0); 893 else 894 KnownOut.One.setBit(0); 895 } 896 } 897 return KnownOut; 898 } 899 900 // Public so this can be used in `SimplifyDemandedUseBits`. 901 KnownBits llvm::analyzeKnownBitsFromAndXorOr(const Operator *I, 902 const KnownBits &KnownLHS, 903 const KnownBits &KnownRHS, 904 unsigned Depth, 905 const SimplifyQuery &SQ) { 906 auto *FVTy = dyn_cast<FixedVectorType>(I->getType()); 907 APInt DemandedElts = 908 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 909 910 return getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS, KnownRHS, Depth, 911 SQ); 912 } 913 914 ConstantRange llvm::getVScaleRange(const Function *F, unsigned BitWidth) { 915 Attribute Attr = F->getFnAttribute(Attribute::VScaleRange); 916 // Without vscale_range, we only know that vscale is non-zero. 917 if (!Attr.isValid()) 918 return ConstantRange(APInt(BitWidth, 1), APInt::getZero(BitWidth)); 919 920 unsigned AttrMin = Attr.getVScaleRangeMin(); 921 // Minimum is larger than vscale width, result is always poison. 922 if ((unsigned)llvm::bit_width(AttrMin) > BitWidth) 923 return ConstantRange::getEmpty(BitWidth); 924 925 APInt Min(BitWidth, AttrMin); 926 std::optional<unsigned> AttrMax = Attr.getVScaleRangeMax(); 927 if (!AttrMax || (unsigned)llvm::bit_width(*AttrMax) > BitWidth) 928 return ConstantRange(Min, APInt::getZero(BitWidth)); 929 930 return ConstantRange(Min, APInt(BitWidth, *AttrMax) + 1); 931 } 932 933 static void computeKnownBitsFromOperator(const Operator *I, 934 const APInt &DemandedElts, 935 KnownBits &Known, unsigned Depth, 936 const SimplifyQuery &Q) { 937 unsigned BitWidth = Known.getBitWidth(); 938 939 KnownBits Known2(BitWidth); 940 switch (I->getOpcode()) { 941 default: break; 942 case Instruction::Load: 943 if (MDNode *MD = 944 Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_range)) 945 computeKnownBitsFromRangeMetadata(*MD, Known); 946 break; 947 case Instruction::And: 948 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); 949 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 950 951 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q); 952 break; 953 case Instruction::Or: 954 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); 955 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 956 957 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q); 958 break; 959 case Instruction::Xor: 960 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); 961 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 962 963 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q); 964 break; 965 case Instruction::Mul: { 966 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); 967 computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, DemandedElts, 968 Known, Known2, Depth, Q); 969 break; 970 } 971 case Instruction::UDiv: { 972 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 973 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 974 Known = 975 KnownBits::udiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I))); 976 break; 977 } 978 case Instruction::SDiv: { 979 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 980 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 981 Known = 982 KnownBits::sdiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I))); 983 break; 984 } 985 case Instruction::Select: { 986 computeKnownBits(I->getOperand(2), Known, Depth + 1, Q); 987 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 988 989 // Only known if known in both the LHS and RHS. 990 Known = Known.intersectWith(Known2); 991 break; 992 } 993 case Instruction::FPTrunc: 994 case Instruction::FPExt: 995 case Instruction::FPToUI: 996 case Instruction::FPToSI: 997 case Instruction::SIToFP: 998 case Instruction::UIToFP: 999 break; // Can't work with floating point. 1000 case Instruction::PtrToInt: 1001 case Instruction::IntToPtr: 1002 // Fall through and handle them the same as zext/trunc. 1003 [[fallthrough]]; 1004 case Instruction::ZExt: 1005 case Instruction::Trunc: { 1006 Type *SrcTy = I->getOperand(0)->getType(); 1007 1008 unsigned SrcBitWidth; 1009 // Note that we handle pointer operands here because of inttoptr/ptrtoint 1010 // which fall through here. 1011 Type *ScalarTy = SrcTy->getScalarType(); 1012 SrcBitWidth = ScalarTy->isPointerTy() ? 1013 Q.DL.getPointerTypeSizeInBits(ScalarTy) : 1014 Q.DL.getTypeSizeInBits(ScalarTy); 1015 1016 assert(SrcBitWidth && "SrcBitWidth can't be zero"); 1017 Known = Known.anyextOrTrunc(SrcBitWidth); 1018 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1019 if (auto *Inst = dyn_cast<PossiblyNonNegInst>(I); 1020 Inst && Inst->hasNonNeg() && !Known.isNegative()) 1021 Known.makeNonNegative(); 1022 Known = Known.zextOrTrunc(BitWidth); 1023 break; 1024 } 1025 case Instruction::BitCast: { 1026 Type *SrcTy = I->getOperand(0)->getType(); 1027 if (SrcTy->isIntOrPtrTy() && 1028 // TODO: For now, not handling conversions like: 1029 // (bitcast i64 %x to <2 x i32>) 1030 !I->getType()->isVectorTy()) { 1031 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1032 break; 1033 } 1034 1035 // Handle cast from vector integer type to scalar or vector integer. 1036 auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcTy); 1037 if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() || 1038 !I->getType()->isIntOrIntVectorTy() || 1039 isa<ScalableVectorType>(I->getType())) 1040 break; 1041 1042 // Look through a cast from narrow vector elements to wider type. 1043 // Examples: v4i32 -> v2i64, v3i8 -> v24 1044 unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits(); 1045 if (BitWidth % SubBitWidth == 0) { 1046 // Known bits are automatically intersected across demanded elements of a 1047 // vector. So for example, if a bit is computed as known zero, it must be 1048 // zero across all demanded elements of the vector. 1049 // 1050 // For this bitcast, each demanded element of the output is sub-divided 1051 // across a set of smaller vector elements in the source vector. To get 1052 // the known bits for an entire element of the output, compute the known 1053 // bits for each sub-element sequentially. This is done by shifting the 1054 // one-set-bit demanded elements parameter across the sub-elements for 1055 // consecutive calls to computeKnownBits. We are using the demanded 1056 // elements parameter as a mask operator. 1057 // 1058 // The known bits of each sub-element are then inserted into place 1059 // (dependent on endian) to form the full result of known bits. 1060 unsigned NumElts = DemandedElts.getBitWidth(); 1061 unsigned SubScale = BitWidth / SubBitWidth; 1062 APInt SubDemandedElts = APInt::getZero(NumElts * SubScale); 1063 for (unsigned i = 0; i != NumElts; ++i) { 1064 if (DemandedElts[i]) 1065 SubDemandedElts.setBit(i * SubScale); 1066 } 1067 1068 KnownBits KnownSrc(SubBitWidth); 1069 for (unsigned i = 0; i != SubScale; ++i) { 1070 computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc, 1071 Depth + 1, Q); 1072 unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i; 1073 Known.insertBits(KnownSrc, ShiftElt * SubBitWidth); 1074 } 1075 } 1076 break; 1077 } 1078 case Instruction::SExt: { 1079 // Compute the bits in the result that are not present in the input. 1080 unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); 1081 1082 Known = Known.trunc(SrcBitWidth); 1083 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1084 // If the sign bit of the input is known set or clear, then we know the 1085 // top bits of the result. 1086 Known = Known.sext(BitWidth); 1087 break; 1088 } 1089 case Instruction::Shl: { 1090 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I)); 1091 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); 1092 auto KF = [NUW, NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt, 1093 bool ShAmtNonZero) { 1094 return KnownBits::shl(KnownVal, KnownAmt, NUW, NSW, ShAmtNonZero); 1095 }; 1096 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, 1097 KF); 1098 // Trailing zeros of a right-shifted constant never decrease. 1099 const APInt *C; 1100 if (match(I->getOperand(0), m_APInt(C))) 1101 Known.Zero.setLowBits(C->countr_zero()); 1102 break; 1103 } 1104 case Instruction::LShr: { 1105 auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt, 1106 bool ShAmtNonZero) { 1107 return KnownBits::lshr(KnownVal, KnownAmt, ShAmtNonZero); 1108 }; 1109 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, 1110 KF); 1111 // Leading zeros of a left-shifted constant never decrease. 1112 const APInt *C; 1113 if (match(I->getOperand(0), m_APInt(C))) 1114 Known.Zero.setHighBits(C->countl_zero()); 1115 break; 1116 } 1117 case Instruction::AShr: { 1118 auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt, 1119 bool ShAmtNonZero) { 1120 return KnownBits::ashr(KnownVal, KnownAmt, ShAmtNonZero); 1121 }; 1122 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, 1123 KF); 1124 break; 1125 } 1126 case Instruction::Sub: { 1127 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); 1128 computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, 1129 DemandedElts, Known, Known2, Depth, Q); 1130 break; 1131 } 1132 case Instruction::Add: { 1133 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); 1134 computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, 1135 DemandedElts, Known, Known2, Depth, Q); 1136 break; 1137 } 1138 case Instruction::SRem: 1139 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1140 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1141 Known = KnownBits::srem(Known, Known2); 1142 break; 1143 1144 case Instruction::URem: 1145 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1146 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1147 Known = KnownBits::urem(Known, Known2); 1148 break; 1149 case Instruction::Alloca: 1150 Known.Zero.setLowBits(Log2(cast<AllocaInst>(I)->getAlign())); 1151 break; 1152 case Instruction::GetElementPtr: { 1153 // Analyze all of the subscripts of this getelementptr instruction 1154 // to determine if we can prove known low zero bits. 1155 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1156 // Accumulate the constant indices in a separate variable 1157 // to minimize the number of calls to computeForAddSub. 1158 APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true); 1159 1160 gep_type_iterator GTI = gep_type_begin(I); 1161 for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { 1162 // TrailZ can only become smaller, short-circuit if we hit zero. 1163 if (Known.isUnknown()) 1164 break; 1165 1166 Value *Index = I->getOperand(i); 1167 1168 // Handle case when index is zero. 1169 Constant *CIndex = dyn_cast<Constant>(Index); 1170 if (CIndex && CIndex->isZeroValue()) 1171 continue; 1172 1173 if (StructType *STy = GTI.getStructTypeOrNull()) { 1174 // Handle struct member offset arithmetic. 1175 1176 assert(CIndex && 1177 "Access to structure field must be known at compile time"); 1178 1179 if (CIndex->getType()->isVectorTy()) 1180 Index = CIndex->getSplatValue(); 1181 1182 unsigned Idx = cast<ConstantInt>(Index)->getZExtValue(); 1183 const StructLayout *SL = Q.DL.getStructLayout(STy); 1184 uint64_t Offset = SL->getElementOffset(Idx); 1185 AccConstIndices += Offset; 1186 continue; 1187 } 1188 1189 // Handle array index arithmetic. 1190 Type *IndexedTy = GTI.getIndexedType(); 1191 if (!IndexedTy->isSized()) { 1192 Known.resetAll(); 1193 break; 1194 } 1195 1196 unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits(); 1197 KnownBits IndexBits(IndexBitWidth); 1198 computeKnownBits(Index, IndexBits, Depth + 1, Q); 1199 TypeSize IndexTypeSize = Q.DL.getTypeAllocSize(IndexedTy); 1200 uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinValue(); 1201 KnownBits ScalingFactor(IndexBitWidth); 1202 // Multiply by current sizeof type. 1203 // &A[i] == A + i * sizeof(*A[i]). 1204 if (IndexTypeSize.isScalable()) { 1205 // For scalable types the only thing we know about sizeof is 1206 // that this is a multiple of the minimum size. 1207 ScalingFactor.Zero.setLowBits(llvm::countr_zero(TypeSizeInBytes)); 1208 } else if (IndexBits.isConstant()) { 1209 APInt IndexConst = IndexBits.getConstant(); 1210 APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes); 1211 IndexConst *= ScalingFactor; 1212 AccConstIndices += IndexConst.sextOrTrunc(BitWidth); 1213 continue; 1214 } else { 1215 ScalingFactor = 1216 KnownBits::makeConstant(APInt(IndexBitWidth, TypeSizeInBytes)); 1217 } 1218 IndexBits = KnownBits::mul(IndexBits, ScalingFactor); 1219 1220 // If the offsets have a different width from the pointer, according 1221 // to the language reference we need to sign-extend or truncate them 1222 // to the width of the pointer. 1223 IndexBits = IndexBits.sextOrTrunc(BitWidth); 1224 1225 // Note that inbounds does *not* guarantee nsw for the addition, as only 1226 // the offset is signed, while the base address is unsigned. 1227 Known = KnownBits::computeForAddSub( 1228 /*Add=*/true, /*NSW=*/false, Known, IndexBits); 1229 } 1230 if (!Known.isUnknown() && !AccConstIndices.isZero()) { 1231 KnownBits Index = KnownBits::makeConstant(AccConstIndices); 1232 Known = KnownBits::computeForAddSub( 1233 /*Add=*/true, /*NSW=*/false, Known, Index); 1234 } 1235 break; 1236 } 1237 case Instruction::PHI: { 1238 const PHINode *P = cast<PHINode>(I); 1239 BinaryOperator *BO = nullptr; 1240 Value *R = nullptr, *L = nullptr; 1241 if (matchSimpleRecurrence(P, BO, R, L)) { 1242 // Handle the case of a simple two-predecessor recurrence PHI. 1243 // There's a lot more that could theoretically be done here, but 1244 // this is sufficient to catch some interesting cases. 1245 unsigned Opcode = BO->getOpcode(); 1246 1247 // If this is a shift recurrence, we know the bits being shifted in. 1248 // We can combine that with information about the start value of the 1249 // recurrence to conclude facts about the result. 1250 if ((Opcode == Instruction::LShr || Opcode == Instruction::AShr || 1251 Opcode == Instruction::Shl) && 1252 BO->getOperand(0) == I) { 1253 1254 // We have matched a recurrence of the form: 1255 // %iv = [R, %entry], [%iv.next, %backedge] 1256 // %iv.next = shift_op %iv, L 1257 1258 // Recurse with the phi context to avoid concern about whether facts 1259 // inferred hold at original context instruction. TODO: It may be 1260 // correct to use the original context. IF warranted, explore and 1261 // add sufficient tests to cover. 1262 SimplifyQuery RecQ = Q; 1263 RecQ.CxtI = P; 1264 computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ); 1265 switch (Opcode) { 1266 case Instruction::Shl: 1267 // A shl recurrence will only increase the tailing zeros 1268 Known.Zero.setLowBits(Known2.countMinTrailingZeros()); 1269 break; 1270 case Instruction::LShr: 1271 // A lshr recurrence will preserve the leading zeros of the 1272 // start value 1273 Known.Zero.setHighBits(Known2.countMinLeadingZeros()); 1274 break; 1275 case Instruction::AShr: 1276 // An ashr recurrence will extend the initial sign bit 1277 Known.Zero.setHighBits(Known2.countMinLeadingZeros()); 1278 Known.One.setHighBits(Known2.countMinLeadingOnes()); 1279 break; 1280 }; 1281 } 1282 1283 // Check for operations that have the property that if 1284 // both their operands have low zero bits, the result 1285 // will have low zero bits. 1286 if (Opcode == Instruction::Add || 1287 Opcode == Instruction::Sub || 1288 Opcode == Instruction::And || 1289 Opcode == Instruction::Or || 1290 Opcode == Instruction::Mul) { 1291 // Change the context instruction to the "edge" that flows into the 1292 // phi. This is important because that is where the value is actually 1293 // "evaluated" even though it is used later somewhere else. (see also 1294 // D69571). 1295 SimplifyQuery RecQ = Q; 1296 1297 unsigned OpNum = P->getOperand(0) == R ? 0 : 1; 1298 Instruction *RInst = P->getIncomingBlock(OpNum)->getTerminator(); 1299 Instruction *LInst = P->getIncomingBlock(1-OpNum)->getTerminator(); 1300 1301 // Ok, we have a PHI of the form L op= R. Check for low 1302 // zero bits. 1303 RecQ.CxtI = RInst; 1304 computeKnownBits(R, Known2, Depth + 1, RecQ); 1305 1306 // We need to take the minimum number of known bits 1307 KnownBits Known3(BitWidth); 1308 RecQ.CxtI = LInst; 1309 computeKnownBits(L, Known3, Depth + 1, RecQ); 1310 1311 Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(), 1312 Known3.countMinTrailingZeros())); 1313 1314 auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(BO); 1315 if (OverflowOp && Q.IIQ.hasNoSignedWrap(OverflowOp)) { 1316 // If initial value of recurrence is nonnegative, and we are adding 1317 // a nonnegative number with nsw, the result can only be nonnegative 1318 // or poison value regardless of the number of times we execute the 1319 // add in phi recurrence. If initial value is negative and we are 1320 // adding a negative number with nsw, the result can only be 1321 // negative or poison value. Similar arguments apply to sub and mul. 1322 // 1323 // (add non-negative, non-negative) --> non-negative 1324 // (add negative, negative) --> negative 1325 if (Opcode == Instruction::Add) { 1326 if (Known2.isNonNegative() && Known3.isNonNegative()) 1327 Known.makeNonNegative(); 1328 else if (Known2.isNegative() && Known3.isNegative()) 1329 Known.makeNegative(); 1330 } 1331 1332 // (sub nsw non-negative, negative) --> non-negative 1333 // (sub nsw negative, non-negative) --> negative 1334 else if (Opcode == Instruction::Sub && BO->getOperand(0) == I) { 1335 if (Known2.isNonNegative() && Known3.isNegative()) 1336 Known.makeNonNegative(); 1337 else if (Known2.isNegative() && Known3.isNonNegative()) 1338 Known.makeNegative(); 1339 } 1340 1341 // (mul nsw non-negative, non-negative) --> non-negative 1342 else if (Opcode == Instruction::Mul && Known2.isNonNegative() && 1343 Known3.isNonNegative()) 1344 Known.makeNonNegative(); 1345 } 1346 1347 break; 1348 } 1349 } 1350 1351 // Unreachable blocks may have zero-operand PHI nodes. 1352 if (P->getNumIncomingValues() == 0) 1353 break; 1354 1355 // Otherwise take the unions of the known bit sets of the operands, 1356 // taking conservative care to avoid excessive recursion. 1357 if (Depth < MaxAnalysisRecursionDepth - 1 && Known.isUnknown()) { 1358 // Skip if every incoming value references to ourself. 1359 if (isa_and_nonnull<UndefValue>(P->hasConstantValue())) 1360 break; 1361 1362 Known.Zero.setAllBits(); 1363 Known.One.setAllBits(); 1364 for (unsigned u = 0, e = P->getNumIncomingValues(); u < e; ++u) { 1365 Value *IncValue = P->getIncomingValue(u); 1366 // Skip direct self references. 1367 if (IncValue == P) continue; 1368 1369 // Change the context instruction to the "edge" that flows into the 1370 // phi. This is important because that is where the value is actually 1371 // "evaluated" even though it is used later somewhere else. (see also 1372 // D69571). 1373 SimplifyQuery RecQ = Q; 1374 RecQ.CxtI = P->getIncomingBlock(u)->getTerminator(); 1375 1376 Known2 = KnownBits(BitWidth); 1377 1378 // Recurse, but cap the recursion to one level, because we don't 1379 // want to waste time spinning around in loops. 1380 // TODO: See if we can base recursion limiter on number of incoming phi 1381 // edges so we don't overly clamp analysis. 1382 computeKnownBits(IncValue, Known2, MaxAnalysisRecursionDepth - 1, RecQ); 1383 1384 // See if we can further use a conditional branch into the phi 1385 // to help us determine the range of the value. 1386 if (!Known2.isConstant()) { 1387 ICmpInst::Predicate Pred; 1388 const APInt *RHSC; 1389 BasicBlock *TrueSucc, *FalseSucc; 1390 // TODO: Use RHS Value and compute range from its known bits. 1391 if (match(RecQ.CxtI, 1392 m_Br(m_c_ICmp(Pred, m_Specific(IncValue), m_APInt(RHSC)), 1393 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) { 1394 // Check for cases of duplicate successors. 1395 if ((TrueSucc == P->getParent()) != (FalseSucc == P->getParent())) { 1396 // If we're using the false successor, invert the predicate. 1397 if (FalseSucc == P->getParent()) 1398 Pred = CmpInst::getInversePredicate(Pred); 1399 // Get the knownbits implied by the incoming phi condition. 1400 auto CR = ConstantRange::makeExactICmpRegion(Pred, *RHSC); 1401 KnownBits KnownUnion = Known2.unionWith(CR.toKnownBits()); 1402 // We can have conflicts here if we are analyzing deadcode (its 1403 // impossible for us reach this BB based the icmp). 1404 if (KnownUnion.hasConflict()) { 1405 // No reason to continue analyzing in a known dead region, so 1406 // just resetAll and break. This will cause us to also exit the 1407 // outer loop. 1408 Known.resetAll(); 1409 break; 1410 } 1411 Known2 = KnownUnion; 1412 } 1413 } 1414 } 1415 1416 Known = Known.intersectWith(Known2); 1417 // If all bits have been ruled out, there's no need to check 1418 // more operands. 1419 if (Known.isUnknown()) 1420 break; 1421 } 1422 } 1423 break; 1424 } 1425 case Instruction::Call: 1426 case Instruction::Invoke: 1427 // If range metadata is attached to this call, set known bits from that, 1428 // and then intersect with known bits based on other properties of the 1429 // function. 1430 if (MDNode *MD = 1431 Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range)) 1432 computeKnownBitsFromRangeMetadata(*MD, Known); 1433 if (const Value *RV = cast<CallBase>(I)->getReturnedArgOperand()) { 1434 if (RV->getType() == I->getType()) { 1435 computeKnownBits(RV, Known2, Depth + 1, Q); 1436 Known = Known.unionWith(Known2); 1437 } 1438 } 1439 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 1440 switch (II->getIntrinsicID()) { 1441 default: break; 1442 case Intrinsic::abs: { 1443 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); 1444 bool IntMinIsPoison = match(II->getArgOperand(1), m_One()); 1445 Known = Known2.abs(IntMinIsPoison); 1446 break; 1447 } 1448 case Intrinsic::bitreverse: 1449 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 1450 Known.Zero |= Known2.Zero.reverseBits(); 1451 Known.One |= Known2.One.reverseBits(); 1452 break; 1453 case Intrinsic::bswap: 1454 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 1455 Known.Zero |= Known2.Zero.byteSwap(); 1456 Known.One |= Known2.One.byteSwap(); 1457 break; 1458 case Intrinsic::ctlz: { 1459 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); 1460 // If we have a known 1, its position is our upper bound. 1461 unsigned PossibleLZ = Known2.countMaxLeadingZeros(); 1462 // If this call is poison for 0 input, the result will be less than 2^n. 1463 if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) 1464 PossibleLZ = std::min(PossibleLZ, BitWidth - 1); 1465 unsigned LowBits = llvm::bit_width(PossibleLZ); 1466 Known.Zero.setBitsFrom(LowBits); 1467 break; 1468 } 1469 case Intrinsic::cttz: { 1470 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); 1471 // If we have a known 1, its position is our upper bound. 1472 unsigned PossibleTZ = Known2.countMaxTrailingZeros(); 1473 // If this call is poison for 0 input, the result will be less than 2^n. 1474 if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) 1475 PossibleTZ = std::min(PossibleTZ, BitWidth - 1); 1476 unsigned LowBits = llvm::bit_width(PossibleTZ); 1477 Known.Zero.setBitsFrom(LowBits); 1478 break; 1479 } 1480 case Intrinsic::ctpop: { 1481 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); 1482 // We can bound the space the count needs. Also, bits known to be zero 1483 // can't contribute to the population. 1484 unsigned BitsPossiblySet = Known2.countMaxPopulation(); 1485 unsigned LowBits = llvm::bit_width(BitsPossiblySet); 1486 Known.Zero.setBitsFrom(LowBits); 1487 // TODO: we could bound KnownOne using the lower bound on the number 1488 // of bits which might be set provided by popcnt KnownOne2. 1489 break; 1490 } 1491 case Intrinsic::fshr: 1492 case Intrinsic::fshl: { 1493 const APInt *SA; 1494 if (!match(I->getOperand(2), m_APInt(SA))) 1495 break; 1496 1497 // Normalize to funnel shift left. 1498 uint64_t ShiftAmt = SA->urem(BitWidth); 1499 if (II->getIntrinsicID() == Intrinsic::fshr) 1500 ShiftAmt = BitWidth - ShiftAmt; 1501 1502 KnownBits Known3(BitWidth); 1503 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); 1504 computeKnownBits(I->getOperand(1), Known3, Depth + 1, Q); 1505 1506 Known.Zero = 1507 Known2.Zero.shl(ShiftAmt) | Known3.Zero.lshr(BitWidth - ShiftAmt); 1508 Known.One = 1509 Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt); 1510 break; 1511 } 1512 case Intrinsic::uadd_sat: 1513 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1514 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1515 Known = KnownBits::uadd_sat(Known, Known2); 1516 break; 1517 case Intrinsic::usub_sat: 1518 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1519 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1520 Known = KnownBits::usub_sat(Known, Known2); 1521 break; 1522 case Intrinsic::sadd_sat: 1523 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1524 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1525 Known = KnownBits::sadd_sat(Known, Known2); 1526 break; 1527 case Intrinsic::ssub_sat: 1528 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1529 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1530 Known = KnownBits::ssub_sat(Known, Known2); 1531 break; 1532 case Intrinsic::umin: 1533 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1534 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1535 Known = KnownBits::umin(Known, Known2); 1536 break; 1537 case Intrinsic::umax: 1538 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1539 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1540 Known = KnownBits::umax(Known, Known2); 1541 break; 1542 case Intrinsic::smin: 1543 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1544 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1545 Known = KnownBits::smin(Known, Known2); 1546 break; 1547 case Intrinsic::smax: 1548 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1549 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1550 Known = KnownBits::smax(Known, Known2); 1551 break; 1552 case Intrinsic::ptrmask: { 1553 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1554 1555 const Value *Mask = I->getOperand(1); 1556 Known2 = KnownBits(Mask->getType()->getScalarSizeInBits()); 1557 computeKnownBits(Mask, Known2, Depth + 1, Q); 1558 // TODO: 1-extend would be more precise. 1559 Known &= Known2.anyextOrTrunc(BitWidth); 1560 break; 1561 } 1562 case Intrinsic::x86_sse42_crc32_64_64: 1563 Known.Zero.setBitsFrom(32); 1564 break; 1565 case Intrinsic::riscv_vsetvli: 1566 case Intrinsic::riscv_vsetvlimax: 1567 // Assume that VL output is <= 65536. 1568 // TODO: Take SEW and LMUL into account. 1569 if (BitWidth > 17) 1570 Known.Zero.setBitsFrom(17); 1571 break; 1572 case Intrinsic::vscale: { 1573 if (!II->getParent() || !II->getFunction()) 1574 break; 1575 1576 Known = getVScaleRange(II->getFunction(), BitWidth).toKnownBits(); 1577 break; 1578 } 1579 } 1580 } 1581 break; 1582 case Instruction::ShuffleVector: { 1583 auto *Shuf = dyn_cast<ShuffleVectorInst>(I); 1584 // FIXME: Do we need to handle ConstantExpr involving shufflevectors? 1585 if (!Shuf) { 1586 Known.resetAll(); 1587 return; 1588 } 1589 // For undef elements, we don't know anything about the common state of 1590 // the shuffle result. 1591 APInt DemandedLHS, DemandedRHS; 1592 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) { 1593 Known.resetAll(); 1594 return; 1595 } 1596 Known.One.setAllBits(); 1597 Known.Zero.setAllBits(); 1598 if (!!DemandedLHS) { 1599 const Value *LHS = Shuf->getOperand(0); 1600 computeKnownBits(LHS, DemandedLHS, Known, Depth + 1, Q); 1601 // If we don't know any bits, early out. 1602 if (Known.isUnknown()) 1603 break; 1604 } 1605 if (!!DemandedRHS) { 1606 const Value *RHS = Shuf->getOperand(1); 1607 computeKnownBits(RHS, DemandedRHS, Known2, Depth + 1, Q); 1608 Known = Known.intersectWith(Known2); 1609 } 1610 break; 1611 } 1612 case Instruction::InsertElement: { 1613 if (isa<ScalableVectorType>(I->getType())) { 1614 Known.resetAll(); 1615 return; 1616 } 1617 const Value *Vec = I->getOperand(0); 1618 const Value *Elt = I->getOperand(1); 1619 auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2)); 1620 // Early out if the index is non-constant or out-of-range. 1621 unsigned NumElts = DemandedElts.getBitWidth(); 1622 if (!CIdx || CIdx->getValue().uge(NumElts)) { 1623 Known.resetAll(); 1624 return; 1625 } 1626 Known.One.setAllBits(); 1627 Known.Zero.setAllBits(); 1628 unsigned EltIdx = CIdx->getZExtValue(); 1629 // Do we demand the inserted element? 1630 if (DemandedElts[EltIdx]) { 1631 computeKnownBits(Elt, Known, Depth + 1, Q); 1632 // If we don't know any bits, early out. 1633 if (Known.isUnknown()) 1634 break; 1635 } 1636 // We don't need the base vector element that has been inserted. 1637 APInt DemandedVecElts = DemandedElts; 1638 DemandedVecElts.clearBit(EltIdx); 1639 if (!!DemandedVecElts) { 1640 computeKnownBits(Vec, DemandedVecElts, Known2, Depth + 1, Q); 1641 Known = Known.intersectWith(Known2); 1642 } 1643 break; 1644 } 1645 case Instruction::ExtractElement: { 1646 // Look through extract element. If the index is non-constant or 1647 // out-of-range demand all elements, otherwise just the extracted element. 1648 const Value *Vec = I->getOperand(0); 1649 const Value *Idx = I->getOperand(1); 1650 auto *CIdx = dyn_cast<ConstantInt>(Idx); 1651 if (isa<ScalableVectorType>(Vec->getType())) { 1652 // FIXME: there's probably *something* we can do with scalable vectors 1653 Known.resetAll(); 1654 break; 1655 } 1656 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements(); 1657 APInt DemandedVecElts = APInt::getAllOnes(NumElts); 1658 if (CIdx && CIdx->getValue().ult(NumElts)) 1659 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); 1660 computeKnownBits(Vec, DemandedVecElts, Known, Depth + 1, Q); 1661 break; 1662 } 1663 case Instruction::ExtractValue: 1664 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) { 1665 const ExtractValueInst *EVI = cast<ExtractValueInst>(I); 1666 if (EVI->getNumIndices() != 1) break; 1667 if (EVI->getIndices()[0] == 0) { 1668 switch (II->getIntrinsicID()) { 1669 default: break; 1670 case Intrinsic::uadd_with_overflow: 1671 case Intrinsic::sadd_with_overflow: 1672 computeKnownBitsAddSub(true, II->getArgOperand(0), 1673 II->getArgOperand(1), false, DemandedElts, 1674 Known, Known2, Depth, Q); 1675 break; 1676 case Intrinsic::usub_with_overflow: 1677 case Intrinsic::ssub_with_overflow: 1678 computeKnownBitsAddSub(false, II->getArgOperand(0), 1679 II->getArgOperand(1), false, DemandedElts, 1680 Known, Known2, Depth, Q); 1681 break; 1682 case Intrinsic::umul_with_overflow: 1683 case Intrinsic::smul_with_overflow: 1684 computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false, 1685 DemandedElts, Known, Known2, Depth, Q); 1686 break; 1687 } 1688 } 1689 } 1690 break; 1691 case Instruction::Freeze: 1692 if (isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT, 1693 Depth + 1)) 1694 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1695 break; 1696 } 1697 } 1698 1699 /// Determine which bits of V are known to be either zero or one and return 1700 /// them. 1701 KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, 1702 unsigned Depth, const SimplifyQuery &Q) { 1703 KnownBits Known(getBitWidth(V->getType(), Q.DL)); 1704 ::computeKnownBits(V, DemandedElts, Known, Depth, Q); 1705 return Known; 1706 } 1707 1708 /// Determine which bits of V are known to be either zero or one and return 1709 /// them. 1710 KnownBits llvm::computeKnownBits(const Value *V, unsigned Depth, 1711 const SimplifyQuery &Q) { 1712 KnownBits Known(getBitWidth(V->getType(), Q.DL)); 1713 computeKnownBits(V, Known, Depth, Q); 1714 return Known; 1715 } 1716 1717 /// Determine which bits of V are known to be either zero or one and return 1718 /// them in the Known bit set. 1719 /// 1720 /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that 1721 /// we cannot optimize based on the assumption that it is zero without changing 1722 /// it to be an explicit zero. If we don't change it to zero, other code could 1723 /// optimized based on the contradictory assumption that it is non-zero. 1724 /// Because instcombine aggressively folds operations with undef args anyway, 1725 /// this won't lose us code quality. 1726 /// 1727 /// This function is defined on values with integer type, values with pointer 1728 /// type, and vectors of integers. In the case 1729 /// where V is a vector, known zero, and known one values are the 1730 /// same width as the vector element, and the bit is set only if it is true 1731 /// for all of the demanded elements in the vector specified by DemandedElts. 1732 void computeKnownBits(const Value *V, const APInt &DemandedElts, 1733 KnownBits &Known, unsigned Depth, 1734 const SimplifyQuery &Q) { 1735 if (!DemandedElts) { 1736 // No demanded elts, better to assume we don't know anything. 1737 Known.resetAll(); 1738 return; 1739 } 1740 1741 assert(V && "No Value?"); 1742 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 1743 1744 #ifndef NDEBUG 1745 Type *Ty = V->getType(); 1746 unsigned BitWidth = Known.getBitWidth(); 1747 1748 assert((Ty->isIntOrIntVectorTy(BitWidth) || Ty->isPtrOrPtrVectorTy()) && 1749 "Not integer or pointer type!"); 1750 1751 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) { 1752 assert( 1753 FVTy->getNumElements() == DemandedElts.getBitWidth() && 1754 "DemandedElt width should equal the fixed vector number of elements"); 1755 } else { 1756 assert(DemandedElts == APInt(1, 1) && 1757 "DemandedElt width should be 1 for scalars or scalable vectors"); 1758 } 1759 1760 Type *ScalarTy = Ty->getScalarType(); 1761 if (ScalarTy->isPointerTy()) { 1762 assert(BitWidth == Q.DL.getPointerTypeSizeInBits(ScalarTy) && 1763 "V and Known should have same BitWidth"); 1764 } else { 1765 assert(BitWidth == Q.DL.getTypeSizeInBits(ScalarTy) && 1766 "V and Known should have same BitWidth"); 1767 } 1768 #endif 1769 1770 const APInt *C; 1771 if (match(V, m_APInt(C))) { 1772 // We know all of the bits for a scalar constant or a splat vector constant! 1773 Known = KnownBits::makeConstant(*C); 1774 return; 1775 } 1776 // Null and aggregate-zero are all-zeros. 1777 if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) { 1778 Known.setAllZero(); 1779 return; 1780 } 1781 // Handle a constant vector by taking the intersection of the known bits of 1782 // each element. 1783 if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) { 1784 assert(!isa<ScalableVectorType>(V->getType())); 1785 // We know that CDV must be a vector of integers. Take the intersection of 1786 // each element. 1787 Known.Zero.setAllBits(); Known.One.setAllBits(); 1788 for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) { 1789 if (!DemandedElts[i]) 1790 continue; 1791 APInt Elt = CDV->getElementAsAPInt(i); 1792 Known.Zero &= ~Elt; 1793 Known.One &= Elt; 1794 } 1795 if (Known.hasConflict()) 1796 Known.resetAll(); 1797 return; 1798 } 1799 1800 if (const auto *CV = dyn_cast<ConstantVector>(V)) { 1801 assert(!isa<ScalableVectorType>(V->getType())); 1802 // We know that CV must be a vector of integers. Take the intersection of 1803 // each element. 1804 Known.Zero.setAllBits(); Known.One.setAllBits(); 1805 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { 1806 if (!DemandedElts[i]) 1807 continue; 1808 Constant *Element = CV->getAggregateElement(i); 1809 if (isa<PoisonValue>(Element)) 1810 continue; 1811 auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element); 1812 if (!ElementCI) { 1813 Known.resetAll(); 1814 return; 1815 } 1816 const APInt &Elt = ElementCI->getValue(); 1817 Known.Zero &= ~Elt; 1818 Known.One &= Elt; 1819 } 1820 if (Known.hasConflict()) 1821 Known.resetAll(); 1822 return; 1823 } 1824 1825 // Start out not knowing anything. 1826 Known.resetAll(); 1827 1828 // We can't imply anything about undefs. 1829 if (isa<UndefValue>(V)) 1830 return; 1831 1832 // There's no point in looking through other users of ConstantData for 1833 // assumptions. Confirm that we've handled them all. 1834 assert(!isa<ConstantData>(V) && "Unhandled constant data!"); 1835 1836 // All recursive calls that increase depth must come after this. 1837 if (Depth == MaxAnalysisRecursionDepth) 1838 return; 1839 1840 // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has 1841 // the bits of its aliasee. 1842 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { 1843 if (!GA->isInterposable()) 1844 computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q); 1845 return; 1846 } 1847 1848 if (const Operator *I = dyn_cast<Operator>(V)) 1849 computeKnownBitsFromOperator(I, DemandedElts, Known, Depth, Q); 1850 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 1851 if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange()) 1852 Known = CR->toKnownBits(); 1853 } 1854 1855 // Aligned pointers have trailing zeros - refine Known.Zero set 1856 if (isa<PointerType>(V->getType())) { 1857 Align Alignment = V->getPointerAlignment(Q.DL); 1858 Known.Zero.setLowBits(Log2(Alignment)); 1859 } 1860 1861 // computeKnownBitsFromContext strictly refines Known. 1862 // Therefore, we run them after computeKnownBitsFromOperator. 1863 1864 // Check whether we can determine known bits from context such as assumes. 1865 computeKnownBitsFromContext(V, Known, Depth, Q); 1866 1867 assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); 1868 } 1869 1870 /// Try to detect a recurrence that the value of the induction variable is 1871 /// always a power of two (or zero). 1872 static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero, 1873 unsigned Depth, SimplifyQuery &Q) { 1874 BinaryOperator *BO = nullptr; 1875 Value *Start = nullptr, *Step = nullptr; 1876 if (!matchSimpleRecurrence(PN, BO, Start, Step)) 1877 return false; 1878 1879 // Initial value must be a power of two. 1880 for (const Use &U : PN->operands()) { 1881 if (U.get() == Start) { 1882 // Initial value comes from a different BB, need to adjust context 1883 // instruction for analysis. 1884 Q.CxtI = PN->getIncomingBlock(U)->getTerminator(); 1885 if (!isKnownToBeAPowerOfTwo(Start, OrZero, Depth, Q)) 1886 return false; 1887 } 1888 } 1889 1890 // Except for Mul, the induction variable must be on the left side of the 1891 // increment expression, otherwise its value can be arbitrary. 1892 if (BO->getOpcode() != Instruction::Mul && BO->getOperand(1) != Step) 1893 return false; 1894 1895 Q.CxtI = BO->getParent()->getTerminator(); 1896 switch (BO->getOpcode()) { 1897 case Instruction::Mul: 1898 // Power of two is closed under multiplication. 1899 return (OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || 1900 Q.IIQ.hasNoSignedWrap(BO)) && 1901 isKnownToBeAPowerOfTwo(Step, OrZero, Depth, Q); 1902 case Instruction::SDiv: 1903 // Start value must not be signmask for signed division, so simply being a 1904 // power of two is not sufficient, and it has to be a constant. 1905 if (!match(Start, m_Power2()) || match(Start, m_SignMask())) 1906 return false; 1907 [[fallthrough]]; 1908 case Instruction::UDiv: 1909 // Divisor must be a power of two. 1910 // If OrZero is false, cannot guarantee induction variable is non-zero after 1911 // division, same for Shr, unless it is exact division. 1912 return (OrZero || Q.IIQ.isExact(BO)) && 1913 isKnownToBeAPowerOfTwo(Step, false, Depth, Q); 1914 case Instruction::Shl: 1915 return OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO); 1916 case Instruction::AShr: 1917 if (!match(Start, m_Power2()) || match(Start, m_SignMask())) 1918 return false; 1919 [[fallthrough]]; 1920 case Instruction::LShr: 1921 return OrZero || Q.IIQ.isExact(BO); 1922 default: 1923 return false; 1924 } 1925 } 1926 1927 /// Return true if the given value is known to have exactly one 1928 /// bit set when defined. For vectors return true if every element is known to 1929 /// be a power of two when defined. Supports values with integer or pointer 1930 /// types and vectors of integers. 1931 bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, 1932 const SimplifyQuery &Q) { 1933 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 1934 1935 if (isa<Constant>(V)) 1936 return OrZero ? match(V, m_Power2OrZero()) : match(V, m_Power2()); 1937 1938 // i1 is by definition a power of 2 or zero. 1939 if (OrZero && V->getType()->getScalarSizeInBits() == 1) 1940 return true; 1941 1942 auto *I = dyn_cast<Instruction>(V); 1943 if (!I) 1944 return false; 1945 1946 if (Q.CxtI && match(V, m_VScale())) { 1947 const Function *F = Q.CxtI->getFunction(); 1948 // The vscale_range indicates vscale is a power-of-two. 1949 return F->hasFnAttribute(Attribute::VScaleRange); 1950 } 1951 1952 // 1 << X is clearly a power of two if the one is not shifted off the end. If 1953 // it is shifted off the end then the result is undefined. 1954 if (match(I, m_Shl(m_One(), m_Value()))) 1955 return true; 1956 1957 // (signmask) >>l X is clearly a power of two if the one is not shifted off 1958 // the bottom. If it is shifted off the bottom then the result is undefined. 1959 if (match(I, m_LShr(m_SignMask(), m_Value()))) 1960 return true; 1961 1962 // The remaining tests are all recursive, so bail out if we hit the limit. 1963 if (Depth++ == MaxAnalysisRecursionDepth) 1964 return false; 1965 1966 switch (I->getOpcode()) { 1967 case Instruction::ZExt: 1968 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 1969 case Instruction::Trunc: 1970 return OrZero && isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 1971 case Instruction::Shl: 1972 if (OrZero || Q.IIQ.hasNoUnsignedWrap(I) || Q.IIQ.hasNoSignedWrap(I)) 1973 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 1974 return false; 1975 case Instruction::LShr: 1976 if (OrZero || Q.IIQ.isExact(cast<BinaryOperator>(I))) 1977 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 1978 return false; 1979 case Instruction::UDiv: 1980 if (Q.IIQ.isExact(cast<BinaryOperator>(I))) 1981 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 1982 return false; 1983 case Instruction::Mul: 1984 return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) && 1985 isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q) && 1986 (OrZero || isKnownNonZero(I, Depth, Q)); 1987 case Instruction::And: 1988 // A power of two and'd with anything is a power of two or zero. 1989 if (OrZero && 1990 (isKnownToBeAPowerOfTwo(I->getOperand(1), /*OrZero*/ true, Depth, Q) || 1991 isKnownToBeAPowerOfTwo(I->getOperand(0), /*OrZero*/ true, Depth, Q))) 1992 return true; 1993 // X & (-X) is always a power of two or zero. 1994 if (match(I->getOperand(0), m_Neg(m_Specific(I->getOperand(1)))) || 1995 match(I->getOperand(1), m_Neg(m_Specific(I->getOperand(0))))) 1996 return OrZero || isKnownNonZero(I->getOperand(0), Depth, Q); 1997 return false; 1998 case Instruction::Add: { 1999 // Adding a power-of-two or zero to the same power-of-two or zero yields 2000 // either the original power-of-two, a larger power-of-two or zero. 2001 const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V); 2002 if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO) || 2003 Q.IIQ.hasNoSignedWrap(VOBO)) { 2004 if (match(I->getOperand(0), 2005 m_c_And(m_Specific(I->getOperand(1)), m_Value())) && 2006 isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q)) 2007 return true; 2008 if (match(I->getOperand(1), 2009 m_c_And(m_Specific(I->getOperand(0)), m_Value())) && 2010 isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q)) 2011 return true; 2012 2013 unsigned BitWidth = V->getType()->getScalarSizeInBits(); 2014 KnownBits LHSBits(BitWidth); 2015 computeKnownBits(I->getOperand(0), LHSBits, Depth, Q); 2016 2017 KnownBits RHSBits(BitWidth); 2018 computeKnownBits(I->getOperand(1), RHSBits, Depth, Q); 2019 // If i8 V is a power of two or zero: 2020 // ZeroBits: 1 1 1 0 1 1 1 1 2021 // ~ZeroBits: 0 0 0 1 0 0 0 0 2022 if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2()) 2023 // If OrZero isn't set, we cannot give back a zero result. 2024 // Make sure either the LHS or RHS has a bit set. 2025 if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue()) 2026 return true; 2027 } 2028 return false; 2029 } 2030 case Instruction::Select: 2031 return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) && 2032 isKnownToBeAPowerOfTwo(I->getOperand(2), OrZero, Depth, Q); 2033 case Instruction::PHI: { 2034 // A PHI node is power of two if all incoming values are power of two, or if 2035 // it is an induction variable where in each step its value is a power of 2036 // two. 2037 auto *PN = cast<PHINode>(I); 2038 SimplifyQuery RecQ = Q; 2039 2040 // Check if it is an induction variable and always power of two. 2041 if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ)) 2042 return true; 2043 2044 // Recursively check all incoming values. Limit recursion to 2 levels, so 2045 // that search complexity is limited to number of operands^2. 2046 unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1); 2047 return llvm::all_of(PN->operands(), [&](const Use &U) { 2048 // Value is power of 2 if it is coming from PHI node itself by induction. 2049 if (U.get() == PN) 2050 return true; 2051 2052 // Change the context instruction to the incoming block where it is 2053 // evaluated. 2054 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator(); 2055 return isKnownToBeAPowerOfTwo(U.get(), OrZero, NewDepth, RecQ); 2056 }); 2057 } 2058 case Instruction::Invoke: 2059 case Instruction::Call: { 2060 if (auto *II = dyn_cast<IntrinsicInst>(I)) { 2061 switch (II->getIntrinsicID()) { 2062 case Intrinsic::umax: 2063 case Intrinsic::smax: 2064 case Intrinsic::umin: 2065 case Intrinsic::smin: 2066 return isKnownToBeAPowerOfTwo(II->getArgOperand(1), OrZero, Depth, Q) && 2067 isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q); 2068 // bswap/bitreverse just move around bits, but don't change any 1s/0s 2069 // thus dont change pow2/non-pow2 status. 2070 case Intrinsic::bitreverse: 2071 case Intrinsic::bswap: 2072 return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q); 2073 case Intrinsic::fshr: 2074 case Intrinsic::fshl: 2075 // If Op0 == Op1, this is a rotate. is_pow2(rotate(x, y)) == is_pow2(x) 2076 if (II->getArgOperand(0) == II->getArgOperand(1)) 2077 return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q); 2078 break; 2079 default: 2080 break; 2081 } 2082 } 2083 return false; 2084 } 2085 default: 2086 return false; 2087 } 2088 } 2089 2090 /// Test whether a GEP's result is known to be non-null. 2091 /// 2092 /// Uses properties inherent in a GEP to try to determine whether it is known 2093 /// to be non-null. 2094 /// 2095 /// Currently this routine does not support vector GEPs. 2096 static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth, 2097 const SimplifyQuery &Q) { 2098 const Function *F = nullptr; 2099 if (const Instruction *I = dyn_cast<Instruction>(GEP)) 2100 F = I->getFunction(); 2101 2102 if (!GEP->isInBounds() || 2103 NullPointerIsDefined(F, GEP->getPointerAddressSpace())) 2104 return false; 2105 2106 // FIXME: Support vector-GEPs. 2107 assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP"); 2108 2109 // If the base pointer is non-null, we cannot walk to a null address with an 2110 // inbounds GEP in address space zero. 2111 if (isKnownNonZero(GEP->getPointerOperand(), Depth, Q)) 2112 return true; 2113 2114 // Walk the GEP operands and see if any operand introduces a non-zero offset. 2115 // If so, then the GEP cannot produce a null pointer, as doing so would 2116 // inherently violate the inbounds contract within address space zero. 2117 for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); 2118 GTI != GTE; ++GTI) { 2119 // Struct types are easy -- they must always be indexed by a constant. 2120 if (StructType *STy = GTI.getStructTypeOrNull()) { 2121 ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand()); 2122 unsigned ElementIdx = OpC->getZExtValue(); 2123 const StructLayout *SL = Q.DL.getStructLayout(STy); 2124 uint64_t ElementOffset = SL->getElementOffset(ElementIdx); 2125 if (ElementOffset > 0) 2126 return true; 2127 continue; 2128 } 2129 2130 // If we have a zero-sized type, the index doesn't matter. Keep looping. 2131 if (Q.DL.getTypeAllocSize(GTI.getIndexedType()).isZero()) 2132 continue; 2133 2134 // Fast path the constant operand case both for efficiency and so we don't 2135 // increment Depth when just zipping down an all-constant GEP. 2136 if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) { 2137 if (!OpC->isZero()) 2138 return true; 2139 continue; 2140 } 2141 2142 // We post-increment Depth here because while isKnownNonZero increments it 2143 // as well, when we pop back up that increment won't persist. We don't want 2144 // to recurse 10k times just because we have 10k GEP operands. We don't 2145 // bail completely out because we want to handle constant GEPs regardless 2146 // of depth. 2147 if (Depth++ >= MaxAnalysisRecursionDepth) 2148 continue; 2149 2150 if (isKnownNonZero(GTI.getOperand(), Depth, Q)) 2151 return true; 2152 } 2153 2154 return false; 2155 } 2156 2157 static bool isKnownNonNullFromDominatingCondition(const Value *V, 2158 const Instruction *CtxI, 2159 const DominatorTree *DT) { 2160 assert(!isa<Constant>(V) && "Called for constant?"); 2161 2162 if (!CtxI || !DT) 2163 return false; 2164 2165 unsigned NumUsesExplored = 0; 2166 for (const auto *U : V->users()) { 2167 // Avoid massive lists 2168 if (NumUsesExplored >= DomConditionsMaxUses) 2169 break; 2170 NumUsesExplored++; 2171 2172 // If the value is used as an argument to a call or invoke, then argument 2173 // attributes may provide an answer about null-ness. 2174 if (const auto *CB = dyn_cast<CallBase>(U)) 2175 if (auto *CalledFunc = CB->getCalledFunction()) 2176 for (const Argument &Arg : CalledFunc->args()) 2177 if (CB->getArgOperand(Arg.getArgNo()) == V && 2178 Arg.hasNonNullAttr(/* AllowUndefOrPoison */ false) && 2179 DT->dominates(CB, CtxI)) 2180 return true; 2181 2182 // If the value is used as a load/store, then the pointer must be non null. 2183 if (V == getLoadStorePointerOperand(U)) { 2184 const Instruction *I = cast<Instruction>(U); 2185 if (!NullPointerIsDefined(I->getFunction(), 2186 V->getType()->getPointerAddressSpace()) && 2187 DT->dominates(I, CtxI)) 2188 return true; 2189 } 2190 2191 if ((match(U, m_IDiv(m_Value(), m_Specific(V))) || 2192 match(U, m_IRem(m_Value(), m_Specific(V)))) && 2193 isValidAssumeForContext(cast<Instruction>(U), CtxI, DT)) 2194 return true; 2195 2196 // Consider only compare instructions uniquely controlling a branch 2197 Value *RHS; 2198 CmpInst::Predicate Pred; 2199 if (!match(U, m_c_ICmp(Pred, m_Specific(V), m_Value(RHS)))) 2200 continue; 2201 2202 bool NonNullIfTrue; 2203 if (cmpExcludesZero(Pred, RHS)) 2204 NonNullIfTrue = true; 2205 else if (cmpExcludesZero(CmpInst::getInversePredicate(Pred), RHS)) 2206 NonNullIfTrue = false; 2207 else 2208 continue; 2209 2210 SmallVector<const User *, 4> WorkList; 2211 SmallPtrSet<const User *, 4> Visited; 2212 for (const auto *CmpU : U->users()) { 2213 assert(WorkList.empty() && "Should be!"); 2214 if (Visited.insert(CmpU).second) 2215 WorkList.push_back(CmpU); 2216 2217 while (!WorkList.empty()) { 2218 auto *Curr = WorkList.pop_back_val(); 2219 2220 // If a user is an AND, add all its users to the work list. We only 2221 // propagate "pred != null" condition through AND because it is only 2222 // correct to assume that all conditions of AND are met in true branch. 2223 // TODO: Support similar logic of OR and EQ predicate? 2224 if (NonNullIfTrue) 2225 if (match(Curr, m_LogicalAnd(m_Value(), m_Value()))) { 2226 for (const auto *CurrU : Curr->users()) 2227 if (Visited.insert(CurrU).second) 2228 WorkList.push_back(CurrU); 2229 continue; 2230 } 2231 2232 if (const BranchInst *BI = dyn_cast<BranchInst>(Curr)) { 2233 assert(BI->isConditional() && "uses a comparison!"); 2234 2235 BasicBlock *NonNullSuccessor = 2236 BI->getSuccessor(NonNullIfTrue ? 0 : 1); 2237 BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor); 2238 if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent())) 2239 return true; 2240 } else if (NonNullIfTrue && isGuard(Curr) && 2241 DT->dominates(cast<Instruction>(Curr), CtxI)) { 2242 return true; 2243 } 2244 } 2245 } 2246 } 2247 2248 return false; 2249 } 2250 2251 /// Does the 'Range' metadata (which must be a valid MD_range operand list) 2252 /// ensure that the value it's attached to is never Value? 'RangeType' is 2253 /// is the type of the value described by the range. 2254 static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) { 2255 const unsigned NumRanges = Ranges->getNumOperands() / 2; 2256 assert(NumRanges >= 1); 2257 for (unsigned i = 0; i < NumRanges; ++i) { 2258 ConstantInt *Lower = 2259 mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0)); 2260 ConstantInt *Upper = 2261 mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1)); 2262 ConstantRange Range(Lower->getValue(), Upper->getValue()); 2263 if (Range.contains(Value)) 2264 return false; 2265 } 2266 return true; 2267 } 2268 2269 /// Try to detect a recurrence that monotonically increases/decreases from a 2270 /// non-zero starting value. These are common as induction variables. 2271 static bool isNonZeroRecurrence(const PHINode *PN) { 2272 BinaryOperator *BO = nullptr; 2273 Value *Start = nullptr, *Step = nullptr; 2274 const APInt *StartC, *StepC; 2275 if (!matchSimpleRecurrence(PN, BO, Start, Step) || 2276 !match(Start, m_APInt(StartC)) || StartC->isZero()) 2277 return false; 2278 2279 switch (BO->getOpcode()) { 2280 case Instruction::Add: 2281 // Starting from non-zero and stepping away from zero can never wrap back 2282 // to zero. 2283 return BO->hasNoUnsignedWrap() || 2284 (BO->hasNoSignedWrap() && match(Step, m_APInt(StepC)) && 2285 StartC->isNegative() == StepC->isNegative()); 2286 case Instruction::Mul: 2287 return (BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap()) && 2288 match(Step, m_APInt(StepC)) && !StepC->isZero(); 2289 case Instruction::Shl: 2290 return BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap(); 2291 case Instruction::AShr: 2292 case Instruction::LShr: 2293 return BO->isExact(); 2294 default: 2295 return false; 2296 } 2297 } 2298 2299 static bool isNonZeroAdd(const APInt &DemandedElts, unsigned Depth, 2300 const SimplifyQuery &Q, unsigned BitWidth, Value *X, 2301 Value *Y, bool NSW) { 2302 KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q); 2303 KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q); 2304 2305 // If X and Y are both non-negative (as signed values) then their sum is not 2306 // zero unless both X and Y are zero. 2307 if (XKnown.isNonNegative() && YKnown.isNonNegative()) 2308 if (isKnownNonZero(Y, DemandedElts, Depth, Q) || 2309 isKnownNonZero(X, DemandedElts, Depth, Q)) 2310 return true; 2311 2312 // If X and Y are both negative (as signed values) then their sum is not 2313 // zero unless both X and Y equal INT_MIN. 2314 if (XKnown.isNegative() && YKnown.isNegative()) { 2315 APInt Mask = APInt::getSignedMaxValue(BitWidth); 2316 // The sign bit of X is set. If some other bit is set then X is not equal 2317 // to INT_MIN. 2318 if (XKnown.One.intersects(Mask)) 2319 return true; 2320 // The sign bit of Y is set. If some other bit is set then Y is not equal 2321 // to INT_MIN. 2322 if (YKnown.One.intersects(Mask)) 2323 return true; 2324 } 2325 2326 // The sum of a non-negative number and a power of two is not zero. 2327 if (XKnown.isNonNegative() && 2328 isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q)) 2329 return true; 2330 if (YKnown.isNonNegative() && 2331 isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q)) 2332 return true; 2333 2334 return KnownBits::computeForAddSub(/*Add*/ true, NSW, XKnown, YKnown) 2335 .isNonZero(); 2336 } 2337 2338 static bool isNonZeroSub(const APInt &DemandedElts, unsigned Depth, 2339 const SimplifyQuery &Q, unsigned BitWidth, Value *X, 2340 Value *Y) { 2341 // TODO: Move this case into isKnownNonEqual(). 2342 if (auto *C = dyn_cast<Constant>(X)) 2343 if (C->isNullValue() && isKnownNonZero(Y, DemandedElts, Depth, Q)) 2344 return true; 2345 2346 return ::isKnownNonEqual(X, Y, Depth, Q); 2347 } 2348 2349 static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts, 2350 unsigned Depth, const SimplifyQuery &Q, 2351 const KnownBits &KnownVal) { 2352 auto ShiftOp = [&](const APInt &Lhs, const APInt &Rhs) { 2353 switch (I->getOpcode()) { 2354 case Instruction::Shl: 2355 return Lhs.shl(Rhs); 2356 case Instruction::LShr: 2357 return Lhs.lshr(Rhs); 2358 case Instruction::AShr: 2359 return Lhs.ashr(Rhs); 2360 default: 2361 llvm_unreachable("Unknown Shift Opcode"); 2362 } 2363 }; 2364 2365 auto InvShiftOp = [&](const APInt &Lhs, const APInt &Rhs) { 2366 switch (I->getOpcode()) { 2367 case Instruction::Shl: 2368 return Lhs.lshr(Rhs); 2369 case Instruction::LShr: 2370 case Instruction::AShr: 2371 return Lhs.shl(Rhs); 2372 default: 2373 llvm_unreachable("Unknown Shift Opcode"); 2374 } 2375 }; 2376 2377 if (KnownVal.isUnknown()) 2378 return false; 2379 2380 KnownBits KnownCnt = 2381 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q); 2382 APInt MaxShift = KnownCnt.getMaxValue(); 2383 unsigned NumBits = KnownVal.getBitWidth(); 2384 if (MaxShift.uge(NumBits)) 2385 return false; 2386 2387 if (!ShiftOp(KnownVal.One, MaxShift).isZero()) 2388 return true; 2389 2390 // If all of the bits shifted out are known to be zero, and Val is known 2391 // non-zero then at least one non-zero bit must remain. 2392 if (InvShiftOp(KnownVal.Zero, NumBits - MaxShift) 2393 .eq(InvShiftOp(APInt::getAllOnes(NumBits), NumBits - MaxShift)) && 2394 isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q)) 2395 return true; 2396 2397 return false; 2398 } 2399 2400 static bool isKnownNonZeroFromOperator(const Operator *I, 2401 const APInt &DemandedElts, 2402 unsigned Depth, const SimplifyQuery &Q) { 2403 unsigned BitWidth = getBitWidth(I->getType()->getScalarType(), Q.DL); 2404 switch (I->getOpcode()) { 2405 case Instruction::Alloca: 2406 // Alloca never returns null, malloc might. 2407 return I->getType()->getPointerAddressSpace() == 0; 2408 case Instruction::GetElementPtr: 2409 if (I->getType()->isPointerTy()) 2410 return isGEPKnownNonNull(cast<GEPOperator>(I), Depth, Q); 2411 break; 2412 case Instruction::BitCast: { 2413 // We need to be a bit careful here. We can only peek through the bitcast 2414 // if the scalar size of elements in the operand are smaller than and a 2415 // multiple of the size they are casting too. Take three cases: 2416 // 2417 // 1) Unsafe: 2418 // bitcast <2 x i16> %NonZero to <4 x i8> 2419 // 2420 // %NonZero can have 2 non-zero i16 elements, but isKnownNonZero on a 2421 // <4 x i8> requires that all 4 i8 elements be non-zero which isn't 2422 // guranteed (imagine just sign bit set in the 2 i16 elements). 2423 // 2424 // 2) Unsafe: 2425 // bitcast <4 x i3> %NonZero to <3 x i4> 2426 // 2427 // Even though the scalar size of the src (`i3`) is smaller than the 2428 // scalar size of the dst `i4`, because `i3` is not a multiple of `i4` 2429 // its possible for the `3 x i4` elements to be zero because there are 2430 // some elements in the destination that don't contain any full src 2431 // element. 2432 // 2433 // 3) Safe: 2434 // bitcast <4 x i8> %NonZero to <2 x i16> 2435 // 2436 // This is always safe as non-zero in the 4 i8 elements implies 2437 // non-zero in the combination of any two adjacent ones. Since i8 is a 2438 // multiple of i16, each i16 is guranteed to have 2 full i8 elements. 2439 // This all implies the 2 i16 elements are non-zero. 2440 Type *FromTy = I->getOperand(0)->getType(); 2441 if ((FromTy->isIntOrIntVectorTy() || FromTy->isPtrOrPtrVectorTy()) && 2442 (BitWidth % getBitWidth(FromTy->getScalarType(), Q.DL)) == 0) 2443 return isKnownNonZero(I->getOperand(0), Depth, Q); 2444 } break; 2445 case Instruction::IntToPtr: 2446 // Note that we have to take special care to avoid looking through 2447 // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well 2448 // as casts that can alter the value, e.g., AddrSpaceCasts. 2449 if (!isa<ScalableVectorType>(I->getType()) && 2450 Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <= 2451 Q.DL.getTypeSizeInBits(I->getType()).getFixedValue()) 2452 return isKnownNonZero(I->getOperand(0), Depth, Q); 2453 break; 2454 case Instruction::PtrToInt: 2455 // Similar to int2ptr above, we can look through ptr2int here if the cast 2456 // is a no-op or an extend and not a truncate. 2457 if (!isa<ScalableVectorType>(I->getType()) && 2458 Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <= 2459 Q.DL.getTypeSizeInBits(I->getType()).getFixedValue()) 2460 return isKnownNonZero(I->getOperand(0), Depth, Q); 2461 break; 2462 case Instruction::Sub: 2463 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, I->getOperand(0), 2464 I->getOperand(1)); 2465 case Instruction::Or: 2466 // X | Y != 0 if X != 0 or Y != 0. 2467 return isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q) || 2468 isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q); 2469 case Instruction::SExt: 2470 case Instruction::ZExt: 2471 // ext X != 0 if X != 0. 2472 return isKnownNonZero(I->getOperand(0), Depth, Q); 2473 2474 case Instruction::Shl: { 2475 // shl nsw/nuw can't remove any non-zero bits. 2476 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I); 2477 if (Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO)) 2478 return isKnownNonZero(I->getOperand(0), Depth, Q); 2479 2480 // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined 2481 // if the lowest bit is shifted off the end. 2482 KnownBits Known(BitWidth); 2483 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth, Q); 2484 if (Known.One[0]) 2485 return true; 2486 2487 return isNonZeroShift(I, DemandedElts, Depth, Q, Known); 2488 } 2489 case Instruction::LShr: 2490 case Instruction::AShr: { 2491 // shr exact can only shift out zero bits. 2492 const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(I); 2493 if (BO->isExact()) 2494 return isKnownNonZero(I->getOperand(0), Depth, Q); 2495 2496 // shr X, Y != 0 if X is negative. Note that the value of the shift is not 2497 // defined if the sign bit is shifted off the end. 2498 KnownBits Known = 2499 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q); 2500 if (Known.isNegative()) 2501 return true; 2502 2503 return isNonZeroShift(I, DemandedElts, Depth, Q, Known); 2504 } 2505 case Instruction::UDiv: 2506 case Instruction::SDiv: { 2507 // X / Y 2508 // div exact can only produce a zero if the dividend is zero. 2509 if (cast<PossiblyExactOperator>(I)->isExact()) 2510 return isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q); 2511 2512 std::optional<bool> XUgeY; 2513 KnownBits XKnown = 2514 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q); 2515 // If X is fully unknown we won't be able to figure anything out so don't 2516 // both computing knownbits for Y. 2517 if (XKnown.isUnknown()) 2518 return false; 2519 2520 KnownBits YKnown = 2521 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q); 2522 if (I->getOpcode() == Instruction::SDiv) { 2523 // For signed division need to compare abs value of the operands. 2524 XKnown = XKnown.abs(/*IntMinIsPoison*/ false); 2525 YKnown = YKnown.abs(/*IntMinIsPoison*/ false); 2526 } 2527 // If X u>= Y then div is non zero (0/0 is UB). 2528 XUgeY = KnownBits::uge(XKnown, YKnown); 2529 // If X is total unknown or X u< Y we won't be able to prove non-zero 2530 // with compute known bits so just return early. 2531 return XUgeY && *XUgeY; 2532 } 2533 case Instruction::Add: { 2534 // X + Y. 2535 2536 // If Add has nuw wrap flag, then if either X or Y is non-zero the result is 2537 // non-zero. 2538 auto *BO = cast<OverflowingBinaryOperator>(I); 2539 if (Q.IIQ.hasNoUnsignedWrap(BO)) 2540 return isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q) || 2541 isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q); 2542 2543 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth, I->getOperand(0), 2544 I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO)); 2545 } 2546 case Instruction::Mul: { 2547 // If X and Y are non-zero then so is X * Y as long as the multiplication 2548 // does not overflow. 2549 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I); 2550 if (Q.IIQ.hasNoSignedWrap(BO) || Q.IIQ.hasNoUnsignedWrap(BO)) 2551 return isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q) && 2552 isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q); 2553 2554 // If either X or Y is odd, then if the other is non-zero the result can't 2555 // be zero. 2556 KnownBits XKnown = 2557 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q); 2558 if (XKnown.One[0]) 2559 return isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q); 2560 2561 KnownBits YKnown = 2562 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q); 2563 if (YKnown.One[0]) 2564 return XKnown.isNonZero() || 2565 isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q); 2566 2567 // If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is 2568 // non-zero, then X * Y is non-zero. We can find sX and sY by just taking 2569 // the lowest known One of X and Y. If they are non-zero, the result 2570 // must be non-zero. We can check if LSB(X) * LSB(Y) != 0 by doing 2571 // X.CountLeadingZeros + Y.CountLeadingZeros < BitWidth. 2572 return (XKnown.countMaxTrailingZeros() + YKnown.countMaxTrailingZeros()) < 2573 BitWidth; 2574 } 2575 case Instruction::Select: { 2576 // (C ? X : Y) != 0 if X != 0 and Y != 0. 2577 2578 // First check if the arm is non-zero using `isKnownNonZero`. If that fails, 2579 // then see if the select condition implies the arm is non-zero. For example 2580 // (X != 0 ? X : Y), we know the true arm is non-zero as the `X` "return" is 2581 // dominated by `X != 0`. 2582 auto SelectArmIsNonZero = [&](bool IsTrueArm) { 2583 Value *Op; 2584 Op = IsTrueArm ? I->getOperand(1) : I->getOperand(2); 2585 // Op is trivially non-zero. 2586 if (isKnownNonZero(Op, DemandedElts, Depth, Q)) 2587 return true; 2588 2589 // The condition of the select dominates the true/false arm. Check if the 2590 // condition implies that a given arm is non-zero. 2591 Value *X; 2592 CmpInst::Predicate Pred; 2593 if (!match(I->getOperand(0), m_c_ICmp(Pred, m_Specific(Op), m_Value(X)))) 2594 return false; 2595 2596 if (!IsTrueArm) 2597 Pred = ICmpInst::getInversePredicate(Pred); 2598 2599 return cmpExcludesZero(Pred, X); 2600 }; 2601 2602 if (SelectArmIsNonZero(/* IsTrueArm */ true) && 2603 SelectArmIsNonZero(/* IsTrueArm */ false)) 2604 return true; 2605 break; 2606 } 2607 case Instruction::PHI: { 2608 auto *PN = cast<PHINode>(I); 2609 if (Q.IIQ.UseInstrInfo && isNonZeroRecurrence(PN)) 2610 return true; 2611 2612 // Check if all incoming values are non-zero using recursion. 2613 SimplifyQuery RecQ = Q; 2614 unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1); 2615 return llvm::all_of(PN->operands(), [&](const Use &U) { 2616 if (U.get() == PN) 2617 return true; 2618 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator(); 2619 // Check if the branch on the phi excludes zero. 2620 ICmpInst::Predicate Pred; 2621 Value *X; 2622 BasicBlock *TrueSucc, *FalseSucc; 2623 if (match(RecQ.CxtI, 2624 m_Br(m_c_ICmp(Pred, m_Specific(U.get()), m_Value(X)), 2625 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) { 2626 // Check for cases of duplicate successors. 2627 if ((TrueSucc == PN->getParent()) != (FalseSucc == PN->getParent())) { 2628 // If we're using the false successor, invert the predicate. 2629 if (FalseSucc == PN->getParent()) 2630 Pred = CmpInst::getInversePredicate(Pred); 2631 if (cmpExcludesZero(Pred, X)) 2632 return true; 2633 } 2634 } 2635 // Finally recurse on the edge and check it directly. 2636 return isKnownNonZero(U.get(), DemandedElts, NewDepth, RecQ); 2637 }); 2638 } 2639 case Instruction::ExtractElement: 2640 if (const auto *EEI = dyn_cast<ExtractElementInst>(I)) { 2641 const Value *Vec = EEI->getVectorOperand(); 2642 const Value *Idx = EEI->getIndexOperand(); 2643 auto *CIdx = dyn_cast<ConstantInt>(Idx); 2644 if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) { 2645 unsigned NumElts = VecTy->getNumElements(); 2646 APInt DemandedVecElts = APInt::getAllOnes(NumElts); 2647 if (CIdx && CIdx->getValue().ult(NumElts)) 2648 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); 2649 return isKnownNonZero(Vec, DemandedVecElts, Depth, Q); 2650 } 2651 } 2652 break; 2653 case Instruction::Freeze: 2654 return isKnownNonZero(I->getOperand(0), Depth, Q) && 2655 isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT, 2656 Depth); 2657 case Instruction::Load: { 2658 auto *LI = cast<LoadInst>(I); 2659 // A Load tagged with nonnull or dereferenceable with null pointer undefined 2660 // is never null. 2661 if (auto *PtrT = dyn_cast<PointerType>(I->getType())) 2662 if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull) || 2663 (Q.IIQ.getMetadata(LI, LLVMContext::MD_dereferenceable) && 2664 !NullPointerIsDefined(LI->getFunction(), PtrT->getAddressSpace()))) 2665 return true; 2666 2667 // No need to fall through to computeKnownBits as range metadata is already 2668 // handled in isKnownNonZero. 2669 return false; 2670 } 2671 case Instruction::Call: 2672 case Instruction::Invoke: 2673 if (I->getType()->isPointerTy()) { 2674 const auto *Call = cast<CallBase>(I); 2675 if (Call->isReturnNonNull()) 2676 return true; 2677 if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true)) 2678 return isKnownNonZero(RP, Depth, Q); 2679 } else if (const Value *RV = cast<CallBase>(I)->getReturnedArgOperand()) { 2680 if (RV->getType() == I->getType() && isKnownNonZero(RV, Depth, Q)) 2681 return true; 2682 } 2683 2684 if (auto *II = dyn_cast<IntrinsicInst>(I)) { 2685 switch (II->getIntrinsicID()) { 2686 case Intrinsic::sshl_sat: 2687 case Intrinsic::ushl_sat: 2688 case Intrinsic::abs: 2689 case Intrinsic::bitreverse: 2690 case Intrinsic::bswap: 2691 case Intrinsic::ctpop: 2692 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q); 2693 case Intrinsic::ssub_sat: 2694 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, 2695 II->getArgOperand(0), II->getArgOperand(1)); 2696 case Intrinsic::sadd_sat: 2697 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth, 2698 II->getArgOperand(0), II->getArgOperand(1), 2699 /*NSW*/ true); 2700 case Intrinsic::umax: 2701 case Intrinsic::uadd_sat: 2702 return isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q) || 2703 isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q); 2704 case Intrinsic::smin: 2705 case Intrinsic::smax: { 2706 auto KnownOpImpliesNonZero = [&](const KnownBits &K) { 2707 return II->getIntrinsicID() == Intrinsic::smin 2708 ? K.isNegative() 2709 : K.isStrictlyPositive(); 2710 }; 2711 KnownBits XKnown = 2712 computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q); 2713 if (KnownOpImpliesNonZero(XKnown)) 2714 return true; 2715 KnownBits YKnown = 2716 computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q); 2717 if (KnownOpImpliesNonZero(YKnown)) 2718 return true; 2719 2720 if (XKnown.isNonZero() && YKnown.isNonZero()) 2721 return true; 2722 } 2723 [[fallthrough]]; 2724 case Intrinsic::umin: 2725 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q) && 2726 isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q); 2727 case Intrinsic::cttz: 2728 return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q) 2729 .Zero[0]; 2730 case Intrinsic::ctlz: 2731 return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q) 2732 .isNonNegative(); 2733 case Intrinsic::fshr: 2734 case Intrinsic::fshl: 2735 // If Op0 == Op1, this is a rotate. rotate(x, y) != 0 iff x != 0. 2736 if (II->getArgOperand(0) == II->getArgOperand(1)) 2737 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q); 2738 break; 2739 case Intrinsic::vscale: 2740 return true; 2741 default: 2742 break; 2743 } 2744 break; 2745 } 2746 2747 return false; 2748 } 2749 2750 KnownBits Known(BitWidth); 2751 computeKnownBits(I, DemandedElts, Known, Depth, Q); 2752 return Known.One != 0; 2753 } 2754 2755 /// Return true if the given value is known to be non-zero when defined. For 2756 /// vectors, return true if every demanded element is known to be non-zero when 2757 /// defined. For pointers, if the context instruction and dominator tree are 2758 /// specified, perform context-sensitive analysis and return true if the 2759 /// pointer couldn't possibly be null at the specified instruction. 2760 /// Supports values with integer or pointer type and vectors of integers. 2761 bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, 2762 const SimplifyQuery &Q) { 2763 2764 #ifndef NDEBUG 2765 Type *Ty = V->getType(); 2766 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 2767 2768 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) { 2769 assert( 2770 FVTy->getNumElements() == DemandedElts.getBitWidth() && 2771 "DemandedElt width should equal the fixed vector number of elements"); 2772 } else { 2773 assert(DemandedElts == APInt(1, 1) && 2774 "DemandedElt width should be 1 for scalars"); 2775 } 2776 #endif 2777 2778 if (auto *C = dyn_cast<Constant>(V)) { 2779 if (C->isNullValue()) 2780 return false; 2781 if (isa<ConstantInt>(C)) 2782 // Must be non-zero due to null test above. 2783 return true; 2784 2785 // For constant vectors, check that all elements are undefined or known 2786 // non-zero to determine that the whole vector is known non-zero. 2787 if (auto *VecTy = dyn_cast<FixedVectorType>(C->getType())) { 2788 for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) { 2789 if (!DemandedElts[i]) 2790 continue; 2791 Constant *Elt = C->getAggregateElement(i); 2792 if (!Elt || Elt->isNullValue()) 2793 return false; 2794 if (!isa<UndefValue>(Elt) && !isa<ConstantInt>(Elt)) 2795 return false; 2796 } 2797 return true; 2798 } 2799 2800 // A global variable in address space 0 is non null unless extern weak 2801 // or an absolute symbol reference. Other address spaces may have null as a 2802 // valid address for a global, so we can't assume anything. 2803 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 2804 if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() && 2805 GV->getType()->getAddressSpace() == 0) 2806 return true; 2807 } 2808 2809 // For constant expressions, fall through to the Operator code below. 2810 if (!isa<ConstantExpr>(V)) 2811 return false; 2812 } 2813 2814 if (auto *I = dyn_cast<Instruction>(V)) { 2815 if (MDNode *Ranges = Q.IIQ.getMetadata(I, LLVMContext::MD_range)) { 2816 // If the possible ranges don't contain zero, then the value is 2817 // definitely non-zero. 2818 if (auto *Ty = dyn_cast<IntegerType>(V->getType())) { 2819 const APInt ZeroValue(Ty->getBitWidth(), 0); 2820 if (rangeMetadataExcludesValue(Ranges, ZeroValue)) 2821 return true; 2822 } 2823 } 2824 } 2825 2826 if (!isa<Constant>(V) && isKnownNonZeroFromAssume(V, Q)) 2827 return true; 2828 2829 // Some of the tests below are recursive, so bail out if we hit the limit. 2830 if (Depth++ >= MaxAnalysisRecursionDepth) 2831 return false; 2832 2833 // Check for pointer simplifications. 2834 2835 if (PointerType *PtrTy = dyn_cast<PointerType>(V->getType())) { 2836 // A byval, inalloca may not be null in a non-default addres space. A 2837 // nonnull argument is assumed never 0. 2838 if (const Argument *A = dyn_cast<Argument>(V)) { 2839 if (((A->hasPassPointeeByValueCopyAttr() && 2840 !NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) || 2841 A->hasNonNullAttr())) 2842 return true; 2843 } 2844 } 2845 2846 if (const auto *I = dyn_cast<Operator>(V)) 2847 if (isKnownNonZeroFromOperator(I, DemandedElts, Depth, Q)) 2848 return true; 2849 2850 if (!isa<Constant>(V) && 2851 isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT)) 2852 return true; 2853 2854 return false; 2855 } 2856 2857 bool isKnownNonZero(const Value *V, unsigned Depth, const SimplifyQuery &Q) { 2858 auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); 2859 APInt DemandedElts = 2860 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 2861 return isKnownNonZero(V, DemandedElts, Depth, Q); 2862 } 2863 2864 /// If the pair of operators are the same invertible function, return the 2865 /// the operands of the function corresponding to each input. Otherwise, 2866 /// return std::nullopt. An invertible function is one that is 1-to-1 and maps 2867 /// every input value to exactly one output value. This is equivalent to 2868 /// saying that Op1 and Op2 are equal exactly when the specified pair of 2869 /// operands are equal, (except that Op1 and Op2 may be poison more often.) 2870 static std::optional<std::pair<Value*, Value*>> 2871 getInvertibleOperands(const Operator *Op1, 2872 const Operator *Op2) { 2873 if (Op1->getOpcode() != Op2->getOpcode()) 2874 return std::nullopt; 2875 2876 auto getOperands = [&](unsigned OpNum) -> auto { 2877 return std::make_pair(Op1->getOperand(OpNum), Op2->getOperand(OpNum)); 2878 }; 2879 2880 switch (Op1->getOpcode()) { 2881 default: 2882 break; 2883 case Instruction::Add: 2884 case Instruction::Sub: 2885 if (Op1->getOperand(0) == Op2->getOperand(0)) 2886 return getOperands(1); 2887 if (Op1->getOperand(1) == Op2->getOperand(1)) 2888 return getOperands(0); 2889 break; 2890 case Instruction::Mul: { 2891 // invertible if A * B == (A * B) mod 2^N where A, and B are integers 2892 // and N is the bitwdith. The nsw case is non-obvious, but proven by 2893 // alive2: https://alive2.llvm.org/ce/z/Z6D5qK 2894 auto *OBO1 = cast<OverflowingBinaryOperator>(Op1); 2895 auto *OBO2 = cast<OverflowingBinaryOperator>(Op2); 2896 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) && 2897 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap())) 2898 break; 2899 2900 // Assume operand order has been canonicalized 2901 if (Op1->getOperand(1) == Op2->getOperand(1) && 2902 isa<ConstantInt>(Op1->getOperand(1)) && 2903 !cast<ConstantInt>(Op1->getOperand(1))->isZero()) 2904 return getOperands(0); 2905 break; 2906 } 2907 case Instruction::Shl: { 2908 // Same as multiplies, with the difference that we don't need to check 2909 // for a non-zero multiply. Shifts always multiply by non-zero. 2910 auto *OBO1 = cast<OverflowingBinaryOperator>(Op1); 2911 auto *OBO2 = cast<OverflowingBinaryOperator>(Op2); 2912 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) && 2913 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap())) 2914 break; 2915 2916 if (Op1->getOperand(1) == Op2->getOperand(1)) 2917 return getOperands(0); 2918 break; 2919 } 2920 case Instruction::AShr: 2921 case Instruction::LShr: { 2922 auto *PEO1 = cast<PossiblyExactOperator>(Op1); 2923 auto *PEO2 = cast<PossiblyExactOperator>(Op2); 2924 if (!PEO1->isExact() || !PEO2->isExact()) 2925 break; 2926 2927 if (Op1->getOperand(1) == Op2->getOperand(1)) 2928 return getOperands(0); 2929 break; 2930 } 2931 case Instruction::SExt: 2932 case Instruction::ZExt: 2933 if (Op1->getOperand(0)->getType() == Op2->getOperand(0)->getType()) 2934 return getOperands(0); 2935 break; 2936 case Instruction::PHI: { 2937 const PHINode *PN1 = cast<PHINode>(Op1); 2938 const PHINode *PN2 = cast<PHINode>(Op2); 2939 2940 // If PN1 and PN2 are both recurrences, can we prove the entire recurrences 2941 // are a single invertible function of the start values? Note that repeated 2942 // application of an invertible function is also invertible 2943 BinaryOperator *BO1 = nullptr; 2944 Value *Start1 = nullptr, *Step1 = nullptr; 2945 BinaryOperator *BO2 = nullptr; 2946 Value *Start2 = nullptr, *Step2 = nullptr; 2947 if (PN1->getParent() != PN2->getParent() || 2948 !matchSimpleRecurrence(PN1, BO1, Start1, Step1) || 2949 !matchSimpleRecurrence(PN2, BO2, Start2, Step2)) 2950 break; 2951 2952 auto Values = getInvertibleOperands(cast<Operator>(BO1), 2953 cast<Operator>(BO2)); 2954 if (!Values) 2955 break; 2956 2957 // We have to be careful of mutually defined recurrences here. Ex: 2958 // * X_i = X_(i-1) OP Y_(i-1), and Y_i = X_(i-1) OP V 2959 // * X_i = Y_i = X_(i-1) OP Y_(i-1) 2960 // The invertibility of these is complicated, and not worth reasoning 2961 // about (yet?). 2962 if (Values->first != PN1 || Values->second != PN2) 2963 break; 2964 2965 return std::make_pair(Start1, Start2); 2966 } 2967 } 2968 return std::nullopt; 2969 } 2970 2971 /// Return true if V2 == V1 + X, where X is known non-zero. 2972 static bool isAddOfNonZero(const Value *V1, const Value *V2, unsigned Depth, 2973 const SimplifyQuery &Q) { 2974 const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1); 2975 if (!BO || BO->getOpcode() != Instruction::Add) 2976 return false; 2977 Value *Op = nullptr; 2978 if (V2 == BO->getOperand(0)) 2979 Op = BO->getOperand(1); 2980 else if (V2 == BO->getOperand(1)) 2981 Op = BO->getOperand(0); 2982 else 2983 return false; 2984 return isKnownNonZero(Op, Depth + 1, Q); 2985 } 2986 2987 /// Return true if V2 == V1 * C, where V1 is known non-zero, C is not 0/1 and 2988 /// the multiplication is nuw or nsw. 2989 static bool isNonEqualMul(const Value *V1, const Value *V2, unsigned Depth, 2990 const SimplifyQuery &Q) { 2991 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) { 2992 const APInt *C; 2993 return match(OBO, m_Mul(m_Specific(V1), m_APInt(C))) && 2994 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) && 2995 !C->isZero() && !C->isOne() && isKnownNonZero(V1, Depth + 1, Q); 2996 } 2997 return false; 2998 } 2999 3000 /// Return true if V2 == V1 << C, where V1 is known non-zero, C is not 0 and 3001 /// the shift is nuw or nsw. 3002 static bool isNonEqualShl(const Value *V1, const Value *V2, unsigned Depth, 3003 const SimplifyQuery &Q) { 3004 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) { 3005 const APInt *C; 3006 return match(OBO, m_Shl(m_Specific(V1), m_APInt(C))) && 3007 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) && 3008 !C->isZero() && isKnownNonZero(V1, Depth + 1, Q); 3009 } 3010 return false; 3011 } 3012 3013 static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2, 3014 unsigned Depth, const SimplifyQuery &Q) { 3015 // Check two PHIs are in same block. 3016 if (PN1->getParent() != PN2->getParent()) 3017 return false; 3018 3019 SmallPtrSet<const BasicBlock *, 8> VisitedBBs; 3020 bool UsedFullRecursion = false; 3021 for (const BasicBlock *IncomBB : PN1->blocks()) { 3022 if (!VisitedBBs.insert(IncomBB).second) 3023 continue; // Don't reprocess blocks that we have dealt with already. 3024 const Value *IV1 = PN1->getIncomingValueForBlock(IncomBB); 3025 const Value *IV2 = PN2->getIncomingValueForBlock(IncomBB); 3026 const APInt *C1, *C2; 3027 if (match(IV1, m_APInt(C1)) && match(IV2, m_APInt(C2)) && *C1 != *C2) 3028 continue; 3029 3030 // Only one pair of phi operands is allowed for full recursion. 3031 if (UsedFullRecursion) 3032 return false; 3033 3034 SimplifyQuery RecQ = Q; 3035 RecQ.CxtI = IncomBB->getTerminator(); 3036 if (!isKnownNonEqual(IV1, IV2, Depth + 1, RecQ)) 3037 return false; 3038 UsedFullRecursion = true; 3039 } 3040 return true; 3041 } 3042 3043 static bool isNonEqualSelect(const Value *V1, const Value *V2, unsigned Depth, 3044 const SimplifyQuery &Q) { 3045 const SelectInst *SI1 = dyn_cast<SelectInst>(V1); 3046 if (!SI1) 3047 return false; 3048 3049 if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) { 3050 const Value *Cond1 = SI1->getCondition(); 3051 const Value *Cond2 = SI2->getCondition(); 3052 if (Cond1 == Cond2) 3053 return isKnownNonEqual(SI1->getTrueValue(), SI2->getTrueValue(), 3054 Depth + 1, Q) && 3055 isKnownNonEqual(SI1->getFalseValue(), SI2->getFalseValue(), 3056 Depth + 1, Q); 3057 } 3058 return isKnownNonEqual(SI1->getTrueValue(), V2, Depth + 1, Q) && 3059 isKnownNonEqual(SI1->getFalseValue(), V2, Depth + 1, Q); 3060 } 3061 3062 // Check to see if A is both a GEP and is the incoming value for a PHI in the 3063 // loop, and B is either a ptr or another GEP. If the PHI has 2 incoming values, 3064 // one of them being the recursive GEP A and the other a ptr at same base and at 3065 // the same/higher offset than B we are only incrementing the pointer further in 3066 // loop if offset of recursive GEP is greater than 0. 3067 static bool isNonEqualPointersWithRecursiveGEP(const Value *A, const Value *B, 3068 const SimplifyQuery &Q) { 3069 if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy()) 3070 return false; 3071 3072 auto *GEPA = dyn_cast<GEPOperator>(A); 3073 if (!GEPA || GEPA->getNumIndices() != 1 || !isa<Constant>(GEPA->idx_begin())) 3074 return false; 3075 3076 // Handle 2 incoming PHI values with one being a recursive GEP. 3077 auto *PN = dyn_cast<PHINode>(GEPA->getPointerOperand()); 3078 if (!PN || PN->getNumIncomingValues() != 2) 3079 return false; 3080 3081 // Search for the recursive GEP as an incoming operand, and record that as 3082 // Step. 3083 Value *Start = nullptr; 3084 Value *Step = const_cast<Value *>(A); 3085 if (PN->getIncomingValue(0) == Step) 3086 Start = PN->getIncomingValue(1); 3087 else if (PN->getIncomingValue(1) == Step) 3088 Start = PN->getIncomingValue(0); 3089 else 3090 return false; 3091 3092 // Other incoming node base should match the B base. 3093 // StartOffset >= OffsetB && StepOffset > 0? 3094 // StartOffset <= OffsetB && StepOffset < 0? 3095 // Is non-equal if above are true. 3096 // We use stripAndAccumulateInBoundsConstantOffsets to restrict the 3097 // optimisation to inbounds GEPs only. 3098 unsigned IndexWidth = Q.DL.getIndexTypeSizeInBits(Start->getType()); 3099 APInt StartOffset(IndexWidth, 0); 3100 Start = Start->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StartOffset); 3101 APInt StepOffset(IndexWidth, 0); 3102 Step = Step->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StepOffset); 3103 3104 // Check if Base Pointer of Step matches the PHI. 3105 if (Step != PN) 3106 return false; 3107 APInt OffsetB(IndexWidth, 0); 3108 B = B->stripAndAccumulateInBoundsConstantOffsets(Q.DL, OffsetB); 3109 return Start == B && 3110 ((StartOffset.sge(OffsetB) && StepOffset.isStrictlyPositive()) || 3111 (StartOffset.sle(OffsetB) && StepOffset.isNegative())); 3112 } 3113 3114 /// Return true if it is known that V1 != V2. 3115 static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth, 3116 const SimplifyQuery &Q) { 3117 if (V1 == V2) 3118 return false; 3119 if (V1->getType() != V2->getType()) 3120 // We can't look through casts yet. 3121 return false; 3122 3123 if (Depth >= MaxAnalysisRecursionDepth) 3124 return false; 3125 3126 // See if we can recurse through (exactly one of) our operands. This 3127 // requires our operation be 1-to-1 and map every input value to exactly 3128 // one output value. Such an operation is invertible. 3129 auto *O1 = dyn_cast<Operator>(V1); 3130 auto *O2 = dyn_cast<Operator>(V2); 3131 if (O1 && O2 && O1->getOpcode() == O2->getOpcode()) { 3132 if (auto Values = getInvertibleOperands(O1, O2)) 3133 return isKnownNonEqual(Values->first, Values->second, Depth + 1, Q); 3134 3135 if (const PHINode *PN1 = dyn_cast<PHINode>(V1)) { 3136 const PHINode *PN2 = cast<PHINode>(V2); 3137 // FIXME: This is missing a generalization to handle the case where one is 3138 // a PHI and another one isn't. 3139 if (isNonEqualPHIs(PN1, PN2, Depth, Q)) 3140 return true; 3141 }; 3142 } 3143 3144 if (isAddOfNonZero(V1, V2, Depth, Q) || isAddOfNonZero(V2, V1, Depth, Q)) 3145 return true; 3146 3147 if (isNonEqualMul(V1, V2, Depth, Q) || isNonEqualMul(V2, V1, Depth, Q)) 3148 return true; 3149 3150 if (isNonEqualShl(V1, V2, Depth, Q) || isNonEqualShl(V2, V1, Depth, Q)) 3151 return true; 3152 3153 if (V1->getType()->isIntOrIntVectorTy()) { 3154 // Are any known bits in V1 contradictory to known bits in V2? If V1 3155 // has a known zero where V2 has a known one, they must not be equal. 3156 KnownBits Known1 = computeKnownBits(V1, Depth, Q); 3157 if (!Known1.isUnknown()) { 3158 KnownBits Known2 = computeKnownBits(V2, Depth, Q); 3159 if (Known1.Zero.intersects(Known2.One) || 3160 Known2.Zero.intersects(Known1.One)) 3161 return true; 3162 } 3163 } 3164 3165 if (isNonEqualSelect(V1, V2, Depth, Q) || isNonEqualSelect(V2, V1, Depth, Q)) 3166 return true; 3167 3168 if (isNonEqualPointersWithRecursiveGEP(V1, V2, Q) || 3169 isNonEqualPointersWithRecursiveGEP(V2, V1, Q)) 3170 return true; 3171 3172 Value *A, *B; 3173 // PtrToInts are NonEqual if their Ptrs are NonEqual. 3174 // Check PtrToInt type matches the pointer size. 3175 if (match(V1, m_PtrToIntSameSize(Q.DL, m_Value(A))) && 3176 match(V2, m_PtrToIntSameSize(Q.DL, m_Value(B)))) 3177 return isKnownNonEqual(A, B, Depth + 1, Q); 3178 3179 return false; 3180 } 3181 3182 // Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow). 3183 // Returns the input and lower/upper bounds. 3184 static bool isSignedMinMaxClamp(const Value *Select, const Value *&In, 3185 const APInt *&CLow, const APInt *&CHigh) { 3186 assert(isa<Operator>(Select) && 3187 cast<Operator>(Select)->getOpcode() == Instruction::Select && 3188 "Input should be a Select!"); 3189 3190 const Value *LHS = nullptr, *RHS = nullptr; 3191 SelectPatternFlavor SPF = matchSelectPattern(Select, LHS, RHS).Flavor; 3192 if (SPF != SPF_SMAX && SPF != SPF_SMIN) 3193 return false; 3194 3195 if (!match(RHS, m_APInt(CLow))) 3196 return false; 3197 3198 const Value *LHS2 = nullptr, *RHS2 = nullptr; 3199 SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor; 3200 if (getInverseMinMaxFlavor(SPF) != SPF2) 3201 return false; 3202 3203 if (!match(RHS2, m_APInt(CHigh))) 3204 return false; 3205 3206 if (SPF == SPF_SMIN) 3207 std::swap(CLow, CHigh); 3208 3209 In = LHS2; 3210 return CLow->sle(*CHigh); 3211 } 3212 3213 static bool isSignedMinMaxIntrinsicClamp(const IntrinsicInst *II, 3214 const APInt *&CLow, 3215 const APInt *&CHigh) { 3216 assert((II->getIntrinsicID() == Intrinsic::smin || 3217 II->getIntrinsicID() == Intrinsic::smax) && "Must be smin/smax"); 3218 3219 Intrinsic::ID InverseID = getInverseMinMaxIntrinsic(II->getIntrinsicID()); 3220 auto *InnerII = dyn_cast<IntrinsicInst>(II->getArgOperand(0)); 3221 if (!InnerII || InnerII->getIntrinsicID() != InverseID || 3222 !match(II->getArgOperand(1), m_APInt(CLow)) || 3223 !match(InnerII->getArgOperand(1), m_APInt(CHigh))) 3224 return false; 3225 3226 if (II->getIntrinsicID() == Intrinsic::smin) 3227 std::swap(CLow, CHigh); 3228 return CLow->sle(*CHigh); 3229 } 3230 3231 /// For vector constants, loop over the elements and find the constant with the 3232 /// minimum number of sign bits. Return 0 if the value is not a vector constant 3233 /// or if any element was not analyzed; otherwise, return the count for the 3234 /// element with the minimum number of sign bits. 3235 static unsigned computeNumSignBitsVectorConstant(const Value *V, 3236 const APInt &DemandedElts, 3237 unsigned TyBits) { 3238 const auto *CV = dyn_cast<Constant>(V); 3239 if (!CV || !isa<FixedVectorType>(CV->getType())) 3240 return 0; 3241 3242 unsigned MinSignBits = TyBits; 3243 unsigned NumElts = cast<FixedVectorType>(CV->getType())->getNumElements(); 3244 for (unsigned i = 0; i != NumElts; ++i) { 3245 if (!DemandedElts[i]) 3246 continue; 3247 // If we find a non-ConstantInt, bail out. 3248 auto *Elt = dyn_cast_or_null<ConstantInt>(CV->getAggregateElement(i)); 3249 if (!Elt) 3250 return 0; 3251 3252 MinSignBits = std::min(MinSignBits, Elt->getValue().getNumSignBits()); 3253 } 3254 3255 return MinSignBits; 3256 } 3257 3258 static unsigned ComputeNumSignBitsImpl(const Value *V, 3259 const APInt &DemandedElts, 3260 unsigned Depth, const SimplifyQuery &Q); 3261 3262 static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts, 3263 unsigned Depth, const SimplifyQuery &Q) { 3264 unsigned Result = ComputeNumSignBitsImpl(V, DemandedElts, Depth, Q); 3265 assert(Result > 0 && "At least one sign bit needs to be present!"); 3266 return Result; 3267 } 3268 3269 /// Return the number of times the sign bit of the register is replicated into 3270 /// the other bits. We know that at least 1 bit is always equal to the sign bit 3271 /// (itself), but other cases can give us information. For example, immediately 3272 /// after an "ashr X, 2", we know that the top 3 bits are all equal to each 3273 /// other, so we return 3. For vectors, return the number of sign bits for the 3274 /// vector element with the minimum number of known sign bits of the demanded 3275 /// elements in the vector specified by DemandedElts. 3276 static unsigned ComputeNumSignBitsImpl(const Value *V, 3277 const APInt &DemandedElts, 3278 unsigned Depth, const SimplifyQuery &Q) { 3279 Type *Ty = V->getType(); 3280 #ifndef NDEBUG 3281 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 3282 3283 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) { 3284 assert( 3285 FVTy->getNumElements() == DemandedElts.getBitWidth() && 3286 "DemandedElt width should equal the fixed vector number of elements"); 3287 } else { 3288 assert(DemandedElts == APInt(1, 1) && 3289 "DemandedElt width should be 1 for scalars"); 3290 } 3291 #endif 3292 3293 // We return the minimum number of sign bits that are guaranteed to be present 3294 // in V, so for undef we have to conservatively return 1. We don't have the 3295 // same behavior for poison though -- that's a FIXME today. 3296 3297 Type *ScalarTy = Ty->getScalarType(); 3298 unsigned TyBits = ScalarTy->isPointerTy() ? 3299 Q.DL.getPointerTypeSizeInBits(ScalarTy) : 3300 Q.DL.getTypeSizeInBits(ScalarTy); 3301 3302 unsigned Tmp, Tmp2; 3303 unsigned FirstAnswer = 1; 3304 3305 // Note that ConstantInt is handled by the general computeKnownBits case 3306 // below. 3307 3308 if (Depth == MaxAnalysisRecursionDepth) 3309 return 1; 3310 3311 if (auto *U = dyn_cast<Operator>(V)) { 3312 switch (Operator::getOpcode(V)) { 3313 default: break; 3314 case Instruction::SExt: 3315 Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); 3316 return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp; 3317 3318 case Instruction::SDiv: { 3319 const APInt *Denominator; 3320 // sdiv X, C -> adds log(C) sign bits. 3321 if (match(U->getOperand(1), m_APInt(Denominator))) { 3322 3323 // Ignore non-positive denominator. 3324 if (!Denominator->isStrictlyPositive()) 3325 break; 3326 3327 // Calculate the incoming numerator bits. 3328 unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3329 3330 // Add floor(log(C)) bits to the numerator bits. 3331 return std::min(TyBits, NumBits + Denominator->logBase2()); 3332 } 3333 break; 3334 } 3335 3336 case Instruction::SRem: { 3337 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3338 3339 const APInt *Denominator; 3340 // srem X, C -> we know that the result is within [-C+1,C) when C is a 3341 // positive constant. This let us put a lower bound on the number of sign 3342 // bits. 3343 if (match(U->getOperand(1), m_APInt(Denominator))) { 3344 3345 // Ignore non-positive denominator. 3346 if (Denominator->isStrictlyPositive()) { 3347 // Calculate the leading sign bit constraints by examining the 3348 // denominator. Given that the denominator is positive, there are two 3349 // cases: 3350 // 3351 // 1. The numerator is positive. The result range is [0,C) and 3352 // [0,C) u< (1 << ceilLogBase2(C)). 3353 // 3354 // 2. The numerator is negative. Then the result range is (-C,0] and 3355 // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)). 3356 // 3357 // Thus a lower bound on the number of sign bits is `TyBits - 3358 // ceilLogBase2(C)`. 3359 3360 unsigned ResBits = TyBits - Denominator->ceilLogBase2(); 3361 Tmp = std::max(Tmp, ResBits); 3362 } 3363 } 3364 return Tmp; 3365 } 3366 3367 case Instruction::AShr: { 3368 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3369 // ashr X, C -> adds C sign bits. Vectors too. 3370 const APInt *ShAmt; 3371 if (match(U->getOperand(1), m_APInt(ShAmt))) { 3372 if (ShAmt->uge(TyBits)) 3373 break; // Bad shift. 3374 unsigned ShAmtLimited = ShAmt->getZExtValue(); 3375 Tmp += ShAmtLimited; 3376 if (Tmp > TyBits) Tmp = TyBits; 3377 } 3378 return Tmp; 3379 } 3380 case Instruction::Shl: { 3381 const APInt *ShAmt; 3382 if (match(U->getOperand(1), m_APInt(ShAmt))) { 3383 // shl destroys sign bits. 3384 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3385 if (ShAmt->uge(TyBits) || // Bad shift. 3386 ShAmt->uge(Tmp)) break; // Shifted all sign bits out. 3387 Tmp2 = ShAmt->getZExtValue(); 3388 return Tmp - Tmp2; 3389 } 3390 break; 3391 } 3392 case Instruction::And: 3393 case Instruction::Or: 3394 case Instruction::Xor: // NOT is handled here. 3395 // Logical binary ops preserve the number of sign bits at the worst. 3396 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3397 if (Tmp != 1) { 3398 Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); 3399 FirstAnswer = std::min(Tmp, Tmp2); 3400 // We computed what we know about the sign bits as our first 3401 // answer. Now proceed to the generic code that uses 3402 // computeKnownBits, and pick whichever answer is better. 3403 } 3404 break; 3405 3406 case Instruction::Select: { 3407 // If we have a clamp pattern, we know that the number of sign bits will 3408 // be the minimum of the clamp min/max range. 3409 const Value *X; 3410 const APInt *CLow, *CHigh; 3411 if (isSignedMinMaxClamp(U, X, CLow, CHigh)) 3412 return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits()); 3413 3414 Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); 3415 if (Tmp == 1) break; 3416 Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q); 3417 return std::min(Tmp, Tmp2); 3418 } 3419 3420 case Instruction::Add: 3421 // Add can have at most one carry bit. Thus we know that the output 3422 // is, at worst, one more bit than the inputs. 3423 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3424 if (Tmp == 1) break; 3425 3426 // Special case decrementing a value (ADD X, -1): 3427 if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1))) 3428 if (CRHS->isAllOnesValue()) { 3429 KnownBits Known(TyBits); 3430 computeKnownBits(U->getOperand(0), Known, Depth + 1, Q); 3431 3432 // If the input is known to be 0 or 1, the output is 0/-1, which is 3433 // all sign bits set. 3434 if ((Known.Zero | 1).isAllOnes()) 3435 return TyBits; 3436 3437 // If we are subtracting one from a positive number, there is no carry 3438 // out of the result. 3439 if (Known.isNonNegative()) 3440 return Tmp; 3441 } 3442 3443 Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); 3444 if (Tmp2 == 1) break; 3445 return std::min(Tmp, Tmp2) - 1; 3446 3447 case Instruction::Sub: 3448 Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); 3449 if (Tmp2 == 1) break; 3450 3451 // Handle NEG. 3452 if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0))) 3453 if (CLHS->isNullValue()) { 3454 KnownBits Known(TyBits); 3455 computeKnownBits(U->getOperand(1), Known, Depth + 1, Q); 3456 // If the input is known to be 0 or 1, the output is 0/-1, which is 3457 // all sign bits set. 3458 if ((Known.Zero | 1).isAllOnes()) 3459 return TyBits; 3460 3461 // If the input is known to be positive (the sign bit is known clear), 3462 // the output of the NEG has the same number of sign bits as the 3463 // input. 3464 if (Known.isNonNegative()) 3465 return Tmp2; 3466 3467 // Otherwise, we treat this like a SUB. 3468 } 3469 3470 // Sub can have at most one carry bit. Thus we know that the output 3471 // is, at worst, one more bit than the inputs. 3472 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3473 if (Tmp == 1) break; 3474 return std::min(Tmp, Tmp2) - 1; 3475 3476 case Instruction::Mul: { 3477 // The output of the Mul can be at most twice the valid bits in the 3478 // inputs. 3479 unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3480 if (SignBitsOp0 == 1) break; 3481 unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); 3482 if (SignBitsOp1 == 1) break; 3483 unsigned OutValidBits = 3484 (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1); 3485 return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1; 3486 } 3487 3488 case Instruction::PHI: { 3489 const PHINode *PN = cast<PHINode>(U); 3490 unsigned NumIncomingValues = PN->getNumIncomingValues(); 3491 // Don't analyze large in-degree PHIs. 3492 if (NumIncomingValues > 4) break; 3493 // Unreachable blocks may have zero-operand PHI nodes. 3494 if (NumIncomingValues == 0) break; 3495 3496 // Take the minimum of all incoming values. This can't infinitely loop 3497 // because of our depth threshold. 3498 SimplifyQuery RecQ = Q; 3499 Tmp = TyBits; 3500 for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) { 3501 if (Tmp == 1) return Tmp; 3502 RecQ.CxtI = PN->getIncomingBlock(i)->getTerminator(); 3503 Tmp = std::min( 3504 Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, RecQ)); 3505 } 3506 return Tmp; 3507 } 3508 3509 case Instruction::Trunc: { 3510 // If the input contained enough sign bits that some remain after the 3511 // truncation, then we can make use of that. Otherwise we don't know 3512 // anything. 3513 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3514 unsigned OperandTyBits = U->getOperand(0)->getType()->getScalarSizeInBits(); 3515 if (Tmp > (OperandTyBits - TyBits)) 3516 return Tmp - (OperandTyBits - TyBits); 3517 3518 return 1; 3519 } 3520 3521 case Instruction::ExtractElement: 3522 // Look through extract element. At the moment we keep this simple and 3523 // skip tracking the specific element. But at least we might find 3524 // information valid for all elements of the vector (for example if vector 3525 // is sign extended, shifted, etc). 3526 return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3527 3528 case Instruction::ShuffleVector: { 3529 // Collect the minimum number of sign bits that are shared by every vector 3530 // element referenced by the shuffle. 3531 auto *Shuf = dyn_cast<ShuffleVectorInst>(U); 3532 if (!Shuf) { 3533 // FIXME: Add support for shufflevector constant expressions. 3534 return 1; 3535 } 3536 APInt DemandedLHS, DemandedRHS; 3537 // For undef elements, we don't know anything about the common state of 3538 // the shuffle result. 3539 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) 3540 return 1; 3541 Tmp = std::numeric_limits<unsigned>::max(); 3542 if (!!DemandedLHS) { 3543 const Value *LHS = Shuf->getOperand(0); 3544 Tmp = ComputeNumSignBits(LHS, DemandedLHS, Depth + 1, Q); 3545 } 3546 // If we don't know anything, early out and try computeKnownBits 3547 // fall-back. 3548 if (Tmp == 1) 3549 break; 3550 if (!!DemandedRHS) { 3551 const Value *RHS = Shuf->getOperand(1); 3552 Tmp2 = ComputeNumSignBits(RHS, DemandedRHS, Depth + 1, Q); 3553 Tmp = std::min(Tmp, Tmp2); 3554 } 3555 // If we don't know anything, early out and try computeKnownBits 3556 // fall-back. 3557 if (Tmp == 1) 3558 break; 3559 assert(Tmp <= TyBits && "Failed to determine minimum sign bits"); 3560 return Tmp; 3561 } 3562 case Instruction::Call: { 3563 if (const auto *II = dyn_cast<IntrinsicInst>(U)) { 3564 switch (II->getIntrinsicID()) { 3565 default: break; 3566 case Intrinsic::abs: 3567 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3568 if (Tmp == 1) break; 3569 3570 // Absolute value reduces number of sign bits by at most 1. 3571 return Tmp - 1; 3572 case Intrinsic::smin: 3573 case Intrinsic::smax: { 3574 const APInt *CLow, *CHigh; 3575 if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh)) 3576 return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits()); 3577 } 3578 } 3579 } 3580 } 3581 } 3582 } 3583 3584 // Finally, if we can prove that the top bits of the result are 0's or 1's, 3585 // use this information. 3586 3587 // If we can examine all elements of a vector constant successfully, we're 3588 // done (we can't do any better than that). If not, keep trying. 3589 if (unsigned VecSignBits = 3590 computeNumSignBitsVectorConstant(V, DemandedElts, TyBits)) 3591 return VecSignBits; 3592 3593 KnownBits Known(TyBits); 3594 computeKnownBits(V, DemandedElts, Known, Depth, Q); 3595 3596 // If we know that the sign bit is either zero or one, determine the number of 3597 // identical bits in the top of the input value. 3598 return std::max(FirstAnswer, Known.countMinSignBits()); 3599 } 3600 3601 Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB, 3602 const TargetLibraryInfo *TLI) { 3603 const Function *F = CB.getCalledFunction(); 3604 if (!F) 3605 return Intrinsic::not_intrinsic; 3606 3607 if (F->isIntrinsic()) 3608 return F->getIntrinsicID(); 3609 3610 // We are going to infer semantics of a library function based on mapping it 3611 // to an LLVM intrinsic. Check that the library function is available from 3612 // this callbase and in this environment. 3613 LibFunc Func; 3614 if (F->hasLocalLinkage() || !TLI || !TLI->getLibFunc(CB, Func) || 3615 !CB.onlyReadsMemory()) 3616 return Intrinsic::not_intrinsic; 3617 3618 switch (Func) { 3619 default: 3620 break; 3621 case LibFunc_sin: 3622 case LibFunc_sinf: 3623 case LibFunc_sinl: 3624 return Intrinsic::sin; 3625 case LibFunc_cos: 3626 case LibFunc_cosf: 3627 case LibFunc_cosl: 3628 return Intrinsic::cos; 3629 case LibFunc_exp: 3630 case LibFunc_expf: 3631 case LibFunc_expl: 3632 return Intrinsic::exp; 3633 case LibFunc_exp2: 3634 case LibFunc_exp2f: 3635 case LibFunc_exp2l: 3636 return Intrinsic::exp2; 3637 case LibFunc_log: 3638 case LibFunc_logf: 3639 case LibFunc_logl: 3640 return Intrinsic::log; 3641 case LibFunc_log10: 3642 case LibFunc_log10f: 3643 case LibFunc_log10l: 3644 return Intrinsic::log10; 3645 case LibFunc_log2: 3646 case LibFunc_log2f: 3647 case LibFunc_log2l: 3648 return Intrinsic::log2; 3649 case LibFunc_fabs: 3650 case LibFunc_fabsf: 3651 case LibFunc_fabsl: 3652 return Intrinsic::fabs; 3653 case LibFunc_fmin: 3654 case LibFunc_fminf: 3655 case LibFunc_fminl: 3656 return Intrinsic::minnum; 3657 case LibFunc_fmax: 3658 case LibFunc_fmaxf: 3659 case LibFunc_fmaxl: 3660 return Intrinsic::maxnum; 3661 case LibFunc_copysign: 3662 case LibFunc_copysignf: 3663 case LibFunc_copysignl: 3664 return Intrinsic::copysign; 3665 case LibFunc_floor: 3666 case LibFunc_floorf: 3667 case LibFunc_floorl: 3668 return Intrinsic::floor; 3669 case LibFunc_ceil: 3670 case LibFunc_ceilf: 3671 case LibFunc_ceill: 3672 return Intrinsic::ceil; 3673 case LibFunc_trunc: 3674 case LibFunc_truncf: 3675 case LibFunc_truncl: 3676 return Intrinsic::trunc; 3677 case LibFunc_rint: 3678 case LibFunc_rintf: 3679 case LibFunc_rintl: 3680 return Intrinsic::rint; 3681 case LibFunc_nearbyint: 3682 case LibFunc_nearbyintf: 3683 case LibFunc_nearbyintl: 3684 return Intrinsic::nearbyint; 3685 case LibFunc_round: 3686 case LibFunc_roundf: 3687 case LibFunc_roundl: 3688 return Intrinsic::round; 3689 case LibFunc_roundeven: 3690 case LibFunc_roundevenf: 3691 case LibFunc_roundevenl: 3692 return Intrinsic::roundeven; 3693 case LibFunc_pow: 3694 case LibFunc_powf: 3695 case LibFunc_powl: 3696 return Intrinsic::pow; 3697 case LibFunc_sqrt: 3698 case LibFunc_sqrtf: 3699 case LibFunc_sqrtl: 3700 return Intrinsic::sqrt; 3701 } 3702 3703 return Intrinsic::not_intrinsic; 3704 } 3705 3706 /// Deprecated, use computeKnownFPClass instead. 3707 /// 3708 /// If \p SignBitOnly is true, test for a known 0 sign bit rather than a 3709 /// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign 3710 /// bit despite comparing equal. 3711 static bool cannotBeOrderedLessThanZeroImpl(const Value *V, 3712 const DataLayout &DL, 3713 const TargetLibraryInfo *TLI, 3714 bool SignBitOnly, unsigned Depth) { 3715 // TODO: This function does not do the right thing when SignBitOnly is true 3716 // and we're lowering to a hypothetical IEEE 754-compliant-but-evil platform 3717 // which flips the sign bits of NaNs. See 3718 // https://llvm.org/bugs/show_bug.cgi?id=31702. 3719 3720 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { 3721 return !CFP->getValueAPF().isNegative() || 3722 (!SignBitOnly && CFP->getValueAPF().isZero()); 3723 } 3724 3725 // Handle vector of constants. 3726 if (auto *CV = dyn_cast<Constant>(V)) { 3727 if (auto *CVFVTy = dyn_cast<FixedVectorType>(CV->getType())) { 3728 unsigned NumElts = CVFVTy->getNumElements(); 3729 for (unsigned i = 0; i != NumElts; ++i) { 3730 auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i)); 3731 if (!CFP) 3732 return false; 3733 if (CFP->getValueAPF().isNegative() && 3734 (SignBitOnly || !CFP->getValueAPF().isZero())) 3735 return false; 3736 } 3737 3738 // All non-negative ConstantFPs. 3739 return true; 3740 } 3741 } 3742 3743 if (Depth == MaxAnalysisRecursionDepth) 3744 return false; 3745 3746 const Operator *I = dyn_cast<Operator>(V); 3747 if (!I) 3748 return false; 3749 3750 switch (I->getOpcode()) { 3751 default: 3752 break; 3753 // Unsigned integers are always nonnegative. 3754 case Instruction::UIToFP: 3755 return true; 3756 case Instruction::FDiv: 3757 // X / X is always exactly 1.0 or a NaN. 3758 if (I->getOperand(0) == I->getOperand(1) && 3759 (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs())) 3760 return true; 3761 3762 // Set SignBitOnly for RHS, because X / -0.0 is -Inf (or NaN). 3763 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3764 SignBitOnly, Depth + 1) && 3765 cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, 3766 /*SignBitOnly*/ true, Depth + 1); 3767 case Instruction::FMul: 3768 // X * X is always non-negative or a NaN. 3769 if (I->getOperand(0) == I->getOperand(1) && 3770 (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs())) 3771 return true; 3772 3773 [[fallthrough]]; 3774 case Instruction::FAdd: 3775 case Instruction::FRem: 3776 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3777 SignBitOnly, Depth + 1) && 3778 cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, 3779 SignBitOnly, Depth + 1); 3780 case Instruction::Select: 3781 return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, 3782 SignBitOnly, Depth + 1) && 3783 cannotBeOrderedLessThanZeroImpl(I->getOperand(2), DL, TLI, 3784 SignBitOnly, Depth + 1); 3785 case Instruction::FPExt: 3786 case Instruction::FPTrunc: 3787 // Widening/narrowing never change sign. 3788 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3789 SignBitOnly, Depth + 1); 3790 case Instruction::ExtractElement: 3791 // Look through extract element. At the moment we keep this simple and skip 3792 // tracking the specific element. But at least we might find information 3793 // valid for all elements of the vector. 3794 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3795 SignBitOnly, Depth + 1); 3796 case Instruction::Call: 3797 const auto *CI = cast<CallInst>(I); 3798 Intrinsic::ID IID = getIntrinsicForCallSite(*CI, TLI); 3799 switch (IID) { 3800 default: 3801 break; 3802 case Intrinsic::canonicalize: 3803 case Intrinsic::arithmetic_fence: 3804 case Intrinsic::floor: 3805 case Intrinsic::ceil: 3806 case Intrinsic::trunc: 3807 case Intrinsic::rint: 3808 case Intrinsic::nearbyint: 3809 case Intrinsic::round: 3810 case Intrinsic::roundeven: 3811 case Intrinsic::fptrunc_round: 3812 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3813 SignBitOnly, Depth + 1); 3814 case Intrinsic::maxnum: { 3815 Value *V0 = I->getOperand(0), *V1 = I->getOperand(1); 3816 auto isPositiveNum = [&](Value *V) { 3817 if (SignBitOnly) { 3818 // With SignBitOnly, this is tricky because the result of 3819 // maxnum(+0.0, -0.0) is unspecified. Just check if the operand is 3820 // a constant strictly greater than 0.0. 3821 const APFloat *C; 3822 return match(V, m_APFloat(C)) && 3823 *C > APFloat::getZero(C->getSemantics()); 3824 } 3825 3826 // -0.0 compares equal to 0.0, so if this operand is at least -0.0, 3827 // maxnum can't be ordered-less-than-zero. 3828 return isKnownNeverNaN(V, DL, TLI) && 3829 cannotBeOrderedLessThanZeroImpl(V, DL, TLI, false, Depth + 1); 3830 }; 3831 3832 // TODO: This could be improved. We could also check that neither operand 3833 // has its sign bit set (and at least 1 is not-NAN?). 3834 return isPositiveNum(V0) || isPositiveNum(V1); 3835 } 3836 3837 case Intrinsic::maximum: 3838 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3839 SignBitOnly, Depth + 1) || 3840 cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, 3841 SignBitOnly, Depth + 1); 3842 case Intrinsic::minnum: 3843 case Intrinsic::minimum: 3844 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3845 SignBitOnly, Depth + 1) && 3846 cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, 3847 SignBitOnly, Depth + 1); 3848 case Intrinsic::exp: 3849 case Intrinsic::exp2: 3850 case Intrinsic::fabs: 3851 return true; 3852 case Intrinsic::copysign: 3853 // Only the sign operand matters. 3854 return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, true, 3855 Depth + 1); 3856 case Intrinsic::sqrt: 3857 // sqrt(x) is always >= -0 or NaN. Moreover, sqrt(x) == -0 iff x == -0. 3858 if (!SignBitOnly) 3859 return true; 3860 return CI->hasNoNaNs() && 3861 (CI->hasNoSignedZeros() || 3862 cannotBeNegativeZero(CI->getOperand(0), DL, TLI)); 3863 3864 case Intrinsic::powi: 3865 if (ConstantInt *Exponent = dyn_cast<ConstantInt>(I->getOperand(1))) { 3866 // powi(x,n) is non-negative if n is even. 3867 if (Exponent->getBitWidth() <= 64 && Exponent->getSExtValue() % 2u == 0) 3868 return true; 3869 } 3870 // TODO: This is not correct. Given that exp is an integer, here are the 3871 // ways that pow can return a negative value: 3872 // 3873 // pow(x, exp) --> negative if exp is odd and x is negative. 3874 // pow(-0, exp) --> -inf if exp is negative odd. 3875 // pow(-0, exp) --> -0 if exp is positive odd. 3876 // pow(-inf, exp) --> -0 if exp is negative odd. 3877 // pow(-inf, exp) --> -inf if exp is positive odd. 3878 // 3879 // Therefore, if !SignBitOnly, we can return true if x >= +0 or x is NaN, 3880 // but we must return false if x == -0. Unfortunately we do not currently 3881 // have a way of expressing this constraint. See details in 3882 // https://llvm.org/bugs/show_bug.cgi?id=31702. 3883 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3884 SignBitOnly, Depth + 1); 3885 3886 case Intrinsic::fma: 3887 case Intrinsic::fmuladd: 3888 // x*x+y is non-negative if y is non-negative. 3889 return I->getOperand(0) == I->getOperand(1) && 3890 (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()) && 3891 cannotBeOrderedLessThanZeroImpl(I->getOperand(2), DL, TLI, 3892 SignBitOnly, Depth + 1); 3893 } 3894 break; 3895 } 3896 return false; 3897 } 3898 3899 bool llvm::SignBitMustBeZero(const Value *V, const DataLayout &DL, 3900 const TargetLibraryInfo *TLI) { 3901 // FIXME: Use computeKnownFPClass and pass all arguments 3902 return cannotBeOrderedLessThanZeroImpl(V, DL, TLI, true, 0); 3903 } 3904 3905 /// Return true if it's possible to assume IEEE treatment of input denormals in 3906 /// \p F for \p Val. 3907 static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) { 3908 Ty = Ty->getScalarType(); 3909 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE; 3910 } 3911 3912 static bool inputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) { 3913 Ty = Ty->getScalarType(); 3914 DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics()); 3915 return Mode.Input == DenormalMode::IEEE || 3916 Mode.Input == DenormalMode::PositiveZero; 3917 } 3918 3919 static bool outputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) { 3920 Ty = Ty->getScalarType(); 3921 DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics()); 3922 return Mode.Output == DenormalMode::IEEE || 3923 Mode.Output == DenormalMode::PositiveZero; 3924 } 3925 3926 bool KnownFPClass::isKnownNeverLogicalZero(const Function &F, Type *Ty) const { 3927 return isKnownNeverZero() && 3928 (isKnownNeverSubnormal() || inputDenormalIsIEEE(F, Ty)); 3929 } 3930 3931 bool KnownFPClass::isKnownNeverLogicalNegZero(const Function &F, 3932 Type *Ty) const { 3933 return isKnownNeverNegZero() && 3934 (isKnownNeverNegSubnormal() || inputDenormalIsIEEEOrPosZero(F, Ty)); 3935 } 3936 3937 bool KnownFPClass::isKnownNeverLogicalPosZero(const Function &F, 3938 Type *Ty) const { 3939 if (!isKnownNeverPosZero()) 3940 return false; 3941 3942 // If we know there are no denormals, nothing can be flushed to zero. 3943 if (isKnownNeverSubnormal()) 3944 return true; 3945 3946 DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics()); 3947 switch (Mode.Input) { 3948 case DenormalMode::IEEE: 3949 return true; 3950 case DenormalMode::PreserveSign: 3951 // Negative subnormal won't flush to +0 3952 return isKnownNeverPosSubnormal(); 3953 case DenormalMode::PositiveZero: 3954 default: 3955 // Both positive and negative subnormal could flush to +0 3956 return false; 3957 } 3958 3959 llvm_unreachable("covered switch over denormal mode"); 3960 } 3961 3962 void KnownFPClass::propagateDenormal(const KnownFPClass &Src, const Function &F, 3963 Type *Ty) { 3964 KnownFPClasses = Src.KnownFPClasses; 3965 // If we aren't assuming the source can't be a zero, we don't have to check if 3966 // a denormal input could be flushed. 3967 if (!Src.isKnownNeverPosZero() && !Src.isKnownNeverNegZero()) 3968 return; 3969 3970 // If we know the input can't be a denormal, it can't be flushed to 0. 3971 if (Src.isKnownNeverSubnormal()) 3972 return; 3973 3974 DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics()); 3975 3976 if (!Src.isKnownNeverPosSubnormal() && Mode != DenormalMode::getIEEE()) 3977 KnownFPClasses |= fcPosZero; 3978 3979 if (!Src.isKnownNeverNegSubnormal() && Mode != DenormalMode::getIEEE()) { 3980 if (Mode != DenormalMode::getPositiveZero()) 3981 KnownFPClasses |= fcNegZero; 3982 3983 if (Mode.Input == DenormalMode::PositiveZero || 3984 Mode.Output == DenormalMode::PositiveZero || 3985 Mode.Input == DenormalMode::Dynamic || 3986 Mode.Output == DenormalMode::Dynamic) 3987 KnownFPClasses |= fcPosZero; 3988 } 3989 } 3990 3991 void KnownFPClass::propagateCanonicalizingSrc(const KnownFPClass &Src, 3992 const Function &F, Type *Ty) { 3993 propagateDenormal(Src, F, Ty); 3994 propagateNaN(Src, /*PreserveSign=*/true); 3995 } 3996 3997 /// Returns a pair of values, which if passed to llvm.is.fpclass, returns the 3998 /// same result as an fcmp with the given operands. 3999 std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred, 4000 const Function &F, 4001 Value *LHS, Value *RHS, 4002 bool LookThroughSrc) { 4003 const APFloat *ConstRHS; 4004 if (!match(RHS, m_APFloatAllowUndef(ConstRHS))) 4005 return {nullptr, fcAllFlags}; 4006 4007 return fcmpToClassTest(Pred, F, LHS, ConstRHS, LookThroughSrc); 4008 } 4009 4010 std::pair<Value *, FPClassTest> 4011 llvm::fcmpToClassTest(FCmpInst::Predicate Pred, const Function &F, Value *LHS, 4012 const APFloat *ConstRHS, bool LookThroughSrc) { 4013 // fcmp ord x, zero|normal|subnormal|inf -> ~fcNan 4014 if (Pred == FCmpInst::FCMP_ORD && !ConstRHS->isNaN()) 4015 return {LHS, ~fcNan}; 4016 4017 // fcmp uno x, zero|normal|subnormal|inf -> fcNan 4018 if (Pred == FCmpInst::FCMP_UNO && !ConstRHS->isNaN()) 4019 return {LHS, fcNan}; 4020 4021 if (ConstRHS->isZero()) { 4022 // Compares with fcNone are only exactly equal to fcZero if input denormals 4023 // are not flushed. 4024 // TODO: Handle DAZ by expanding masks to cover subnormal cases. 4025 if (Pred != FCmpInst::FCMP_ORD && Pred != FCmpInst::FCMP_UNO && 4026 !inputDenormalIsIEEE(F, LHS->getType())) 4027 return {nullptr, fcAllFlags}; 4028 4029 switch (Pred) { 4030 case FCmpInst::FCMP_OEQ: // Match x == 0.0 4031 return {LHS, fcZero}; 4032 case FCmpInst::FCMP_UEQ: // Match isnan(x) || (x == 0.0) 4033 return {LHS, fcZero | fcNan}; 4034 case FCmpInst::FCMP_UNE: // Match (x != 0.0) 4035 return {LHS, ~fcZero}; 4036 case FCmpInst::FCMP_ONE: // Match !isnan(x) && x != 0.0 4037 return {LHS, ~fcNan & ~fcZero}; 4038 case FCmpInst::FCMP_ORD: 4039 // Canonical form of ord/uno is with a zero. We could also handle 4040 // non-canonical other non-NaN constants or LHS == RHS. 4041 return {LHS, ~fcNan}; 4042 case FCmpInst::FCMP_UNO: 4043 return {LHS, fcNan}; 4044 case FCmpInst::FCMP_OGT: // x > 0 4045 return {LHS, fcPosSubnormal | fcPosNormal | fcPosInf}; 4046 case FCmpInst::FCMP_UGT: // isnan(x) || x > 0 4047 return {LHS, fcPosSubnormal | fcPosNormal | fcPosInf | fcNan}; 4048 case FCmpInst::FCMP_OGE: // x >= 0 4049 return {LHS, fcPositive | fcNegZero}; 4050 case FCmpInst::FCMP_UGE: // isnan(x) || x >= 0 4051 return {LHS, fcPositive | fcNegZero | fcNan}; 4052 case FCmpInst::FCMP_OLT: // x < 0 4053 return {LHS, fcNegSubnormal | fcNegNormal | fcNegInf}; 4054 case FCmpInst::FCMP_ULT: // isnan(x) || x < 0 4055 return {LHS, fcNegSubnormal | fcNegNormal | fcNegInf | fcNan}; 4056 case FCmpInst::FCMP_OLE: // x <= 0 4057 return {LHS, fcNegative | fcPosZero}; 4058 case FCmpInst::FCMP_ULE: // isnan(x) || x <= 0 4059 return {LHS, fcNegative | fcPosZero | fcNan}; 4060 default: 4061 break; 4062 } 4063 4064 return {nullptr, fcAllFlags}; 4065 } 4066 4067 Value *Src = LHS; 4068 const bool IsFabs = LookThroughSrc && match(LHS, m_FAbs(m_Value(Src))); 4069 4070 // Compute the test mask that would return true for the ordered comparisons. 4071 FPClassTest Mask; 4072 4073 if (ConstRHS->isInfinity()) { 4074 switch (Pred) { 4075 case FCmpInst::FCMP_OEQ: 4076 case FCmpInst::FCMP_UNE: { 4077 // Match __builtin_isinf patterns 4078 // 4079 // fcmp oeq x, +inf -> is_fpclass x, fcPosInf 4080 // fcmp oeq fabs(x), +inf -> is_fpclass x, fcInf 4081 // fcmp oeq x, -inf -> is_fpclass x, fcNegInf 4082 // fcmp oeq fabs(x), -inf -> is_fpclass x, 0 -> false 4083 // 4084 // fcmp une x, +inf -> is_fpclass x, ~fcPosInf 4085 // fcmp une fabs(x), +inf -> is_fpclass x, ~fcInf 4086 // fcmp une x, -inf -> is_fpclass x, ~fcNegInf 4087 // fcmp une fabs(x), -inf -> is_fpclass x, fcAllFlags -> true 4088 4089 if (ConstRHS->isNegative()) { 4090 Mask = fcNegInf; 4091 if (IsFabs) 4092 Mask = fcNone; 4093 } else { 4094 Mask = fcPosInf; 4095 if (IsFabs) 4096 Mask |= fcNegInf; 4097 } 4098 4099 break; 4100 } 4101 case FCmpInst::FCMP_ONE: 4102 case FCmpInst::FCMP_UEQ: { 4103 // Match __builtin_isinf patterns 4104 // fcmp one x, -inf -> is_fpclass x, fcNegInf 4105 // fcmp one fabs(x), -inf -> is_fpclass x, ~fcNegInf & ~fcNan 4106 // fcmp one x, +inf -> is_fpclass x, ~fcNegInf & ~fcNan 4107 // fcmp one fabs(x), +inf -> is_fpclass x, ~fcInf & fcNan 4108 // 4109 // fcmp ueq x, +inf -> is_fpclass x, fcPosInf|fcNan 4110 // fcmp ueq (fabs x), +inf -> is_fpclass x, fcInf|fcNan 4111 // fcmp ueq x, -inf -> is_fpclass x, fcNegInf|fcNan 4112 // fcmp ueq fabs(x), -inf -> is_fpclass x, fcNan 4113 if (ConstRHS->isNegative()) { 4114 Mask = ~fcNegInf & ~fcNan; 4115 if (IsFabs) 4116 Mask = ~fcNan; 4117 } else { 4118 Mask = ~fcPosInf & ~fcNan; 4119 if (IsFabs) 4120 Mask &= ~fcNegInf; 4121 } 4122 4123 break; 4124 } 4125 case FCmpInst::FCMP_OLT: 4126 case FCmpInst::FCMP_UGE: { 4127 if (ConstRHS->isNegative()) { 4128 // No value is ordered and less than negative infinity. 4129 // All values are unordered with or at least negative infinity. 4130 // fcmp olt x, -inf -> false 4131 // fcmp uge x, -inf -> true 4132 Mask = fcNone; 4133 break; 4134 } 4135 4136 // fcmp olt fabs(x), +inf -> fcFinite 4137 // fcmp uge fabs(x), +inf -> ~fcFinite 4138 // fcmp olt x, +inf -> fcFinite|fcNegInf 4139 // fcmp uge x, +inf -> ~(fcFinite|fcNegInf) 4140 Mask = fcFinite; 4141 if (!IsFabs) 4142 Mask |= fcNegInf; 4143 break; 4144 } 4145 case FCmpInst::FCMP_OGE: 4146 case FCmpInst::FCMP_ULT: { 4147 if (ConstRHS->isNegative()) { 4148 // fcmp oge x, -inf -> ~fcNan 4149 // fcmp oge fabs(x), -inf -> ~fcNan 4150 // fcmp ult x, -inf -> fcNan 4151 // fcmp ult fabs(x), -inf -> fcNan 4152 Mask = ~fcNan; 4153 break; 4154 } 4155 4156 // fcmp oge fabs(x), +inf -> fcInf 4157 // fcmp oge x, +inf -> fcPosInf 4158 // fcmp ult fabs(x), +inf -> ~fcInf 4159 // fcmp ult x, +inf -> ~fcPosInf 4160 Mask = fcPosInf; 4161 if (IsFabs) 4162 Mask |= fcNegInf; 4163 break; 4164 } 4165 case FCmpInst::FCMP_OGT: 4166 case FCmpInst::FCMP_ULE: { 4167 if (ConstRHS->isNegative()) { 4168 // fcmp ogt x, -inf -> fcmp one x, -inf 4169 // fcmp ogt fabs(x), -inf -> fcmp ord x, x 4170 // fcmp ule x, -inf -> fcmp ueq x, -inf 4171 // fcmp ule fabs(x), -inf -> fcmp uno x, x 4172 Mask = IsFabs ? ~fcNan : ~(fcNegInf | fcNan); 4173 break; 4174 } 4175 4176 // No value is ordered and greater than infinity. 4177 Mask = fcNone; 4178 break; 4179 } 4180 default: 4181 return {nullptr, fcAllFlags}; 4182 } 4183 } else if (ConstRHS->isSmallestNormalized() && !ConstRHS->isNegative()) { 4184 // Match pattern that's used in __builtin_isnormal. 4185 switch (Pred) { 4186 case FCmpInst::FCMP_OLT: 4187 case FCmpInst::FCMP_UGE: { 4188 // fcmp olt x, smallest_normal -> fcNegInf|fcNegNormal|fcSubnormal|fcZero 4189 // fcmp olt fabs(x), smallest_normal -> fcSubnormal|fcZero 4190 // fcmp uge x, smallest_normal -> fcNan|fcPosNormal|fcPosInf 4191 // fcmp uge fabs(x), smallest_normal -> ~(fcSubnormal|fcZero) 4192 Mask = fcZero | fcSubnormal; 4193 if (!IsFabs) 4194 Mask |= fcNegNormal | fcNegInf; 4195 4196 break; 4197 } 4198 case FCmpInst::FCMP_OGE: 4199 case FCmpInst::FCMP_ULT: { 4200 // fcmp oge x, smallest_normal -> fcPosNormal | fcPosInf 4201 // fcmp oge fabs(x), smallest_normal -> fcInf | fcNormal 4202 // fcmp ult x, smallest_normal -> ~(fcPosNormal | fcPosInf) 4203 // fcmp ult fabs(x), smallest_normal -> ~(fcInf | fcNormal) 4204 Mask = fcPosInf | fcPosNormal; 4205 if (IsFabs) 4206 Mask |= fcNegInf | fcNegNormal; 4207 break; 4208 } 4209 default: 4210 return {nullptr, fcAllFlags}; 4211 } 4212 } else if (ConstRHS->isNaN()) { 4213 // fcmp o__ x, nan -> false 4214 // fcmp u__ x, nan -> true 4215 Mask = fcNone; 4216 } else 4217 return {nullptr, fcAllFlags}; 4218 4219 // Invert the comparison for the unordered cases. 4220 if (FCmpInst::isUnordered(Pred)) 4221 Mask = ~Mask; 4222 4223 return {Src, Mask}; 4224 } 4225 4226 static FPClassTest computeKnownFPClassFromAssumes(const Value *V, 4227 const SimplifyQuery &Q) { 4228 FPClassTest KnownFromAssume = fcAllFlags; 4229 4230 // Try to restrict the floating-point classes based on information from 4231 // assumptions. 4232 for (auto &AssumeVH : Q.AC->assumptionsFor(V)) { 4233 if (!AssumeVH) 4234 continue; 4235 CallInst *I = cast<CallInst>(AssumeVH); 4236 const Function *F = I->getFunction(); 4237 4238 assert(F == Q.CxtI->getParent()->getParent() && 4239 "Got assumption for the wrong function!"); 4240 assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && 4241 "must be an assume intrinsic"); 4242 4243 if (!isValidAssumeForContext(I, Q.CxtI, Q.DT)) 4244 continue; 4245 4246 CmpInst::Predicate Pred; 4247 Value *LHS, *RHS; 4248 uint64_t ClassVal = 0; 4249 if (match(I->getArgOperand(0), m_FCmp(Pred, m_Value(LHS), m_Value(RHS)))) { 4250 auto [TestedValue, TestedMask] = 4251 fcmpToClassTest(Pred, *F, LHS, RHS, true); 4252 // First see if we can fold in fabs/fneg into the test. 4253 if (TestedValue == V) 4254 KnownFromAssume &= TestedMask; 4255 else { 4256 // Try again without the lookthrough if we found a different source 4257 // value. 4258 auto [TestedValue, TestedMask] = 4259 fcmpToClassTest(Pred, *F, LHS, RHS, false); 4260 if (TestedValue == V) 4261 KnownFromAssume &= TestedMask; 4262 } 4263 } else if (match(I->getArgOperand(0), 4264 m_Intrinsic<Intrinsic::is_fpclass>( 4265 m_Value(LHS), m_ConstantInt(ClassVal)))) { 4266 KnownFromAssume &= static_cast<FPClassTest>(ClassVal); 4267 } 4268 } 4269 4270 return KnownFromAssume; 4271 } 4272 4273 void computeKnownFPClass(const Value *V, const APInt &DemandedElts, 4274 FPClassTest InterestedClasses, KnownFPClass &Known, 4275 unsigned Depth, const SimplifyQuery &Q); 4276 4277 static void computeKnownFPClass(const Value *V, KnownFPClass &Known, 4278 FPClassTest InterestedClasses, unsigned Depth, 4279 const SimplifyQuery &Q) { 4280 auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); 4281 APInt DemandedElts = 4282 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 4283 computeKnownFPClass(V, DemandedElts, InterestedClasses, Known, Depth, Q); 4284 } 4285 4286 static void computeKnownFPClassForFPTrunc(const Operator *Op, 4287 const APInt &DemandedElts, 4288 FPClassTest InterestedClasses, 4289 KnownFPClass &Known, unsigned Depth, 4290 const SimplifyQuery &Q) { 4291 if ((InterestedClasses & 4292 (KnownFPClass::OrderedLessThanZeroMask | fcNan)) == fcNone) 4293 return; 4294 4295 KnownFPClass KnownSrc; 4296 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses, 4297 KnownSrc, Depth + 1, Q); 4298 4299 // Sign should be preserved 4300 // TODO: Handle cannot be ordered greater than zero 4301 if (KnownSrc.cannotBeOrderedLessThanZero()) 4302 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 4303 4304 Known.propagateNaN(KnownSrc, true); 4305 4306 // Infinity needs a range check. 4307 } 4308 4309 // TODO: Merge implementation of cannotBeOrderedLessThanZero into here. 4310 void computeKnownFPClass(const Value *V, const APInt &DemandedElts, 4311 FPClassTest InterestedClasses, KnownFPClass &Known, 4312 unsigned Depth, const SimplifyQuery &Q) { 4313 assert(Known.isUnknown() && "should not be called with known information"); 4314 4315 if (!DemandedElts) { 4316 // No demanded elts, better to assume we don't know anything. 4317 Known.resetAll(); 4318 return; 4319 } 4320 4321 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 4322 4323 if (auto *CFP = dyn_cast_or_null<ConstantFP>(V)) { 4324 Known.KnownFPClasses = CFP->getValueAPF().classify(); 4325 Known.SignBit = CFP->isNegative(); 4326 return; 4327 } 4328 4329 // Try to handle fixed width vector constants 4330 auto *VFVTy = dyn_cast<FixedVectorType>(V->getType()); 4331 const Constant *CV = dyn_cast<Constant>(V); 4332 if (VFVTy && CV) { 4333 Known.KnownFPClasses = fcNone; 4334 4335 // For vectors, verify that each element is not NaN. 4336 unsigned NumElts = VFVTy->getNumElements(); 4337 for (unsigned i = 0; i != NumElts; ++i) { 4338 Constant *Elt = CV->getAggregateElement(i); 4339 if (!Elt) { 4340 Known = KnownFPClass(); 4341 return; 4342 } 4343 if (isa<UndefValue>(Elt)) 4344 continue; 4345 auto *CElt = dyn_cast<ConstantFP>(Elt); 4346 if (!CElt) { 4347 Known = KnownFPClass(); 4348 return; 4349 } 4350 4351 KnownFPClass KnownElt{CElt->getValueAPF().classify(), CElt->isNegative()}; 4352 Known |= KnownElt; 4353 } 4354 4355 return; 4356 } 4357 4358 FPClassTest KnownNotFromFlags = fcNone; 4359 if (const auto *CB = dyn_cast<CallBase>(V)) 4360 KnownNotFromFlags |= CB->getRetNoFPClass(); 4361 else if (const auto *Arg = dyn_cast<Argument>(V)) 4362 KnownNotFromFlags |= Arg->getNoFPClass(); 4363 4364 const Operator *Op = dyn_cast<Operator>(V); 4365 if (const FPMathOperator *FPOp = dyn_cast_or_null<FPMathOperator>(Op)) { 4366 if (FPOp->hasNoNaNs()) 4367 KnownNotFromFlags |= fcNan; 4368 if (FPOp->hasNoInfs()) 4369 KnownNotFromFlags |= fcInf; 4370 } 4371 4372 if (Q.AC) { 4373 FPClassTest AssumedClasses = computeKnownFPClassFromAssumes(V, Q); 4374 KnownNotFromFlags |= ~AssumedClasses; 4375 } 4376 4377 // We no longer need to find out about these bits from inputs if we can 4378 // assume this from flags/attributes. 4379 InterestedClasses &= ~KnownNotFromFlags; 4380 4381 auto ClearClassesFromFlags = make_scope_exit([=, &Known] { 4382 Known.knownNot(KnownNotFromFlags); 4383 }); 4384 4385 if (!Op) 4386 return; 4387 4388 // All recursive calls that increase depth must come after this. 4389 if (Depth == MaxAnalysisRecursionDepth) 4390 return; 4391 4392 const unsigned Opc = Op->getOpcode(); 4393 switch (Opc) { 4394 case Instruction::FNeg: { 4395 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses, 4396 Known, Depth + 1, Q); 4397 Known.fneg(); 4398 break; 4399 } 4400 case Instruction::Select: { 4401 Value *Cond = Op->getOperand(0); 4402 Value *LHS = Op->getOperand(1); 4403 Value *RHS = Op->getOperand(2); 4404 4405 FPClassTest FilterLHS = fcAllFlags; 4406 FPClassTest FilterRHS = fcAllFlags; 4407 4408 Value *TestedValue = nullptr; 4409 FPClassTest TestedMask = fcNone; 4410 uint64_t ClassVal = 0; 4411 const Function *F = cast<Instruction>(Op)->getFunction(); 4412 CmpInst::Predicate Pred; 4413 Value *CmpLHS, *CmpRHS; 4414 if (F && match(Cond, m_FCmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)))) { 4415 // If the select filters out a value based on the class, it no longer 4416 // participates in the class of the result 4417 4418 // TODO: In some degenerate cases we can infer something if we try again 4419 // without looking through sign operations. 4420 bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS; 4421 std::tie(TestedValue, TestedMask) = 4422 fcmpToClassTest(Pred, *F, CmpLHS, CmpRHS, LookThroughFAbsFNeg); 4423 } else if (match(Cond, 4424 m_Intrinsic<Intrinsic::is_fpclass>( 4425 m_Value(TestedValue), m_ConstantInt(ClassVal)))) { 4426 TestedMask = static_cast<FPClassTest>(ClassVal); 4427 } 4428 4429 if (TestedValue == LHS) { 4430 // match !isnan(x) ? x : y 4431 FilterLHS = TestedMask; 4432 } else if (TestedValue == RHS) { 4433 // match !isnan(x) ? y : x 4434 FilterRHS = ~TestedMask; 4435 } 4436 4437 KnownFPClass Known2; 4438 computeKnownFPClass(LHS, DemandedElts, InterestedClasses & FilterLHS, Known, 4439 Depth + 1, Q); 4440 Known.KnownFPClasses &= FilterLHS; 4441 4442 computeKnownFPClass(RHS, DemandedElts, InterestedClasses & FilterRHS, 4443 Known2, Depth + 1, Q); 4444 Known2.KnownFPClasses &= FilterRHS; 4445 4446 Known |= Known2; 4447 break; 4448 } 4449 case Instruction::Call: { 4450 const CallInst *II = cast<CallInst>(Op); 4451 const Intrinsic::ID IID = II->getIntrinsicID(); 4452 switch (IID) { 4453 case Intrinsic::fabs: { 4454 if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) { 4455 // If we only care about the sign bit we don't need to inspect the 4456 // operand. 4457 computeKnownFPClass(II->getArgOperand(0), DemandedElts, 4458 InterestedClasses, Known, Depth + 1, Q); 4459 } 4460 4461 Known.fabs(); 4462 break; 4463 } 4464 case Intrinsic::copysign: { 4465 KnownFPClass KnownSign; 4466 4467 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4468 Known, Depth + 1, Q); 4469 computeKnownFPClass(II->getArgOperand(1), DemandedElts, InterestedClasses, 4470 KnownSign, Depth + 1, Q); 4471 Known.copysign(KnownSign); 4472 break; 4473 } 4474 case Intrinsic::fma: 4475 case Intrinsic::fmuladd: { 4476 if ((InterestedClasses & fcNegative) == fcNone) 4477 break; 4478 4479 if (II->getArgOperand(0) != II->getArgOperand(1)) 4480 break; 4481 4482 // The multiply cannot be -0 and therefore the add can't be -0 4483 Known.knownNot(fcNegZero); 4484 4485 // x * x + y is non-negative if y is non-negative. 4486 KnownFPClass KnownAddend; 4487 computeKnownFPClass(II->getArgOperand(2), DemandedElts, InterestedClasses, 4488 KnownAddend, Depth + 1, Q); 4489 4490 // TODO: Known sign bit with no nans 4491 if (KnownAddend.cannotBeOrderedLessThanZero()) 4492 Known.knownNot(fcNegative); 4493 break; 4494 } 4495 case Intrinsic::sqrt: 4496 case Intrinsic::experimental_constrained_sqrt: { 4497 KnownFPClass KnownSrc; 4498 FPClassTest InterestedSrcs = InterestedClasses; 4499 if (InterestedClasses & fcNan) 4500 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask; 4501 4502 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs, 4503 KnownSrc, Depth + 1, Q); 4504 4505 if (KnownSrc.isKnownNeverPosInfinity()) 4506 Known.knownNot(fcPosInf); 4507 if (KnownSrc.isKnownNever(fcSNan)) 4508 Known.knownNot(fcSNan); 4509 4510 // Any negative value besides -0 returns a nan. 4511 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero()) 4512 Known.knownNot(fcNan); 4513 4514 // The only negative value that can be returned is -0 for -0 inputs. 4515 Known.knownNot(fcNegInf | fcNegSubnormal | fcNegNormal); 4516 4517 // If the input denormal mode could be PreserveSign, a negative 4518 // subnormal input could produce a negative zero output. 4519 const Function *F = II->getFunction(); 4520 if (Q.IIQ.hasNoSignedZeros(II) || 4521 (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType()))) { 4522 Known.knownNot(fcNegZero); 4523 if (KnownSrc.isKnownNeverNaN()) 4524 Known.SignBit = false; 4525 } 4526 4527 break; 4528 } 4529 case Intrinsic::sin: 4530 case Intrinsic::cos: { 4531 // Return NaN on infinite inputs. 4532 KnownFPClass KnownSrc; 4533 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4534 KnownSrc, Depth + 1, Q); 4535 Known.knownNot(fcInf); 4536 if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity()) 4537 Known.knownNot(fcNan); 4538 break; 4539 } 4540 case Intrinsic::maxnum: 4541 case Intrinsic::minnum: 4542 case Intrinsic::minimum: 4543 case Intrinsic::maximum: { 4544 KnownFPClass KnownLHS, KnownRHS; 4545 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4546 KnownLHS, Depth + 1, Q); 4547 computeKnownFPClass(II->getArgOperand(1), DemandedElts, InterestedClasses, 4548 KnownRHS, Depth + 1, Q); 4549 4550 bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN(); 4551 Known = KnownLHS | KnownRHS; 4552 4553 // If either operand is not NaN, the result is not NaN. 4554 if (NeverNaN && (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)) 4555 Known.knownNot(fcNan); 4556 4557 if (IID == Intrinsic::maxnum) { 4558 // If at least one operand is known to be positive, the result must be 4559 // positive. 4560 if ((KnownLHS.cannotBeOrderedLessThanZero() && 4561 KnownLHS.isKnownNeverNaN()) || 4562 (KnownRHS.cannotBeOrderedLessThanZero() && 4563 KnownRHS.isKnownNeverNaN())) 4564 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 4565 } else if (IID == Intrinsic::maximum) { 4566 // If at least one operand is known to be positive, the result must be 4567 // positive. 4568 if (KnownLHS.cannotBeOrderedLessThanZero() || 4569 KnownRHS.cannotBeOrderedLessThanZero()) 4570 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 4571 } else if (IID == Intrinsic::minnum) { 4572 // If at least one operand is known to be negative, the result must be 4573 // negative. 4574 if ((KnownLHS.cannotBeOrderedGreaterThanZero() && 4575 KnownLHS.isKnownNeverNaN()) || 4576 (KnownRHS.cannotBeOrderedGreaterThanZero() && 4577 KnownRHS.isKnownNeverNaN())) 4578 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 4579 } else { 4580 // If at least one operand is known to be negative, the result must be 4581 // negative. 4582 if (KnownLHS.cannotBeOrderedGreaterThanZero() || 4583 KnownRHS.cannotBeOrderedGreaterThanZero()) 4584 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 4585 } 4586 4587 // Fixup zero handling if denormals could be returned as a zero. 4588 // 4589 // As there's no spec for denormal flushing, be conservative with the 4590 // treatment of denormals that could be flushed to zero. For older 4591 // subtargets on AMDGPU the min/max instructions would not flush the 4592 // output and return the original value. 4593 // 4594 // TODO: This could be refined based on the sign 4595 if ((Known.KnownFPClasses & fcZero) != fcNone && 4596 !Known.isKnownNeverSubnormal()) { 4597 const Function *Parent = II->getFunction(); 4598 if (!Parent) 4599 break; 4600 4601 DenormalMode Mode = Parent->getDenormalMode( 4602 II->getType()->getScalarType()->getFltSemantics()); 4603 if (Mode != DenormalMode::getIEEE()) 4604 Known.KnownFPClasses |= fcZero; 4605 } 4606 4607 break; 4608 } 4609 case Intrinsic::canonicalize: { 4610 KnownFPClass KnownSrc; 4611 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4612 KnownSrc, Depth + 1, Q); 4613 4614 // This is essentially a stronger form of 4615 // propagateCanonicalizingSrc. Other "canonicalizing" operations don't 4616 // actually have an IR canonicalization guarantee. 4617 4618 // Canonicalize may flush denormals to zero, so we have to consider the 4619 // denormal mode to preserve known-not-0 knowledge. 4620 Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan; 4621 4622 // Stronger version of propagateNaN 4623 // Canonicalize is guaranteed to quiet signaling nans. 4624 if (KnownSrc.isKnownNeverNaN()) 4625 Known.knownNot(fcNan); 4626 else 4627 Known.knownNot(fcSNan); 4628 4629 const Function *F = II->getFunction(); 4630 if (!F) 4631 break; 4632 4633 // If the parent function flushes denormals, the canonical output cannot 4634 // be a denormal. 4635 const fltSemantics &FPType = 4636 II->getType()->getScalarType()->getFltSemantics(); 4637 DenormalMode DenormMode = F->getDenormalMode(FPType); 4638 if (DenormMode == DenormalMode::getIEEE()) { 4639 if (KnownSrc.isKnownNever(fcPosZero)) 4640 Known.knownNot(fcPosZero); 4641 if (KnownSrc.isKnownNever(fcNegZero)) 4642 Known.knownNot(fcNegZero); 4643 break; 4644 } 4645 4646 if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero()) 4647 Known.knownNot(fcSubnormal); 4648 4649 if (DenormMode.Input == DenormalMode::PositiveZero || 4650 (DenormMode.Output == DenormalMode::PositiveZero && 4651 DenormMode.Input == DenormalMode::IEEE)) 4652 Known.knownNot(fcNegZero); 4653 4654 break; 4655 } 4656 case Intrinsic::trunc: 4657 case Intrinsic::floor: 4658 case Intrinsic::ceil: 4659 case Intrinsic::rint: 4660 case Intrinsic::nearbyint: 4661 case Intrinsic::round: 4662 case Intrinsic::roundeven: { 4663 KnownFPClass KnownSrc; 4664 FPClassTest InterestedSrcs = InterestedClasses; 4665 if (InterestedSrcs & fcPosFinite) 4666 InterestedSrcs |= fcPosFinite; 4667 if (InterestedSrcs & fcNegFinite) 4668 InterestedSrcs |= fcNegFinite; 4669 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs, 4670 KnownSrc, Depth + 1, Q); 4671 4672 // Integer results cannot be subnormal. 4673 Known.knownNot(fcSubnormal); 4674 4675 Known.propagateNaN(KnownSrc, true); 4676 4677 // Pass through infinities, except PPC_FP128 is a special case for 4678 // intrinsics other than trunc. 4679 if (IID == Intrinsic::trunc || !V->getType()->isMultiUnitFPType()) { 4680 if (KnownSrc.isKnownNeverPosInfinity()) 4681 Known.knownNot(fcPosInf); 4682 if (KnownSrc.isKnownNeverNegInfinity()) 4683 Known.knownNot(fcNegInf); 4684 } 4685 4686 // Negative round ups to 0 produce -0 4687 if (KnownSrc.isKnownNever(fcPosFinite)) 4688 Known.knownNot(fcPosFinite); 4689 if (KnownSrc.isKnownNever(fcNegFinite)) 4690 Known.knownNot(fcNegFinite); 4691 4692 break; 4693 } 4694 case Intrinsic::exp: 4695 case Intrinsic::exp2: 4696 case Intrinsic::exp10: { 4697 Known.knownNot(fcNegative); 4698 if ((InterestedClasses & fcNan) == fcNone) 4699 break; 4700 4701 KnownFPClass KnownSrc; 4702 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4703 KnownSrc, Depth + 1, Q); 4704 if (KnownSrc.isKnownNeverNaN()) { 4705 Known.knownNot(fcNan); 4706 Known.SignBit = false; 4707 } 4708 4709 break; 4710 } 4711 case Intrinsic::fptrunc_round: { 4712 computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known, 4713 Depth, Q); 4714 break; 4715 } 4716 case Intrinsic::log: 4717 case Intrinsic::log10: 4718 case Intrinsic::log2: 4719 case Intrinsic::experimental_constrained_log: 4720 case Intrinsic::experimental_constrained_log10: 4721 case Intrinsic::experimental_constrained_log2: { 4722 // log(+inf) -> +inf 4723 // log([+-]0.0) -> -inf 4724 // log(-inf) -> nan 4725 // log(-x) -> nan 4726 if ((InterestedClasses & (fcNan | fcInf)) == fcNone) 4727 break; 4728 4729 FPClassTest InterestedSrcs = InterestedClasses; 4730 if ((InterestedClasses & fcNegInf) != fcNone) 4731 InterestedSrcs |= fcZero | fcSubnormal; 4732 if ((InterestedClasses & fcNan) != fcNone) 4733 InterestedSrcs |= fcNan | (fcNegative & ~fcNan); 4734 4735 KnownFPClass KnownSrc; 4736 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs, 4737 KnownSrc, Depth + 1, Q); 4738 4739 if (KnownSrc.isKnownNeverPosInfinity()) 4740 Known.knownNot(fcPosInf); 4741 4742 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero()) 4743 Known.knownNot(fcNan); 4744 4745 const Function *F = II->getFunction(); 4746 if (F && KnownSrc.isKnownNeverLogicalZero(*F, II->getType())) 4747 Known.knownNot(fcNegInf); 4748 4749 break; 4750 } 4751 case Intrinsic::powi: { 4752 if ((InterestedClasses & fcNegative) == fcNone) 4753 break; 4754 4755 const Value *Exp = II->getArgOperand(1); 4756 Type *ExpTy = Exp->getType(); 4757 unsigned BitWidth = ExpTy->getScalarType()->getIntegerBitWidth(); 4758 KnownBits ExponentKnownBits(BitWidth); 4759 computeKnownBits(Exp, isa<VectorType>(ExpTy) ? DemandedElts : APInt(1, 1), 4760 ExponentKnownBits, Depth + 1, Q); 4761 4762 if (ExponentKnownBits.Zero[0]) { // Is even 4763 Known.knownNot(fcNegative); 4764 break; 4765 } 4766 4767 // Given that exp is an integer, here are the 4768 // ways that pow can return a negative value: 4769 // 4770 // pow(-x, exp) --> negative if exp is odd and x is negative. 4771 // pow(-0, exp) --> -inf if exp is negative odd. 4772 // pow(-0, exp) --> -0 if exp is positive odd. 4773 // pow(-inf, exp) --> -0 if exp is negative odd. 4774 // pow(-inf, exp) --> -inf if exp is positive odd. 4775 KnownFPClass KnownSrc; 4776 computeKnownFPClass(II->getArgOperand(0), DemandedElts, fcNegative, 4777 KnownSrc, Depth + 1, Q); 4778 if (KnownSrc.isKnownNever(fcNegative)) 4779 Known.knownNot(fcNegative); 4780 break; 4781 } 4782 case Intrinsic::ldexp: { 4783 KnownFPClass KnownSrc; 4784 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4785 KnownSrc, Depth + 1, Q); 4786 Known.propagateNaN(KnownSrc, /*PropagateSign=*/true); 4787 4788 // Sign is preserved, but underflows may produce zeroes. 4789 if (KnownSrc.isKnownNever(fcNegative)) 4790 Known.knownNot(fcNegative); 4791 else if (KnownSrc.cannotBeOrderedLessThanZero()) 4792 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 4793 4794 if (KnownSrc.isKnownNever(fcPositive)) 4795 Known.knownNot(fcPositive); 4796 else if (KnownSrc.cannotBeOrderedGreaterThanZero()) 4797 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 4798 4799 // Can refine inf/zero handling based on the exponent operand. 4800 const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf; 4801 if ((InterestedClasses & ExpInfoMask) == fcNone) 4802 break; 4803 if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone) 4804 break; 4805 4806 const fltSemantics &Flt = 4807 II->getType()->getScalarType()->getFltSemantics(); 4808 unsigned Precision = APFloat::semanticsPrecision(Flt); 4809 const Value *ExpArg = II->getArgOperand(1); 4810 ConstantRange ExpRange = computeConstantRange( 4811 ExpArg, true, Q.IIQ.UseInstrInfo, Q.AC, Q.CxtI, Q.DT, Depth + 1); 4812 4813 const int MantissaBits = Precision - 1; 4814 if (ExpRange.getSignedMin().sge(static_cast<int64_t>(MantissaBits))) 4815 Known.knownNot(fcSubnormal); 4816 4817 const Function *F = II->getFunction(); 4818 const APInt *ConstVal = ExpRange.getSingleElement(); 4819 if (ConstVal && ConstVal->isZero()) { 4820 // ldexp(x, 0) -> x, so propagate everything. 4821 Known.propagateCanonicalizingSrc(KnownSrc, *F, II->getType()); 4822 } else if (ExpRange.isAllNegative()) { 4823 // If we know the power is <= 0, can't introduce inf 4824 if (KnownSrc.isKnownNeverPosInfinity()) 4825 Known.knownNot(fcPosInf); 4826 if (KnownSrc.isKnownNeverNegInfinity()) 4827 Known.knownNot(fcNegInf); 4828 } else if (ExpRange.isAllNonNegative()) { 4829 // If we know the power is >= 0, can't introduce subnormal or zero 4830 if (KnownSrc.isKnownNeverPosSubnormal()) 4831 Known.knownNot(fcPosSubnormal); 4832 if (KnownSrc.isKnownNeverNegSubnormal()) 4833 Known.knownNot(fcNegSubnormal); 4834 if (F && KnownSrc.isKnownNeverLogicalPosZero(*F, II->getType())) 4835 Known.knownNot(fcPosZero); 4836 if (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType())) 4837 Known.knownNot(fcNegZero); 4838 } 4839 4840 break; 4841 } 4842 case Intrinsic::arithmetic_fence: { 4843 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4844 Known, Depth + 1, Q); 4845 break; 4846 } 4847 case Intrinsic::experimental_constrained_sitofp: 4848 case Intrinsic::experimental_constrained_uitofp: 4849 // Cannot produce nan 4850 Known.knownNot(fcNan); 4851 4852 // sitofp and uitofp turn into +0.0 for zero. 4853 Known.knownNot(fcNegZero); 4854 4855 // Integers cannot be subnormal 4856 Known.knownNot(fcSubnormal); 4857 4858 if (IID == Intrinsic::experimental_constrained_uitofp) 4859 Known.signBitMustBeZero(); 4860 4861 // TODO: Copy inf handling from instructions 4862 break; 4863 default: 4864 break; 4865 } 4866 4867 break; 4868 } 4869 case Instruction::FAdd: 4870 case Instruction::FSub: { 4871 KnownFPClass KnownLHS, KnownRHS; 4872 bool WantNegative = 4873 Op->getOpcode() == Instruction::FAdd && 4874 (InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone; 4875 bool WantNaN = (InterestedClasses & fcNan) != fcNone; 4876 bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone; 4877 4878 if (!WantNaN && !WantNegative && !WantNegZero) 4879 break; 4880 4881 FPClassTest InterestedSrcs = InterestedClasses; 4882 if (WantNegative) 4883 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask; 4884 if (InterestedClasses & fcNan) 4885 InterestedSrcs |= fcInf; 4886 computeKnownFPClass(Op->getOperand(1), DemandedElts, InterestedSrcs, 4887 KnownRHS, Depth + 1, Q); 4888 4889 if ((WantNaN && KnownRHS.isKnownNeverNaN()) || 4890 (WantNegative && KnownRHS.cannotBeOrderedLessThanZero()) || 4891 WantNegZero || Opc == Instruction::FSub) { 4892 4893 // RHS is canonically cheaper to compute. Skip inspecting the LHS if 4894 // there's no point. 4895 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedSrcs, 4896 KnownLHS, Depth + 1, Q); 4897 // Adding positive and negative infinity produces NaN. 4898 // TODO: Check sign of infinities. 4899 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && 4900 (KnownLHS.isKnownNeverInfinity() || KnownRHS.isKnownNeverInfinity())) 4901 Known.knownNot(fcNan); 4902 4903 // FIXME: Context function should always be passed in separately 4904 const Function *F = cast<Instruction>(Op)->getFunction(); 4905 4906 if (Op->getOpcode() == Instruction::FAdd) { 4907 if (KnownLHS.cannotBeOrderedLessThanZero() && 4908 KnownRHS.cannotBeOrderedLessThanZero()) 4909 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 4910 if (!F) 4911 break; 4912 4913 // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0. 4914 if ((KnownLHS.isKnownNeverLogicalNegZero(*F, Op->getType()) || 4915 KnownRHS.isKnownNeverLogicalNegZero(*F, Op->getType())) && 4916 // Make sure output negative denormal can't flush to -0 4917 outputDenormalIsIEEEOrPosZero(*F, Op->getType())) 4918 Known.knownNot(fcNegZero); 4919 } else { 4920 if (!F) 4921 break; 4922 4923 // Only fsub -0, +0 can return -0 4924 if ((KnownLHS.isKnownNeverLogicalNegZero(*F, Op->getType()) || 4925 KnownRHS.isKnownNeverLogicalPosZero(*F, Op->getType())) && 4926 // Make sure output negative denormal can't flush to -0 4927 outputDenormalIsIEEEOrPosZero(*F, Op->getType())) 4928 Known.knownNot(fcNegZero); 4929 } 4930 } 4931 4932 break; 4933 } 4934 case Instruction::FMul: { 4935 // X * X is always non-negative or a NaN. 4936 if (Op->getOperand(0) == Op->getOperand(1)) 4937 Known.knownNot(fcNegative); 4938 4939 if ((InterestedClasses & fcNan) != fcNan) 4940 break; 4941 4942 // fcSubnormal is only needed in case of DAZ. 4943 const FPClassTest NeedForNan = fcNan | fcInf | fcZero | fcSubnormal; 4944 4945 KnownFPClass KnownLHS, KnownRHS; 4946 computeKnownFPClass(Op->getOperand(1), DemandedElts, NeedForNan, KnownRHS, 4947 Depth + 1, Q); 4948 if (!KnownRHS.isKnownNeverNaN()) 4949 break; 4950 4951 computeKnownFPClass(Op->getOperand(0), DemandedElts, NeedForNan, KnownLHS, 4952 Depth + 1, Q); 4953 if (!KnownLHS.isKnownNeverNaN()) 4954 break; 4955 4956 // If 0 * +/-inf produces NaN. 4957 if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) { 4958 Known.knownNot(fcNan); 4959 break; 4960 } 4961 4962 const Function *F = cast<Instruction>(Op)->getFunction(); 4963 if (!F) 4964 break; 4965 4966 if ((KnownRHS.isKnownNeverInfinity() || 4967 KnownLHS.isKnownNeverLogicalZero(*F, Op->getType())) && 4968 (KnownLHS.isKnownNeverInfinity() || 4969 KnownRHS.isKnownNeverLogicalZero(*F, Op->getType()))) 4970 Known.knownNot(fcNan); 4971 4972 break; 4973 } 4974 case Instruction::FDiv: 4975 case Instruction::FRem: { 4976 if (Op->getOperand(0) == Op->getOperand(1)) { 4977 // TODO: Could filter out snan if we inspect the operand 4978 if (Op->getOpcode() == Instruction::FDiv) { 4979 // X / X is always exactly 1.0 or a NaN. 4980 Known.KnownFPClasses = fcNan | fcPosNormal; 4981 } else { 4982 // X % X is always exactly [+-]0.0 or a NaN. 4983 Known.KnownFPClasses = fcNan | fcZero; 4984 } 4985 4986 break; 4987 } 4988 4989 const bool WantNan = (InterestedClasses & fcNan) != fcNone; 4990 const bool WantNegative = (InterestedClasses & fcNegative) != fcNone; 4991 const bool WantPositive = 4992 Opc == Instruction::FRem && (InterestedClasses & fcPositive) != fcNone; 4993 if (!WantNan && !WantNegative && !WantPositive) 4994 break; 4995 4996 KnownFPClass KnownLHS, KnownRHS; 4997 4998 computeKnownFPClass(Op->getOperand(1), DemandedElts, 4999 fcNan | fcInf | fcZero | fcNegative, KnownRHS, 5000 Depth + 1, Q); 5001 5002 bool KnowSomethingUseful = 5003 KnownRHS.isKnownNeverNaN() || KnownRHS.isKnownNever(fcNegative); 5004 5005 if (KnowSomethingUseful || WantPositive) { 5006 const FPClassTest InterestedLHS = 5007 WantPositive ? fcAllFlags 5008 : fcNan | fcInf | fcZero | fcSubnormal | fcNegative; 5009 5010 computeKnownFPClass(Op->getOperand(0), DemandedElts, 5011 InterestedClasses & InterestedLHS, KnownLHS, 5012 Depth + 1, Q); 5013 } 5014 5015 const Function *F = cast<Instruction>(Op)->getFunction(); 5016 5017 if (Op->getOpcode() == Instruction::FDiv) { 5018 // Only 0/0, Inf/Inf produce NaN. 5019 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && 5020 (KnownLHS.isKnownNeverInfinity() || 5021 KnownRHS.isKnownNeverInfinity()) && 5022 ((F && KnownLHS.isKnownNeverLogicalZero(*F, Op->getType())) || 5023 (F && KnownRHS.isKnownNeverLogicalZero(*F, Op->getType())))) { 5024 Known.knownNot(fcNan); 5025 } 5026 5027 // X / -0.0 is -Inf (or NaN). 5028 // +X / +X is +X 5029 if (KnownLHS.isKnownNever(fcNegative) && KnownRHS.isKnownNever(fcNegative)) 5030 Known.knownNot(fcNegative); 5031 } else { 5032 // Inf REM x and x REM 0 produce NaN. 5033 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && 5034 KnownLHS.isKnownNeverInfinity() && F && 5035 KnownRHS.isKnownNeverLogicalZero(*F, Op->getType())) { 5036 Known.knownNot(fcNan); 5037 } 5038 5039 // The sign for frem is the same as the first operand. 5040 if (KnownLHS.cannotBeOrderedLessThanZero()) 5041 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 5042 if (KnownLHS.cannotBeOrderedGreaterThanZero()) 5043 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 5044 5045 // See if we can be more aggressive about the sign of 0. 5046 if (KnownLHS.isKnownNever(fcNegative)) 5047 Known.knownNot(fcNegative); 5048 if (KnownLHS.isKnownNever(fcPositive)) 5049 Known.knownNot(fcPositive); 5050 } 5051 5052 break; 5053 } 5054 case Instruction::FPExt: { 5055 // Infinity, nan and zero propagate from source. 5056 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses, 5057 Known, Depth + 1, Q); 5058 5059 const fltSemantics &DstTy = 5060 Op->getType()->getScalarType()->getFltSemantics(); 5061 const fltSemantics &SrcTy = 5062 Op->getOperand(0)->getType()->getScalarType()->getFltSemantics(); 5063 5064 // All subnormal inputs should be in the normal range in the result type. 5065 if (APFloat::isRepresentableAsNormalIn(SrcTy, DstTy)) 5066 Known.knownNot(fcSubnormal); 5067 5068 // Sign bit of a nan isn't guaranteed. 5069 if (!Known.isKnownNeverNaN()) 5070 Known.SignBit = std::nullopt; 5071 break; 5072 } 5073 case Instruction::FPTrunc: { 5074 computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known, 5075 Depth, Q); 5076 break; 5077 } 5078 case Instruction::SIToFP: 5079 case Instruction::UIToFP: { 5080 // Cannot produce nan 5081 Known.knownNot(fcNan); 5082 5083 // Integers cannot be subnormal 5084 Known.knownNot(fcSubnormal); 5085 5086 // sitofp and uitofp turn into +0.0 for zero. 5087 Known.knownNot(fcNegZero); 5088 if (Op->getOpcode() == Instruction::UIToFP) 5089 Known.signBitMustBeZero(); 5090 5091 if (InterestedClasses & fcInf) { 5092 // Get width of largest magnitude integer (remove a bit if signed). 5093 // This still works for a signed minimum value because the largest FP 5094 // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx). 5095 int IntSize = Op->getOperand(0)->getType()->getScalarSizeInBits(); 5096 if (Op->getOpcode() == Instruction::SIToFP) 5097 --IntSize; 5098 5099 // If the exponent of the largest finite FP value can hold the largest 5100 // integer, the result of the cast must be finite. 5101 Type *FPTy = Op->getType()->getScalarType(); 5102 if (ilogb(APFloat::getLargest(FPTy->getFltSemantics())) >= IntSize) 5103 Known.knownNot(fcInf); 5104 } 5105 5106 break; 5107 } 5108 case Instruction::ExtractElement: { 5109 // Look through extract element. If the index is non-constant or 5110 // out-of-range demand all elements, otherwise just the extracted element. 5111 const Value *Vec = Op->getOperand(0); 5112 const Value *Idx = Op->getOperand(1); 5113 auto *CIdx = dyn_cast<ConstantInt>(Idx); 5114 5115 if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) { 5116 unsigned NumElts = VecTy->getNumElements(); 5117 APInt DemandedVecElts = APInt::getAllOnes(NumElts); 5118 if (CIdx && CIdx->getValue().ult(NumElts)) 5119 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); 5120 return computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known, 5121 Depth + 1, Q); 5122 } 5123 5124 break; 5125 } 5126 case Instruction::InsertElement: { 5127 if (isa<ScalableVectorType>(Op->getType())) 5128 return; 5129 5130 const Value *Vec = Op->getOperand(0); 5131 const Value *Elt = Op->getOperand(1); 5132 auto *CIdx = dyn_cast<ConstantInt>(Op->getOperand(2)); 5133 // Early out if the index is non-constant or out-of-range. 5134 unsigned NumElts = DemandedElts.getBitWidth(); 5135 if (!CIdx || CIdx->getValue().uge(NumElts)) 5136 return; 5137 5138 unsigned EltIdx = CIdx->getZExtValue(); 5139 // Do we demand the inserted element? 5140 if (DemandedElts[EltIdx]) { 5141 computeKnownFPClass(Elt, Known, InterestedClasses, Depth + 1, Q); 5142 // If we don't know any bits, early out. 5143 if (Known.isUnknown()) 5144 break; 5145 } else { 5146 Known.KnownFPClasses = fcNone; 5147 } 5148 5149 // We don't need the base vector element that has been inserted. 5150 APInt DemandedVecElts = DemandedElts; 5151 DemandedVecElts.clearBit(EltIdx); 5152 if (!!DemandedVecElts) { 5153 KnownFPClass Known2; 5154 computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known2, 5155 Depth + 1, Q); 5156 Known |= Known2; 5157 } 5158 5159 break; 5160 } 5161 case Instruction::ShuffleVector: { 5162 // For undef elements, we don't know anything about the common state of 5163 // the shuffle result. 5164 APInt DemandedLHS, DemandedRHS; 5165 auto *Shuf = dyn_cast<ShuffleVectorInst>(Op); 5166 if (!Shuf || !getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) 5167 return; 5168 5169 if (!!DemandedLHS) { 5170 const Value *LHS = Shuf->getOperand(0); 5171 computeKnownFPClass(LHS, DemandedLHS, InterestedClasses, Known, 5172 Depth + 1, Q); 5173 5174 // If we don't know any bits, early out. 5175 if (Known.isUnknown()) 5176 break; 5177 } else { 5178 Known.KnownFPClasses = fcNone; 5179 } 5180 5181 if (!!DemandedRHS) { 5182 KnownFPClass Known2; 5183 const Value *RHS = Shuf->getOperand(1); 5184 computeKnownFPClass(RHS, DemandedRHS, InterestedClasses, Known2, 5185 Depth + 1, Q); 5186 Known |= Known2; 5187 } 5188 5189 break; 5190 } 5191 case Instruction::ExtractValue: { 5192 const ExtractValueInst *Extract = cast<ExtractValueInst>(Op); 5193 ArrayRef<unsigned> Indices = Extract->getIndices(); 5194 const Value *Src = Extract->getAggregateOperand(); 5195 if (isa<StructType>(Src->getType()) && Indices.size() == 1 && 5196 Indices[0] == 0) { 5197 if (const auto *II = dyn_cast<IntrinsicInst>(Src)) { 5198 switch (II->getIntrinsicID()) { 5199 case Intrinsic::frexp: { 5200 Known.knownNot(fcSubnormal); 5201 5202 KnownFPClass KnownSrc; 5203 computeKnownFPClass(II->getArgOperand(0), DemandedElts, 5204 InterestedClasses, KnownSrc, Depth + 1, Q); 5205 5206 const Function *F = cast<Instruction>(Op)->getFunction(); 5207 5208 if (KnownSrc.isKnownNever(fcNegative)) 5209 Known.knownNot(fcNegative); 5210 else { 5211 if (F && KnownSrc.isKnownNeverLogicalNegZero(*F, Op->getType())) 5212 Known.knownNot(fcNegZero); 5213 if (KnownSrc.isKnownNever(fcNegInf)) 5214 Known.knownNot(fcNegInf); 5215 } 5216 5217 if (KnownSrc.isKnownNever(fcPositive)) 5218 Known.knownNot(fcPositive); 5219 else { 5220 if (F && KnownSrc.isKnownNeverLogicalPosZero(*F, Op->getType())) 5221 Known.knownNot(fcPosZero); 5222 if (KnownSrc.isKnownNever(fcPosInf)) 5223 Known.knownNot(fcPosInf); 5224 } 5225 5226 Known.propagateNaN(KnownSrc); 5227 return; 5228 } 5229 default: 5230 break; 5231 } 5232 } 5233 } 5234 5235 computeKnownFPClass(Src, DemandedElts, InterestedClasses, Known, Depth + 1, 5236 Q); 5237 break; 5238 } 5239 case Instruction::PHI: { 5240 const PHINode *P = cast<PHINode>(Op); 5241 // Unreachable blocks may have zero-operand PHI nodes. 5242 if (P->getNumIncomingValues() == 0) 5243 break; 5244 5245 // Otherwise take the unions of the known bit sets of the operands, 5246 // taking conservative care to avoid excessive recursion. 5247 const unsigned PhiRecursionLimit = MaxAnalysisRecursionDepth - 2; 5248 5249 if (Depth < PhiRecursionLimit) { 5250 // Skip if every incoming value references to ourself. 5251 if (isa_and_nonnull<UndefValue>(P->hasConstantValue())) 5252 break; 5253 5254 bool First = true; 5255 5256 for (Value *IncValue : P->incoming_values()) { 5257 // Skip direct self references. 5258 if (IncValue == P) 5259 continue; 5260 5261 KnownFPClass KnownSrc; 5262 // Recurse, but cap the recursion to two levels, because we don't want 5263 // to waste time spinning around in loops. We need at least depth 2 to 5264 // detect known sign bits. 5265 computeKnownFPClass(IncValue, DemandedElts, InterestedClasses, KnownSrc, 5266 PhiRecursionLimit, Q); 5267 5268 if (First) { 5269 Known = KnownSrc; 5270 First = false; 5271 } else { 5272 Known |= KnownSrc; 5273 } 5274 5275 if (Known.KnownFPClasses == fcAllFlags) 5276 break; 5277 } 5278 } 5279 5280 break; 5281 } 5282 default: 5283 break; 5284 } 5285 } 5286 5287 KnownFPClass llvm::computeKnownFPClass( 5288 const Value *V, const APInt &DemandedElts, const DataLayout &DL, 5289 FPClassTest InterestedClasses, unsigned Depth, const TargetLibraryInfo *TLI, 5290 AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, 5291 bool UseInstrInfo) { 5292 KnownFPClass KnownClasses; 5293 ::computeKnownFPClass( 5294 V, DemandedElts, InterestedClasses, KnownClasses, Depth, 5295 SimplifyQuery(DL, TLI, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 5296 return KnownClasses; 5297 } 5298 5299 KnownFPClass llvm::computeKnownFPClass( 5300 const Value *V, const DataLayout &DL, FPClassTest InterestedClasses, 5301 unsigned Depth, const TargetLibraryInfo *TLI, AssumptionCache *AC, 5302 const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo) { 5303 KnownFPClass Known; 5304 ::computeKnownFPClass( 5305 V, Known, InterestedClasses, Depth, 5306 SimplifyQuery(DL, TLI, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 5307 return Known; 5308 } 5309 5310 Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) { 5311 5312 // All byte-wide stores are splatable, even of arbitrary variables. 5313 if (V->getType()->isIntegerTy(8)) 5314 return V; 5315 5316 LLVMContext &Ctx = V->getContext(); 5317 5318 // Undef don't care. 5319 auto *UndefInt8 = UndefValue::get(Type::getInt8Ty(Ctx)); 5320 if (isa<UndefValue>(V)) 5321 return UndefInt8; 5322 5323 // Return Undef for zero-sized type. 5324 if (DL.getTypeStoreSize(V->getType()).isZero()) 5325 return UndefInt8; 5326 5327 Constant *C = dyn_cast<Constant>(V); 5328 if (!C) { 5329 // Conceptually, we could handle things like: 5330 // %a = zext i8 %X to i16 5331 // %b = shl i16 %a, 8 5332 // %c = or i16 %a, %b 5333 // but until there is an example that actually needs this, it doesn't seem 5334 // worth worrying about. 5335 return nullptr; 5336 } 5337 5338 // Handle 'null' ConstantArrayZero etc. 5339 if (C->isNullValue()) 5340 return Constant::getNullValue(Type::getInt8Ty(Ctx)); 5341 5342 // Constant floating-point values can be handled as integer values if the 5343 // corresponding integer value is "byteable". An important case is 0.0. 5344 if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { 5345 Type *Ty = nullptr; 5346 if (CFP->getType()->isHalfTy()) 5347 Ty = Type::getInt16Ty(Ctx); 5348 else if (CFP->getType()->isFloatTy()) 5349 Ty = Type::getInt32Ty(Ctx); 5350 else if (CFP->getType()->isDoubleTy()) 5351 Ty = Type::getInt64Ty(Ctx); 5352 // Don't handle long double formats, which have strange constraints. 5353 return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty), DL) 5354 : nullptr; 5355 } 5356 5357 // We can handle constant integers that are multiple of 8 bits. 5358 if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) { 5359 if (CI->getBitWidth() % 8 == 0) { 5360 assert(CI->getBitWidth() > 8 && "8 bits should be handled above!"); 5361 if (!CI->getValue().isSplat(8)) 5362 return nullptr; 5363 return ConstantInt::get(Ctx, CI->getValue().trunc(8)); 5364 } 5365 } 5366 5367 if (auto *CE = dyn_cast<ConstantExpr>(C)) { 5368 if (CE->getOpcode() == Instruction::IntToPtr) { 5369 if (auto *PtrTy = dyn_cast<PointerType>(CE->getType())) { 5370 unsigned BitWidth = DL.getPointerSizeInBits(PtrTy->getAddressSpace()); 5371 if (Constant *Op = ConstantFoldIntegerCast( 5372 CE->getOperand(0), Type::getIntNTy(Ctx, BitWidth), false, DL)) 5373 return isBytewiseValue(Op, DL); 5374 } 5375 } 5376 } 5377 5378 auto Merge = [&](Value *LHS, Value *RHS) -> Value * { 5379 if (LHS == RHS) 5380 return LHS; 5381 if (!LHS || !RHS) 5382 return nullptr; 5383 if (LHS == UndefInt8) 5384 return RHS; 5385 if (RHS == UndefInt8) 5386 return LHS; 5387 return nullptr; 5388 }; 5389 5390 if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(C)) { 5391 Value *Val = UndefInt8; 5392 for (unsigned I = 0, E = CA->getNumElements(); I != E; ++I) 5393 if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I), DL)))) 5394 return nullptr; 5395 return Val; 5396 } 5397 5398 if (isa<ConstantAggregate>(C)) { 5399 Value *Val = UndefInt8; 5400 for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) 5401 if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I), DL)))) 5402 return nullptr; 5403 return Val; 5404 } 5405 5406 // Don't try to handle the handful of other constants. 5407 return nullptr; 5408 } 5409 5410 // This is the recursive version of BuildSubAggregate. It takes a few different 5411 // arguments. Idxs is the index within the nested struct From that we are 5412 // looking at now (which is of type IndexedType). IdxSkip is the number of 5413 // indices from Idxs that should be left out when inserting into the resulting 5414 // struct. To is the result struct built so far, new insertvalue instructions 5415 // build on that. 5416 static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, 5417 SmallVectorImpl<unsigned> &Idxs, 5418 unsigned IdxSkip, 5419 Instruction *InsertBefore) { 5420 StructType *STy = dyn_cast<StructType>(IndexedType); 5421 if (STy) { 5422 // Save the original To argument so we can modify it 5423 Value *OrigTo = To; 5424 // General case, the type indexed by Idxs is a struct 5425 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 5426 // Process each struct element recursively 5427 Idxs.push_back(i); 5428 Value *PrevTo = To; 5429 To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip, 5430 InsertBefore); 5431 Idxs.pop_back(); 5432 if (!To) { 5433 // Couldn't find any inserted value for this index? Cleanup 5434 while (PrevTo != OrigTo) { 5435 InsertValueInst* Del = cast<InsertValueInst>(PrevTo); 5436 PrevTo = Del->getAggregateOperand(); 5437 Del->eraseFromParent(); 5438 } 5439 // Stop processing elements 5440 break; 5441 } 5442 } 5443 // If we successfully found a value for each of our subaggregates 5444 if (To) 5445 return To; 5446 } 5447 // Base case, the type indexed by SourceIdxs is not a struct, or not all of 5448 // the struct's elements had a value that was inserted directly. In the latter 5449 // case, perhaps we can't determine each of the subelements individually, but 5450 // we might be able to find the complete struct somewhere. 5451 5452 // Find the value that is at that particular spot 5453 Value *V = FindInsertedValue(From, Idxs); 5454 5455 if (!V) 5456 return nullptr; 5457 5458 // Insert the value in the new (sub) aggregate 5459 return InsertValueInst::Create(To, V, ArrayRef(Idxs).slice(IdxSkip), "tmp", 5460 InsertBefore); 5461 } 5462 5463 // This helper takes a nested struct and extracts a part of it (which is again a 5464 // struct) into a new value. For example, given the struct: 5465 // { a, { b, { c, d }, e } } 5466 // and the indices "1, 1" this returns 5467 // { c, d }. 5468 // 5469 // It does this by inserting an insertvalue for each element in the resulting 5470 // struct, as opposed to just inserting a single struct. This will only work if 5471 // each of the elements of the substruct are known (ie, inserted into From by an 5472 // insertvalue instruction somewhere). 5473 // 5474 // All inserted insertvalue instructions are inserted before InsertBefore 5475 static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range, 5476 Instruction *InsertBefore) { 5477 assert(InsertBefore && "Must have someplace to insert!"); 5478 Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(), 5479 idx_range); 5480 Value *To = PoisonValue::get(IndexedType); 5481 SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end()); 5482 unsigned IdxSkip = Idxs.size(); 5483 5484 return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore); 5485 } 5486 5487 /// Given an aggregate and a sequence of indices, see if the scalar value 5488 /// indexed is already around as a register, for example if it was inserted 5489 /// directly into the aggregate. 5490 /// 5491 /// If InsertBefore is not null, this function will duplicate (modified) 5492 /// insertvalues when a part of a nested struct is extracted. 5493 Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, 5494 Instruction *InsertBefore) { 5495 // Nothing to index? Just return V then (this is useful at the end of our 5496 // recursion). 5497 if (idx_range.empty()) 5498 return V; 5499 // We have indices, so V should have an indexable type. 5500 assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) && 5501 "Not looking at a struct or array?"); 5502 assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) && 5503 "Invalid indices for type?"); 5504 5505 if (Constant *C = dyn_cast<Constant>(V)) { 5506 C = C->getAggregateElement(idx_range[0]); 5507 if (!C) return nullptr; 5508 return FindInsertedValue(C, idx_range.slice(1), InsertBefore); 5509 } 5510 5511 if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) { 5512 // Loop the indices for the insertvalue instruction in parallel with the 5513 // requested indices 5514 const unsigned *req_idx = idx_range.begin(); 5515 for (const unsigned *i = I->idx_begin(), *e = I->idx_end(); 5516 i != e; ++i, ++req_idx) { 5517 if (req_idx == idx_range.end()) { 5518 // We can't handle this without inserting insertvalues 5519 if (!InsertBefore) 5520 return nullptr; 5521 5522 // The requested index identifies a part of a nested aggregate. Handle 5523 // this specially. For example, 5524 // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0 5525 // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1 5526 // %C = extractvalue {i32, { i32, i32 } } %B, 1 5527 // This can be changed into 5528 // %A = insertvalue {i32, i32 } undef, i32 10, 0 5529 // %C = insertvalue {i32, i32 } %A, i32 11, 1 5530 // which allows the unused 0,0 element from the nested struct to be 5531 // removed. 5532 return BuildSubAggregate(V, ArrayRef(idx_range.begin(), req_idx), 5533 InsertBefore); 5534 } 5535 5536 // This insert value inserts something else than what we are looking for. 5537 // See if the (aggregate) value inserted into has the value we are 5538 // looking for, then. 5539 if (*req_idx != *i) 5540 return FindInsertedValue(I->getAggregateOperand(), idx_range, 5541 InsertBefore); 5542 } 5543 // If we end up here, the indices of the insertvalue match with those 5544 // requested (though possibly only partially). Now we recursively look at 5545 // the inserted value, passing any remaining indices. 5546 return FindInsertedValue(I->getInsertedValueOperand(), 5547 ArrayRef(req_idx, idx_range.end()), InsertBefore); 5548 } 5549 5550 if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) { 5551 // If we're extracting a value from an aggregate that was extracted from 5552 // something else, we can extract from that something else directly instead. 5553 // However, we will need to chain I's indices with the requested indices. 5554 5555 // Calculate the number of indices required 5556 unsigned size = I->getNumIndices() + idx_range.size(); 5557 // Allocate some space to put the new indices in 5558 SmallVector<unsigned, 5> Idxs; 5559 Idxs.reserve(size); 5560 // Add indices from the extract value instruction 5561 Idxs.append(I->idx_begin(), I->idx_end()); 5562 5563 // Add requested indices 5564 Idxs.append(idx_range.begin(), idx_range.end()); 5565 5566 assert(Idxs.size() == size 5567 && "Number of indices added not correct?"); 5568 5569 return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore); 5570 } 5571 // Otherwise, we don't know (such as, extracting from a function return value 5572 // or load instruction) 5573 return nullptr; 5574 } 5575 5576 bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP, 5577 unsigned CharSize) { 5578 // Make sure the GEP has exactly three arguments. 5579 if (GEP->getNumOperands() != 3) 5580 return false; 5581 5582 // Make sure the index-ee is a pointer to array of \p CharSize integers. 5583 // CharSize. 5584 ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType()); 5585 if (!AT || !AT->getElementType()->isIntegerTy(CharSize)) 5586 return false; 5587 5588 // Check to make sure that the first operand of the GEP is an integer and 5589 // has value 0 so that we are sure we're indexing into the initializer. 5590 const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1)); 5591 if (!FirstIdx || !FirstIdx->isZero()) 5592 return false; 5593 5594 return true; 5595 } 5596 5597 // If V refers to an initialized global constant, set Slice either to 5598 // its initializer if the size of its elements equals ElementSize, or, 5599 // for ElementSize == 8, to its representation as an array of unsiged 5600 // char. Return true on success. 5601 // Offset is in the unit "nr of ElementSize sized elements". 5602 bool llvm::getConstantDataArrayInfo(const Value *V, 5603 ConstantDataArraySlice &Slice, 5604 unsigned ElementSize, uint64_t Offset) { 5605 assert(V && "V should not be null."); 5606 assert((ElementSize % 8) == 0 && 5607 "ElementSize expected to be a multiple of the size of a byte."); 5608 unsigned ElementSizeInBytes = ElementSize / 8; 5609 5610 // Drill down into the pointer expression V, ignoring any intervening 5611 // casts, and determine the identity of the object it references along 5612 // with the cumulative byte offset into it. 5613 const GlobalVariable *GV = 5614 dyn_cast<GlobalVariable>(getUnderlyingObject(V)); 5615 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) 5616 // Fail if V is not based on constant global object. 5617 return false; 5618 5619 const DataLayout &DL = GV->getParent()->getDataLayout(); 5620 APInt Off(DL.getIndexTypeSizeInBits(V->getType()), 0); 5621 5622 if (GV != V->stripAndAccumulateConstantOffsets(DL, Off, 5623 /*AllowNonInbounds*/ true)) 5624 // Fail if a constant offset could not be determined. 5625 return false; 5626 5627 uint64_t StartIdx = Off.getLimitedValue(); 5628 if (StartIdx == UINT64_MAX) 5629 // Fail if the constant offset is excessive. 5630 return false; 5631 5632 // Off/StartIdx is in the unit of bytes. So we need to convert to number of 5633 // elements. Simply bail out if that isn't possible. 5634 if ((StartIdx % ElementSizeInBytes) != 0) 5635 return false; 5636 5637 Offset += StartIdx / ElementSizeInBytes; 5638 ConstantDataArray *Array = nullptr; 5639 ArrayType *ArrayTy = nullptr; 5640 5641 if (GV->getInitializer()->isNullValue()) { 5642 Type *GVTy = GV->getValueType(); 5643 uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy).getFixedValue(); 5644 uint64_t Length = SizeInBytes / ElementSizeInBytes; 5645 5646 Slice.Array = nullptr; 5647 Slice.Offset = 0; 5648 // Return an empty Slice for undersized constants to let callers 5649 // transform even undefined library calls into simpler, well-defined 5650 // expressions. This is preferable to making the calls although it 5651 // prevents sanitizers from detecting such calls. 5652 Slice.Length = Length < Offset ? 0 : Length - Offset; 5653 return true; 5654 } 5655 5656 auto *Init = const_cast<Constant *>(GV->getInitializer()); 5657 if (auto *ArrayInit = dyn_cast<ConstantDataArray>(Init)) { 5658 Type *InitElTy = ArrayInit->getElementType(); 5659 if (InitElTy->isIntegerTy(ElementSize)) { 5660 // If Init is an initializer for an array of the expected type 5661 // and size, use it as is. 5662 Array = ArrayInit; 5663 ArrayTy = ArrayInit->getType(); 5664 } 5665 } 5666 5667 if (!Array) { 5668 if (ElementSize != 8) 5669 // TODO: Handle conversions to larger integral types. 5670 return false; 5671 5672 // Otherwise extract the portion of the initializer starting 5673 // at Offset as an array of bytes, and reset Offset. 5674 Init = ReadByteArrayFromGlobal(GV, Offset); 5675 if (!Init) 5676 return false; 5677 5678 Offset = 0; 5679 Array = dyn_cast<ConstantDataArray>(Init); 5680 ArrayTy = dyn_cast<ArrayType>(Init->getType()); 5681 } 5682 5683 uint64_t NumElts = ArrayTy->getArrayNumElements(); 5684 if (Offset > NumElts) 5685 return false; 5686 5687 Slice.Array = Array; 5688 Slice.Offset = Offset; 5689 Slice.Length = NumElts - Offset; 5690 return true; 5691 } 5692 5693 /// Extract bytes from the initializer of the constant array V, which need 5694 /// not be a nul-terminated string. On success, store the bytes in Str and 5695 /// return true. When TrimAtNul is set, Str will contain only the bytes up 5696 /// to but not including the first nul. Return false on failure. 5697 bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, 5698 bool TrimAtNul) { 5699 ConstantDataArraySlice Slice; 5700 if (!getConstantDataArrayInfo(V, Slice, 8)) 5701 return false; 5702 5703 if (Slice.Array == nullptr) { 5704 if (TrimAtNul) { 5705 // Return a nul-terminated string even for an empty Slice. This is 5706 // safe because all existing SimplifyLibcalls callers require string 5707 // arguments and the behavior of the functions they fold is undefined 5708 // otherwise. Folding the calls this way is preferable to making 5709 // the undefined library calls, even though it prevents sanitizers 5710 // from reporting such calls. 5711 Str = StringRef(); 5712 return true; 5713 } 5714 if (Slice.Length == 1) { 5715 Str = StringRef("", 1); 5716 return true; 5717 } 5718 // We cannot instantiate a StringRef as we do not have an appropriate string 5719 // of 0s at hand. 5720 return false; 5721 } 5722 5723 // Start out with the entire array in the StringRef. 5724 Str = Slice.Array->getAsString(); 5725 // Skip over 'offset' bytes. 5726 Str = Str.substr(Slice.Offset); 5727 5728 if (TrimAtNul) { 5729 // Trim off the \0 and anything after it. If the array is not nul 5730 // terminated, we just return the whole end of string. The client may know 5731 // some other way that the string is length-bound. 5732 Str = Str.substr(0, Str.find('\0')); 5733 } 5734 return true; 5735 } 5736 5737 // These next two are very similar to the above, but also look through PHI 5738 // nodes. 5739 // TODO: See if we can integrate these two together. 5740 5741 /// If we can compute the length of the string pointed to by 5742 /// the specified pointer, return 'len+1'. If we can't, return 0. 5743 static uint64_t GetStringLengthH(const Value *V, 5744 SmallPtrSetImpl<const PHINode*> &PHIs, 5745 unsigned CharSize) { 5746 // Look through noop bitcast instructions. 5747 V = V->stripPointerCasts(); 5748 5749 // If this is a PHI node, there are two cases: either we have already seen it 5750 // or we haven't. 5751 if (const PHINode *PN = dyn_cast<PHINode>(V)) { 5752 if (!PHIs.insert(PN).second) 5753 return ~0ULL; // already in the set. 5754 5755 // If it was new, see if all the input strings are the same length. 5756 uint64_t LenSoFar = ~0ULL; 5757 for (Value *IncValue : PN->incoming_values()) { 5758 uint64_t Len = GetStringLengthH(IncValue, PHIs, CharSize); 5759 if (Len == 0) return 0; // Unknown length -> unknown. 5760 5761 if (Len == ~0ULL) continue; 5762 5763 if (Len != LenSoFar && LenSoFar != ~0ULL) 5764 return 0; // Disagree -> unknown. 5765 LenSoFar = Len; 5766 } 5767 5768 // Success, all agree. 5769 return LenSoFar; 5770 } 5771 5772 // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y) 5773 if (const SelectInst *SI = dyn_cast<SelectInst>(V)) { 5774 uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs, CharSize); 5775 if (Len1 == 0) return 0; 5776 uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs, CharSize); 5777 if (Len2 == 0) return 0; 5778 if (Len1 == ~0ULL) return Len2; 5779 if (Len2 == ~0ULL) return Len1; 5780 if (Len1 != Len2) return 0; 5781 return Len1; 5782 } 5783 5784 // Otherwise, see if we can read the string. 5785 ConstantDataArraySlice Slice; 5786 if (!getConstantDataArrayInfo(V, Slice, CharSize)) 5787 return 0; 5788 5789 if (Slice.Array == nullptr) 5790 // Zeroinitializer (including an empty one). 5791 return 1; 5792 5793 // Search for the first nul character. Return a conservative result even 5794 // when there is no nul. This is safe since otherwise the string function 5795 // being folded such as strlen is undefined, and can be preferable to 5796 // making the undefined library call. 5797 unsigned NullIndex = 0; 5798 for (unsigned E = Slice.Length; NullIndex < E; ++NullIndex) { 5799 if (Slice.Array->getElementAsInteger(Slice.Offset + NullIndex) == 0) 5800 break; 5801 } 5802 5803 return NullIndex + 1; 5804 } 5805 5806 /// If we can compute the length of the string pointed to by 5807 /// the specified pointer, return 'len+1'. If we can't, return 0. 5808 uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) { 5809 if (!V->getType()->isPointerTy()) 5810 return 0; 5811 5812 SmallPtrSet<const PHINode*, 32> PHIs; 5813 uint64_t Len = GetStringLengthH(V, PHIs, CharSize); 5814 // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return 5815 // an empty string as a length. 5816 return Len == ~0ULL ? 1 : Len; 5817 } 5818 5819 const Value * 5820 llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call, 5821 bool MustPreserveNullness) { 5822 assert(Call && 5823 "getArgumentAliasingToReturnedPointer only works on nonnull calls"); 5824 if (const Value *RV = Call->getReturnedArgOperand()) 5825 return RV; 5826 // This can be used only as a aliasing property. 5827 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( 5828 Call, MustPreserveNullness)) 5829 return Call->getArgOperand(0); 5830 return nullptr; 5831 } 5832 5833 bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( 5834 const CallBase *Call, bool MustPreserveNullness) { 5835 switch (Call->getIntrinsicID()) { 5836 case Intrinsic::launder_invariant_group: 5837 case Intrinsic::strip_invariant_group: 5838 case Intrinsic::aarch64_irg: 5839 case Intrinsic::aarch64_tagp: 5840 // The amdgcn_make_buffer_rsrc function does not alter the address of the 5841 // input pointer (and thus preserve null-ness for the purposes of escape 5842 // analysis, which is where the MustPreserveNullness flag comes in to play). 5843 // However, it will not necessarily map ptr addrspace(N) null to ptr 5844 // addrspace(8) null, aka the "null descriptor", which has "all loads return 5845 // 0, all stores are dropped" semantics. Given the context of this intrinsic 5846 // list, no one should be relying on such a strict interpretation of 5847 // MustPreserveNullness (and, at time of writing, they are not), but we 5848 // document this fact out of an abundance of caution. 5849 case Intrinsic::amdgcn_make_buffer_rsrc: 5850 return true; 5851 case Intrinsic::ptrmask: 5852 return !MustPreserveNullness; 5853 default: 5854 return false; 5855 } 5856 } 5857 5858 /// \p PN defines a loop-variant pointer to an object. Check if the 5859 /// previous iteration of the loop was referring to the same object as \p PN. 5860 static bool isSameUnderlyingObjectInLoop(const PHINode *PN, 5861 const LoopInfo *LI) { 5862 // Find the loop-defined value. 5863 Loop *L = LI->getLoopFor(PN->getParent()); 5864 if (PN->getNumIncomingValues() != 2) 5865 return true; 5866 5867 // Find the value from previous iteration. 5868 auto *PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(0)); 5869 if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L) 5870 PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(1)); 5871 if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L) 5872 return true; 5873 5874 // If a new pointer is loaded in the loop, the pointer references a different 5875 // object in every iteration. E.g.: 5876 // for (i) 5877 // int *p = a[i]; 5878 // ... 5879 if (auto *Load = dyn_cast<LoadInst>(PrevValue)) 5880 if (!L->isLoopInvariant(Load->getPointerOperand())) 5881 return false; 5882 return true; 5883 } 5884 5885 const Value *llvm::getUnderlyingObject(const Value *V, unsigned MaxLookup) { 5886 if (!V->getType()->isPointerTy()) 5887 return V; 5888 for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { 5889 if (auto *GEP = dyn_cast<GEPOperator>(V)) { 5890 V = GEP->getPointerOperand(); 5891 } else if (Operator::getOpcode(V) == Instruction::BitCast || 5892 Operator::getOpcode(V) == Instruction::AddrSpaceCast) { 5893 V = cast<Operator>(V)->getOperand(0); 5894 if (!V->getType()->isPointerTy()) 5895 return V; 5896 } else if (auto *GA = dyn_cast<GlobalAlias>(V)) { 5897 if (GA->isInterposable()) 5898 return V; 5899 V = GA->getAliasee(); 5900 } else { 5901 if (auto *PHI = dyn_cast<PHINode>(V)) { 5902 // Look through single-arg phi nodes created by LCSSA. 5903 if (PHI->getNumIncomingValues() == 1) { 5904 V = PHI->getIncomingValue(0); 5905 continue; 5906 } 5907 } else if (auto *Call = dyn_cast<CallBase>(V)) { 5908 // CaptureTracking can know about special capturing properties of some 5909 // intrinsics like launder.invariant.group, that can't be expressed with 5910 // the attributes, but have properties like returning aliasing pointer. 5911 // Because some analysis may assume that nocaptured pointer is not 5912 // returned from some special intrinsic (because function would have to 5913 // be marked with returns attribute), it is crucial to use this function 5914 // because it should be in sync with CaptureTracking. Not using it may 5915 // cause weird miscompilations where 2 aliasing pointers are assumed to 5916 // noalias. 5917 if (auto *RP = getArgumentAliasingToReturnedPointer(Call, false)) { 5918 V = RP; 5919 continue; 5920 } 5921 } 5922 5923 return V; 5924 } 5925 assert(V->getType()->isPointerTy() && "Unexpected operand type!"); 5926 } 5927 return V; 5928 } 5929 5930 void llvm::getUnderlyingObjects(const Value *V, 5931 SmallVectorImpl<const Value *> &Objects, 5932 LoopInfo *LI, unsigned MaxLookup) { 5933 SmallPtrSet<const Value *, 4> Visited; 5934 SmallVector<const Value *, 4> Worklist; 5935 Worklist.push_back(V); 5936 do { 5937 const Value *P = Worklist.pop_back_val(); 5938 P = getUnderlyingObject(P, MaxLookup); 5939 5940 if (!Visited.insert(P).second) 5941 continue; 5942 5943 if (auto *SI = dyn_cast<SelectInst>(P)) { 5944 Worklist.push_back(SI->getTrueValue()); 5945 Worklist.push_back(SI->getFalseValue()); 5946 continue; 5947 } 5948 5949 if (auto *PN = dyn_cast<PHINode>(P)) { 5950 // If this PHI changes the underlying object in every iteration of the 5951 // loop, don't look through it. Consider: 5952 // int **A; 5953 // for (i) { 5954 // Prev = Curr; // Prev = PHI (Prev_0, Curr) 5955 // Curr = A[i]; 5956 // *Prev, *Curr; 5957 // 5958 // Prev is tracking Curr one iteration behind so they refer to different 5959 // underlying objects. 5960 if (!LI || !LI->isLoopHeader(PN->getParent()) || 5961 isSameUnderlyingObjectInLoop(PN, LI)) 5962 append_range(Worklist, PN->incoming_values()); 5963 continue; 5964 } 5965 5966 Objects.push_back(P); 5967 } while (!Worklist.empty()); 5968 } 5969 5970 /// This is the function that does the work of looking through basic 5971 /// ptrtoint+arithmetic+inttoptr sequences. 5972 static const Value *getUnderlyingObjectFromInt(const Value *V) { 5973 do { 5974 if (const Operator *U = dyn_cast<Operator>(V)) { 5975 // If we find a ptrtoint, we can transfer control back to the 5976 // regular getUnderlyingObjectFromInt. 5977 if (U->getOpcode() == Instruction::PtrToInt) 5978 return U->getOperand(0); 5979 // If we find an add of a constant, a multiplied value, or a phi, it's 5980 // likely that the other operand will lead us to the base 5981 // object. We don't have to worry about the case where the 5982 // object address is somehow being computed by the multiply, 5983 // because our callers only care when the result is an 5984 // identifiable object. 5985 if (U->getOpcode() != Instruction::Add || 5986 (!isa<ConstantInt>(U->getOperand(1)) && 5987 Operator::getOpcode(U->getOperand(1)) != Instruction::Mul && 5988 !isa<PHINode>(U->getOperand(1)))) 5989 return V; 5990 V = U->getOperand(0); 5991 } else { 5992 return V; 5993 } 5994 assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); 5995 } while (true); 5996 } 5997 5998 /// This is a wrapper around getUnderlyingObjects and adds support for basic 5999 /// ptrtoint+arithmetic+inttoptr sequences. 6000 /// It returns false if unidentified object is found in getUnderlyingObjects. 6001 bool llvm::getUnderlyingObjectsForCodeGen(const Value *V, 6002 SmallVectorImpl<Value *> &Objects) { 6003 SmallPtrSet<const Value *, 16> Visited; 6004 SmallVector<const Value *, 4> Working(1, V); 6005 do { 6006 V = Working.pop_back_val(); 6007 6008 SmallVector<const Value *, 4> Objs; 6009 getUnderlyingObjects(V, Objs); 6010 6011 for (const Value *V : Objs) { 6012 if (!Visited.insert(V).second) 6013 continue; 6014 if (Operator::getOpcode(V) == Instruction::IntToPtr) { 6015 const Value *O = 6016 getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); 6017 if (O->getType()->isPointerTy()) { 6018 Working.push_back(O); 6019 continue; 6020 } 6021 } 6022 // If getUnderlyingObjects fails to find an identifiable object, 6023 // getUnderlyingObjectsForCodeGen also fails for safety. 6024 if (!isIdentifiedObject(V)) { 6025 Objects.clear(); 6026 return false; 6027 } 6028 Objects.push_back(const_cast<Value *>(V)); 6029 } 6030 } while (!Working.empty()); 6031 return true; 6032 } 6033 6034 AllocaInst *llvm::findAllocaForValue(Value *V, bool OffsetZero) { 6035 AllocaInst *Result = nullptr; 6036 SmallPtrSet<Value *, 4> Visited; 6037 SmallVector<Value *, 4> Worklist; 6038 6039 auto AddWork = [&](Value *V) { 6040 if (Visited.insert(V).second) 6041 Worklist.push_back(V); 6042 }; 6043 6044 AddWork(V); 6045 do { 6046 V = Worklist.pop_back_val(); 6047 assert(Visited.count(V)); 6048 6049 if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { 6050 if (Result && Result != AI) 6051 return nullptr; 6052 Result = AI; 6053 } else if (CastInst *CI = dyn_cast<CastInst>(V)) { 6054 AddWork(CI->getOperand(0)); 6055 } else if (PHINode *PN = dyn_cast<PHINode>(V)) { 6056 for (Value *IncValue : PN->incoming_values()) 6057 AddWork(IncValue); 6058 } else if (auto *SI = dyn_cast<SelectInst>(V)) { 6059 AddWork(SI->getTrueValue()); 6060 AddWork(SI->getFalseValue()); 6061 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { 6062 if (OffsetZero && !GEP->hasAllZeroIndices()) 6063 return nullptr; 6064 AddWork(GEP->getPointerOperand()); 6065 } else if (CallBase *CB = dyn_cast<CallBase>(V)) { 6066 Value *Returned = CB->getReturnedArgOperand(); 6067 if (Returned) 6068 AddWork(Returned); 6069 else 6070 return nullptr; 6071 } else { 6072 return nullptr; 6073 } 6074 } while (!Worklist.empty()); 6075 6076 return Result; 6077 } 6078 6079 static bool onlyUsedByLifetimeMarkersOrDroppableInstsHelper( 6080 const Value *V, bool AllowLifetime, bool AllowDroppable) { 6081 for (const User *U : V->users()) { 6082 const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); 6083 if (!II) 6084 return false; 6085 6086 if (AllowLifetime && II->isLifetimeStartOrEnd()) 6087 continue; 6088 6089 if (AllowDroppable && II->isDroppable()) 6090 continue; 6091 6092 return false; 6093 } 6094 return true; 6095 } 6096 6097 bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { 6098 return onlyUsedByLifetimeMarkersOrDroppableInstsHelper( 6099 V, /* AllowLifetime */ true, /* AllowDroppable */ false); 6100 } 6101 bool llvm::onlyUsedByLifetimeMarkersOrDroppableInsts(const Value *V) { 6102 return onlyUsedByLifetimeMarkersOrDroppableInstsHelper( 6103 V, /* AllowLifetime */ true, /* AllowDroppable */ true); 6104 } 6105 6106 bool llvm::mustSuppressSpeculation(const LoadInst &LI) { 6107 if (!LI.isUnordered()) 6108 return true; 6109 const Function &F = *LI.getFunction(); 6110 // Speculative load may create a race that did not exist in the source. 6111 return F.hasFnAttribute(Attribute::SanitizeThread) || 6112 // Speculative load may load data from dirty regions. 6113 F.hasFnAttribute(Attribute::SanitizeAddress) || 6114 F.hasFnAttribute(Attribute::SanitizeHWAddress); 6115 } 6116 6117 bool llvm::isSafeToSpeculativelyExecute(const Instruction *Inst, 6118 const Instruction *CtxI, 6119 AssumptionCache *AC, 6120 const DominatorTree *DT, 6121 const TargetLibraryInfo *TLI) { 6122 return isSafeToSpeculativelyExecuteWithOpcode(Inst->getOpcode(), Inst, CtxI, 6123 AC, DT, TLI); 6124 } 6125 6126 bool llvm::isSafeToSpeculativelyExecuteWithOpcode( 6127 unsigned Opcode, const Instruction *Inst, const Instruction *CtxI, 6128 AssumptionCache *AC, const DominatorTree *DT, 6129 const TargetLibraryInfo *TLI) { 6130 #ifndef NDEBUG 6131 if (Inst->getOpcode() != Opcode) { 6132 // Check that the operands are actually compatible with the Opcode override. 6133 auto hasEqualReturnAndLeadingOperandTypes = 6134 [](const Instruction *Inst, unsigned NumLeadingOperands) { 6135 if (Inst->getNumOperands() < NumLeadingOperands) 6136 return false; 6137 const Type *ExpectedType = Inst->getType(); 6138 for (unsigned ItOp = 0; ItOp < NumLeadingOperands; ++ItOp) 6139 if (Inst->getOperand(ItOp)->getType() != ExpectedType) 6140 return false; 6141 return true; 6142 }; 6143 assert(!Instruction::isBinaryOp(Opcode) || 6144 hasEqualReturnAndLeadingOperandTypes(Inst, 2)); 6145 assert(!Instruction::isUnaryOp(Opcode) || 6146 hasEqualReturnAndLeadingOperandTypes(Inst, 1)); 6147 } 6148 #endif 6149 6150 switch (Opcode) { 6151 default: 6152 return true; 6153 case Instruction::UDiv: 6154 case Instruction::URem: { 6155 // x / y is undefined if y == 0. 6156 const APInt *V; 6157 if (match(Inst->getOperand(1), m_APInt(V))) 6158 return *V != 0; 6159 return false; 6160 } 6161 case Instruction::SDiv: 6162 case Instruction::SRem: { 6163 // x / y is undefined if y == 0 or x == INT_MIN and y == -1 6164 const APInt *Numerator, *Denominator; 6165 if (!match(Inst->getOperand(1), m_APInt(Denominator))) 6166 return false; 6167 // We cannot hoist this division if the denominator is 0. 6168 if (*Denominator == 0) 6169 return false; 6170 // It's safe to hoist if the denominator is not 0 or -1. 6171 if (!Denominator->isAllOnes()) 6172 return true; 6173 // At this point we know that the denominator is -1. It is safe to hoist as 6174 // long we know that the numerator is not INT_MIN. 6175 if (match(Inst->getOperand(0), m_APInt(Numerator))) 6176 return !Numerator->isMinSignedValue(); 6177 // The numerator *might* be MinSignedValue. 6178 return false; 6179 } 6180 case Instruction::Load: { 6181 const LoadInst *LI = dyn_cast<LoadInst>(Inst); 6182 if (!LI) 6183 return false; 6184 if (mustSuppressSpeculation(*LI)) 6185 return false; 6186 const DataLayout &DL = LI->getModule()->getDataLayout(); 6187 return isDereferenceableAndAlignedPointer(LI->getPointerOperand(), 6188 LI->getType(), LI->getAlign(), DL, 6189 CtxI, AC, DT, TLI); 6190 } 6191 case Instruction::Call: { 6192 auto *CI = dyn_cast<const CallInst>(Inst); 6193 if (!CI) 6194 return false; 6195 const Function *Callee = CI->getCalledFunction(); 6196 6197 // The called function could have undefined behavior or side-effects, even 6198 // if marked readnone nounwind. 6199 return Callee && Callee->isSpeculatable(); 6200 } 6201 case Instruction::VAArg: 6202 case Instruction::Alloca: 6203 case Instruction::Invoke: 6204 case Instruction::CallBr: 6205 case Instruction::PHI: 6206 case Instruction::Store: 6207 case Instruction::Ret: 6208 case Instruction::Br: 6209 case Instruction::IndirectBr: 6210 case Instruction::Switch: 6211 case Instruction::Unreachable: 6212 case Instruction::Fence: 6213 case Instruction::AtomicRMW: 6214 case Instruction::AtomicCmpXchg: 6215 case Instruction::LandingPad: 6216 case Instruction::Resume: 6217 case Instruction::CatchSwitch: 6218 case Instruction::CatchPad: 6219 case Instruction::CatchRet: 6220 case Instruction::CleanupPad: 6221 case Instruction::CleanupRet: 6222 return false; // Misc instructions which have effects 6223 } 6224 } 6225 6226 bool llvm::mayHaveNonDefUseDependency(const Instruction &I) { 6227 if (I.mayReadOrWriteMemory()) 6228 // Memory dependency possible 6229 return true; 6230 if (!isSafeToSpeculativelyExecute(&I)) 6231 // Can't move above a maythrow call or infinite loop. Or if an 6232 // inalloca alloca, above a stacksave call. 6233 return true; 6234 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 6235 // 1) Can't reorder two inf-loop calls, even if readonly 6236 // 2) Also can't reorder an inf-loop call below a instruction which isn't 6237 // safe to speculative execute. (Inverse of above) 6238 return true; 6239 return false; 6240 } 6241 6242 /// Convert ConstantRange OverflowResult into ValueTracking OverflowResult. 6243 static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) { 6244 switch (OR) { 6245 case ConstantRange::OverflowResult::MayOverflow: 6246 return OverflowResult::MayOverflow; 6247 case ConstantRange::OverflowResult::AlwaysOverflowsLow: 6248 return OverflowResult::AlwaysOverflowsLow; 6249 case ConstantRange::OverflowResult::AlwaysOverflowsHigh: 6250 return OverflowResult::AlwaysOverflowsHigh; 6251 case ConstantRange::OverflowResult::NeverOverflows: 6252 return OverflowResult::NeverOverflows; 6253 } 6254 llvm_unreachable("Unknown OverflowResult"); 6255 } 6256 6257 /// Combine constant ranges from computeConstantRange() and computeKnownBits(). 6258 ConstantRange 6259 llvm::computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V, 6260 bool ForSigned, 6261 const SimplifyQuery &SQ) { 6262 ConstantRange CR1 = 6263 ConstantRange::fromKnownBits(V.getKnownBits(SQ), ForSigned); 6264 ConstantRange CR2 = computeConstantRange(V, ForSigned, SQ.IIQ.UseInstrInfo); 6265 ConstantRange::PreferredRangeType RangeType = 6266 ForSigned ? ConstantRange::Signed : ConstantRange::Unsigned; 6267 return CR1.intersectWith(CR2, RangeType); 6268 } 6269 6270 OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS, 6271 const Value *RHS, 6272 const SimplifyQuery &SQ) { 6273 KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, SQ); 6274 KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, SQ); 6275 ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false); 6276 ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false); 6277 return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange)); 6278 } 6279 6280 OverflowResult llvm::computeOverflowForSignedMul(const Value *LHS, 6281 const Value *RHS, 6282 const SimplifyQuery &SQ) { 6283 // Multiplying n * m significant bits yields a result of n + m significant 6284 // bits. If the total number of significant bits does not exceed the 6285 // result bit width (minus 1), there is no overflow. 6286 // This means if we have enough leading sign bits in the operands 6287 // we can guarantee that the result does not overflow. 6288 // Ref: "Hacker's Delight" by Henry Warren 6289 unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); 6290 6291 // Note that underestimating the number of sign bits gives a more 6292 // conservative answer. 6293 unsigned SignBits = 6294 ::ComputeNumSignBits(LHS, 0, SQ) + ::ComputeNumSignBits(RHS, 0, SQ); 6295 6296 // First handle the easy case: if we have enough sign bits there's 6297 // definitely no overflow. 6298 if (SignBits > BitWidth + 1) 6299 return OverflowResult::NeverOverflows; 6300 6301 // There are two ambiguous cases where there can be no overflow: 6302 // SignBits == BitWidth + 1 and 6303 // SignBits == BitWidth 6304 // The second case is difficult to check, therefore we only handle the 6305 // first case. 6306 if (SignBits == BitWidth + 1) { 6307 // It overflows only when both arguments are negative and the true 6308 // product is exactly the minimum negative number. 6309 // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000 6310 // For simplicity we just check if at least one side is not negative. 6311 KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, SQ); 6312 KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, SQ); 6313 if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) 6314 return OverflowResult::NeverOverflows; 6315 } 6316 return OverflowResult::MayOverflow; 6317 } 6318 6319 OverflowResult 6320 llvm::computeOverflowForUnsignedAdd(const WithCache<const Value *> &LHS, 6321 const WithCache<const Value *> &RHS, 6322 const SimplifyQuery &SQ) { 6323 ConstantRange LHSRange = 6324 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/false, SQ); 6325 ConstantRange RHSRange = 6326 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/false, SQ); 6327 return mapOverflowResult(LHSRange.unsignedAddMayOverflow(RHSRange)); 6328 } 6329 6330 static OverflowResult 6331 computeOverflowForSignedAdd(const WithCache<const Value *> &LHS, 6332 const WithCache<const Value *> &RHS, 6333 const AddOperator *Add, const SimplifyQuery &SQ) { 6334 if (Add && Add->hasNoSignedWrap()) { 6335 return OverflowResult::NeverOverflows; 6336 } 6337 6338 // If LHS and RHS each have at least two sign bits, the addition will look 6339 // like 6340 // 6341 // XX..... + 6342 // YY..... 6343 // 6344 // If the carry into the most significant position is 0, X and Y can't both 6345 // be 1 and therefore the carry out of the addition is also 0. 6346 // 6347 // If the carry into the most significant position is 1, X and Y can't both 6348 // be 0 and therefore the carry out of the addition is also 1. 6349 // 6350 // Since the carry into the most significant position is always equal to 6351 // the carry out of the addition, there is no signed overflow. 6352 if (::ComputeNumSignBits(LHS, 0, SQ) > 1 && 6353 ::ComputeNumSignBits(RHS, 0, SQ) > 1) 6354 return OverflowResult::NeverOverflows; 6355 6356 ConstantRange LHSRange = 6357 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/true, SQ); 6358 ConstantRange RHSRange = 6359 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/true, SQ); 6360 OverflowResult OR = 6361 mapOverflowResult(LHSRange.signedAddMayOverflow(RHSRange)); 6362 if (OR != OverflowResult::MayOverflow) 6363 return OR; 6364 6365 // The remaining code needs Add to be available. Early returns if not so. 6366 if (!Add) 6367 return OverflowResult::MayOverflow; 6368 6369 // If the sign of Add is the same as at least one of the operands, this add 6370 // CANNOT overflow. If this can be determined from the known bits of the 6371 // operands the above signedAddMayOverflow() check will have already done so. 6372 // The only other way to improve on the known bits is from an assumption, so 6373 // call computeKnownBitsFromContext() directly. 6374 bool LHSOrRHSKnownNonNegative = 6375 (LHSRange.isAllNonNegative() || RHSRange.isAllNonNegative()); 6376 bool LHSOrRHSKnownNegative = 6377 (LHSRange.isAllNegative() || RHSRange.isAllNegative()); 6378 if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) { 6379 KnownBits AddKnown(LHSRange.getBitWidth()); 6380 computeKnownBitsFromContext(Add, AddKnown, /*Depth=*/0, SQ); 6381 if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) || 6382 (AddKnown.isNegative() && LHSOrRHSKnownNegative)) 6383 return OverflowResult::NeverOverflows; 6384 } 6385 6386 return OverflowResult::MayOverflow; 6387 } 6388 6389 OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS, 6390 const Value *RHS, 6391 const SimplifyQuery &SQ) { 6392 // X - (X % ?) 6393 // The remainder of a value can't have greater magnitude than itself, 6394 // so the subtraction can't overflow. 6395 6396 // X - (X -nuw ?) 6397 // In the minimal case, this would simplify to "?", so there's no subtract 6398 // at all. But if this analysis is used to peek through casts, for example, 6399 // then determining no-overflow may allow other transforms. 6400 6401 // TODO: There are other patterns like this. 6402 // See simplifyICmpWithBinOpOnLHS() for candidates. 6403 if (match(RHS, m_URem(m_Specific(LHS), m_Value())) || 6404 match(RHS, m_NUWSub(m_Specific(LHS), m_Value()))) 6405 if (isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT)) 6406 return OverflowResult::NeverOverflows; 6407 6408 // Checking for conditions implied by dominating conditions may be expensive. 6409 // Limit it to usub_with_overflow calls for now. 6410 if (match(SQ.CxtI, 6411 m_Intrinsic<Intrinsic::usub_with_overflow>(m_Value(), m_Value()))) 6412 if (auto C = isImpliedByDomCondition(CmpInst::ICMP_UGE, LHS, RHS, SQ.CxtI, 6413 SQ.DL)) { 6414 if (*C) 6415 return OverflowResult::NeverOverflows; 6416 return OverflowResult::AlwaysOverflowsLow; 6417 } 6418 ConstantRange LHSRange = 6419 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/false, SQ); 6420 ConstantRange RHSRange = 6421 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/false, SQ); 6422 return mapOverflowResult(LHSRange.unsignedSubMayOverflow(RHSRange)); 6423 } 6424 6425 OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS, 6426 const Value *RHS, 6427 const SimplifyQuery &SQ) { 6428 // X - (X % ?) 6429 // The remainder of a value can't have greater magnitude than itself, 6430 // so the subtraction can't overflow. 6431 6432 // X - (X -nsw ?) 6433 // In the minimal case, this would simplify to "?", so there's no subtract 6434 // at all. But if this analysis is used to peek through casts, for example, 6435 // then determining no-overflow may allow other transforms. 6436 if (match(RHS, m_SRem(m_Specific(LHS), m_Value())) || 6437 match(RHS, m_NSWSub(m_Specific(LHS), m_Value()))) 6438 if (isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT)) 6439 return OverflowResult::NeverOverflows; 6440 6441 // If LHS and RHS each have at least two sign bits, the subtraction 6442 // cannot overflow. 6443 if (::ComputeNumSignBits(LHS, 0, SQ) > 1 && 6444 ::ComputeNumSignBits(RHS, 0, SQ) > 1) 6445 return OverflowResult::NeverOverflows; 6446 6447 ConstantRange LHSRange = 6448 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/true, SQ); 6449 ConstantRange RHSRange = 6450 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/true, SQ); 6451 return mapOverflowResult(LHSRange.signedSubMayOverflow(RHSRange)); 6452 } 6453 6454 bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO, 6455 const DominatorTree &DT) { 6456 SmallVector<const BranchInst *, 2> GuardingBranches; 6457 SmallVector<const ExtractValueInst *, 2> Results; 6458 6459 for (const User *U : WO->users()) { 6460 if (const auto *EVI = dyn_cast<ExtractValueInst>(U)) { 6461 assert(EVI->getNumIndices() == 1 && "Obvious from CI's type"); 6462 6463 if (EVI->getIndices()[0] == 0) 6464 Results.push_back(EVI); 6465 else { 6466 assert(EVI->getIndices()[0] == 1 && "Obvious from CI's type"); 6467 6468 for (const auto *U : EVI->users()) 6469 if (const auto *B = dyn_cast<BranchInst>(U)) { 6470 assert(B->isConditional() && "How else is it using an i1?"); 6471 GuardingBranches.push_back(B); 6472 } 6473 } 6474 } else { 6475 // We are using the aggregate directly in a way we don't want to analyze 6476 // here (storing it to a global, say). 6477 return false; 6478 } 6479 } 6480 6481 auto AllUsesGuardedByBranch = [&](const BranchInst *BI) { 6482 BasicBlockEdge NoWrapEdge(BI->getParent(), BI->getSuccessor(1)); 6483 if (!NoWrapEdge.isSingleEdge()) 6484 return false; 6485 6486 // Check if all users of the add are provably no-wrap. 6487 for (const auto *Result : Results) { 6488 // If the extractvalue itself is not executed on overflow, the we don't 6489 // need to check each use separately, since domination is transitive. 6490 if (DT.dominates(NoWrapEdge, Result->getParent())) 6491 continue; 6492 6493 for (const auto &RU : Result->uses()) 6494 if (!DT.dominates(NoWrapEdge, RU)) 6495 return false; 6496 } 6497 6498 return true; 6499 }; 6500 6501 return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch); 6502 } 6503 6504 /// Shifts return poison if shiftwidth is larger than the bitwidth. 6505 static bool shiftAmountKnownInRange(const Value *ShiftAmount) { 6506 auto *C = dyn_cast<Constant>(ShiftAmount); 6507 if (!C) 6508 return false; 6509 6510 // Shifts return poison if shiftwidth is larger than the bitwidth. 6511 SmallVector<const Constant *, 4> ShiftAmounts; 6512 if (auto *FVTy = dyn_cast<FixedVectorType>(C->getType())) { 6513 unsigned NumElts = FVTy->getNumElements(); 6514 for (unsigned i = 0; i < NumElts; ++i) 6515 ShiftAmounts.push_back(C->getAggregateElement(i)); 6516 } else if (isa<ScalableVectorType>(C->getType())) 6517 return false; // Can't tell, just return false to be safe 6518 else 6519 ShiftAmounts.push_back(C); 6520 6521 bool Safe = llvm::all_of(ShiftAmounts, [](const Constant *C) { 6522 auto *CI = dyn_cast_or_null<ConstantInt>(C); 6523 return CI && CI->getValue().ult(C->getType()->getIntegerBitWidth()); 6524 }); 6525 6526 return Safe; 6527 } 6528 6529 enum class UndefPoisonKind { 6530 PoisonOnly = (1 << 0), 6531 UndefOnly = (1 << 1), 6532 UndefOrPoison = PoisonOnly | UndefOnly, 6533 }; 6534 6535 static bool includesPoison(UndefPoisonKind Kind) { 6536 return (unsigned(Kind) & unsigned(UndefPoisonKind::PoisonOnly)) != 0; 6537 } 6538 6539 static bool includesUndef(UndefPoisonKind Kind) { 6540 return (unsigned(Kind) & unsigned(UndefPoisonKind::UndefOnly)) != 0; 6541 } 6542 6543 static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind, 6544 bool ConsiderFlagsAndMetadata) { 6545 6546 if (ConsiderFlagsAndMetadata && includesPoison(Kind) && 6547 Op->hasPoisonGeneratingFlagsOrMetadata()) 6548 return true; 6549 6550 unsigned Opcode = Op->getOpcode(); 6551 6552 // Check whether opcode is a poison/undef-generating operation 6553 switch (Opcode) { 6554 case Instruction::Shl: 6555 case Instruction::AShr: 6556 case Instruction::LShr: 6557 return includesPoison(Kind) && !shiftAmountKnownInRange(Op->getOperand(1)); 6558 case Instruction::FPToSI: 6559 case Instruction::FPToUI: 6560 // fptosi/ui yields poison if the resulting value does not fit in the 6561 // destination type. 6562 return true; 6563 case Instruction::Call: 6564 if (auto *II = dyn_cast<IntrinsicInst>(Op)) { 6565 switch (II->getIntrinsicID()) { 6566 // TODO: Add more intrinsics. 6567 case Intrinsic::ctlz: 6568 case Intrinsic::cttz: 6569 case Intrinsic::abs: 6570 if (cast<ConstantInt>(II->getArgOperand(1))->isNullValue()) 6571 return false; 6572 break; 6573 case Intrinsic::ctpop: 6574 case Intrinsic::bswap: 6575 case Intrinsic::bitreverse: 6576 case Intrinsic::fshl: 6577 case Intrinsic::fshr: 6578 case Intrinsic::smax: 6579 case Intrinsic::smin: 6580 case Intrinsic::umax: 6581 case Intrinsic::umin: 6582 case Intrinsic::ptrmask: 6583 case Intrinsic::fptoui_sat: 6584 case Intrinsic::fptosi_sat: 6585 case Intrinsic::sadd_with_overflow: 6586 case Intrinsic::ssub_with_overflow: 6587 case Intrinsic::smul_with_overflow: 6588 case Intrinsic::uadd_with_overflow: 6589 case Intrinsic::usub_with_overflow: 6590 case Intrinsic::umul_with_overflow: 6591 case Intrinsic::sadd_sat: 6592 case Intrinsic::uadd_sat: 6593 case Intrinsic::ssub_sat: 6594 case Intrinsic::usub_sat: 6595 return false; 6596 case Intrinsic::sshl_sat: 6597 case Intrinsic::ushl_sat: 6598 return includesPoison(Kind) && 6599 !shiftAmountKnownInRange(II->getArgOperand(1)); 6600 case Intrinsic::fma: 6601 case Intrinsic::fmuladd: 6602 case Intrinsic::sqrt: 6603 case Intrinsic::powi: 6604 case Intrinsic::sin: 6605 case Intrinsic::cos: 6606 case Intrinsic::pow: 6607 case Intrinsic::log: 6608 case Intrinsic::log10: 6609 case Intrinsic::log2: 6610 case Intrinsic::exp: 6611 case Intrinsic::exp2: 6612 case Intrinsic::exp10: 6613 case Intrinsic::fabs: 6614 case Intrinsic::copysign: 6615 case Intrinsic::floor: 6616 case Intrinsic::ceil: 6617 case Intrinsic::trunc: 6618 case Intrinsic::rint: 6619 case Intrinsic::nearbyint: 6620 case Intrinsic::round: 6621 case Intrinsic::roundeven: 6622 case Intrinsic::fptrunc_round: 6623 case Intrinsic::canonicalize: 6624 case Intrinsic::arithmetic_fence: 6625 case Intrinsic::minnum: 6626 case Intrinsic::maxnum: 6627 case Intrinsic::minimum: 6628 case Intrinsic::maximum: 6629 case Intrinsic::is_fpclass: 6630 case Intrinsic::ldexp: 6631 case Intrinsic::frexp: 6632 return false; 6633 case Intrinsic::lround: 6634 case Intrinsic::llround: 6635 case Intrinsic::lrint: 6636 case Intrinsic::llrint: 6637 // If the value doesn't fit an unspecified value is returned (but this 6638 // is not poison). 6639 return false; 6640 } 6641 } 6642 [[fallthrough]]; 6643 case Instruction::CallBr: 6644 case Instruction::Invoke: { 6645 const auto *CB = cast<CallBase>(Op); 6646 return !CB->hasRetAttr(Attribute::NoUndef); 6647 } 6648 case Instruction::InsertElement: 6649 case Instruction::ExtractElement: { 6650 // If index exceeds the length of the vector, it returns poison 6651 auto *VTy = cast<VectorType>(Op->getOperand(0)->getType()); 6652 unsigned IdxOp = Op->getOpcode() == Instruction::InsertElement ? 2 : 1; 6653 auto *Idx = dyn_cast<ConstantInt>(Op->getOperand(IdxOp)); 6654 if (includesPoison(Kind)) 6655 return !Idx || 6656 Idx->getValue().uge(VTy->getElementCount().getKnownMinValue()); 6657 return false; 6658 } 6659 case Instruction::ShuffleVector: { 6660 ArrayRef<int> Mask = isa<ConstantExpr>(Op) 6661 ? cast<ConstantExpr>(Op)->getShuffleMask() 6662 : cast<ShuffleVectorInst>(Op)->getShuffleMask(); 6663 return includesPoison(Kind) && is_contained(Mask, PoisonMaskElem); 6664 } 6665 case Instruction::FNeg: 6666 case Instruction::PHI: 6667 case Instruction::Select: 6668 case Instruction::URem: 6669 case Instruction::SRem: 6670 case Instruction::ExtractValue: 6671 case Instruction::InsertValue: 6672 case Instruction::Freeze: 6673 case Instruction::ICmp: 6674 case Instruction::FCmp: 6675 case Instruction::FAdd: 6676 case Instruction::FSub: 6677 case Instruction::FMul: 6678 case Instruction::FDiv: 6679 case Instruction::FRem: 6680 return false; 6681 case Instruction::GetElementPtr: 6682 // inbounds is handled above 6683 // TODO: what about inrange on constexpr? 6684 return false; 6685 default: { 6686 const auto *CE = dyn_cast<ConstantExpr>(Op); 6687 if (isa<CastInst>(Op) || (CE && CE->isCast())) 6688 return false; 6689 else if (Instruction::isBinaryOp(Opcode)) 6690 return false; 6691 // Be conservative and return true. 6692 return true; 6693 } 6694 } 6695 } 6696 6697 bool llvm::canCreateUndefOrPoison(const Operator *Op, 6698 bool ConsiderFlagsAndMetadata) { 6699 return ::canCreateUndefOrPoison(Op, UndefPoisonKind::UndefOrPoison, 6700 ConsiderFlagsAndMetadata); 6701 } 6702 6703 bool llvm::canCreatePoison(const Operator *Op, bool ConsiderFlagsAndMetadata) { 6704 return ::canCreateUndefOrPoison(Op, UndefPoisonKind::PoisonOnly, 6705 ConsiderFlagsAndMetadata); 6706 } 6707 6708 static bool directlyImpliesPoison(const Value *ValAssumedPoison, const Value *V, 6709 unsigned Depth) { 6710 if (ValAssumedPoison == V) 6711 return true; 6712 6713 const unsigned MaxDepth = 2; 6714 if (Depth >= MaxDepth) 6715 return false; 6716 6717 if (const auto *I = dyn_cast<Instruction>(V)) { 6718 if (any_of(I->operands(), [=](const Use &Op) { 6719 return propagatesPoison(Op) && 6720 directlyImpliesPoison(ValAssumedPoison, Op, Depth + 1); 6721 })) 6722 return true; 6723 6724 // V = extractvalue V0, idx 6725 // V2 = extractvalue V0, idx2 6726 // V0's elements are all poison or not. (e.g., add_with_overflow) 6727 const WithOverflowInst *II; 6728 if (match(I, m_ExtractValue(m_WithOverflowInst(II))) && 6729 (match(ValAssumedPoison, m_ExtractValue(m_Specific(II))) || 6730 llvm::is_contained(II->args(), ValAssumedPoison))) 6731 return true; 6732 } 6733 return false; 6734 } 6735 6736 static bool impliesPoison(const Value *ValAssumedPoison, const Value *V, 6737 unsigned Depth) { 6738 if (isGuaranteedNotToBePoison(ValAssumedPoison)) 6739 return true; 6740 6741 if (directlyImpliesPoison(ValAssumedPoison, V, /* Depth */ 0)) 6742 return true; 6743 6744 const unsigned MaxDepth = 2; 6745 if (Depth >= MaxDepth) 6746 return false; 6747 6748 const auto *I = dyn_cast<Instruction>(ValAssumedPoison); 6749 if (I && !canCreatePoison(cast<Operator>(I))) { 6750 return all_of(I->operands(), [=](const Value *Op) { 6751 return impliesPoison(Op, V, Depth + 1); 6752 }); 6753 } 6754 return false; 6755 } 6756 6757 bool llvm::impliesPoison(const Value *ValAssumedPoison, const Value *V) { 6758 return ::impliesPoison(ValAssumedPoison, V, /* Depth */ 0); 6759 } 6760 6761 static bool programUndefinedIfUndefOrPoison(const Value *V, bool PoisonOnly); 6762 6763 static bool isGuaranteedNotToBeUndefOrPoison( 6764 const Value *V, AssumptionCache *AC, const Instruction *CtxI, 6765 const DominatorTree *DT, unsigned Depth, UndefPoisonKind Kind) { 6766 if (Depth >= MaxAnalysisRecursionDepth) 6767 return false; 6768 6769 if (isa<MetadataAsValue>(V)) 6770 return false; 6771 6772 if (const auto *A = dyn_cast<Argument>(V)) { 6773 if (A->hasAttribute(Attribute::NoUndef) || 6774 A->hasAttribute(Attribute::Dereferenceable) || 6775 A->hasAttribute(Attribute::DereferenceableOrNull)) 6776 return true; 6777 } 6778 6779 if (auto *C = dyn_cast<Constant>(V)) { 6780 if (isa<PoisonValue>(C)) 6781 return !includesPoison(Kind); 6782 6783 if (isa<UndefValue>(C)) 6784 return !includesUndef(Kind); 6785 6786 if (isa<ConstantInt>(C) || isa<GlobalVariable>(C) || isa<ConstantFP>(V) || 6787 isa<ConstantPointerNull>(C) || isa<Function>(C)) 6788 return true; 6789 6790 if (C->getType()->isVectorTy() && !isa<ConstantExpr>(C)) 6791 return (!includesUndef(Kind) ? !C->containsPoisonElement() 6792 : !C->containsUndefOrPoisonElement()) && 6793 !C->containsConstantExpression(); 6794 } 6795 6796 // Strip cast operations from a pointer value. 6797 // Note that stripPointerCastsSameRepresentation can strip off getelementptr 6798 // inbounds with zero offset. To guarantee that the result isn't poison, the 6799 // stripped pointer is checked as it has to be pointing into an allocated 6800 // object or be null `null` to ensure `inbounds` getelement pointers with a 6801 // zero offset could not produce poison. 6802 // It can strip off addrspacecast that do not change bit representation as 6803 // well. We believe that such addrspacecast is equivalent to no-op. 6804 auto *StrippedV = V->stripPointerCastsSameRepresentation(); 6805 if (isa<AllocaInst>(StrippedV) || isa<GlobalVariable>(StrippedV) || 6806 isa<Function>(StrippedV) || isa<ConstantPointerNull>(StrippedV)) 6807 return true; 6808 6809 auto OpCheck = [&](const Value *V) { 6810 return isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth + 1, Kind); 6811 }; 6812 6813 if (auto *Opr = dyn_cast<Operator>(V)) { 6814 // If the value is a freeze instruction, then it can never 6815 // be undef or poison. 6816 if (isa<FreezeInst>(V)) 6817 return true; 6818 6819 if (const auto *CB = dyn_cast<CallBase>(V)) { 6820 if (CB->hasRetAttr(Attribute::NoUndef) || 6821 CB->hasRetAttr(Attribute::Dereferenceable) || 6822 CB->hasRetAttr(Attribute::DereferenceableOrNull)) 6823 return true; 6824 } 6825 6826 if (const auto *PN = dyn_cast<PHINode>(V)) { 6827 unsigned Num = PN->getNumIncomingValues(); 6828 bool IsWellDefined = true; 6829 for (unsigned i = 0; i < Num; ++i) { 6830 auto *TI = PN->getIncomingBlock(i)->getTerminator(); 6831 if (!isGuaranteedNotToBeUndefOrPoison(PN->getIncomingValue(i), AC, TI, 6832 DT, Depth + 1, Kind)) { 6833 IsWellDefined = false; 6834 break; 6835 } 6836 } 6837 if (IsWellDefined) 6838 return true; 6839 } else if (!::canCreateUndefOrPoison(Opr, Kind, 6840 /*ConsiderFlagsAndMetadata*/ true) && 6841 all_of(Opr->operands(), OpCheck)) 6842 return true; 6843 } 6844 6845 if (auto *I = dyn_cast<LoadInst>(V)) 6846 if (I->hasMetadata(LLVMContext::MD_noundef) || 6847 I->hasMetadata(LLVMContext::MD_dereferenceable) || 6848 I->hasMetadata(LLVMContext::MD_dereferenceable_or_null)) 6849 return true; 6850 6851 if (programUndefinedIfUndefOrPoison(V, !includesUndef(Kind))) 6852 return true; 6853 6854 // CxtI may be null or a cloned instruction. 6855 if (!CtxI || !CtxI->getParent() || !DT) 6856 return false; 6857 6858 auto *DNode = DT->getNode(CtxI->getParent()); 6859 if (!DNode) 6860 // Unreachable block 6861 return false; 6862 6863 // If V is used as a branch condition before reaching CtxI, V cannot be 6864 // undef or poison. 6865 // br V, BB1, BB2 6866 // BB1: 6867 // CtxI ; V cannot be undef or poison here 6868 auto *Dominator = DNode->getIDom(); 6869 while (Dominator) { 6870 auto *TI = Dominator->getBlock()->getTerminator(); 6871 6872 Value *Cond = nullptr; 6873 if (auto BI = dyn_cast_or_null<BranchInst>(TI)) { 6874 if (BI->isConditional()) 6875 Cond = BI->getCondition(); 6876 } else if (auto SI = dyn_cast_or_null<SwitchInst>(TI)) { 6877 Cond = SI->getCondition(); 6878 } 6879 6880 if (Cond) { 6881 if (Cond == V) 6882 return true; 6883 else if (!includesUndef(Kind) && isa<Operator>(Cond)) { 6884 // For poison, we can analyze further 6885 auto *Opr = cast<Operator>(Cond); 6886 if (any_of(Opr->operands(), 6887 [V](const Use &U) { return V == U && propagatesPoison(U); })) 6888 return true; 6889 } 6890 } 6891 6892 Dominator = Dominator->getIDom(); 6893 } 6894 6895 if (getKnowledgeValidInContext(V, {Attribute::NoUndef}, CtxI, DT, AC)) 6896 return true; 6897 6898 return false; 6899 } 6900 6901 bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC, 6902 const Instruction *CtxI, 6903 const DominatorTree *DT, 6904 unsigned Depth) { 6905 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, 6906 UndefPoisonKind::UndefOrPoison); 6907 } 6908 6909 bool llvm::isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC, 6910 const Instruction *CtxI, 6911 const DominatorTree *DT, unsigned Depth) { 6912 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, 6913 UndefPoisonKind::PoisonOnly); 6914 } 6915 6916 bool llvm::isGuaranteedNotToBeUndef(const Value *V, AssumptionCache *AC, 6917 const Instruction *CtxI, 6918 const DominatorTree *DT, unsigned Depth) { 6919 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, 6920 UndefPoisonKind::UndefOnly); 6921 } 6922 6923 /// Return true if undefined behavior would provably be executed on the path to 6924 /// OnPathTo if Root produced a posion result. Note that this doesn't say 6925 /// anything about whether OnPathTo is actually executed or whether Root is 6926 /// actually poison. This can be used to assess whether a new use of Root can 6927 /// be added at a location which is control equivalent with OnPathTo (such as 6928 /// immediately before it) without introducing UB which didn't previously 6929 /// exist. Note that a false result conveys no information. 6930 bool llvm::mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 6931 Instruction *OnPathTo, 6932 DominatorTree *DT) { 6933 // Basic approach is to assume Root is poison, propagate poison forward 6934 // through all users we can easily track, and then check whether any of those 6935 // users are provable UB and must execute before out exiting block might 6936 // exit. 6937 6938 // The set of all recursive users we've visited (which are assumed to all be 6939 // poison because of said visit) 6940 SmallSet<const Value *, 16> KnownPoison; 6941 SmallVector<const Instruction*, 16> Worklist; 6942 Worklist.push_back(Root); 6943 while (!Worklist.empty()) { 6944 const Instruction *I = Worklist.pop_back_val(); 6945 6946 // If we know this must trigger UB on a path leading our target. 6947 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 6948 return true; 6949 6950 // If we can't analyze propagation through this instruction, just skip it 6951 // and transitive users. Safe as false is a conservative result. 6952 if (I != Root && !any_of(I->operands(), [&KnownPoison](const Use &U) { 6953 return KnownPoison.contains(U) && propagatesPoison(U); 6954 })) 6955 continue; 6956 6957 if (KnownPoison.insert(I).second) 6958 for (const User *User : I->users()) 6959 Worklist.push_back(cast<Instruction>(User)); 6960 } 6961 6962 // Might be non-UB, or might have a path we couldn't prove must execute on 6963 // way to exiting bb. 6964 return false; 6965 } 6966 6967 OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add, 6968 const SimplifyQuery &SQ) { 6969 return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1), 6970 Add, SQ); 6971 } 6972 6973 OverflowResult 6974 llvm::computeOverflowForSignedAdd(const WithCache<const Value *> &LHS, 6975 const WithCache<const Value *> &RHS, 6976 const SimplifyQuery &SQ) { 6977 return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, SQ); 6978 } 6979 6980 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { 6981 // Note: An atomic operation isn't guaranteed to return in a reasonable amount 6982 // of time because it's possible for another thread to interfere with it for an 6983 // arbitrary length of time, but programs aren't allowed to rely on that. 6984 6985 // If there is no successor, then execution can't transfer to it. 6986 if (isa<ReturnInst>(I)) 6987 return false; 6988 if (isa<UnreachableInst>(I)) 6989 return false; 6990 6991 // Note: Do not add new checks here; instead, change Instruction::mayThrow or 6992 // Instruction::willReturn. 6993 // 6994 // FIXME: Move this check into Instruction::willReturn. 6995 if (isa<CatchPadInst>(I)) { 6996 switch (classifyEHPersonality(I->getFunction()->getPersonalityFn())) { 6997 default: 6998 // A catchpad may invoke exception object constructors and such, which 6999 // in some languages can be arbitrary code, so be conservative by default. 7000 return false; 7001 case EHPersonality::CoreCLR: 7002 // For CoreCLR, it just involves a type test. 7003 return true; 7004 } 7005 } 7006 7007 // An instruction that returns without throwing must transfer control flow 7008 // to a successor. 7009 return !I->mayThrow() && I->willReturn(); 7010 } 7011 7012 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) { 7013 // TODO: This is slightly conservative for invoke instruction since exiting 7014 // via an exception *is* normal control for them. 7015 for (const Instruction &I : *BB) 7016 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 7017 return false; 7018 return true; 7019 } 7020 7021 bool llvm::isGuaranteedToTransferExecutionToSuccessor( 7022 BasicBlock::const_iterator Begin, BasicBlock::const_iterator End, 7023 unsigned ScanLimit) { 7024 return isGuaranteedToTransferExecutionToSuccessor(make_range(Begin, End), 7025 ScanLimit); 7026 } 7027 7028 bool llvm::isGuaranteedToTransferExecutionToSuccessor( 7029 iterator_range<BasicBlock::const_iterator> Range, unsigned ScanLimit) { 7030 assert(ScanLimit && "scan limit must be non-zero"); 7031 for (const Instruction &I : Range) { 7032 if (isa<DbgInfoIntrinsic>(I)) 7033 continue; 7034 if (--ScanLimit == 0) 7035 return false; 7036 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 7037 return false; 7038 } 7039 return true; 7040 } 7041 7042 bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I, 7043 const Loop *L) { 7044 // The loop header is guaranteed to be executed for every iteration. 7045 // 7046 // FIXME: Relax this constraint to cover all basic blocks that are 7047 // guaranteed to be executed at every iteration. 7048 if (I->getParent() != L->getHeader()) return false; 7049 7050 for (const Instruction &LI : *L->getHeader()) { 7051 if (&LI == I) return true; 7052 if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false; 7053 } 7054 llvm_unreachable("Instruction not contained in its own parent basic block."); 7055 } 7056 7057 bool llvm::propagatesPoison(const Use &PoisonOp) { 7058 const Operator *I = cast<Operator>(PoisonOp.getUser()); 7059 switch (I->getOpcode()) { 7060 case Instruction::Freeze: 7061 case Instruction::PHI: 7062 case Instruction::Invoke: 7063 return false; 7064 case Instruction::Select: 7065 return PoisonOp.getOperandNo() == 0; 7066 case Instruction::Call: 7067 if (auto *II = dyn_cast<IntrinsicInst>(I)) { 7068 switch (II->getIntrinsicID()) { 7069 // TODO: Add more intrinsics. 7070 case Intrinsic::sadd_with_overflow: 7071 case Intrinsic::ssub_with_overflow: 7072 case Intrinsic::smul_with_overflow: 7073 case Intrinsic::uadd_with_overflow: 7074 case Intrinsic::usub_with_overflow: 7075 case Intrinsic::umul_with_overflow: 7076 // If an input is a vector containing a poison element, the 7077 // two output vectors (calculated results, overflow bits)' 7078 // corresponding lanes are poison. 7079 return true; 7080 case Intrinsic::ctpop: 7081 return true; 7082 } 7083 } 7084 return false; 7085 case Instruction::ICmp: 7086 case Instruction::FCmp: 7087 case Instruction::GetElementPtr: 7088 return true; 7089 default: 7090 if (isa<BinaryOperator>(I) || isa<UnaryOperator>(I) || isa<CastInst>(I)) 7091 return true; 7092 7093 // Be conservative and return false. 7094 return false; 7095 } 7096 } 7097 7098 void llvm::getGuaranteedWellDefinedOps( 7099 const Instruction *I, SmallVectorImpl<const Value *> &Operands) { 7100 switch (I->getOpcode()) { 7101 case Instruction::Store: 7102 Operands.push_back(cast<StoreInst>(I)->getPointerOperand()); 7103 break; 7104 7105 case Instruction::Load: 7106 Operands.push_back(cast<LoadInst>(I)->getPointerOperand()); 7107 break; 7108 7109 // Since dereferenceable attribute imply noundef, atomic operations 7110 // also implicitly have noundef pointers too 7111 case Instruction::AtomicCmpXchg: 7112 Operands.push_back(cast<AtomicCmpXchgInst>(I)->getPointerOperand()); 7113 break; 7114 7115 case Instruction::AtomicRMW: 7116 Operands.push_back(cast<AtomicRMWInst>(I)->getPointerOperand()); 7117 break; 7118 7119 case Instruction::Call: 7120 case Instruction::Invoke: { 7121 const CallBase *CB = cast<CallBase>(I); 7122 if (CB->isIndirectCall()) 7123 Operands.push_back(CB->getCalledOperand()); 7124 for (unsigned i = 0; i < CB->arg_size(); ++i) { 7125 if (CB->paramHasAttr(i, Attribute::NoUndef) || 7126 CB->paramHasAttr(i, Attribute::Dereferenceable) || 7127 CB->paramHasAttr(i, Attribute::DereferenceableOrNull)) 7128 Operands.push_back(CB->getArgOperand(i)); 7129 } 7130 break; 7131 } 7132 case Instruction::Ret: 7133 if (I->getFunction()->hasRetAttribute(Attribute::NoUndef)) 7134 Operands.push_back(I->getOperand(0)); 7135 break; 7136 case Instruction::Switch: 7137 Operands.push_back(cast<SwitchInst>(I)->getCondition()); 7138 break; 7139 case Instruction::Br: { 7140 auto *BR = cast<BranchInst>(I); 7141 if (BR->isConditional()) 7142 Operands.push_back(BR->getCondition()); 7143 break; 7144 } 7145 default: 7146 break; 7147 } 7148 } 7149 7150 void llvm::getGuaranteedNonPoisonOps(const Instruction *I, 7151 SmallVectorImpl<const Value *> &Operands) { 7152 getGuaranteedWellDefinedOps(I, Operands); 7153 switch (I->getOpcode()) { 7154 // Divisors of these operations are allowed to be partially undef. 7155 case Instruction::UDiv: 7156 case Instruction::SDiv: 7157 case Instruction::URem: 7158 case Instruction::SRem: 7159 Operands.push_back(I->getOperand(1)); 7160 break; 7161 default: 7162 break; 7163 } 7164 } 7165 7166 bool llvm::mustTriggerUB(const Instruction *I, 7167 const SmallPtrSetImpl<const Value *> &KnownPoison) { 7168 SmallVector<const Value *, 4> NonPoisonOps; 7169 getGuaranteedNonPoisonOps(I, NonPoisonOps); 7170 7171 for (const auto *V : NonPoisonOps) 7172 if (KnownPoison.count(V)) 7173 return true; 7174 7175 return false; 7176 } 7177 7178 static bool programUndefinedIfUndefOrPoison(const Value *V, 7179 bool PoisonOnly) { 7180 // We currently only look for uses of values within the same basic 7181 // block, as that makes it easier to guarantee that the uses will be 7182 // executed given that Inst is executed. 7183 // 7184 // FIXME: Expand this to consider uses beyond the same basic block. To do 7185 // this, look out for the distinction between post-dominance and strong 7186 // post-dominance. 7187 const BasicBlock *BB = nullptr; 7188 BasicBlock::const_iterator Begin; 7189 if (const auto *Inst = dyn_cast<Instruction>(V)) { 7190 BB = Inst->getParent(); 7191 Begin = Inst->getIterator(); 7192 Begin++; 7193 } else if (const auto *Arg = dyn_cast<Argument>(V)) { 7194 if (Arg->getParent()->isDeclaration()) 7195 return false; 7196 BB = &Arg->getParent()->getEntryBlock(); 7197 Begin = BB->begin(); 7198 } else { 7199 return false; 7200 } 7201 7202 // Limit number of instructions we look at, to avoid scanning through large 7203 // blocks. The current limit is chosen arbitrarily. 7204 unsigned ScanLimit = 32; 7205 BasicBlock::const_iterator End = BB->end(); 7206 7207 if (!PoisonOnly) { 7208 // Since undef does not propagate eagerly, be conservative & just check 7209 // whether a value is directly passed to an instruction that must take 7210 // well-defined operands. 7211 7212 for (const auto &I : make_range(Begin, End)) { 7213 if (isa<DbgInfoIntrinsic>(I)) 7214 continue; 7215 if (--ScanLimit == 0) 7216 break; 7217 7218 SmallVector<const Value *, 4> WellDefinedOps; 7219 getGuaranteedWellDefinedOps(&I, WellDefinedOps); 7220 if (is_contained(WellDefinedOps, V)) 7221 return true; 7222 7223 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 7224 break; 7225 } 7226 return false; 7227 } 7228 7229 // Set of instructions that we have proved will yield poison if Inst 7230 // does. 7231 SmallSet<const Value *, 16> YieldsPoison; 7232 SmallSet<const BasicBlock *, 4> Visited; 7233 7234 YieldsPoison.insert(V); 7235 Visited.insert(BB); 7236 7237 while (true) { 7238 for (const auto &I : make_range(Begin, End)) { 7239 if (isa<DbgInfoIntrinsic>(I)) 7240 continue; 7241 if (--ScanLimit == 0) 7242 return false; 7243 if (mustTriggerUB(&I, YieldsPoison)) 7244 return true; 7245 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 7246 return false; 7247 7248 // If an operand is poison and propagates it, mark I as yielding poison. 7249 for (const Use &Op : I.operands()) { 7250 if (YieldsPoison.count(Op) && propagatesPoison(Op)) { 7251 YieldsPoison.insert(&I); 7252 break; 7253 } 7254 } 7255 7256 // Special handling for select, which returns poison if its operand 0 is 7257 // poison (handled in the loop above) *or* if both its true/false operands 7258 // are poison (handled here). 7259 if (I.getOpcode() == Instruction::Select && 7260 YieldsPoison.count(I.getOperand(1)) && 7261 YieldsPoison.count(I.getOperand(2))) { 7262 YieldsPoison.insert(&I); 7263 } 7264 } 7265 7266 BB = BB->getSingleSuccessor(); 7267 if (!BB || !Visited.insert(BB).second) 7268 break; 7269 7270 Begin = BB->getFirstNonPHI()->getIterator(); 7271 End = BB->end(); 7272 } 7273 return false; 7274 } 7275 7276 bool llvm::programUndefinedIfUndefOrPoison(const Instruction *Inst) { 7277 return ::programUndefinedIfUndefOrPoison(Inst, false); 7278 } 7279 7280 bool llvm::programUndefinedIfPoison(const Instruction *Inst) { 7281 return ::programUndefinedIfUndefOrPoison(Inst, true); 7282 } 7283 7284 static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) { 7285 if (FMF.noNaNs()) 7286 return true; 7287 7288 if (auto *C = dyn_cast<ConstantFP>(V)) 7289 return !C->isNaN(); 7290 7291 if (auto *C = dyn_cast<ConstantDataVector>(V)) { 7292 if (!C->getElementType()->isFloatingPointTy()) 7293 return false; 7294 for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) { 7295 if (C->getElementAsAPFloat(I).isNaN()) 7296 return false; 7297 } 7298 return true; 7299 } 7300 7301 if (isa<ConstantAggregateZero>(V)) 7302 return true; 7303 7304 return false; 7305 } 7306 7307 static bool isKnownNonZero(const Value *V) { 7308 if (auto *C = dyn_cast<ConstantFP>(V)) 7309 return !C->isZero(); 7310 7311 if (auto *C = dyn_cast<ConstantDataVector>(V)) { 7312 if (!C->getElementType()->isFloatingPointTy()) 7313 return false; 7314 for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) { 7315 if (C->getElementAsAPFloat(I).isZero()) 7316 return false; 7317 } 7318 return true; 7319 } 7320 7321 return false; 7322 } 7323 7324 /// Match clamp pattern for float types without care about NaNs or signed zeros. 7325 /// Given non-min/max outer cmp/select from the clamp pattern this 7326 /// function recognizes if it can be substitued by a "canonical" min/max 7327 /// pattern. 7328 static SelectPatternResult matchFastFloatClamp(CmpInst::Predicate Pred, 7329 Value *CmpLHS, Value *CmpRHS, 7330 Value *TrueVal, Value *FalseVal, 7331 Value *&LHS, Value *&RHS) { 7332 // Try to match 7333 // X < C1 ? C1 : Min(X, C2) --> Max(C1, Min(X, C2)) 7334 // X > C1 ? C1 : Max(X, C2) --> Min(C1, Max(X, C2)) 7335 // and return description of the outer Max/Min. 7336 7337 // First, check if select has inverse order: 7338 if (CmpRHS == FalseVal) { 7339 std::swap(TrueVal, FalseVal); 7340 Pred = CmpInst::getInversePredicate(Pred); 7341 } 7342 7343 // Assume success now. If there's no match, callers should not use these anyway. 7344 LHS = TrueVal; 7345 RHS = FalseVal; 7346 7347 const APFloat *FC1; 7348 if (CmpRHS != TrueVal || !match(CmpRHS, m_APFloat(FC1)) || !FC1->isFinite()) 7349 return {SPF_UNKNOWN, SPNB_NA, false}; 7350 7351 const APFloat *FC2; 7352 switch (Pred) { 7353 case CmpInst::FCMP_OLT: 7354 case CmpInst::FCMP_OLE: 7355 case CmpInst::FCMP_ULT: 7356 case CmpInst::FCMP_ULE: 7357 if (match(FalseVal, 7358 m_CombineOr(m_OrdFMin(m_Specific(CmpLHS), m_APFloat(FC2)), 7359 m_UnordFMin(m_Specific(CmpLHS), m_APFloat(FC2)))) && 7360 *FC1 < *FC2) 7361 return {SPF_FMAXNUM, SPNB_RETURNS_ANY, false}; 7362 break; 7363 case CmpInst::FCMP_OGT: 7364 case CmpInst::FCMP_OGE: 7365 case CmpInst::FCMP_UGT: 7366 case CmpInst::FCMP_UGE: 7367 if (match(FalseVal, 7368 m_CombineOr(m_OrdFMax(m_Specific(CmpLHS), m_APFloat(FC2)), 7369 m_UnordFMax(m_Specific(CmpLHS), m_APFloat(FC2)))) && 7370 *FC1 > *FC2) 7371 return {SPF_FMINNUM, SPNB_RETURNS_ANY, false}; 7372 break; 7373 default: 7374 break; 7375 } 7376 7377 return {SPF_UNKNOWN, SPNB_NA, false}; 7378 } 7379 7380 /// Recognize variations of: 7381 /// CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v))) 7382 static SelectPatternResult matchClamp(CmpInst::Predicate Pred, 7383 Value *CmpLHS, Value *CmpRHS, 7384 Value *TrueVal, Value *FalseVal) { 7385 // Swap the select operands and predicate to match the patterns below. 7386 if (CmpRHS != TrueVal) { 7387 Pred = ICmpInst::getSwappedPredicate(Pred); 7388 std::swap(TrueVal, FalseVal); 7389 } 7390 const APInt *C1; 7391 if (CmpRHS == TrueVal && match(CmpRHS, m_APInt(C1))) { 7392 const APInt *C2; 7393 // (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1) 7394 if (match(FalseVal, m_SMin(m_Specific(CmpLHS), m_APInt(C2))) && 7395 C1->slt(*C2) && Pred == CmpInst::ICMP_SLT) 7396 return {SPF_SMAX, SPNB_NA, false}; 7397 7398 // (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1) 7399 if (match(FalseVal, m_SMax(m_Specific(CmpLHS), m_APInt(C2))) && 7400 C1->sgt(*C2) && Pred == CmpInst::ICMP_SGT) 7401 return {SPF_SMIN, SPNB_NA, false}; 7402 7403 // (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1) 7404 if (match(FalseVal, m_UMin(m_Specific(CmpLHS), m_APInt(C2))) && 7405 C1->ult(*C2) && Pred == CmpInst::ICMP_ULT) 7406 return {SPF_UMAX, SPNB_NA, false}; 7407 7408 // (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1) 7409 if (match(FalseVal, m_UMax(m_Specific(CmpLHS), m_APInt(C2))) && 7410 C1->ugt(*C2) && Pred == CmpInst::ICMP_UGT) 7411 return {SPF_UMIN, SPNB_NA, false}; 7412 } 7413 return {SPF_UNKNOWN, SPNB_NA, false}; 7414 } 7415 7416 /// Recognize variations of: 7417 /// a < c ? min(a,b) : min(b,c) ==> min(min(a,b),min(b,c)) 7418 static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred, 7419 Value *CmpLHS, Value *CmpRHS, 7420 Value *TVal, Value *FVal, 7421 unsigned Depth) { 7422 // TODO: Allow FP min/max with nnan/nsz. 7423 assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison"); 7424 7425 Value *A = nullptr, *B = nullptr; 7426 SelectPatternResult L = matchSelectPattern(TVal, A, B, nullptr, Depth + 1); 7427 if (!SelectPatternResult::isMinOrMax(L.Flavor)) 7428 return {SPF_UNKNOWN, SPNB_NA, false}; 7429 7430 Value *C = nullptr, *D = nullptr; 7431 SelectPatternResult R = matchSelectPattern(FVal, C, D, nullptr, Depth + 1); 7432 if (L.Flavor != R.Flavor) 7433 return {SPF_UNKNOWN, SPNB_NA, false}; 7434 7435 // We have something like: x Pred y ? min(a, b) : min(c, d). 7436 // Try to match the compare to the min/max operations of the select operands. 7437 // First, make sure we have the right compare predicate. 7438 switch (L.Flavor) { 7439 case SPF_SMIN: 7440 if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) { 7441 Pred = ICmpInst::getSwappedPredicate(Pred); 7442 std::swap(CmpLHS, CmpRHS); 7443 } 7444 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) 7445 break; 7446 return {SPF_UNKNOWN, SPNB_NA, false}; 7447 case SPF_SMAX: 7448 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) { 7449 Pred = ICmpInst::getSwappedPredicate(Pred); 7450 std::swap(CmpLHS, CmpRHS); 7451 } 7452 if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) 7453 break; 7454 return {SPF_UNKNOWN, SPNB_NA, false}; 7455 case SPF_UMIN: 7456 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) { 7457 Pred = ICmpInst::getSwappedPredicate(Pred); 7458 std::swap(CmpLHS, CmpRHS); 7459 } 7460 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) 7461 break; 7462 return {SPF_UNKNOWN, SPNB_NA, false}; 7463 case SPF_UMAX: 7464 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { 7465 Pred = ICmpInst::getSwappedPredicate(Pred); 7466 std::swap(CmpLHS, CmpRHS); 7467 } 7468 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) 7469 break; 7470 return {SPF_UNKNOWN, SPNB_NA, false}; 7471 default: 7472 return {SPF_UNKNOWN, SPNB_NA, false}; 7473 } 7474 7475 // If there is a common operand in the already matched min/max and the other 7476 // min/max operands match the compare operands (either directly or inverted), 7477 // then this is min/max of the same flavor. 7478 7479 // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b)) 7480 // ~c pred ~a ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b)) 7481 if (D == B) { 7482 if ((CmpLHS == A && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) && 7483 match(A, m_Not(m_Specific(CmpRHS))))) 7484 return {L.Flavor, SPNB_NA, false}; 7485 } 7486 // a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d)) 7487 // ~d pred ~a ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d)) 7488 if (C == B) { 7489 if ((CmpLHS == A && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) && 7490 match(A, m_Not(m_Specific(CmpRHS))))) 7491 return {L.Flavor, SPNB_NA, false}; 7492 } 7493 // b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a)) 7494 // ~c pred ~b ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a)) 7495 if (D == A) { 7496 if ((CmpLHS == B && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) && 7497 match(B, m_Not(m_Specific(CmpRHS))))) 7498 return {L.Flavor, SPNB_NA, false}; 7499 } 7500 // b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d)) 7501 // ~d pred ~b ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d)) 7502 if (C == A) { 7503 if ((CmpLHS == B && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) && 7504 match(B, m_Not(m_Specific(CmpRHS))))) 7505 return {L.Flavor, SPNB_NA, false}; 7506 } 7507 7508 return {SPF_UNKNOWN, SPNB_NA, false}; 7509 } 7510 7511 /// If the input value is the result of a 'not' op, constant integer, or vector 7512 /// splat of a constant integer, return the bitwise-not source value. 7513 /// TODO: This could be extended to handle non-splat vector integer constants. 7514 static Value *getNotValue(Value *V) { 7515 Value *NotV; 7516 if (match(V, m_Not(m_Value(NotV)))) 7517 return NotV; 7518 7519 const APInt *C; 7520 if (match(V, m_APInt(C))) 7521 return ConstantInt::get(V->getType(), ~(*C)); 7522 7523 return nullptr; 7524 } 7525 7526 /// Match non-obvious integer minimum and maximum sequences. 7527 static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, 7528 Value *CmpLHS, Value *CmpRHS, 7529 Value *TrueVal, Value *FalseVal, 7530 Value *&LHS, Value *&RHS, 7531 unsigned Depth) { 7532 // Assume success. If there's no match, callers should not use these anyway. 7533 LHS = TrueVal; 7534 RHS = FalseVal; 7535 7536 SelectPatternResult SPR = matchClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal); 7537 if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN) 7538 return SPR; 7539 7540 SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, Depth); 7541 if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN) 7542 return SPR; 7543 7544 // Look through 'not' ops to find disguised min/max. 7545 // (X > Y) ? ~X : ~Y ==> (~X < ~Y) ? ~X : ~Y ==> MIN(~X, ~Y) 7546 // (X < Y) ? ~X : ~Y ==> (~X > ~Y) ? ~X : ~Y ==> MAX(~X, ~Y) 7547 if (CmpLHS == getNotValue(TrueVal) && CmpRHS == getNotValue(FalseVal)) { 7548 switch (Pred) { 7549 case CmpInst::ICMP_SGT: return {SPF_SMIN, SPNB_NA, false}; 7550 case CmpInst::ICMP_SLT: return {SPF_SMAX, SPNB_NA, false}; 7551 case CmpInst::ICMP_UGT: return {SPF_UMIN, SPNB_NA, false}; 7552 case CmpInst::ICMP_ULT: return {SPF_UMAX, SPNB_NA, false}; 7553 default: break; 7554 } 7555 } 7556 7557 // (X > Y) ? ~Y : ~X ==> (~X < ~Y) ? ~Y : ~X ==> MAX(~Y, ~X) 7558 // (X < Y) ? ~Y : ~X ==> (~X > ~Y) ? ~Y : ~X ==> MIN(~Y, ~X) 7559 if (CmpLHS == getNotValue(FalseVal) && CmpRHS == getNotValue(TrueVal)) { 7560 switch (Pred) { 7561 case CmpInst::ICMP_SGT: return {SPF_SMAX, SPNB_NA, false}; 7562 case CmpInst::ICMP_SLT: return {SPF_SMIN, SPNB_NA, false}; 7563 case CmpInst::ICMP_UGT: return {SPF_UMAX, SPNB_NA, false}; 7564 case CmpInst::ICMP_ULT: return {SPF_UMIN, SPNB_NA, false}; 7565 default: break; 7566 } 7567 } 7568 7569 if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT) 7570 return {SPF_UNKNOWN, SPNB_NA, false}; 7571 7572 const APInt *C1; 7573 if (!match(CmpRHS, m_APInt(C1))) 7574 return {SPF_UNKNOWN, SPNB_NA, false}; 7575 7576 // An unsigned min/max can be written with a signed compare. 7577 const APInt *C2; 7578 if ((CmpLHS == TrueVal && match(FalseVal, m_APInt(C2))) || 7579 (CmpLHS == FalseVal && match(TrueVal, m_APInt(C2)))) { 7580 // Is the sign bit set? 7581 // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX 7582 // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN 7583 if (Pred == CmpInst::ICMP_SLT && C1->isZero() && C2->isMaxSignedValue()) 7584 return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; 7585 7586 // Is the sign bit clear? 7587 // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX 7588 // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN 7589 if (Pred == CmpInst::ICMP_SGT && C1->isAllOnes() && C2->isMinSignedValue()) 7590 return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; 7591 } 7592 7593 return {SPF_UNKNOWN, SPNB_NA, false}; 7594 } 7595 7596 bool llvm::isKnownNegation(const Value *X, const Value *Y, bool NeedNSW) { 7597 assert(X && Y && "Invalid operand"); 7598 7599 // X = sub (0, Y) || X = sub nsw (0, Y) 7600 if ((!NeedNSW && match(X, m_Sub(m_ZeroInt(), m_Specific(Y)))) || 7601 (NeedNSW && match(X, m_NSWSub(m_ZeroInt(), m_Specific(Y))))) 7602 return true; 7603 7604 // Y = sub (0, X) || Y = sub nsw (0, X) 7605 if ((!NeedNSW && match(Y, m_Sub(m_ZeroInt(), m_Specific(X)))) || 7606 (NeedNSW && match(Y, m_NSWSub(m_ZeroInt(), m_Specific(X))))) 7607 return true; 7608 7609 // X = sub (A, B), Y = sub (B, A) || X = sub nsw (A, B), Y = sub nsw (B, A) 7610 Value *A, *B; 7611 return (!NeedNSW && (match(X, m_Sub(m_Value(A), m_Value(B))) && 7612 match(Y, m_Sub(m_Specific(B), m_Specific(A))))) || 7613 (NeedNSW && (match(X, m_NSWSub(m_Value(A), m_Value(B))) && 7614 match(Y, m_NSWSub(m_Specific(B), m_Specific(A))))); 7615 } 7616 7617 static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, 7618 FastMathFlags FMF, 7619 Value *CmpLHS, Value *CmpRHS, 7620 Value *TrueVal, Value *FalseVal, 7621 Value *&LHS, Value *&RHS, 7622 unsigned Depth) { 7623 bool HasMismatchedZeros = false; 7624 if (CmpInst::isFPPredicate(Pred)) { 7625 // IEEE-754 ignores the sign of 0.0 in comparisons. So if the select has one 7626 // 0.0 operand, set the compare's 0.0 operands to that same value for the 7627 // purpose of identifying min/max. Disregard vector constants with undefined 7628 // elements because those can not be back-propagated for analysis. 7629 Value *OutputZeroVal = nullptr; 7630 if (match(TrueVal, m_AnyZeroFP()) && !match(FalseVal, m_AnyZeroFP()) && 7631 !cast<Constant>(TrueVal)->containsUndefOrPoisonElement()) 7632 OutputZeroVal = TrueVal; 7633 else if (match(FalseVal, m_AnyZeroFP()) && !match(TrueVal, m_AnyZeroFP()) && 7634 !cast<Constant>(FalseVal)->containsUndefOrPoisonElement()) 7635 OutputZeroVal = FalseVal; 7636 7637 if (OutputZeroVal) { 7638 if (match(CmpLHS, m_AnyZeroFP()) && CmpLHS != OutputZeroVal) { 7639 HasMismatchedZeros = true; 7640 CmpLHS = OutputZeroVal; 7641 } 7642 if (match(CmpRHS, m_AnyZeroFP()) && CmpRHS != OutputZeroVal) { 7643 HasMismatchedZeros = true; 7644 CmpRHS = OutputZeroVal; 7645 } 7646 } 7647 } 7648 7649 LHS = CmpLHS; 7650 RHS = CmpRHS; 7651 7652 // Signed zero may return inconsistent results between implementations. 7653 // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0 7654 // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1) 7655 // Therefore, we behave conservatively and only proceed if at least one of the 7656 // operands is known to not be zero or if we don't care about signed zero. 7657 switch (Pred) { 7658 default: break; 7659 case CmpInst::FCMP_OGT: case CmpInst::FCMP_OLT: 7660 case CmpInst::FCMP_UGT: case CmpInst::FCMP_ULT: 7661 if (!HasMismatchedZeros) 7662 break; 7663 [[fallthrough]]; 7664 case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE: 7665 case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE: 7666 if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) && 7667 !isKnownNonZero(CmpRHS)) 7668 return {SPF_UNKNOWN, SPNB_NA, false}; 7669 } 7670 7671 SelectPatternNaNBehavior NaNBehavior = SPNB_NA; 7672 bool Ordered = false; 7673 7674 // When given one NaN and one non-NaN input: 7675 // - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input. 7676 // - A simple C99 (a < b ? a : b) construction will return 'b' (as the 7677 // ordered comparison fails), which could be NaN or non-NaN. 7678 // so here we discover exactly what NaN behavior is required/accepted. 7679 if (CmpInst::isFPPredicate(Pred)) { 7680 bool LHSSafe = isKnownNonNaN(CmpLHS, FMF); 7681 bool RHSSafe = isKnownNonNaN(CmpRHS, FMF); 7682 7683 if (LHSSafe && RHSSafe) { 7684 // Both operands are known non-NaN. 7685 NaNBehavior = SPNB_RETURNS_ANY; 7686 } else if (CmpInst::isOrdered(Pred)) { 7687 // An ordered comparison will return false when given a NaN, so it 7688 // returns the RHS. 7689 Ordered = true; 7690 if (LHSSafe) 7691 // LHS is non-NaN, so if RHS is NaN then NaN will be returned. 7692 NaNBehavior = SPNB_RETURNS_NAN; 7693 else if (RHSSafe) 7694 NaNBehavior = SPNB_RETURNS_OTHER; 7695 else 7696 // Completely unsafe. 7697 return {SPF_UNKNOWN, SPNB_NA, false}; 7698 } else { 7699 Ordered = false; 7700 // An unordered comparison will return true when given a NaN, so it 7701 // returns the LHS. 7702 if (LHSSafe) 7703 // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned. 7704 NaNBehavior = SPNB_RETURNS_OTHER; 7705 else if (RHSSafe) 7706 NaNBehavior = SPNB_RETURNS_NAN; 7707 else 7708 // Completely unsafe. 7709 return {SPF_UNKNOWN, SPNB_NA, false}; 7710 } 7711 } 7712 7713 if (TrueVal == CmpRHS && FalseVal == CmpLHS) { 7714 std::swap(CmpLHS, CmpRHS); 7715 Pred = CmpInst::getSwappedPredicate(Pred); 7716 if (NaNBehavior == SPNB_RETURNS_NAN) 7717 NaNBehavior = SPNB_RETURNS_OTHER; 7718 else if (NaNBehavior == SPNB_RETURNS_OTHER) 7719 NaNBehavior = SPNB_RETURNS_NAN; 7720 Ordered = !Ordered; 7721 } 7722 7723 // ([if]cmp X, Y) ? X : Y 7724 if (TrueVal == CmpLHS && FalseVal == CmpRHS) { 7725 switch (Pred) { 7726 default: return {SPF_UNKNOWN, SPNB_NA, false}; // Equality. 7727 case ICmpInst::ICMP_UGT: 7728 case ICmpInst::ICMP_UGE: return {SPF_UMAX, SPNB_NA, false}; 7729 case ICmpInst::ICMP_SGT: 7730 case ICmpInst::ICMP_SGE: return {SPF_SMAX, SPNB_NA, false}; 7731 case ICmpInst::ICMP_ULT: 7732 case ICmpInst::ICMP_ULE: return {SPF_UMIN, SPNB_NA, false}; 7733 case ICmpInst::ICMP_SLT: 7734 case ICmpInst::ICMP_SLE: return {SPF_SMIN, SPNB_NA, false}; 7735 case FCmpInst::FCMP_UGT: 7736 case FCmpInst::FCMP_UGE: 7737 case FCmpInst::FCMP_OGT: 7738 case FCmpInst::FCMP_OGE: return {SPF_FMAXNUM, NaNBehavior, Ordered}; 7739 case FCmpInst::FCMP_ULT: 7740 case FCmpInst::FCMP_ULE: 7741 case FCmpInst::FCMP_OLT: 7742 case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered}; 7743 } 7744 } 7745 7746 if (isKnownNegation(TrueVal, FalseVal)) { 7747 // Sign-extending LHS does not change its sign, so TrueVal/FalseVal can 7748 // match against either LHS or sext(LHS). 7749 auto MaybeSExtCmpLHS = 7750 m_CombineOr(m_Specific(CmpLHS), m_SExt(m_Specific(CmpLHS))); 7751 auto ZeroOrAllOnes = m_CombineOr(m_ZeroInt(), m_AllOnes()); 7752 auto ZeroOrOne = m_CombineOr(m_ZeroInt(), m_One()); 7753 if (match(TrueVal, MaybeSExtCmpLHS)) { 7754 // Set the return values. If the compare uses the negated value (-X >s 0), 7755 // swap the return values because the negated value is always 'RHS'. 7756 LHS = TrueVal; 7757 RHS = FalseVal; 7758 if (match(CmpLHS, m_Neg(m_Specific(FalseVal)))) 7759 std::swap(LHS, RHS); 7760 7761 // (X >s 0) ? X : -X or (X >s -1) ? X : -X --> ABS(X) 7762 // (-X >s 0) ? -X : X or (-X >s -1) ? -X : X --> ABS(X) 7763 if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes)) 7764 return {SPF_ABS, SPNB_NA, false}; 7765 7766 // (X >=s 0) ? X : -X or (X >=s 1) ? X : -X --> ABS(X) 7767 if (Pred == ICmpInst::ICMP_SGE && match(CmpRHS, ZeroOrOne)) 7768 return {SPF_ABS, SPNB_NA, false}; 7769 7770 // (X <s 0) ? X : -X or (X <s 1) ? X : -X --> NABS(X) 7771 // (-X <s 0) ? -X : X or (-X <s 1) ? -X : X --> NABS(X) 7772 if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne)) 7773 return {SPF_NABS, SPNB_NA, false}; 7774 } 7775 else if (match(FalseVal, MaybeSExtCmpLHS)) { 7776 // Set the return values. If the compare uses the negated value (-X >s 0), 7777 // swap the return values because the negated value is always 'RHS'. 7778 LHS = FalseVal; 7779 RHS = TrueVal; 7780 if (match(CmpLHS, m_Neg(m_Specific(TrueVal)))) 7781 std::swap(LHS, RHS); 7782 7783 // (X >s 0) ? -X : X or (X >s -1) ? -X : X --> NABS(X) 7784 // (-X >s 0) ? X : -X or (-X >s -1) ? X : -X --> NABS(X) 7785 if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes)) 7786 return {SPF_NABS, SPNB_NA, false}; 7787 7788 // (X <s 0) ? -X : X or (X <s 1) ? -X : X --> ABS(X) 7789 // (-X <s 0) ? X : -X or (-X <s 1) ? X : -X --> ABS(X) 7790 if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne)) 7791 return {SPF_ABS, SPNB_NA, false}; 7792 } 7793 } 7794 7795 if (CmpInst::isIntPredicate(Pred)) 7796 return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS, Depth); 7797 7798 // According to (IEEE 754-2008 5.3.1), minNum(0.0, -0.0) and similar 7799 // may return either -0.0 or 0.0, so fcmp/select pair has stricter 7800 // semantics than minNum. Be conservative in such case. 7801 if (NaNBehavior != SPNB_RETURNS_ANY || 7802 (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) && 7803 !isKnownNonZero(CmpRHS))) 7804 return {SPF_UNKNOWN, SPNB_NA, false}; 7805 7806 return matchFastFloatClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS); 7807 } 7808 7809 /// Helps to match a select pattern in case of a type mismatch. 7810 /// 7811 /// The function processes the case when type of true and false values of a 7812 /// select instruction differs from type of the cmp instruction operands because 7813 /// of a cast instruction. The function checks if it is legal to move the cast 7814 /// operation after "select". If yes, it returns the new second value of 7815 /// "select" (with the assumption that cast is moved): 7816 /// 1. As operand of cast instruction when both values of "select" are same cast 7817 /// instructions. 7818 /// 2. As restored constant (by applying reverse cast operation) when the first 7819 /// value of the "select" is a cast operation and the second value is a 7820 /// constant. 7821 /// NOTE: We return only the new second value because the first value could be 7822 /// accessed as operand of cast instruction. 7823 static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, 7824 Instruction::CastOps *CastOp) { 7825 auto *Cast1 = dyn_cast<CastInst>(V1); 7826 if (!Cast1) 7827 return nullptr; 7828 7829 *CastOp = Cast1->getOpcode(); 7830 Type *SrcTy = Cast1->getSrcTy(); 7831 if (auto *Cast2 = dyn_cast<CastInst>(V2)) { 7832 // If V1 and V2 are both the same cast from the same type, look through V1. 7833 if (*CastOp == Cast2->getOpcode() && SrcTy == Cast2->getSrcTy()) 7834 return Cast2->getOperand(0); 7835 return nullptr; 7836 } 7837 7838 auto *C = dyn_cast<Constant>(V2); 7839 if (!C) 7840 return nullptr; 7841 7842 const DataLayout &DL = CmpI->getModule()->getDataLayout(); 7843 Constant *CastedTo = nullptr; 7844 switch (*CastOp) { 7845 case Instruction::ZExt: 7846 if (CmpI->isUnsigned()) 7847 CastedTo = ConstantExpr::getTrunc(C, SrcTy); 7848 break; 7849 case Instruction::SExt: 7850 if (CmpI->isSigned()) 7851 CastedTo = ConstantExpr::getTrunc(C, SrcTy, true); 7852 break; 7853 case Instruction::Trunc: 7854 Constant *CmpConst; 7855 if (match(CmpI->getOperand(1), m_Constant(CmpConst)) && 7856 CmpConst->getType() == SrcTy) { 7857 // Here we have the following case: 7858 // 7859 // %cond = cmp iN %x, CmpConst 7860 // %tr = trunc iN %x to iK 7861 // %narrowsel = select i1 %cond, iK %t, iK C 7862 // 7863 // We can always move trunc after select operation: 7864 // 7865 // %cond = cmp iN %x, CmpConst 7866 // %widesel = select i1 %cond, iN %x, iN CmpConst 7867 // %tr = trunc iN %widesel to iK 7868 // 7869 // Note that C could be extended in any way because we don't care about 7870 // upper bits after truncation. It can't be abs pattern, because it would 7871 // look like: 7872 // 7873 // select i1 %cond, x, -x. 7874 // 7875 // So only min/max pattern could be matched. Such match requires widened C 7876 // == CmpConst. That is why set widened C = CmpConst, condition trunc 7877 // CmpConst == C is checked below. 7878 CastedTo = CmpConst; 7879 } else { 7880 unsigned ExtOp = CmpI->isSigned() ? Instruction::SExt : Instruction::ZExt; 7881 CastedTo = ConstantFoldCastOperand(ExtOp, C, SrcTy, DL); 7882 } 7883 break; 7884 case Instruction::FPTrunc: 7885 CastedTo = ConstantFoldCastOperand(Instruction::FPExt, C, SrcTy, DL); 7886 break; 7887 case Instruction::FPExt: 7888 CastedTo = ConstantFoldCastOperand(Instruction::FPTrunc, C, SrcTy, DL); 7889 break; 7890 case Instruction::FPToUI: 7891 CastedTo = ConstantFoldCastOperand(Instruction::UIToFP, C, SrcTy, DL); 7892 break; 7893 case Instruction::FPToSI: 7894 CastedTo = ConstantFoldCastOperand(Instruction::SIToFP, C, SrcTy, DL); 7895 break; 7896 case Instruction::UIToFP: 7897 CastedTo = ConstantFoldCastOperand(Instruction::FPToUI, C, SrcTy, DL); 7898 break; 7899 case Instruction::SIToFP: 7900 CastedTo = ConstantFoldCastOperand(Instruction::FPToSI, C, SrcTy, DL); 7901 break; 7902 default: 7903 break; 7904 } 7905 7906 if (!CastedTo) 7907 return nullptr; 7908 7909 // Make sure the cast doesn't lose any information. 7910 Constant *CastedBack = 7911 ConstantFoldCastOperand(*CastOp, CastedTo, C->getType(), DL); 7912 if (CastedBack && CastedBack != C) 7913 return nullptr; 7914 7915 return CastedTo; 7916 } 7917 7918 SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, 7919 Instruction::CastOps *CastOp, 7920 unsigned Depth) { 7921 if (Depth >= MaxAnalysisRecursionDepth) 7922 return {SPF_UNKNOWN, SPNB_NA, false}; 7923 7924 SelectInst *SI = dyn_cast<SelectInst>(V); 7925 if (!SI) return {SPF_UNKNOWN, SPNB_NA, false}; 7926 7927 CmpInst *CmpI = dyn_cast<CmpInst>(SI->getCondition()); 7928 if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false}; 7929 7930 Value *TrueVal = SI->getTrueValue(); 7931 Value *FalseVal = SI->getFalseValue(); 7932 7933 return llvm::matchDecomposedSelectPattern(CmpI, TrueVal, FalseVal, LHS, RHS, 7934 CastOp, Depth); 7935 } 7936 7937 SelectPatternResult llvm::matchDecomposedSelectPattern( 7938 CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS, 7939 Instruction::CastOps *CastOp, unsigned Depth) { 7940 CmpInst::Predicate Pred = CmpI->getPredicate(); 7941 Value *CmpLHS = CmpI->getOperand(0); 7942 Value *CmpRHS = CmpI->getOperand(1); 7943 FastMathFlags FMF; 7944 if (isa<FPMathOperator>(CmpI)) 7945 FMF = CmpI->getFastMathFlags(); 7946 7947 // Bail out early. 7948 if (CmpI->isEquality()) 7949 return {SPF_UNKNOWN, SPNB_NA, false}; 7950 7951 // Deal with type mismatches. 7952 if (CastOp && CmpLHS->getType() != TrueVal->getType()) { 7953 if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) { 7954 // If this is a potential fmin/fmax with a cast to integer, then ignore 7955 // -0.0 because there is no corresponding integer value. 7956 if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI) 7957 FMF.setNoSignedZeros(); 7958 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, 7959 cast<CastInst>(TrueVal)->getOperand(0), C, 7960 LHS, RHS, Depth); 7961 } 7962 if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) { 7963 // If this is a potential fmin/fmax with a cast to integer, then ignore 7964 // -0.0 because there is no corresponding integer value. 7965 if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI) 7966 FMF.setNoSignedZeros(); 7967 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, 7968 C, cast<CastInst>(FalseVal)->getOperand(0), 7969 LHS, RHS, Depth); 7970 } 7971 } 7972 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal, 7973 LHS, RHS, Depth); 7974 } 7975 7976 CmpInst::Predicate llvm::getMinMaxPred(SelectPatternFlavor SPF, bool Ordered) { 7977 if (SPF == SPF_SMIN) return ICmpInst::ICMP_SLT; 7978 if (SPF == SPF_UMIN) return ICmpInst::ICMP_ULT; 7979 if (SPF == SPF_SMAX) return ICmpInst::ICMP_SGT; 7980 if (SPF == SPF_UMAX) return ICmpInst::ICMP_UGT; 7981 if (SPF == SPF_FMINNUM) 7982 return Ordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; 7983 if (SPF == SPF_FMAXNUM) 7984 return Ordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; 7985 llvm_unreachable("unhandled!"); 7986 } 7987 7988 SelectPatternFlavor llvm::getInverseMinMaxFlavor(SelectPatternFlavor SPF) { 7989 if (SPF == SPF_SMIN) return SPF_SMAX; 7990 if (SPF == SPF_UMIN) return SPF_UMAX; 7991 if (SPF == SPF_SMAX) return SPF_SMIN; 7992 if (SPF == SPF_UMAX) return SPF_UMIN; 7993 llvm_unreachable("unhandled!"); 7994 } 7995 7996 Intrinsic::ID llvm::getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID) { 7997 switch (MinMaxID) { 7998 case Intrinsic::smax: return Intrinsic::smin; 7999 case Intrinsic::smin: return Intrinsic::smax; 8000 case Intrinsic::umax: return Intrinsic::umin; 8001 case Intrinsic::umin: return Intrinsic::umax; 8002 // Please note that next four intrinsics may produce the same result for 8003 // original and inverted case even if X != Y due to NaN is handled specially. 8004 case Intrinsic::maximum: return Intrinsic::minimum; 8005 case Intrinsic::minimum: return Intrinsic::maximum; 8006 case Intrinsic::maxnum: return Intrinsic::minnum; 8007 case Intrinsic::minnum: return Intrinsic::maxnum; 8008 default: llvm_unreachable("Unexpected intrinsic"); 8009 } 8010 } 8011 8012 APInt llvm::getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth) { 8013 switch (SPF) { 8014 case SPF_SMAX: return APInt::getSignedMaxValue(BitWidth); 8015 case SPF_SMIN: return APInt::getSignedMinValue(BitWidth); 8016 case SPF_UMAX: return APInt::getMaxValue(BitWidth); 8017 case SPF_UMIN: return APInt::getMinValue(BitWidth); 8018 default: llvm_unreachable("Unexpected flavor"); 8019 } 8020 } 8021 8022 std::pair<Intrinsic::ID, bool> 8023 llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) { 8024 // Check if VL contains select instructions that can be folded into a min/max 8025 // vector intrinsic and return the intrinsic if it is possible. 8026 // TODO: Support floating point min/max. 8027 bool AllCmpSingleUse = true; 8028 SelectPatternResult SelectPattern; 8029 SelectPattern.Flavor = SPF_UNKNOWN; 8030 if (all_of(VL, [&SelectPattern, &AllCmpSingleUse](Value *I) { 8031 Value *LHS, *RHS; 8032 auto CurrentPattern = matchSelectPattern(I, LHS, RHS); 8033 if (!SelectPatternResult::isMinOrMax(CurrentPattern.Flavor) || 8034 CurrentPattern.Flavor == SPF_FMINNUM || 8035 CurrentPattern.Flavor == SPF_FMAXNUM || 8036 !I->getType()->isIntOrIntVectorTy()) 8037 return false; 8038 if (SelectPattern.Flavor != SPF_UNKNOWN && 8039 SelectPattern.Flavor != CurrentPattern.Flavor) 8040 return false; 8041 SelectPattern = CurrentPattern; 8042 AllCmpSingleUse &= 8043 match(I, m_Select(m_OneUse(m_Value()), m_Value(), m_Value())); 8044 return true; 8045 })) { 8046 switch (SelectPattern.Flavor) { 8047 case SPF_SMIN: 8048 return {Intrinsic::smin, AllCmpSingleUse}; 8049 case SPF_UMIN: 8050 return {Intrinsic::umin, AllCmpSingleUse}; 8051 case SPF_SMAX: 8052 return {Intrinsic::smax, AllCmpSingleUse}; 8053 case SPF_UMAX: 8054 return {Intrinsic::umax, AllCmpSingleUse}; 8055 default: 8056 llvm_unreachable("unexpected select pattern flavor"); 8057 } 8058 } 8059 return {Intrinsic::not_intrinsic, false}; 8060 } 8061 8062 bool llvm::matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, 8063 Value *&Start, Value *&Step) { 8064 // Handle the case of a simple two-predecessor recurrence PHI. 8065 // There's a lot more that could theoretically be done here, but 8066 // this is sufficient to catch some interesting cases. 8067 if (P->getNumIncomingValues() != 2) 8068 return false; 8069 8070 for (unsigned i = 0; i != 2; ++i) { 8071 Value *L = P->getIncomingValue(i); 8072 Value *R = P->getIncomingValue(!i); 8073 auto *LU = dyn_cast<BinaryOperator>(L); 8074 if (!LU) 8075 continue; 8076 unsigned Opcode = LU->getOpcode(); 8077 8078 switch (Opcode) { 8079 default: 8080 continue; 8081 // TODO: Expand list -- xor, div, gep, uaddo, etc.. 8082 case Instruction::LShr: 8083 case Instruction::AShr: 8084 case Instruction::Shl: 8085 case Instruction::Add: 8086 case Instruction::Sub: 8087 case Instruction::And: 8088 case Instruction::Or: 8089 case Instruction::Mul: 8090 case Instruction::FMul: { 8091 Value *LL = LU->getOperand(0); 8092 Value *LR = LU->getOperand(1); 8093 // Find a recurrence. 8094 if (LL == P) 8095 L = LR; 8096 else if (LR == P) 8097 L = LL; 8098 else 8099 continue; // Check for recurrence with L and R flipped. 8100 8101 break; // Match! 8102 } 8103 }; 8104 8105 // We have matched a recurrence of the form: 8106 // %iv = [R, %entry], [%iv.next, %backedge] 8107 // %iv.next = binop %iv, L 8108 // OR 8109 // %iv = [R, %entry], [%iv.next, %backedge] 8110 // %iv.next = binop L, %iv 8111 BO = LU; 8112 Start = R; 8113 Step = L; 8114 return true; 8115 } 8116 return false; 8117 } 8118 8119 bool llvm::matchSimpleRecurrence(const BinaryOperator *I, PHINode *&P, 8120 Value *&Start, Value *&Step) { 8121 BinaryOperator *BO = nullptr; 8122 P = dyn_cast<PHINode>(I->getOperand(0)); 8123 if (!P) 8124 P = dyn_cast<PHINode>(I->getOperand(1)); 8125 return P && matchSimpleRecurrence(P, BO, Start, Step) && BO == I; 8126 } 8127 8128 /// Return true if "icmp Pred LHS RHS" is always true. 8129 static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS, 8130 const Value *RHS, const DataLayout &DL, 8131 unsigned Depth) { 8132 if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS) 8133 return true; 8134 8135 switch (Pred) { 8136 default: 8137 return false; 8138 8139 case CmpInst::ICMP_SLE: { 8140 const APInt *C; 8141 8142 // LHS s<= LHS +_{nsw} C if C >= 0 8143 if (match(RHS, m_NSWAdd(m_Specific(LHS), m_APInt(C)))) 8144 return !C->isNegative(); 8145 return false; 8146 } 8147 8148 case CmpInst::ICMP_ULE: { 8149 // LHS u<= LHS +_{nuw} V for any V 8150 if (match(RHS, m_c_Add(m_Specific(LHS), m_Value())) && 8151 cast<OverflowingBinaryOperator>(RHS)->hasNoUnsignedWrap()) 8152 return true; 8153 8154 // RHS >> V u<= RHS for any V 8155 if (match(LHS, m_LShr(m_Specific(RHS), m_Value()))) 8156 return true; 8157 8158 // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB) 8159 auto MatchNUWAddsToSameValue = [&](const Value *A, const Value *B, 8160 const Value *&X, 8161 const APInt *&CA, const APInt *&CB) { 8162 if (match(A, m_NUWAdd(m_Value(X), m_APInt(CA))) && 8163 match(B, m_NUWAdd(m_Specific(X), m_APInt(CB)))) 8164 return true; 8165 8166 // If X & C == 0 then (X | C) == X +_{nuw} C 8167 if (match(A, m_Or(m_Value(X), m_APInt(CA))) && 8168 match(B, m_Or(m_Specific(X), m_APInt(CB)))) { 8169 KnownBits Known(CA->getBitWidth()); 8170 computeKnownBits(X, Known, DL, Depth + 1, /*AC*/ nullptr, 8171 /*CxtI*/ nullptr, /*DT*/ nullptr); 8172 if (CA->isSubsetOf(Known.Zero) && CB->isSubsetOf(Known.Zero)) 8173 return true; 8174 } 8175 8176 return false; 8177 }; 8178 8179 const Value *X; 8180 const APInt *CLHS, *CRHS; 8181 if (MatchNUWAddsToSameValue(LHS, RHS, X, CLHS, CRHS)) 8182 return CLHS->ule(*CRHS); 8183 8184 return false; 8185 } 8186 } 8187 } 8188 8189 /// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred 8190 /// ALHS ARHS" is true. Otherwise, return std::nullopt. 8191 static std::optional<bool> 8192 isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS, 8193 const Value *ARHS, const Value *BLHS, const Value *BRHS, 8194 const DataLayout &DL, unsigned Depth) { 8195 switch (Pred) { 8196 default: 8197 return std::nullopt; 8198 8199 case CmpInst::ICMP_SLT: 8200 case CmpInst::ICMP_SLE: 8201 if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth) && 8202 isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth)) 8203 return true; 8204 return std::nullopt; 8205 8206 case CmpInst::ICMP_SGT: 8207 case CmpInst::ICMP_SGE: 8208 if (isTruePredicate(CmpInst::ICMP_SLE, ALHS, BLHS, DL, Depth) && 8209 isTruePredicate(CmpInst::ICMP_SLE, BRHS, ARHS, DL, Depth)) 8210 return true; 8211 return std::nullopt; 8212 8213 case CmpInst::ICMP_ULT: 8214 case CmpInst::ICMP_ULE: 8215 if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth) && 8216 isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth)) 8217 return true; 8218 return std::nullopt; 8219 8220 case CmpInst::ICMP_UGT: 8221 case CmpInst::ICMP_UGE: 8222 if (isTruePredicate(CmpInst::ICMP_ULE, ALHS, BLHS, DL, Depth) && 8223 isTruePredicate(CmpInst::ICMP_ULE, BRHS, ARHS, DL, Depth)) 8224 return true; 8225 return std::nullopt; 8226 } 8227 } 8228 8229 /// Return true if the operands of two compares (expanded as "L0 pred L1" and 8230 /// "R0 pred R1") match. IsSwappedOps is true when the operands match, but are 8231 /// swapped. 8232 static bool areMatchingOperands(const Value *L0, const Value *L1, const Value *R0, 8233 const Value *R1, bool &AreSwappedOps) { 8234 bool AreMatchingOps = (L0 == R0 && L1 == R1); 8235 AreSwappedOps = (L0 == R1 && L1 == R0); 8236 return AreMatchingOps || AreSwappedOps; 8237 } 8238 8239 /// Return true if "icmp1 LPred X, Y" implies "icmp2 RPred X, Y" is true. 8240 /// Return false if "icmp1 LPred X, Y" implies "icmp2 RPred X, Y" is false. 8241 /// Otherwise, return std::nullopt if we can't infer anything. 8242 static std::optional<bool> 8243 isImpliedCondMatchingOperands(CmpInst::Predicate LPred, 8244 CmpInst::Predicate RPred, bool AreSwappedOps) { 8245 // Canonicalize the predicate as if the operands were not commuted. 8246 if (AreSwappedOps) 8247 RPred = ICmpInst::getSwappedPredicate(RPred); 8248 8249 if (CmpInst::isImpliedTrueByMatchingCmp(LPred, RPred)) 8250 return true; 8251 if (CmpInst::isImpliedFalseByMatchingCmp(LPred, RPred)) 8252 return false; 8253 8254 return std::nullopt; 8255 } 8256 8257 /// Return true if "icmp LPred X, LC" implies "icmp RPred X, RC" is true. 8258 /// Return false if "icmp LPred X, LC" implies "icmp RPred X, RC" is false. 8259 /// Otherwise, return std::nullopt if we can't infer anything. 8260 static std::optional<bool> isImpliedCondCommonOperandWithConstants( 8261 CmpInst::Predicate LPred, const APInt &LC, CmpInst::Predicate RPred, 8262 const APInt &RC) { 8263 ConstantRange DomCR = ConstantRange::makeExactICmpRegion(LPred, LC); 8264 ConstantRange CR = ConstantRange::makeExactICmpRegion(RPred, RC); 8265 ConstantRange Intersection = DomCR.intersectWith(CR); 8266 ConstantRange Difference = DomCR.difference(CR); 8267 if (Intersection.isEmptySet()) 8268 return false; 8269 if (Difference.isEmptySet()) 8270 return true; 8271 return std::nullopt; 8272 } 8273 8274 /// Return true if LHS implies RHS (expanded to its components as "R0 RPred R1") 8275 /// is true. Return false if LHS implies RHS is false. Otherwise, return 8276 /// std::nullopt if we can't infer anything. 8277 static std::optional<bool> isImpliedCondICmps(const ICmpInst *LHS, 8278 CmpInst::Predicate RPred, 8279 const Value *R0, const Value *R1, 8280 const DataLayout &DL, 8281 bool LHSIsTrue, unsigned Depth) { 8282 Value *L0 = LHS->getOperand(0); 8283 Value *L1 = LHS->getOperand(1); 8284 8285 // The rest of the logic assumes the LHS condition is true. If that's not the 8286 // case, invert the predicate to make it so. 8287 CmpInst::Predicate LPred = 8288 LHSIsTrue ? LHS->getPredicate() : LHS->getInversePredicate(); 8289 8290 // Can we infer anything when the 0-operands match and the 1-operands are 8291 // constants (not necessarily matching)? 8292 const APInt *LC, *RC; 8293 if (L0 == R0 && match(L1, m_APInt(LC)) && match(R1, m_APInt(RC))) 8294 return isImpliedCondCommonOperandWithConstants(LPred, *LC, RPred, *RC); 8295 8296 // Can we infer anything when the two compares have matching operands? 8297 bool AreSwappedOps; 8298 if (areMatchingOperands(L0, L1, R0, R1, AreSwappedOps)) 8299 return isImpliedCondMatchingOperands(LPred, RPred, AreSwappedOps); 8300 8301 // L0 = R0 = L1 + R1, L0 >=u L1 implies R0 >=u R1, L0 <u L1 implies R0 <u R1 8302 if (ICmpInst::isUnsigned(LPred) && ICmpInst::isUnsigned(RPred)) { 8303 if (L0 == R1) { 8304 std::swap(R0, R1); 8305 RPred = ICmpInst::getSwappedPredicate(RPred); 8306 } 8307 if (L1 == R0) { 8308 std::swap(L0, L1); 8309 LPred = ICmpInst::getSwappedPredicate(LPred); 8310 } 8311 if (L1 == R1) { 8312 std::swap(L0, L1); 8313 LPred = ICmpInst::getSwappedPredicate(LPred); 8314 std::swap(R0, R1); 8315 RPred = ICmpInst::getSwappedPredicate(RPred); 8316 } 8317 if (L0 == R0 && 8318 (LPred == ICmpInst::ICMP_ULT || LPred == ICmpInst::ICMP_UGE) && 8319 (RPred == ICmpInst::ICMP_ULT || RPred == ICmpInst::ICMP_UGE) && 8320 match(L0, m_c_Add(m_Specific(L1), m_Specific(R1)))) 8321 return LPred == RPred; 8322 } 8323 8324 if (LPred == RPred) 8325 return isImpliedCondOperands(LPred, L0, L1, R0, R1, DL, Depth); 8326 8327 return std::nullopt; 8328 } 8329 8330 /// Return true if LHS implies RHS is true. Return false if LHS implies RHS is 8331 /// false. Otherwise, return std::nullopt if we can't infer anything. We 8332 /// expect the RHS to be an icmp and the LHS to be an 'and', 'or', or a 'select' 8333 /// instruction. 8334 static std::optional<bool> 8335 isImpliedCondAndOr(const Instruction *LHS, CmpInst::Predicate RHSPred, 8336 const Value *RHSOp0, const Value *RHSOp1, 8337 const DataLayout &DL, bool LHSIsTrue, unsigned Depth) { 8338 // The LHS must be an 'or', 'and', or a 'select' instruction. 8339 assert((LHS->getOpcode() == Instruction::And || 8340 LHS->getOpcode() == Instruction::Or || 8341 LHS->getOpcode() == Instruction::Select) && 8342 "Expected LHS to be 'and', 'or', or 'select'."); 8343 8344 assert(Depth <= MaxAnalysisRecursionDepth && "Hit recursion limit"); 8345 8346 // If the result of an 'or' is false, then we know both legs of the 'or' are 8347 // false. Similarly, if the result of an 'and' is true, then we know both 8348 // legs of the 'and' are true. 8349 const Value *ALHS, *ARHS; 8350 if ((!LHSIsTrue && match(LHS, m_LogicalOr(m_Value(ALHS), m_Value(ARHS)))) || 8351 (LHSIsTrue && match(LHS, m_LogicalAnd(m_Value(ALHS), m_Value(ARHS))))) { 8352 // FIXME: Make this non-recursion. 8353 if (std::optional<bool> Implication = isImpliedCondition( 8354 ALHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth + 1)) 8355 return Implication; 8356 if (std::optional<bool> Implication = isImpliedCondition( 8357 ARHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth + 1)) 8358 return Implication; 8359 return std::nullopt; 8360 } 8361 return std::nullopt; 8362 } 8363 8364 std::optional<bool> 8365 llvm::isImpliedCondition(const Value *LHS, CmpInst::Predicate RHSPred, 8366 const Value *RHSOp0, const Value *RHSOp1, 8367 const DataLayout &DL, bool LHSIsTrue, unsigned Depth) { 8368 // Bail out when we hit the limit. 8369 if (Depth == MaxAnalysisRecursionDepth) 8370 return std::nullopt; 8371 8372 // A mismatch occurs when we compare a scalar cmp to a vector cmp, for 8373 // example. 8374 if (RHSOp0->getType()->isVectorTy() != LHS->getType()->isVectorTy()) 8375 return std::nullopt; 8376 8377 assert(LHS->getType()->isIntOrIntVectorTy(1) && 8378 "Expected integer type only!"); 8379 8380 // Both LHS and RHS are icmps. 8381 const ICmpInst *LHSCmp = dyn_cast<ICmpInst>(LHS); 8382 if (LHSCmp) 8383 return isImpliedCondICmps(LHSCmp, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, 8384 Depth); 8385 8386 /// The LHS should be an 'or', 'and', or a 'select' instruction. We expect 8387 /// the RHS to be an icmp. 8388 /// FIXME: Add support for and/or/select on the RHS. 8389 if (const Instruction *LHSI = dyn_cast<Instruction>(LHS)) { 8390 if ((LHSI->getOpcode() == Instruction::And || 8391 LHSI->getOpcode() == Instruction::Or || 8392 LHSI->getOpcode() == Instruction::Select)) 8393 return isImpliedCondAndOr(LHSI, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, 8394 Depth); 8395 } 8396 return std::nullopt; 8397 } 8398 8399 std::optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS, 8400 const DataLayout &DL, 8401 bool LHSIsTrue, unsigned Depth) { 8402 // LHS ==> RHS by definition 8403 if (LHS == RHS) 8404 return LHSIsTrue; 8405 8406 if (const ICmpInst *RHSCmp = dyn_cast<ICmpInst>(RHS)) 8407 return isImpliedCondition(LHS, RHSCmp->getPredicate(), 8408 RHSCmp->getOperand(0), RHSCmp->getOperand(1), DL, 8409 LHSIsTrue, Depth); 8410 8411 if (Depth == MaxAnalysisRecursionDepth) 8412 return std::nullopt; 8413 8414 // LHS ==> (RHS1 || RHS2) if LHS ==> RHS1 or LHS ==> RHS2 8415 // LHS ==> !(RHS1 && RHS2) if LHS ==> !RHS1 or LHS ==> !RHS2 8416 const Value *RHS1, *RHS2; 8417 if (match(RHS, m_LogicalOr(m_Value(RHS1), m_Value(RHS2)))) { 8418 if (std::optional<bool> Imp = 8419 isImpliedCondition(LHS, RHS1, DL, LHSIsTrue, Depth + 1)) 8420 if (*Imp == true) 8421 return true; 8422 if (std::optional<bool> Imp = 8423 isImpliedCondition(LHS, RHS2, DL, LHSIsTrue, Depth + 1)) 8424 if (*Imp == true) 8425 return true; 8426 } 8427 if (match(RHS, m_LogicalAnd(m_Value(RHS1), m_Value(RHS2)))) { 8428 if (std::optional<bool> Imp = 8429 isImpliedCondition(LHS, RHS1, DL, LHSIsTrue, Depth + 1)) 8430 if (*Imp == false) 8431 return false; 8432 if (std::optional<bool> Imp = 8433 isImpliedCondition(LHS, RHS2, DL, LHSIsTrue, Depth + 1)) 8434 if (*Imp == false) 8435 return false; 8436 } 8437 8438 return std::nullopt; 8439 } 8440 8441 // Returns a pair (Condition, ConditionIsTrue), where Condition is a branch 8442 // condition dominating ContextI or nullptr, if no condition is found. 8443 static std::pair<Value *, bool> 8444 getDomPredecessorCondition(const Instruction *ContextI) { 8445 if (!ContextI || !ContextI->getParent()) 8446 return {nullptr, false}; 8447 8448 // TODO: This is a poor/cheap way to determine dominance. Should we use a 8449 // dominator tree (eg, from a SimplifyQuery) instead? 8450 const BasicBlock *ContextBB = ContextI->getParent(); 8451 const BasicBlock *PredBB = ContextBB->getSinglePredecessor(); 8452 if (!PredBB) 8453 return {nullptr, false}; 8454 8455 // We need a conditional branch in the predecessor. 8456 Value *PredCond; 8457 BasicBlock *TrueBB, *FalseBB; 8458 if (!match(PredBB->getTerminator(), m_Br(m_Value(PredCond), TrueBB, FalseBB))) 8459 return {nullptr, false}; 8460 8461 // The branch should get simplified. Don't bother simplifying this condition. 8462 if (TrueBB == FalseBB) 8463 return {nullptr, false}; 8464 8465 assert((TrueBB == ContextBB || FalseBB == ContextBB) && 8466 "Predecessor block does not point to successor?"); 8467 8468 // Is this condition implied by the predecessor condition? 8469 return {PredCond, TrueBB == ContextBB}; 8470 } 8471 8472 std::optional<bool> llvm::isImpliedByDomCondition(const Value *Cond, 8473 const Instruction *ContextI, 8474 const DataLayout &DL) { 8475 assert(Cond->getType()->isIntOrIntVectorTy(1) && "Condition must be bool"); 8476 auto PredCond = getDomPredecessorCondition(ContextI); 8477 if (PredCond.first) 8478 return isImpliedCondition(PredCond.first, Cond, DL, PredCond.second); 8479 return std::nullopt; 8480 } 8481 8482 std::optional<bool> llvm::isImpliedByDomCondition(CmpInst::Predicate Pred, 8483 const Value *LHS, 8484 const Value *RHS, 8485 const Instruction *ContextI, 8486 const DataLayout &DL) { 8487 auto PredCond = getDomPredecessorCondition(ContextI); 8488 if (PredCond.first) 8489 return isImpliedCondition(PredCond.first, Pred, LHS, RHS, DL, 8490 PredCond.second); 8491 return std::nullopt; 8492 } 8493 8494 static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, 8495 APInt &Upper, const InstrInfoQuery &IIQ, 8496 bool PreferSignedRange) { 8497 unsigned Width = Lower.getBitWidth(); 8498 const APInt *C; 8499 switch (BO.getOpcode()) { 8500 case Instruction::Add: 8501 if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) { 8502 bool HasNSW = IIQ.hasNoSignedWrap(&BO); 8503 bool HasNUW = IIQ.hasNoUnsignedWrap(&BO); 8504 8505 // If the caller expects a signed compare, then try to use a signed range. 8506 // Otherwise if both no-wraps are set, use the unsigned range because it 8507 // is never larger than the signed range. Example: 8508 // "add nuw nsw i8 X, -2" is unsigned [254,255] vs. signed [-128, 125]. 8509 if (PreferSignedRange && HasNSW && HasNUW) 8510 HasNUW = false; 8511 8512 if (HasNUW) { 8513 // 'add nuw x, C' produces [C, UINT_MAX]. 8514 Lower = *C; 8515 } else if (HasNSW) { 8516 if (C->isNegative()) { 8517 // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C]. 8518 Lower = APInt::getSignedMinValue(Width); 8519 Upper = APInt::getSignedMaxValue(Width) + *C + 1; 8520 } else { 8521 // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX]. 8522 Lower = APInt::getSignedMinValue(Width) + *C; 8523 Upper = APInt::getSignedMaxValue(Width) + 1; 8524 } 8525 } 8526 } 8527 break; 8528 8529 case Instruction::And: 8530 if (match(BO.getOperand(1), m_APInt(C))) 8531 // 'and x, C' produces [0, C]. 8532 Upper = *C + 1; 8533 // X & -X is a power of two or zero. So we can cap the value at max power of 8534 // two. 8535 if (match(BO.getOperand(0), m_Neg(m_Specific(BO.getOperand(1)))) || 8536 match(BO.getOperand(1), m_Neg(m_Specific(BO.getOperand(0))))) 8537 Upper = APInt::getSignedMinValue(Width) + 1; 8538 break; 8539 8540 case Instruction::Or: 8541 if (match(BO.getOperand(1), m_APInt(C))) 8542 // 'or x, C' produces [C, UINT_MAX]. 8543 Lower = *C; 8544 break; 8545 8546 case Instruction::AShr: 8547 if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { 8548 // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C]. 8549 Lower = APInt::getSignedMinValue(Width).ashr(*C); 8550 Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1; 8551 } else if (match(BO.getOperand(0), m_APInt(C))) { 8552 unsigned ShiftAmount = Width - 1; 8553 if (!C->isZero() && IIQ.isExact(&BO)) 8554 ShiftAmount = C->countr_zero(); 8555 if (C->isNegative()) { 8556 // 'ashr C, x' produces [C, C >> (Width-1)] 8557 Lower = *C; 8558 Upper = C->ashr(ShiftAmount) + 1; 8559 } else { 8560 // 'ashr C, x' produces [C >> (Width-1), C] 8561 Lower = C->ashr(ShiftAmount); 8562 Upper = *C + 1; 8563 } 8564 } 8565 break; 8566 8567 case Instruction::LShr: 8568 if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { 8569 // 'lshr x, C' produces [0, UINT_MAX >> C]. 8570 Upper = APInt::getAllOnes(Width).lshr(*C) + 1; 8571 } else if (match(BO.getOperand(0), m_APInt(C))) { 8572 // 'lshr C, x' produces [C >> (Width-1), C]. 8573 unsigned ShiftAmount = Width - 1; 8574 if (!C->isZero() && IIQ.isExact(&BO)) 8575 ShiftAmount = C->countr_zero(); 8576 Lower = C->lshr(ShiftAmount); 8577 Upper = *C + 1; 8578 } 8579 break; 8580 8581 case Instruction::Shl: 8582 if (match(BO.getOperand(0), m_APInt(C))) { 8583 if (IIQ.hasNoUnsignedWrap(&BO)) { 8584 // 'shl nuw C, x' produces [C, C << CLZ(C)] 8585 Lower = *C; 8586 Upper = Lower.shl(Lower.countl_zero()) + 1; 8587 } else if (BO.hasNoSignedWrap()) { // TODO: What if both nuw+nsw? 8588 if (C->isNegative()) { 8589 // 'shl nsw C, x' produces [C << CLO(C)-1, C] 8590 unsigned ShiftAmount = C->countl_one() - 1; 8591 Lower = C->shl(ShiftAmount); 8592 Upper = *C + 1; 8593 } else { 8594 // 'shl nsw C, x' produces [C, C << CLZ(C)-1] 8595 unsigned ShiftAmount = C->countl_zero() - 1; 8596 Lower = *C; 8597 Upper = C->shl(ShiftAmount) + 1; 8598 } 8599 } else { 8600 // If lowbit is set, value can never be zero. 8601 if ((*C)[0]) 8602 Lower = APInt::getOneBitSet(Width, 0); 8603 // If we are shifting a constant the largest it can be is if the longest 8604 // sequence of consecutive ones is shifted to the highbits (breaking 8605 // ties for which sequence is higher). At the moment we take a liberal 8606 // upper bound on this by just popcounting the constant. 8607 // TODO: There may be a bitwise trick for it longest/highest 8608 // consecutative sequence of ones (naive method is O(Width) loop). 8609 Upper = APInt::getHighBitsSet(Width, C->popcount()) + 1; 8610 } 8611 } else if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { 8612 Upper = APInt::getBitsSetFrom(Width, C->getZExtValue()) + 1; 8613 } 8614 break; 8615 8616 case Instruction::SDiv: 8617 if (match(BO.getOperand(1), m_APInt(C))) { 8618 APInt IntMin = APInt::getSignedMinValue(Width); 8619 APInt IntMax = APInt::getSignedMaxValue(Width); 8620 if (C->isAllOnes()) { 8621 // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX] 8622 // where C != -1 and C != 0 and C != 1 8623 Lower = IntMin + 1; 8624 Upper = IntMax + 1; 8625 } else if (C->countl_zero() < Width - 1) { 8626 // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C] 8627 // where C != -1 and C != 0 and C != 1 8628 Lower = IntMin.sdiv(*C); 8629 Upper = IntMax.sdiv(*C); 8630 if (Lower.sgt(Upper)) 8631 std::swap(Lower, Upper); 8632 Upper = Upper + 1; 8633 assert(Upper != Lower && "Upper part of range has wrapped!"); 8634 } 8635 } else if (match(BO.getOperand(0), m_APInt(C))) { 8636 if (C->isMinSignedValue()) { 8637 // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2]. 8638 Lower = *C; 8639 Upper = Lower.lshr(1) + 1; 8640 } else { 8641 // 'sdiv C, x' produces [-|C|, |C|]. 8642 Upper = C->abs() + 1; 8643 Lower = (-Upper) + 1; 8644 } 8645 } 8646 break; 8647 8648 case Instruction::UDiv: 8649 if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) { 8650 // 'udiv x, C' produces [0, UINT_MAX / C]. 8651 Upper = APInt::getMaxValue(Width).udiv(*C) + 1; 8652 } else if (match(BO.getOperand(0), m_APInt(C))) { 8653 // 'udiv C, x' produces [0, C]. 8654 Upper = *C + 1; 8655 } 8656 break; 8657 8658 case Instruction::SRem: 8659 if (match(BO.getOperand(1), m_APInt(C))) { 8660 // 'srem x, C' produces (-|C|, |C|). 8661 Upper = C->abs(); 8662 Lower = (-Upper) + 1; 8663 } 8664 break; 8665 8666 case Instruction::URem: 8667 if (match(BO.getOperand(1), m_APInt(C))) 8668 // 'urem x, C' produces [0, C). 8669 Upper = *C; 8670 break; 8671 8672 default: 8673 break; 8674 } 8675 } 8676 8677 static ConstantRange getRangeForIntrinsic(const IntrinsicInst &II) { 8678 unsigned Width = II.getType()->getScalarSizeInBits(); 8679 const APInt *C; 8680 switch (II.getIntrinsicID()) { 8681 case Intrinsic::ctpop: 8682 case Intrinsic::ctlz: 8683 case Intrinsic::cttz: 8684 // Maximum of set/clear bits is the bit width. 8685 return ConstantRange::getNonEmpty(APInt::getZero(Width), 8686 APInt(Width, Width + 1)); 8687 case Intrinsic::uadd_sat: 8688 // uadd.sat(x, C) produces [C, UINT_MAX]. 8689 if (match(II.getOperand(0), m_APInt(C)) || 8690 match(II.getOperand(1), m_APInt(C))) 8691 return ConstantRange::getNonEmpty(*C, APInt::getZero(Width)); 8692 break; 8693 case Intrinsic::sadd_sat: 8694 if (match(II.getOperand(0), m_APInt(C)) || 8695 match(II.getOperand(1), m_APInt(C))) { 8696 if (C->isNegative()) 8697 // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)]. 8698 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width), 8699 APInt::getSignedMaxValue(Width) + *C + 8700 1); 8701 8702 // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX]. 8703 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width) + *C, 8704 APInt::getSignedMaxValue(Width) + 1); 8705 } 8706 break; 8707 case Intrinsic::usub_sat: 8708 // usub.sat(C, x) produces [0, C]. 8709 if (match(II.getOperand(0), m_APInt(C))) 8710 return ConstantRange::getNonEmpty(APInt::getZero(Width), *C + 1); 8711 8712 // usub.sat(x, C) produces [0, UINT_MAX - C]. 8713 if (match(II.getOperand(1), m_APInt(C))) 8714 return ConstantRange::getNonEmpty(APInt::getZero(Width), 8715 APInt::getMaxValue(Width) - *C + 1); 8716 break; 8717 case Intrinsic::ssub_sat: 8718 if (match(II.getOperand(0), m_APInt(C))) { 8719 if (C->isNegative()) 8720 // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)]. 8721 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width), 8722 *C - APInt::getSignedMinValue(Width) + 8723 1); 8724 8725 // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX]. 8726 return ConstantRange::getNonEmpty(*C - APInt::getSignedMaxValue(Width), 8727 APInt::getSignedMaxValue(Width) + 1); 8728 } else if (match(II.getOperand(1), m_APInt(C))) { 8729 if (C->isNegative()) 8730 // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]: 8731 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width) - *C, 8732 APInt::getSignedMaxValue(Width) + 1); 8733 8734 // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C]. 8735 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width), 8736 APInt::getSignedMaxValue(Width) - *C + 8737 1); 8738 } 8739 break; 8740 case Intrinsic::umin: 8741 case Intrinsic::umax: 8742 case Intrinsic::smin: 8743 case Intrinsic::smax: 8744 if (!match(II.getOperand(0), m_APInt(C)) && 8745 !match(II.getOperand(1), m_APInt(C))) 8746 break; 8747 8748 switch (II.getIntrinsicID()) { 8749 case Intrinsic::umin: 8750 return ConstantRange::getNonEmpty(APInt::getZero(Width), *C + 1); 8751 case Intrinsic::umax: 8752 return ConstantRange::getNonEmpty(*C, APInt::getZero(Width)); 8753 case Intrinsic::smin: 8754 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width), 8755 *C + 1); 8756 case Intrinsic::smax: 8757 return ConstantRange::getNonEmpty(*C, 8758 APInt::getSignedMaxValue(Width) + 1); 8759 default: 8760 llvm_unreachable("Must be min/max intrinsic"); 8761 } 8762 break; 8763 case Intrinsic::abs: 8764 // If abs of SIGNED_MIN is poison, then the result is [0..SIGNED_MAX], 8765 // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN. 8766 if (match(II.getOperand(1), m_One())) 8767 return ConstantRange::getNonEmpty(APInt::getZero(Width), 8768 APInt::getSignedMaxValue(Width) + 1); 8769 8770 return ConstantRange::getNonEmpty(APInt::getZero(Width), 8771 APInt::getSignedMinValue(Width) + 1); 8772 case Intrinsic::vscale: 8773 if (!II.getParent() || !II.getFunction()) 8774 break; 8775 return getVScaleRange(II.getFunction(), Width); 8776 default: 8777 break; 8778 } 8779 8780 return ConstantRange::getFull(Width); 8781 } 8782 8783 static ConstantRange getRangeForSelectPattern(const SelectInst &SI, 8784 const InstrInfoQuery &IIQ) { 8785 unsigned BitWidth = SI.getType()->getScalarSizeInBits(); 8786 const Value *LHS = nullptr, *RHS = nullptr; 8787 SelectPatternResult R = matchSelectPattern(&SI, LHS, RHS); 8788 if (R.Flavor == SPF_UNKNOWN) 8789 return ConstantRange::getFull(BitWidth); 8790 8791 if (R.Flavor == SelectPatternFlavor::SPF_ABS) { 8792 // If the negation part of the abs (in RHS) has the NSW flag, 8793 // then the result of abs(X) is [0..SIGNED_MAX], 8794 // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN. 8795 if (match(RHS, m_Neg(m_Specific(LHS))) && 8796 IIQ.hasNoSignedWrap(cast<Instruction>(RHS))) 8797 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth), 8798 APInt::getSignedMaxValue(BitWidth) + 1); 8799 8800 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth), 8801 APInt::getSignedMinValue(BitWidth) + 1); 8802 } 8803 8804 if (R.Flavor == SelectPatternFlavor::SPF_NABS) { 8805 // The result of -abs(X) is <= 0. 8806 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth), 8807 APInt(BitWidth, 1)); 8808 } 8809 8810 const APInt *C; 8811 if (!match(LHS, m_APInt(C)) && !match(RHS, m_APInt(C))) 8812 return ConstantRange::getFull(BitWidth); 8813 8814 switch (R.Flavor) { 8815 case SPF_UMIN: 8816 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth), *C + 1); 8817 case SPF_UMAX: 8818 return ConstantRange::getNonEmpty(*C, APInt::getZero(BitWidth)); 8819 case SPF_SMIN: 8820 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth), 8821 *C + 1); 8822 case SPF_SMAX: 8823 return ConstantRange::getNonEmpty(*C, 8824 APInt::getSignedMaxValue(BitWidth) + 1); 8825 default: 8826 return ConstantRange::getFull(BitWidth); 8827 } 8828 } 8829 8830 static void setLimitForFPToI(const Instruction *I, APInt &Lower, APInt &Upper) { 8831 // The maximum representable value of a half is 65504. For floats the maximum 8832 // value is 3.4e38 which requires roughly 129 bits. 8833 unsigned BitWidth = I->getType()->getScalarSizeInBits(); 8834 if (!I->getOperand(0)->getType()->getScalarType()->isHalfTy()) 8835 return; 8836 if (isa<FPToSIInst>(I) && BitWidth >= 17) { 8837 Lower = APInt(BitWidth, -65504); 8838 Upper = APInt(BitWidth, 65505); 8839 } 8840 8841 if (isa<FPToUIInst>(I) && BitWidth >= 16) { 8842 // For a fptoui the lower limit is left as 0. 8843 Upper = APInt(BitWidth, 65505); 8844 } 8845 } 8846 8847 ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned, 8848 bool UseInstrInfo, AssumptionCache *AC, 8849 const Instruction *CtxI, 8850 const DominatorTree *DT, 8851 unsigned Depth) { 8852 assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction"); 8853 8854 if (Depth == MaxAnalysisRecursionDepth) 8855 return ConstantRange::getFull(V->getType()->getScalarSizeInBits()); 8856 8857 const APInt *C; 8858 if (match(V, m_APInt(C))) 8859 return ConstantRange(*C); 8860 unsigned BitWidth = V->getType()->getScalarSizeInBits(); 8861 8862 if (auto *VC = dyn_cast<ConstantDataVector>(V)) { 8863 ConstantRange CR = ConstantRange::getEmpty(BitWidth); 8864 for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem; 8865 ++ElemIdx) 8866 CR = CR.unionWith(VC->getElementAsAPInt(ElemIdx)); 8867 return CR; 8868 } 8869 8870 InstrInfoQuery IIQ(UseInstrInfo); 8871 ConstantRange CR = ConstantRange::getFull(BitWidth); 8872 if (auto *BO = dyn_cast<BinaryOperator>(V)) { 8873 APInt Lower = APInt(BitWidth, 0); 8874 APInt Upper = APInt(BitWidth, 0); 8875 // TODO: Return ConstantRange. 8876 setLimitsForBinOp(*BO, Lower, Upper, IIQ, ForSigned); 8877 CR = ConstantRange::getNonEmpty(Lower, Upper); 8878 } else if (auto *II = dyn_cast<IntrinsicInst>(V)) 8879 CR = getRangeForIntrinsic(*II); 8880 else if (auto *SI = dyn_cast<SelectInst>(V)) { 8881 ConstantRange CRTrue = computeConstantRange( 8882 SI->getTrueValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1); 8883 ConstantRange CRFalse = computeConstantRange( 8884 SI->getFalseValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1); 8885 CR = CRTrue.unionWith(CRFalse); 8886 CR = CR.intersectWith(getRangeForSelectPattern(*SI, IIQ)); 8887 } else if (isa<FPToUIInst>(V) || isa<FPToSIInst>(V)) { 8888 APInt Lower = APInt(BitWidth, 0); 8889 APInt Upper = APInt(BitWidth, 0); 8890 // TODO: Return ConstantRange. 8891 setLimitForFPToI(cast<Instruction>(V), Lower, Upper); 8892 CR = ConstantRange::getNonEmpty(Lower, Upper); 8893 } 8894 8895 if (auto *I = dyn_cast<Instruction>(V)) 8896 if (auto *Range = IIQ.getMetadata(I, LLVMContext::MD_range)) 8897 CR = CR.intersectWith(getConstantRangeFromMetadata(*Range)); 8898 8899 if (CtxI && AC) { 8900 // Try to restrict the range based on information from assumptions. 8901 for (auto &AssumeVH : AC->assumptionsFor(V)) { 8902 if (!AssumeVH) 8903 continue; 8904 CallInst *I = cast<CallInst>(AssumeVH); 8905 assert(I->getParent()->getParent() == CtxI->getParent()->getParent() && 8906 "Got assumption for the wrong function!"); 8907 assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && 8908 "must be an assume intrinsic"); 8909 8910 if (!isValidAssumeForContext(I, CtxI, DT)) 8911 continue; 8912 Value *Arg = I->getArgOperand(0); 8913 ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg); 8914 // Currently we just use information from comparisons. 8915 if (!Cmp || Cmp->getOperand(0) != V) 8916 continue; 8917 // TODO: Set "ForSigned" parameter via Cmp->isSigned()? 8918 ConstantRange RHS = 8919 computeConstantRange(Cmp->getOperand(1), /* ForSigned */ false, 8920 UseInstrInfo, AC, I, DT, Depth + 1); 8921 CR = CR.intersectWith( 8922 ConstantRange::makeAllowedICmpRegion(Cmp->getPredicate(), RHS)); 8923 } 8924 } 8925 8926 return CR; 8927 } 8928