1 //===- InstCombineMulDivRem.cpp -------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the visit functions for mul, fmul, sdiv, udiv, fdiv, 10 // srem, urem, frem. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "InstCombineInternal.h" 15 #include "llvm/ADT/APInt.h" 16 #include "llvm/ADT/SmallPtrSet.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/Analysis/InstructionSimplify.h" 19 #include "llvm/Analysis/ValueTracking.h" 20 #include "llvm/IR/BasicBlock.h" 21 #include "llvm/IR/Constant.h" 22 #include "llvm/IR/Constants.h" 23 #include "llvm/IR/InstrTypes.h" 24 #include "llvm/IR/Instruction.h" 25 #include "llvm/IR/Instructions.h" 26 #include "llvm/IR/IntrinsicInst.h" 27 #include "llvm/IR/Intrinsics.h" 28 #include "llvm/IR/Operator.h" 29 #include "llvm/IR/PatternMatch.h" 30 #include "llvm/IR/Type.h" 31 #include "llvm/IR/Value.h" 32 #include "llvm/Support/Casting.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/Transforms/InstCombine/InstCombiner.h" 35 #include "llvm/Transforms/Utils/BuildLibCalls.h" 36 #include <cassert> 37 38 #define DEBUG_TYPE "instcombine" 39 #include "llvm/Transforms/Utils/InstructionWorklist.h" 40 41 using namespace llvm; 42 using namespace PatternMatch; 43 44 /// The specific integer value is used in a context where it is known to be 45 /// non-zero. If this allows us to simplify the computation, do so and return 46 /// the new operand, otherwise return null. 47 static Value *simplifyValueKnownNonZero(Value *V, InstCombinerImpl &IC, 48 Instruction &CxtI) { 49 // If V has multiple uses, then we would have to do more analysis to determine 50 // if this is safe. For example, the use could be in dynamically unreached 51 // code. 52 if (!V->hasOneUse()) return nullptr; 53 54 bool MadeChange = false; 55 56 // ((1 << A) >>u B) --> (1 << (A-B)) 57 // Because V cannot be zero, we know that B is less than A. 58 Value *A = nullptr, *B = nullptr, *One = nullptr; 59 if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(One), m_Value(A))), m_Value(B))) && 60 match(One, m_One())) { 61 A = IC.Builder.CreateSub(A, B); 62 return IC.Builder.CreateShl(One, A); 63 } 64 65 // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it 66 // inexact. Similarly for <<. 67 BinaryOperator *I = dyn_cast<BinaryOperator>(V); 68 if (I && I->isLogicalShift() && 69 IC.isKnownToBeAPowerOfTwo(I->getOperand(0), false, 0, &CxtI)) { 70 // We know that this is an exact/nuw shift and that the input is a 71 // non-zero context as well. 72 if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC, CxtI)) { 73 IC.replaceOperand(*I, 0, V2); 74 MadeChange = true; 75 } 76 77 if (I->getOpcode() == Instruction::LShr && !I->isExact()) { 78 I->setIsExact(); 79 MadeChange = true; 80 } 81 82 if (I->getOpcode() == Instruction::Shl && !I->hasNoUnsignedWrap()) { 83 I->setHasNoUnsignedWrap(); 84 MadeChange = true; 85 } 86 } 87 88 // TODO: Lots more we could do here: 89 // If V is a phi node, we can call this on each of its operands. 90 // "select cond, X, 0" can simplify to "X". 91 92 return MadeChange ? V : nullptr; 93 } 94 95 // TODO: This is a specific form of a much more general pattern. 96 // We could detect a select with any binop identity constant, or we 97 // could use SimplifyBinOp to see if either arm of the select reduces. 98 // But that needs to be done carefully and/or while removing potential 99 // reverse canonicalizations as in InstCombiner::foldSelectIntoOp(). 100 static Value *foldMulSelectToNegate(BinaryOperator &I, 101 InstCombiner::BuilderTy &Builder) { 102 Value *Cond, *OtherOp; 103 104 // mul (select Cond, 1, -1), OtherOp --> select Cond, OtherOp, -OtherOp 105 // mul OtherOp, (select Cond, 1, -1) --> select Cond, OtherOp, -OtherOp 106 if (match(&I, m_c_Mul(m_OneUse(m_Select(m_Value(Cond), m_One(), m_AllOnes())), 107 m_Value(OtherOp)))) { 108 bool HasAnyNoWrap = I.hasNoSignedWrap() || I.hasNoUnsignedWrap(); 109 Value *Neg = Builder.CreateNeg(OtherOp, "", HasAnyNoWrap); 110 return Builder.CreateSelect(Cond, OtherOp, Neg); 111 } 112 // mul (select Cond, -1, 1), OtherOp --> select Cond, -OtherOp, OtherOp 113 // mul OtherOp, (select Cond, -1, 1) --> select Cond, -OtherOp, OtherOp 114 if (match(&I, m_c_Mul(m_OneUse(m_Select(m_Value(Cond), m_AllOnes(), m_One())), 115 m_Value(OtherOp)))) { 116 bool HasAnyNoWrap = I.hasNoSignedWrap() || I.hasNoUnsignedWrap(); 117 Value *Neg = Builder.CreateNeg(OtherOp, "", HasAnyNoWrap); 118 return Builder.CreateSelect(Cond, Neg, OtherOp); 119 } 120 121 // fmul (select Cond, 1.0, -1.0), OtherOp --> select Cond, OtherOp, -OtherOp 122 // fmul OtherOp, (select Cond, 1.0, -1.0) --> select Cond, OtherOp, -OtherOp 123 if (match(&I, m_c_FMul(m_OneUse(m_Select(m_Value(Cond), m_SpecificFP(1.0), 124 m_SpecificFP(-1.0))), 125 m_Value(OtherOp)))) 126 return Builder.CreateSelectFMF(Cond, OtherOp, 127 Builder.CreateFNegFMF(OtherOp, &I), &I); 128 129 // fmul (select Cond, -1.0, 1.0), OtherOp --> select Cond, -OtherOp, OtherOp 130 // fmul OtherOp, (select Cond, -1.0, 1.0) --> select Cond, -OtherOp, OtherOp 131 if (match(&I, m_c_FMul(m_OneUse(m_Select(m_Value(Cond), m_SpecificFP(-1.0), 132 m_SpecificFP(1.0))), 133 m_Value(OtherOp)))) 134 return Builder.CreateSelectFMF(Cond, Builder.CreateFNegFMF(OtherOp, &I), 135 OtherOp, &I); 136 137 return nullptr; 138 } 139 140 /// Reduce integer multiplication patterns that contain a (+/-1 << Z) factor. 141 /// Callers are expected to call this twice to handle commuted patterns. 142 static Value *foldMulShl1(BinaryOperator &Mul, bool CommuteOperands, 143 InstCombiner::BuilderTy &Builder) { 144 Value *X = Mul.getOperand(0), *Y = Mul.getOperand(1); 145 if (CommuteOperands) 146 std::swap(X, Y); 147 148 const bool HasNSW = Mul.hasNoSignedWrap(); 149 const bool HasNUW = Mul.hasNoUnsignedWrap(); 150 151 // X * (1 << Z) --> X << Z 152 Value *Z; 153 if (match(Y, m_Shl(m_One(), m_Value(Z)))) { 154 bool PropagateNSW = HasNSW && cast<ShlOperator>(Y)->hasNoSignedWrap(); 155 return Builder.CreateShl(X, Z, Mul.getName(), HasNUW, PropagateNSW); 156 } 157 158 // Similar to above, but an increment of the shifted value becomes an add: 159 // X * ((1 << Z) + 1) --> (X * (1 << Z)) + X --> (X << Z) + X 160 // This increases uses of X, so it may require a freeze, but that is still 161 // expected to be an improvement because it removes the multiply. 162 BinaryOperator *Shift; 163 if (match(Y, m_OneUse(m_Add(m_BinOp(Shift), m_One()))) && 164 match(Shift, m_OneUse(m_Shl(m_One(), m_Value(Z))))) { 165 bool PropagateNSW = HasNSW && Shift->hasNoSignedWrap(); 166 Value *FrX = X; 167 if (!isGuaranteedNotToBeUndef(X)) 168 FrX = Builder.CreateFreeze(X, X->getName() + ".fr"); 169 Value *Shl = Builder.CreateShl(FrX, Z, "mulshl", HasNUW, PropagateNSW); 170 return Builder.CreateAdd(Shl, FrX, Mul.getName(), HasNUW, PropagateNSW); 171 } 172 173 // Similar to above, but a decrement of the shifted value is disguised as 174 // 'not' and becomes a sub: 175 // X * (~(-1 << Z)) --> X * ((1 << Z) - 1) --> (X << Z) - X 176 // This increases uses of X, so it may require a freeze, but that is still 177 // expected to be an improvement because it removes the multiply. 178 if (match(Y, m_OneUse(m_Not(m_OneUse(m_Shl(m_AllOnes(), m_Value(Z))))))) { 179 Value *FrX = X; 180 if (!isGuaranteedNotToBeUndef(X)) 181 FrX = Builder.CreateFreeze(X, X->getName() + ".fr"); 182 Value *Shl = Builder.CreateShl(FrX, Z, "mulshl"); 183 return Builder.CreateSub(Shl, FrX, Mul.getName()); 184 } 185 186 return nullptr; 187 } 188 189 Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { 190 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 191 if (Value *V = 192 simplifyMulInst(Op0, Op1, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), 193 SQ.getWithInstruction(&I))) 194 return replaceInstUsesWith(I, V); 195 196 if (SimplifyAssociativeOrCommutative(I)) 197 return &I; 198 199 if (Instruction *X = foldVectorBinop(I)) 200 return X; 201 202 if (Instruction *Phi = foldBinopWithPhiOperands(I)) 203 return Phi; 204 205 if (Value *V = foldUsingDistributiveLaws(I)) 206 return replaceInstUsesWith(I, V); 207 208 Type *Ty = I.getType(); 209 const unsigned BitWidth = Ty->getScalarSizeInBits(); 210 const bool HasNSW = I.hasNoSignedWrap(); 211 const bool HasNUW = I.hasNoUnsignedWrap(); 212 213 // X * -1 --> 0 - X 214 if (match(Op1, m_AllOnes())) { 215 return HasNSW ? BinaryOperator::CreateNSWNeg(Op0) 216 : BinaryOperator::CreateNeg(Op0); 217 } 218 219 // Also allow combining multiply instructions on vectors. 220 { 221 Value *NewOp; 222 Constant *C1, *C2; 223 const APInt *IVal; 224 if (match(&I, m_Mul(m_Shl(m_Value(NewOp), m_ImmConstant(C2)), 225 m_ImmConstant(C1))) && 226 match(C1, m_APInt(IVal))) { 227 // ((X << C2)*C1) == (X * (C1 << C2)) 228 Constant *Shl = 229 ConstantFoldBinaryOpOperands(Instruction::Shl, C1, C2, DL); 230 assert(Shl && "Constant folding of immediate constants failed"); 231 BinaryOperator *Mul = cast<BinaryOperator>(I.getOperand(0)); 232 BinaryOperator *BO = BinaryOperator::CreateMul(NewOp, Shl); 233 if (HasNUW && Mul->hasNoUnsignedWrap()) 234 BO->setHasNoUnsignedWrap(); 235 if (HasNSW && Mul->hasNoSignedWrap() && Shl->isNotMinSignedValue()) 236 BO->setHasNoSignedWrap(); 237 return BO; 238 } 239 240 if (match(&I, m_Mul(m_Value(NewOp), m_Constant(C1)))) { 241 // Replace X*(2^C) with X << C, where C is either a scalar or a vector. 242 if (Constant *NewCst = ConstantExpr::getExactLogBase2(C1)) { 243 BinaryOperator *Shl = BinaryOperator::CreateShl(NewOp, NewCst); 244 245 if (HasNUW) 246 Shl->setHasNoUnsignedWrap(); 247 if (HasNSW) { 248 const APInt *V; 249 if (match(NewCst, m_APInt(V)) && *V != V->getBitWidth() - 1) 250 Shl->setHasNoSignedWrap(); 251 } 252 253 return Shl; 254 } 255 } 256 } 257 258 if (Op0->hasOneUse() && match(Op1, m_NegatedPower2())) { 259 // Interpret X * (-1<<C) as (-X) * (1<<C) and try to sink the negation. 260 // The "* (1<<C)" thus becomes a potential shifting opportunity. 261 if (Value *NegOp0 = 262 Negator::Negate(/*IsNegation*/ true, HasNSW, Op0, *this)) { 263 auto *Op1C = cast<Constant>(Op1); 264 return replaceInstUsesWith( 265 I, Builder.CreateMul(NegOp0, ConstantExpr::getNeg(Op1C), "", 266 /* HasNUW */ false, 267 HasNSW && Op1C->isNotMinSignedValue())); 268 } 269 270 // Try to convert multiply of extended operand to narrow negate and shift 271 // for better analysis. 272 // This is valid if the shift amount (trailing zeros in the multiplier 273 // constant) clears more high bits than the bitwidth difference between 274 // source and destination types: 275 // ({z/s}ext X) * (-1<<C) --> (zext (-X)) << C 276 const APInt *NegPow2C; 277 Value *X; 278 if (match(Op0, m_ZExtOrSExt(m_Value(X))) && 279 match(Op1, m_APIntAllowPoison(NegPow2C))) { 280 unsigned SrcWidth = X->getType()->getScalarSizeInBits(); 281 unsigned ShiftAmt = NegPow2C->countr_zero(); 282 if (ShiftAmt >= BitWidth - SrcWidth) { 283 Value *N = Builder.CreateNeg(X, X->getName() + ".neg"); 284 Value *Z = Builder.CreateZExt(N, Ty, N->getName() + ".z"); 285 return BinaryOperator::CreateShl(Z, ConstantInt::get(Ty, ShiftAmt)); 286 } 287 } 288 } 289 290 if (Instruction *FoldedMul = foldBinOpIntoSelectOrPhi(I)) 291 return FoldedMul; 292 293 if (Value *FoldedMul = foldMulSelectToNegate(I, Builder)) 294 return replaceInstUsesWith(I, FoldedMul); 295 296 // Simplify mul instructions with a constant RHS. 297 Constant *MulC; 298 if (match(Op1, m_ImmConstant(MulC))) { 299 // Canonicalize (X+C1)*MulC -> X*MulC+C1*MulC. 300 // Canonicalize (X|C1)*MulC -> X*MulC+C1*MulC. 301 Value *X; 302 Constant *C1; 303 if (match(Op0, m_OneUse(m_AddLike(m_Value(X), m_ImmConstant(C1))))) { 304 // C1*MulC simplifies to a tidier constant. 305 Value *NewC = Builder.CreateMul(C1, MulC); 306 auto *BOp0 = cast<BinaryOperator>(Op0); 307 bool Op0NUW = 308 (BOp0->getOpcode() == Instruction::Or || BOp0->hasNoUnsignedWrap()); 309 Value *NewMul = Builder.CreateMul(X, MulC); 310 auto *BO = BinaryOperator::CreateAdd(NewMul, NewC); 311 if (HasNUW && Op0NUW) { 312 // If NewMulBO is constant we also can set BO to nuw. 313 if (auto *NewMulBO = dyn_cast<BinaryOperator>(NewMul)) 314 NewMulBO->setHasNoUnsignedWrap(); 315 BO->setHasNoUnsignedWrap(); 316 } 317 return BO; 318 } 319 } 320 321 // abs(X) * abs(X) -> X * X 322 Value *X; 323 if (Op0 == Op1 && match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X)))) 324 return BinaryOperator::CreateMul(X, X); 325 326 { 327 Value *Y; 328 // abs(X) * abs(Y) -> abs(X * Y) 329 if (I.hasNoSignedWrap() && 330 match(Op0, 331 m_OneUse(m_Intrinsic<Intrinsic::abs>(m_Value(X), m_One()))) && 332 match(Op1, m_OneUse(m_Intrinsic<Intrinsic::abs>(m_Value(Y), m_One())))) 333 return replaceInstUsesWith( 334 I, Builder.CreateBinaryIntrinsic(Intrinsic::abs, 335 Builder.CreateNSWMul(X, Y), 336 Builder.getTrue())); 337 } 338 339 // -X * C --> X * -C 340 Value *Y; 341 Constant *Op1C; 342 if (match(Op0, m_Neg(m_Value(X))) && match(Op1, m_Constant(Op1C))) 343 return BinaryOperator::CreateMul(X, ConstantExpr::getNeg(Op1C)); 344 345 // -X * -Y --> X * Y 346 if (match(Op0, m_Neg(m_Value(X))) && match(Op1, m_Neg(m_Value(Y)))) { 347 auto *NewMul = BinaryOperator::CreateMul(X, Y); 348 if (HasNSW && cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap() && 349 cast<OverflowingBinaryOperator>(Op1)->hasNoSignedWrap()) 350 NewMul->setHasNoSignedWrap(); 351 return NewMul; 352 } 353 354 // -X * Y --> -(X * Y) 355 // X * -Y --> -(X * Y) 356 if (match(&I, m_c_Mul(m_OneUse(m_Neg(m_Value(X))), m_Value(Y)))) 357 return BinaryOperator::CreateNeg(Builder.CreateMul(X, Y)); 358 359 // (-X * Y) * -X --> (X * Y) * X 360 // (-X << Y) * -X --> (X << Y) * X 361 if (match(Op1, m_Neg(m_Value(X)))) { 362 if (Value *NegOp0 = Negator::Negate(false, /*IsNSW*/ false, Op0, *this)) 363 return BinaryOperator::CreateMul(NegOp0, X); 364 } 365 366 if (Op0->hasOneUse()) { 367 // (mul (div exact X, C0), C1) 368 // -> (div exact X, C0 / C1) 369 // iff C0 % C1 == 0 and X / (C0 / C1) doesn't create UB. 370 const APInt *C1; 371 auto UDivCheck = [&C1](const APInt &C) { return C.urem(*C1).isZero(); }; 372 auto SDivCheck = [&C1](const APInt &C) { 373 APInt Quot, Rem; 374 APInt::sdivrem(C, *C1, Quot, Rem); 375 return Rem.isZero() && !Quot.isAllOnes(); 376 }; 377 if (match(Op1, m_APInt(C1)) && 378 (match(Op0, m_Exact(m_UDiv(m_Value(X), m_CheckedInt(UDivCheck)))) || 379 match(Op0, m_Exact(m_SDiv(m_Value(X), m_CheckedInt(SDivCheck)))))) { 380 auto BOpc = cast<BinaryOperator>(Op0)->getOpcode(); 381 return BinaryOperator::CreateExact( 382 BOpc, X, 383 Builder.CreateBinOp(BOpc, cast<BinaryOperator>(Op0)->getOperand(1), 384 Op1)); 385 } 386 } 387 388 // (X / Y) * Y = X - (X % Y) 389 // (X / Y) * -Y = (X % Y) - X 390 { 391 Value *Y = Op1; 392 BinaryOperator *Div = dyn_cast<BinaryOperator>(Op0); 393 if (!Div || (Div->getOpcode() != Instruction::UDiv && 394 Div->getOpcode() != Instruction::SDiv)) { 395 Y = Op0; 396 Div = dyn_cast<BinaryOperator>(Op1); 397 } 398 Value *Neg = dyn_castNegVal(Y); 399 if (Div && Div->hasOneUse() && 400 (Div->getOperand(1) == Y || Div->getOperand(1) == Neg) && 401 (Div->getOpcode() == Instruction::UDiv || 402 Div->getOpcode() == Instruction::SDiv)) { 403 Value *X = Div->getOperand(0), *DivOp1 = Div->getOperand(1); 404 405 // If the division is exact, X % Y is zero, so we end up with X or -X. 406 if (Div->isExact()) { 407 if (DivOp1 == Y) 408 return replaceInstUsesWith(I, X); 409 return BinaryOperator::CreateNeg(X); 410 } 411 412 auto RemOpc = Div->getOpcode() == Instruction::UDiv ? Instruction::URem 413 : Instruction::SRem; 414 // X must be frozen because we are increasing its number of uses. 415 Value *XFreeze = X; 416 if (!isGuaranteedNotToBeUndef(X)) 417 XFreeze = Builder.CreateFreeze(X, X->getName() + ".fr"); 418 Value *Rem = Builder.CreateBinOp(RemOpc, XFreeze, DivOp1); 419 if (DivOp1 == Y) 420 return BinaryOperator::CreateSub(XFreeze, Rem); 421 return BinaryOperator::CreateSub(Rem, XFreeze); 422 } 423 } 424 425 // Fold the following two scenarios: 426 // 1) i1 mul -> i1 and. 427 // 2) X * Y --> X & Y, iff X, Y can be only {0,1}. 428 // Note: We could use known bits to generalize this and related patterns with 429 // shifts/truncs 430 if (Ty->isIntOrIntVectorTy(1) || 431 (match(Op0, m_And(m_Value(), m_One())) && 432 match(Op1, m_And(m_Value(), m_One())))) 433 return BinaryOperator::CreateAnd(Op0, Op1); 434 435 if (Value *R = foldMulShl1(I, /* CommuteOperands */ false, Builder)) 436 return replaceInstUsesWith(I, R); 437 if (Value *R = foldMulShl1(I, /* CommuteOperands */ true, Builder)) 438 return replaceInstUsesWith(I, R); 439 440 // (zext bool X) * (zext bool Y) --> zext (and X, Y) 441 // (sext bool X) * (sext bool Y) --> zext (and X, Y) 442 // Note: -1 * -1 == 1 * 1 == 1 (if the extends match, the result is the same) 443 if (((match(Op0, m_ZExt(m_Value(X))) && match(Op1, m_ZExt(m_Value(Y)))) || 444 (match(Op0, m_SExt(m_Value(X))) && match(Op1, m_SExt(m_Value(Y))))) && 445 X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType() && 446 (Op0->hasOneUse() || Op1->hasOneUse() || X == Y)) { 447 Value *And = Builder.CreateAnd(X, Y, "mulbool"); 448 return CastInst::Create(Instruction::ZExt, And, Ty); 449 } 450 // (sext bool X) * (zext bool Y) --> sext (and X, Y) 451 // (zext bool X) * (sext bool Y) --> sext (and X, Y) 452 // Note: -1 * 1 == 1 * -1 == -1 453 if (((match(Op0, m_SExt(m_Value(X))) && match(Op1, m_ZExt(m_Value(Y)))) || 454 (match(Op0, m_ZExt(m_Value(X))) && match(Op1, m_SExt(m_Value(Y))))) && 455 X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType() && 456 (Op0->hasOneUse() || Op1->hasOneUse())) { 457 Value *And = Builder.CreateAnd(X, Y, "mulbool"); 458 return CastInst::Create(Instruction::SExt, And, Ty); 459 } 460 461 // (zext bool X) * Y --> X ? Y : 0 462 // Y * (zext bool X) --> X ? Y : 0 463 if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) 464 return SelectInst::Create(X, Op1, ConstantInt::getNullValue(Ty)); 465 if (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) 466 return SelectInst::Create(X, Op0, ConstantInt::getNullValue(Ty)); 467 468 // mul (sext X), Y -> select X, -Y, 0 469 // mul Y, (sext X) -> select X, -Y, 0 470 if (match(&I, m_c_Mul(m_OneUse(m_SExt(m_Value(X))), m_Value(Y))) && 471 X->getType()->isIntOrIntVectorTy(1)) 472 return SelectInst::Create(X, Builder.CreateNeg(Y, "", I.hasNoSignedWrap()), 473 ConstantInt::getNullValue(Op0->getType())); 474 475 Constant *ImmC; 476 if (match(Op1, m_ImmConstant(ImmC))) { 477 // (sext bool X) * C --> X ? -C : 0 478 if (match(Op0, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) { 479 Constant *NegC = ConstantExpr::getNeg(ImmC); 480 return SelectInst::Create(X, NegC, ConstantInt::getNullValue(Ty)); 481 } 482 483 // (ashr i32 X, 31) * C --> (X < 0) ? -C : 0 484 const APInt *C; 485 if (match(Op0, m_OneUse(m_AShr(m_Value(X), m_APInt(C)))) && 486 *C == C->getBitWidth() - 1) { 487 Constant *NegC = ConstantExpr::getNeg(ImmC); 488 Value *IsNeg = Builder.CreateIsNeg(X, "isneg"); 489 return SelectInst::Create(IsNeg, NegC, ConstantInt::getNullValue(Ty)); 490 } 491 } 492 493 // (lshr X, 31) * Y --> (X < 0) ? Y : 0 494 // TODO: We are not checking one-use because the elimination of the multiply 495 // is better for analysis? 496 const APInt *C; 497 if (match(&I, m_c_BinOp(m_LShr(m_Value(X), m_APInt(C)), m_Value(Y))) && 498 *C == C->getBitWidth() - 1) { 499 Value *IsNeg = Builder.CreateIsNeg(X, "isneg"); 500 return SelectInst::Create(IsNeg, Y, ConstantInt::getNullValue(Ty)); 501 } 502 503 // (and X, 1) * Y --> (trunc X) ? Y : 0 504 if (match(&I, m_c_BinOp(m_OneUse(m_And(m_Value(X), m_One())), m_Value(Y)))) { 505 Value *Tr = Builder.CreateTrunc(X, CmpInst::makeCmpResultType(Ty)); 506 return SelectInst::Create(Tr, Y, ConstantInt::getNullValue(Ty)); 507 } 508 509 // ((ashr X, 31) | 1) * X --> abs(X) 510 // X * ((ashr X, 31) | 1) --> abs(X) 511 if (match(&I, m_c_BinOp(m_Or(m_AShr(m_Value(X), 512 m_SpecificIntAllowPoison(BitWidth - 1)), 513 m_One()), 514 m_Deferred(X)))) { 515 Value *Abs = Builder.CreateBinaryIntrinsic( 516 Intrinsic::abs, X, ConstantInt::getBool(I.getContext(), HasNSW)); 517 Abs->takeName(&I); 518 return replaceInstUsesWith(I, Abs); 519 } 520 521 if (Instruction *Ext = narrowMathIfNoOverflow(I)) 522 return Ext; 523 524 if (Instruction *Res = foldBinOpOfSelectAndCastOfSelectCondition(I)) 525 return Res; 526 527 // (mul Op0 Op1): 528 // if Log2(Op0) folds away -> 529 // (shl Op1, Log2(Op0)) 530 // if Log2(Op1) folds away -> 531 // (shl Op0, Log2(Op1)) 532 if (Value *Res = tryGetLog2(Op0, /*AssumeNonZero=*/false)) { 533 BinaryOperator *Shl = BinaryOperator::CreateShl(Op1, Res); 534 // We can only propegate nuw flag. 535 Shl->setHasNoUnsignedWrap(HasNUW); 536 return Shl; 537 } 538 if (Value *Res = tryGetLog2(Op1, /*AssumeNonZero=*/false)) { 539 BinaryOperator *Shl = BinaryOperator::CreateShl(Op0, Res); 540 // We can only propegate nuw flag. 541 Shl->setHasNoUnsignedWrap(HasNUW); 542 return Shl; 543 } 544 545 bool Changed = false; 546 if (!HasNSW && willNotOverflowSignedMul(Op0, Op1, I)) { 547 Changed = true; 548 I.setHasNoSignedWrap(true); 549 } 550 551 if (!HasNUW && willNotOverflowUnsignedMul(Op0, Op1, I, I.hasNoSignedWrap())) { 552 Changed = true; 553 I.setHasNoUnsignedWrap(true); 554 } 555 556 return Changed ? &I : nullptr; 557 } 558 559 Instruction *InstCombinerImpl::foldFPSignBitOps(BinaryOperator &I) { 560 BinaryOperator::BinaryOps Opcode = I.getOpcode(); 561 assert((Opcode == Instruction::FMul || Opcode == Instruction::FDiv) && 562 "Expected fmul or fdiv"); 563 564 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 565 Value *X, *Y; 566 567 // -X * -Y --> X * Y 568 // -X / -Y --> X / Y 569 if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y)))) 570 return BinaryOperator::CreateWithCopiedFlags(Opcode, X, Y, &I); 571 572 // fabs(X) * fabs(X) -> X * X 573 // fabs(X) / fabs(X) -> X / X 574 if (Op0 == Op1 && match(Op0, m_FAbs(m_Value(X)))) 575 return BinaryOperator::CreateWithCopiedFlags(Opcode, X, X, &I); 576 577 // fabs(X) * fabs(Y) --> fabs(X * Y) 578 // fabs(X) / fabs(Y) --> fabs(X / Y) 579 if (match(Op0, m_FAbs(m_Value(X))) && match(Op1, m_FAbs(m_Value(Y))) && 580 (Op0->hasOneUse() || Op1->hasOneUse())) { 581 Value *XY = Builder.CreateBinOpFMF(Opcode, X, Y, &I); 582 Value *Fabs = 583 Builder.CreateUnaryIntrinsic(Intrinsic::fabs, XY, &I, I.getName()); 584 return replaceInstUsesWith(I, Fabs); 585 } 586 587 return nullptr; 588 } 589 590 Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) { 591 auto createPowiExpr = [](BinaryOperator &I, InstCombinerImpl &IC, Value *X, 592 Value *Y, Value *Z) { 593 InstCombiner::BuilderTy &Builder = IC.Builder; 594 Value *YZ = Builder.CreateAdd(Y, Z); 595 Instruction *NewPow = Builder.CreateIntrinsic( 596 Intrinsic::powi, {X->getType(), YZ->getType()}, {X, YZ}, &I); 597 598 return NewPow; 599 }; 600 601 Value *X, *Y, *Z; 602 unsigned Opcode = I.getOpcode(); 603 assert((Opcode == Instruction::FMul || Opcode == Instruction::FDiv) && 604 "Unexpected opcode"); 605 606 // powi(X, Y) * X --> powi(X, Y+1) 607 // X * powi(X, Y) --> powi(X, Y+1) 608 if (match(&I, m_c_FMul(m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>( 609 m_Value(X), m_Value(Y)))), 610 m_Deferred(X)))) { 611 Constant *One = ConstantInt::get(Y->getType(), 1); 612 if (willNotOverflowSignedAdd(Y, One, I)) { 613 Instruction *NewPow = createPowiExpr(I, *this, X, Y, One); 614 return replaceInstUsesWith(I, NewPow); 615 } 616 } 617 618 // powi(x, y) * powi(x, z) -> powi(x, y + z) 619 Value *Op0 = I.getOperand(0); 620 Value *Op1 = I.getOperand(1); 621 if (Opcode == Instruction::FMul && I.isOnlyUserOfAnyOperand() && 622 match(Op0, m_AllowReassoc( 623 m_Intrinsic<Intrinsic::powi>(m_Value(X), m_Value(Y)))) && 624 match(Op1, m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(m_Specific(X), 625 m_Value(Z)))) && 626 Y->getType() == Z->getType()) { 627 Instruction *NewPow = createPowiExpr(I, *this, X, Y, Z); 628 return replaceInstUsesWith(I, NewPow); 629 } 630 631 if (Opcode == Instruction::FDiv && I.hasAllowReassoc() && I.hasNoNaNs()) { 632 // powi(X, Y) / X --> powi(X, Y-1) 633 // This is legal when (Y - 1) can't wraparound, in which case reassoc and 634 // nnan are required. 635 // TODO: Multi-use may be also better off creating Powi(x,y-1) 636 if (match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>( 637 m_Specific(Op1), m_Value(Y))))) && 638 willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) { 639 Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType()); 640 Instruction *NewPow = createPowiExpr(I, *this, Op1, Y, NegOne); 641 return replaceInstUsesWith(I, NewPow); 642 } 643 644 // powi(X, Y) / (X * Z) --> powi(X, Y-1) / Z 645 // This is legal when (Y - 1) can't wraparound, in which case reassoc and 646 // nnan are required. 647 // TODO: Multi-use may be also better off creating Powi(x,y-1) 648 if (match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>( 649 m_Value(X), m_Value(Y))))) && 650 match(Op1, m_AllowReassoc(m_c_FMul(m_Specific(X), m_Value(Z)))) && 651 willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) { 652 Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType()); 653 auto *NewPow = createPowiExpr(I, *this, X, Y, NegOne); 654 return BinaryOperator::CreateFDivFMF(NewPow, Z, &I); 655 } 656 } 657 658 return nullptr; 659 } 660 661 // If we have the following pattern, 662 // X = 1.0/sqrt(a) 663 // R1 = X * X 664 // R2 = a/sqrt(a) 665 // then this method collects all the instructions that match R1 and R2. 666 static bool getFSqrtDivOptPattern(Instruction *Div, 667 SmallPtrSetImpl<Instruction *> &R1, 668 SmallPtrSetImpl<Instruction *> &R2) { 669 Value *A; 670 if (match(Div, m_FDiv(m_FPOne(), m_Sqrt(m_Value(A)))) || 671 match(Div, m_FDiv(m_SpecificFP(-1.0), m_Sqrt(m_Value(A))))) { 672 for (User *U : Div->users()) { 673 Instruction *I = cast<Instruction>(U); 674 if (match(I, m_FMul(m_Specific(Div), m_Specific(Div)))) 675 R1.insert(I); 676 } 677 678 CallInst *CI = cast<CallInst>(Div->getOperand(1)); 679 for (User *U : CI->users()) { 680 Instruction *I = cast<Instruction>(U); 681 if (match(I, m_FDiv(m_Specific(A), m_Sqrt(m_Specific(A))))) 682 R2.insert(I); 683 } 684 } 685 return !R1.empty() && !R2.empty(); 686 } 687 688 // Check legality for transforming 689 // x = 1.0/sqrt(a) 690 // r1 = x * x; 691 // r2 = a/sqrt(a); 692 // 693 // TO 694 // 695 // r1 = 1/a 696 // r2 = sqrt(a) 697 // x = r1 * r2 698 // This transform works only when 'a' is known positive. 699 static bool isFSqrtDivToFMulLegal(Instruction *X, 700 SmallPtrSetImpl<Instruction *> &R1, 701 SmallPtrSetImpl<Instruction *> &R2) { 702 // Check if the required pattern for the transformation exists. 703 if (!getFSqrtDivOptPattern(X, R1, R2)) 704 return false; 705 706 BasicBlock *BBx = X->getParent(); 707 BasicBlock *BBr1 = (*R1.begin())->getParent(); 708 BasicBlock *BBr2 = (*R2.begin())->getParent(); 709 710 CallInst *FSqrt = cast<CallInst>(X->getOperand(1)); 711 if (!FSqrt->hasAllowReassoc() || !FSqrt->hasNoNaNs() || 712 !FSqrt->hasNoSignedZeros() || !FSqrt->hasNoInfs()) 713 return false; 714 715 // We change x = 1/sqrt(a) to x = sqrt(a) * 1/a . This change isn't allowed 716 // by recip fp as it is strictly meant to transform ops of type a/b to 717 // a * 1/b. So, this can be considered as algebraic rewrite and reassoc flag 718 // has been used(rather abused)in the past for algebraic rewrites. 719 if (!X->hasAllowReassoc() || !X->hasAllowReciprocal() || !X->hasNoInfs()) 720 return false; 721 722 // Check the constraints on X, R1 and R2 combined. 723 // fdiv instruction and one of the multiplications must reside in the same 724 // block. If not, the optimized code may execute more ops than before and 725 // this may hamper the performance. 726 if (BBx != BBr1 && BBx != BBr2) 727 return false; 728 729 // Check the constraints on instructions in R1. 730 if (any_of(R1, [BBr1](Instruction *I) { 731 // When you have multiple instructions residing in R1 and R2 732 // respectively, it's difficult to generate combinations of (R1,R2) and 733 // then check if we have the required pattern. So, for now, just be 734 // conservative. 735 return (I->getParent() != BBr1 || !I->hasAllowReassoc()); 736 })) 737 return false; 738 739 // Check the constraints on instructions in R2. 740 return all_of(R2, [BBr2](Instruction *I) { 741 // When you have multiple instructions residing in R1 and R2 742 // respectively, it's difficult to generate combination of (R1,R2) and 743 // then check if we have the required pattern. So, for now, just be 744 // conservative. 745 return (I->getParent() == BBr2 && I->hasAllowReassoc()); 746 }); 747 } 748 749 Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) { 750 Value *Op0 = I.getOperand(0); 751 Value *Op1 = I.getOperand(1); 752 Value *X, *Y; 753 Constant *C; 754 BinaryOperator *Op0BinOp; 755 756 // Reassociate constant RHS with another constant to form constant 757 // expression. 758 if (match(Op1, m_Constant(C)) && C->isFiniteNonZeroFP() && 759 match(Op0, m_AllowReassoc(m_BinOp(Op0BinOp)))) { 760 // Everything in this scope folds I with Op0, intersecting their FMF. 761 FastMathFlags FMF = I.getFastMathFlags() & Op0BinOp->getFastMathFlags(); 762 Constant *C1; 763 if (match(Op0, m_OneUse(m_FDiv(m_Constant(C1), m_Value(X))))) { 764 // (C1 / X) * C --> (C * C1) / X 765 Constant *CC1 = 766 ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL); 767 if (CC1 && CC1->isNormalFP()) 768 return BinaryOperator::CreateFDivFMF(CC1, X, FMF); 769 } 770 if (match(Op0, m_FDiv(m_Value(X), m_Constant(C1)))) { 771 // FIXME: This seems like it should also be checking for arcp 772 // (X / C1) * C --> X * (C / C1) 773 Constant *CDivC1 = 774 ConstantFoldBinaryOpOperands(Instruction::FDiv, C, C1, DL); 775 if (CDivC1 && CDivC1->isNormalFP()) 776 return BinaryOperator::CreateFMulFMF(X, CDivC1, FMF); 777 778 // If the constant was a denormal, try reassociating differently. 779 // (X / C1) * C --> X / (C1 / C) 780 Constant *C1DivC = 781 ConstantFoldBinaryOpOperands(Instruction::FDiv, C1, C, DL); 782 if (C1DivC && Op0->hasOneUse() && C1DivC->isNormalFP()) 783 return BinaryOperator::CreateFDivFMF(X, C1DivC, FMF); 784 } 785 786 // We do not need to match 'fadd C, X' and 'fsub X, C' because they are 787 // canonicalized to 'fadd X, C'. Distributing the multiply may allow 788 // further folds and (X * C) + C2 is 'fma'. 789 if (match(Op0, m_OneUse(m_FAdd(m_Value(X), m_Constant(C1))))) { 790 // (X + C1) * C --> (X * C) + (C * C1) 791 if (Constant *CC1 = 792 ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL)) { 793 Value *XC = Builder.CreateFMulFMF(X, C, FMF); 794 return BinaryOperator::CreateFAddFMF(XC, CC1, FMF); 795 } 796 } 797 if (match(Op0, m_OneUse(m_FSub(m_Constant(C1), m_Value(X))))) { 798 // (C1 - X) * C --> (C * C1) - (X * C) 799 if (Constant *CC1 = 800 ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL)) { 801 Value *XC = Builder.CreateFMulFMF(X, C, FMF); 802 return BinaryOperator::CreateFSubFMF(CC1, XC, FMF); 803 } 804 } 805 } 806 807 Value *Z; 808 if (match(&I, 809 m_c_FMul(m_AllowReassoc(m_OneUse(m_FDiv(m_Value(X), m_Value(Y)))), 810 m_Value(Z)))) { 811 BinaryOperator *DivOp = cast<BinaryOperator>(((Z == Op0) ? Op1 : Op0)); 812 FastMathFlags FMF = I.getFastMathFlags() & DivOp->getFastMathFlags(); 813 if (FMF.allowReassoc()) { 814 // Sink division: (X / Y) * Z --> (X * Z) / Y 815 auto *NewFMul = Builder.CreateFMulFMF(X, Z, FMF); 816 return BinaryOperator::CreateFDivFMF(NewFMul, Y, FMF); 817 } 818 } 819 820 // sqrt(X) * sqrt(Y) -> sqrt(X * Y) 821 // nnan disallows the possibility of returning a number if both operands are 822 // negative (in that case, we should return NaN). 823 if (I.hasNoNaNs() && match(Op0, m_OneUse(m_Sqrt(m_Value(X)))) && 824 match(Op1, m_OneUse(m_Sqrt(m_Value(Y))))) { 825 Value *XY = Builder.CreateFMulFMF(X, Y, &I); 826 Value *Sqrt = Builder.CreateUnaryIntrinsic(Intrinsic::sqrt, XY, &I); 827 return replaceInstUsesWith(I, Sqrt); 828 } 829 830 // The following transforms are done irrespective of the number of uses 831 // for the expression "1.0/sqrt(X)". 832 // 1) 1.0/sqrt(X) * X -> X/sqrt(X) 833 // 2) X * 1.0/sqrt(X) -> X/sqrt(X) 834 // We always expect the backend to reduce X/sqrt(X) to sqrt(X), if it 835 // has the necessary (reassoc) fast-math-flags. 836 if (I.hasNoSignedZeros() && 837 match(Op0, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) && 838 match(Y, m_Sqrt(m_Value(X))) && Op1 == X) 839 return BinaryOperator::CreateFDivFMF(X, Y, &I); 840 if (I.hasNoSignedZeros() && 841 match(Op1, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) && 842 match(Y, m_Sqrt(m_Value(X))) && Op0 == X) 843 return BinaryOperator::CreateFDivFMF(X, Y, &I); 844 845 // Like the similar transform in instsimplify, this requires 'nsz' because 846 // sqrt(-0.0) = -0.0, and -0.0 * -0.0 does not simplify to -0.0. 847 if (I.hasNoNaNs() && I.hasNoSignedZeros() && Op0 == Op1 && Op0->hasNUses(2)) { 848 // Peek through fdiv to find squaring of square root: 849 // (X / sqrt(Y)) * (X / sqrt(Y)) --> (X * X) / Y 850 if (match(Op0, m_FDiv(m_Value(X), m_Sqrt(m_Value(Y))))) { 851 Value *XX = Builder.CreateFMulFMF(X, X, &I); 852 return BinaryOperator::CreateFDivFMF(XX, Y, &I); 853 } 854 // (sqrt(Y) / X) * (sqrt(Y) / X) --> Y / (X * X) 855 if (match(Op0, m_FDiv(m_Sqrt(m_Value(Y)), m_Value(X)))) { 856 Value *XX = Builder.CreateFMulFMF(X, X, &I); 857 return BinaryOperator::CreateFDivFMF(Y, XX, &I); 858 } 859 } 860 861 // pow(X, Y) * X --> pow(X, Y+1) 862 // X * pow(X, Y) --> pow(X, Y+1) 863 if (match(&I, m_c_FMul(m_OneUse(m_Intrinsic<Intrinsic::pow>(m_Value(X), 864 m_Value(Y))), 865 m_Deferred(X)))) { 866 Value *Y1 = Builder.CreateFAddFMF(Y, ConstantFP::get(I.getType(), 1.0), &I); 867 Value *Pow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, X, Y1, &I); 868 return replaceInstUsesWith(I, Pow); 869 } 870 871 if (Instruction *FoldedPowi = foldPowiReassoc(I)) 872 return FoldedPowi; 873 874 if (I.isOnlyUserOfAnyOperand()) { 875 // pow(X, Y) * pow(X, Z) -> pow(X, Y + Z) 876 if (match(Op0, m_Intrinsic<Intrinsic::pow>(m_Value(X), m_Value(Y))) && 877 match(Op1, m_Intrinsic<Intrinsic::pow>(m_Specific(X), m_Value(Z)))) { 878 auto *YZ = Builder.CreateFAddFMF(Y, Z, &I); 879 auto *NewPow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, X, YZ, &I); 880 return replaceInstUsesWith(I, NewPow); 881 } 882 // pow(X, Y) * pow(Z, Y) -> pow(X * Z, Y) 883 if (match(Op0, m_Intrinsic<Intrinsic::pow>(m_Value(X), m_Value(Y))) && 884 match(Op1, m_Intrinsic<Intrinsic::pow>(m_Value(Z), m_Specific(Y)))) { 885 auto *XZ = Builder.CreateFMulFMF(X, Z, &I); 886 auto *NewPow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, XZ, Y, &I); 887 return replaceInstUsesWith(I, NewPow); 888 } 889 890 // exp(X) * exp(Y) -> exp(X + Y) 891 if (match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X))) && 892 match(Op1, m_Intrinsic<Intrinsic::exp>(m_Value(Y)))) { 893 Value *XY = Builder.CreateFAddFMF(X, Y, &I); 894 Value *Exp = Builder.CreateUnaryIntrinsic(Intrinsic::exp, XY, &I); 895 return replaceInstUsesWith(I, Exp); 896 } 897 898 // exp2(X) * exp2(Y) -> exp2(X + Y) 899 if (match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X))) && 900 match(Op1, m_Intrinsic<Intrinsic::exp2>(m_Value(Y)))) { 901 Value *XY = Builder.CreateFAddFMF(X, Y, &I); 902 Value *Exp2 = Builder.CreateUnaryIntrinsic(Intrinsic::exp2, XY, &I); 903 return replaceInstUsesWith(I, Exp2); 904 } 905 } 906 907 // (X*Y) * X => (X*X) * Y where Y != X 908 // The purpose is two-fold: 909 // 1) to form a power expression (of X). 910 // 2) potentially shorten the critical path: After transformation, the 911 // latency of the instruction Y is amortized by the expression of X*X, 912 // and therefore Y is in a "less critical" position compared to what it 913 // was before the transformation. 914 if (match(Op0, m_OneUse(m_c_FMul(m_Specific(Op1), m_Value(Y)))) && Op1 != Y) { 915 Value *XX = Builder.CreateFMulFMF(Op1, Op1, &I); 916 return BinaryOperator::CreateFMulFMF(XX, Y, &I); 917 } 918 if (match(Op1, m_OneUse(m_c_FMul(m_Specific(Op0), m_Value(Y)))) && Op0 != Y) { 919 Value *XX = Builder.CreateFMulFMF(Op0, Op0, &I); 920 return BinaryOperator::CreateFMulFMF(XX, Y, &I); 921 } 922 923 return nullptr; 924 } 925 926 Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) { 927 if (Value *V = simplifyFMulInst(I.getOperand(0), I.getOperand(1), 928 I.getFastMathFlags(), 929 SQ.getWithInstruction(&I))) 930 return replaceInstUsesWith(I, V); 931 932 if (SimplifyAssociativeOrCommutative(I)) 933 return &I; 934 935 if (Instruction *X = foldVectorBinop(I)) 936 return X; 937 938 if (Instruction *Phi = foldBinopWithPhiOperands(I)) 939 return Phi; 940 941 if (Instruction *FoldedMul = foldBinOpIntoSelectOrPhi(I)) 942 return FoldedMul; 943 944 if (Value *FoldedMul = foldMulSelectToNegate(I, Builder)) 945 return replaceInstUsesWith(I, FoldedMul); 946 947 if (Instruction *R = foldFPSignBitOps(I)) 948 return R; 949 950 if (Instruction *R = foldFBinOpOfIntCasts(I)) 951 return R; 952 953 // X * -1.0 --> -X 954 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 955 if (match(Op1, m_SpecificFP(-1.0))) 956 return UnaryOperator::CreateFNegFMF(Op0, &I); 957 958 // With no-nans/no-infs: 959 // X * 0.0 --> copysign(0.0, X) 960 // X * -0.0 --> copysign(0.0, -X) 961 const APFloat *FPC; 962 if (match(Op1, m_APFloatAllowPoison(FPC)) && FPC->isZero() && 963 ((I.hasNoInfs() && 964 isKnownNeverNaN(Op0, /*Depth=*/0, SQ.getWithInstruction(&I))) || 965 isKnownNeverNaN(&I, /*Depth=*/0, SQ.getWithInstruction(&I)))) { 966 if (FPC->isNegative()) 967 Op0 = Builder.CreateFNegFMF(Op0, &I); 968 CallInst *CopySign = Builder.CreateIntrinsic(Intrinsic::copysign, 969 {I.getType()}, {Op1, Op0}, &I); 970 return replaceInstUsesWith(I, CopySign); 971 } 972 973 // -X * C --> X * -C 974 Value *X, *Y; 975 Constant *C; 976 if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_Constant(C))) 977 if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL)) 978 return BinaryOperator::CreateFMulFMF(X, NegC, &I); 979 980 if (I.hasNoNaNs() && I.hasNoSignedZeros()) { 981 // (uitofp bool X) * Y --> X ? Y : 0 982 // Y * (uitofp bool X) --> X ? Y : 0 983 // Note INF * 0 is NaN. 984 if (match(Op0, m_UIToFP(m_Value(X))) && 985 X->getType()->isIntOrIntVectorTy(1)) { 986 auto *SI = SelectInst::Create(X, Op1, ConstantFP::get(I.getType(), 0.0)); 987 SI->copyFastMathFlags(I.getFastMathFlags()); 988 return SI; 989 } 990 if (match(Op1, m_UIToFP(m_Value(X))) && 991 X->getType()->isIntOrIntVectorTy(1)) { 992 auto *SI = SelectInst::Create(X, Op0, ConstantFP::get(I.getType(), 0.0)); 993 SI->copyFastMathFlags(I.getFastMathFlags()); 994 return SI; 995 } 996 } 997 998 // (select A, B, C) * (select A, D, E) --> select A, (B*D), (C*E) 999 if (Value *V = SimplifySelectsFeedingBinaryOp(I, Op0, Op1)) 1000 return replaceInstUsesWith(I, V); 1001 1002 if (I.hasAllowReassoc()) 1003 if (Instruction *FoldedMul = foldFMulReassoc(I)) 1004 return FoldedMul; 1005 1006 // log2(X * 0.5) * Y = log2(X) * Y - Y 1007 if (I.isFast()) { 1008 IntrinsicInst *Log2 = nullptr; 1009 if (match(Op0, m_OneUse(m_Intrinsic<Intrinsic::log2>( 1010 m_OneUse(m_FMul(m_Value(X), m_SpecificFP(0.5))))))) { 1011 Log2 = cast<IntrinsicInst>(Op0); 1012 Y = Op1; 1013 } 1014 if (match(Op1, m_OneUse(m_Intrinsic<Intrinsic::log2>( 1015 m_OneUse(m_FMul(m_Value(X), m_SpecificFP(0.5))))))) { 1016 Log2 = cast<IntrinsicInst>(Op1); 1017 Y = Op0; 1018 } 1019 if (Log2) { 1020 Value *Log2 = Builder.CreateUnaryIntrinsic(Intrinsic::log2, X, &I); 1021 Value *LogXTimesY = Builder.CreateFMulFMF(Log2, Y, &I); 1022 return BinaryOperator::CreateFSubFMF(LogXTimesY, Y, &I); 1023 } 1024 } 1025 1026 // Simplify FMUL recurrences starting with 0.0 to 0.0 if nnan and nsz are set. 1027 // Given a phi node with entry value as 0 and it used in fmul operation, 1028 // we can replace fmul with 0 safely and eleminate loop operation. 1029 PHINode *PN = nullptr; 1030 Value *Start = nullptr, *Step = nullptr; 1031 if (matchSimpleRecurrence(&I, PN, Start, Step) && I.hasNoNaNs() && 1032 I.hasNoSignedZeros() && match(Start, m_Zero())) 1033 return replaceInstUsesWith(I, Start); 1034 1035 // minimum(X, Y) * maximum(X, Y) => X * Y. 1036 if (match(&I, 1037 m_c_FMul(m_Intrinsic<Intrinsic::maximum>(m_Value(X), m_Value(Y)), 1038 m_c_Intrinsic<Intrinsic::minimum>(m_Deferred(X), 1039 m_Deferred(Y))))) { 1040 BinaryOperator *Result = BinaryOperator::CreateFMulFMF(X, Y, &I); 1041 // We cannot preserve ninf if nnan flag is not set. 1042 // If X is NaN and Y is Inf then in original program we had NaN * NaN, 1043 // while in optimized version NaN * Inf and this is a poison with ninf flag. 1044 if (!Result->hasNoNaNs()) 1045 Result->setHasNoInfs(false); 1046 return Result; 1047 } 1048 1049 return nullptr; 1050 } 1051 1052 /// Fold a divide or remainder with a select instruction divisor when one of the 1053 /// select operands is zero. In that case, we can use the other select operand 1054 /// because div/rem by zero is undefined. 1055 bool InstCombinerImpl::simplifyDivRemOfSelectWithZeroOp(BinaryOperator &I) { 1056 SelectInst *SI = dyn_cast<SelectInst>(I.getOperand(1)); 1057 if (!SI) 1058 return false; 1059 1060 int NonNullOperand; 1061 if (match(SI->getTrueValue(), m_Zero())) 1062 // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y 1063 NonNullOperand = 2; 1064 else if (match(SI->getFalseValue(), m_Zero())) 1065 // div/rem X, (Cond ? Y : 0) -> div/rem X, Y 1066 NonNullOperand = 1; 1067 else 1068 return false; 1069 1070 // Change the div/rem to use 'Y' instead of the select. 1071 replaceOperand(I, 1, SI->getOperand(NonNullOperand)); 1072 1073 // Okay, we know we replace the operand of the div/rem with 'Y' with no 1074 // problem. However, the select, or the condition of the select may have 1075 // multiple uses. Based on our knowledge that the operand must be non-zero, 1076 // propagate the known value for the select into other uses of it, and 1077 // propagate a known value of the condition into its other users. 1078 1079 // If the select and condition only have a single use, don't bother with this, 1080 // early exit. 1081 Value *SelectCond = SI->getCondition(); 1082 if (SI->use_empty() && SelectCond->hasOneUse()) 1083 return true; 1084 1085 // Scan the current block backward, looking for other uses of SI. 1086 BasicBlock::iterator BBI = I.getIterator(), BBFront = I.getParent()->begin(); 1087 Type *CondTy = SelectCond->getType(); 1088 while (BBI != BBFront) { 1089 --BBI; 1090 // If we found an instruction that we can't assume will return, so 1091 // information from below it cannot be propagated above it. 1092 if (!isGuaranteedToTransferExecutionToSuccessor(&*BBI)) 1093 break; 1094 1095 // Replace uses of the select or its condition with the known values. 1096 for (Use &Op : BBI->operands()) { 1097 if (Op == SI) { 1098 replaceUse(Op, SI->getOperand(NonNullOperand)); 1099 Worklist.push(&*BBI); 1100 } else if (Op == SelectCond) { 1101 replaceUse(Op, NonNullOperand == 1 ? ConstantInt::getTrue(CondTy) 1102 : ConstantInt::getFalse(CondTy)); 1103 Worklist.push(&*BBI); 1104 } 1105 } 1106 1107 // If we past the instruction, quit looking for it. 1108 if (&*BBI == SI) 1109 SI = nullptr; 1110 if (&*BBI == SelectCond) 1111 SelectCond = nullptr; 1112 1113 // If we ran out of things to eliminate, break out of the loop. 1114 if (!SelectCond && !SI) 1115 break; 1116 1117 } 1118 return true; 1119 } 1120 1121 /// True if the multiply can not be expressed in an int this size. 1122 static bool multiplyOverflows(const APInt &C1, const APInt &C2, APInt &Product, 1123 bool IsSigned) { 1124 bool Overflow; 1125 Product = IsSigned ? C1.smul_ov(C2, Overflow) : C1.umul_ov(C2, Overflow); 1126 return Overflow; 1127 } 1128 1129 /// True if C1 is a multiple of C2. Quotient contains C1/C2. 1130 static bool isMultiple(const APInt &C1, const APInt &C2, APInt &Quotient, 1131 bool IsSigned) { 1132 assert(C1.getBitWidth() == C2.getBitWidth() && "Constant widths not equal"); 1133 1134 // Bail if we will divide by zero. 1135 if (C2.isZero()) 1136 return false; 1137 1138 // Bail if we would divide INT_MIN by -1. 1139 if (IsSigned && C1.isMinSignedValue() && C2.isAllOnes()) 1140 return false; 1141 1142 APInt Remainder(C1.getBitWidth(), /*val=*/0ULL, IsSigned); 1143 if (IsSigned) 1144 APInt::sdivrem(C1, C2, Quotient, Remainder); 1145 else 1146 APInt::udivrem(C1, C2, Quotient, Remainder); 1147 1148 return Remainder.isMinValue(); 1149 } 1150 1151 static Value *foldIDivShl(BinaryOperator &I, InstCombiner::BuilderTy &Builder) { 1152 assert((I.getOpcode() == Instruction::SDiv || 1153 I.getOpcode() == Instruction::UDiv) && 1154 "Expected integer divide"); 1155 1156 bool IsSigned = I.getOpcode() == Instruction::SDiv; 1157 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1158 Type *Ty = I.getType(); 1159 1160 Value *X, *Y, *Z; 1161 1162 // With appropriate no-wrap constraints, remove a common factor in the 1163 // dividend and divisor that is disguised as a left-shifted value. 1164 if (match(Op1, m_Shl(m_Value(X), m_Value(Z))) && 1165 match(Op0, m_c_Mul(m_Specific(X), m_Value(Y)))) { 1166 // Both operands must have the matching no-wrap for this kind of division. 1167 auto *Mul = cast<OverflowingBinaryOperator>(Op0); 1168 auto *Shl = cast<OverflowingBinaryOperator>(Op1); 1169 bool HasNUW = Mul->hasNoUnsignedWrap() && Shl->hasNoUnsignedWrap(); 1170 bool HasNSW = Mul->hasNoSignedWrap() && Shl->hasNoSignedWrap(); 1171 1172 // (X * Y) u/ (X << Z) --> Y u>> Z 1173 if (!IsSigned && HasNUW) 1174 return Builder.CreateLShr(Y, Z, "", I.isExact()); 1175 1176 // (X * Y) s/ (X << Z) --> Y s/ (1 << Z) 1177 if (IsSigned && HasNSW && (Op0->hasOneUse() || Op1->hasOneUse())) { 1178 Value *Shl = Builder.CreateShl(ConstantInt::get(Ty, 1), Z); 1179 return Builder.CreateSDiv(Y, Shl, "", I.isExact()); 1180 } 1181 } 1182 1183 // With appropriate no-wrap constraints, remove a common factor in the 1184 // dividend and divisor that is disguised as a left-shift amount. 1185 if (match(Op0, m_Shl(m_Value(X), m_Value(Z))) && 1186 match(Op1, m_Shl(m_Value(Y), m_Specific(Z)))) { 1187 auto *Shl0 = cast<OverflowingBinaryOperator>(Op0); 1188 auto *Shl1 = cast<OverflowingBinaryOperator>(Op1); 1189 1190 // For unsigned div, we need 'nuw' on both shifts or 1191 // 'nsw' on both shifts + 'nuw' on the dividend. 1192 // (X << Z) / (Y << Z) --> X / Y 1193 if (!IsSigned && 1194 ((Shl0->hasNoUnsignedWrap() && Shl1->hasNoUnsignedWrap()) || 1195 (Shl0->hasNoUnsignedWrap() && Shl0->hasNoSignedWrap() && 1196 Shl1->hasNoSignedWrap()))) 1197 return Builder.CreateUDiv(X, Y, "", I.isExact()); 1198 1199 // For signed div, we need 'nsw' on both shifts + 'nuw' on the divisor. 1200 // (X << Z) / (Y << Z) --> X / Y 1201 if (IsSigned && Shl0->hasNoSignedWrap() && Shl1->hasNoSignedWrap() && 1202 Shl1->hasNoUnsignedWrap()) 1203 return Builder.CreateSDiv(X, Y, "", I.isExact()); 1204 } 1205 1206 // If X << Y and X << Z does not overflow, then: 1207 // (X << Y) / (X << Z) -> (1 << Y) / (1 << Z) -> 1 << Y >> Z 1208 if (match(Op0, m_Shl(m_Value(X), m_Value(Y))) && 1209 match(Op1, m_Shl(m_Specific(X), m_Value(Z)))) { 1210 auto *Shl0 = cast<OverflowingBinaryOperator>(Op0); 1211 auto *Shl1 = cast<OverflowingBinaryOperator>(Op1); 1212 1213 if (IsSigned ? (Shl0->hasNoSignedWrap() && Shl1->hasNoSignedWrap()) 1214 : (Shl0->hasNoUnsignedWrap() && Shl1->hasNoUnsignedWrap())) { 1215 Constant *One = ConstantInt::get(X->getType(), 1); 1216 // Only preserve the nsw flag if dividend has nsw 1217 // or divisor has nsw and operator is sdiv. 1218 Value *Dividend = Builder.CreateShl( 1219 One, Y, "shl.dividend", 1220 /*HasNUW*/ true, 1221 /*HasNSW*/ 1222 IsSigned ? (Shl0->hasNoUnsignedWrap() || Shl1->hasNoUnsignedWrap()) 1223 : Shl0->hasNoSignedWrap()); 1224 return Builder.CreateLShr(Dividend, Z, "", I.isExact()); 1225 } 1226 } 1227 1228 return nullptr; 1229 } 1230 1231 /// Common integer divide/remainder transforms 1232 Instruction *InstCombinerImpl::commonIDivRemTransforms(BinaryOperator &I) { 1233 assert(I.isIntDivRem() && "Unexpected instruction"); 1234 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1235 1236 // If any element of a constant divisor fixed width vector is zero or undef 1237 // the behavior is undefined and we can fold the whole op to poison. 1238 auto *Op1C = dyn_cast<Constant>(Op1); 1239 Type *Ty = I.getType(); 1240 auto *VTy = dyn_cast<FixedVectorType>(Ty); 1241 if (Op1C && VTy) { 1242 unsigned NumElts = VTy->getNumElements(); 1243 for (unsigned i = 0; i != NumElts; ++i) { 1244 Constant *Elt = Op1C->getAggregateElement(i); 1245 if (Elt && (Elt->isNullValue() || isa<UndefValue>(Elt))) 1246 return replaceInstUsesWith(I, PoisonValue::get(Ty)); 1247 } 1248 } 1249 1250 if (Instruction *Phi = foldBinopWithPhiOperands(I)) 1251 return Phi; 1252 1253 // The RHS is known non-zero. 1254 if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I)) 1255 return replaceOperand(I, 1, V); 1256 1257 // Handle cases involving: div/rem X, (select Cond, Y, Z) 1258 if (simplifyDivRemOfSelectWithZeroOp(I)) 1259 return &I; 1260 1261 // If the divisor is a select-of-constants, try to constant fold all div ops: 1262 // C div/rem (select Cond, TrueC, FalseC) --> select Cond, (C div/rem TrueC), 1263 // (C div/rem FalseC) 1264 // TODO: Adapt simplifyDivRemOfSelectWithZeroOp to allow this and other folds. 1265 if (match(Op0, m_ImmConstant()) && 1266 match(Op1, m_Select(m_Value(), m_ImmConstant(), m_ImmConstant()))) { 1267 if (Instruction *R = FoldOpIntoSelect(I, cast<SelectInst>(Op1), 1268 /*FoldWithMultiUse*/ true)) 1269 return R; 1270 } 1271 1272 return nullptr; 1273 } 1274 1275 /// This function implements the transforms common to both integer division 1276 /// instructions (udiv and sdiv). It is called by the visitors to those integer 1277 /// division instructions. 1278 /// Common integer divide transforms 1279 Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) { 1280 if (Instruction *Res = commonIDivRemTransforms(I)) 1281 return Res; 1282 1283 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1284 bool IsSigned = I.getOpcode() == Instruction::SDiv; 1285 Type *Ty = I.getType(); 1286 1287 const APInt *C2; 1288 if (match(Op1, m_APInt(C2))) { 1289 Value *X; 1290 const APInt *C1; 1291 1292 // (X / C1) / C2 -> X / (C1*C2) 1293 if ((IsSigned && match(Op0, m_SDiv(m_Value(X), m_APInt(C1)))) || 1294 (!IsSigned && match(Op0, m_UDiv(m_Value(X), m_APInt(C1))))) { 1295 APInt Product(C1->getBitWidth(), /*val=*/0ULL, IsSigned); 1296 if (!multiplyOverflows(*C1, *C2, Product, IsSigned)) 1297 return BinaryOperator::Create(I.getOpcode(), X, 1298 ConstantInt::get(Ty, Product)); 1299 } 1300 1301 APInt Quotient(C2->getBitWidth(), /*val=*/0ULL, IsSigned); 1302 if ((IsSigned && match(Op0, m_NSWMul(m_Value(X), m_APInt(C1)))) || 1303 (!IsSigned && match(Op0, m_NUWMul(m_Value(X), m_APInt(C1))))) { 1304 1305 // (X * C1) / C2 -> X / (C2 / C1) if C2 is a multiple of C1. 1306 if (isMultiple(*C2, *C1, Quotient, IsSigned)) { 1307 auto *NewDiv = BinaryOperator::Create(I.getOpcode(), X, 1308 ConstantInt::get(Ty, Quotient)); 1309 NewDiv->setIsExact(I.isExact()); 1310 return NewDiv; 1311 } 1312 1313 // (X * C1) / C2 -> X * (C1 / C2) if C1 is a multiple of C2. 1314 if (isMultiple(*C1, *C2, Quotient, IsSigned)) { 1315 auto *Mul = BinaryOperator::Create(Instruction::Mul, X, 1316 ConstantInt::get(Ty, Quotient)); 1317 auto *OBO = cast<OverflowingBinaryOperator>(Op0); 1318 Mul->setHasNoUnsignedWrap(!IsSigned && OBO->hasNoUnsignedWrap()); 1319 Mul->setHasNoSignedWrap(OBO->hasNoSignedWrap()); 1320 return Mul; 1321 } 1322 } 1323 1324 if ((IsSigned && match(Op0, m_NSWShl(m_Value(X), m_APInt(C1))) && 1325 C1->ult(C1->getBitWidth() - 1)) || 1326 (!IsSigned && match(Op0, m_NUWShl(m_Value(X), m_APInt(C1))) && 1327 C1->ult(C1->getBitWidth()))) { 1328 APInt C1Shifted = APInt::getOneBitSet( 1329 C1->getBitWidth(), static_cast<unsigned>(C1->getZExtValue())); 1330 1331 // (X << C1) / C2 -> X / (C2 >> C1) if C2 is a multiple of 1 << C1. 1332 if (isMultiple(*C2, C1Shifted, Quotient, IsSigned)) { 1333 auto *BO = BinaryOperator::Create(I.getOpcode(), X, 1334 ConstantInt::get(Ty, Quotient)); 1335 BO->setIsExact(I.isExact()); 1336 return BO; 1337 } 1338 1339 // (X << C1) / C2 -> X * ((1 << C1) / C2) if 1 << C1 is a multiple of C2. 1340 if (isMultiple(C1Shifted, *C2, Quotient, IsSigned)) { 1341 auto *Mul = BinaryOperator::Create(Instruction::Mul, X, 1342 ConstantInt::get(Ty, Quotient)); 1343 auto *OBO = cast<OverflowingBinaryOperator>(Op0); 1344 Mul->setHasNoUnsignedWrap(!IsSigned && OBO->hasNoUnsignedWrap()); 1345 Mul->setHasNoSignedWrap(OBO->hasNoSignedWrap()); 1346 return Mul; 1347 } 1348 } 1349 1350 // Distribute div over add to eliminate a matching div/mul pair: 1351 // ((X * C2) + C1) / C2 --> X + C1/C2 1352 // We need a multiple of the divisor for a signed add constant, but 1353 // unsigned is fine with any constant pair. 1354 if (IsSigned && 1355 match(Op0, m_NSWAddLike(m_NSWMul(m_Value(X), m_SpecificInt(*C2)), 1356 m_APInt(C1))) && 1357 isMultiple(*C1, *C2, Quotient, IsSigned)) { 1358 return BinaryOperator::CreateNSWAdd(X, ConstantInt::get(Ty, Quotient)); 1359 } 1360 if (!IsSigned && 1361 match(Op0, m_NUWAddLike(m_NUWMul(m_Value(X), m_SpecificInt(*C2)), 1362 m_APInt(C1)))) { 1363 return BinaryOperator::CreateNUWAdd(X, 1364 ConstantInt::get(Ty, C1->udiv(*C2))); 1365 } 1366 1367 if (!C2->isZero()) // avoid X udiv 0 1368 if (Instruction *FoldedDiv = foldBinOpIntoSelectOrPhi(I)) 1369 return FoldedDiv; 1370 } 1371 1372 if (match(Op0, m_One())) { 1373 assert(!Ty->isIntOrIntVectorTy(1) && "i1 divide not removed?"); 1374 if (IsSigned) { 1375 // 1 / 0 --> undef ; 1 / 1 --> 1 ; 1 / -1 --> -1 ; 1 / anything else --> 0 1376 // (Op1 + 1) u< 3 ? Op1 : 0 1377 // Op1 must be frozen because we are increasing its number of uses. 1378 Value *F1 = Op1; 1379 if (!isGuaranteedNotToBeUndef(Op1)) 1380 F1 = Builder.CreateFreeze(Op1, Op1->getName() + ".fr"); 1381 Value *Inc = Builder.CreateAdd(F1, Op0); 1382 Value *Cmp = Builder.CreateICmpULT(Inc, ConstantInt::get(Ty, 3)); 1383 return SelectInst::Create(Cmp, F1, ConstantInt::get(Ty, 0)); 1384 } else { 1385 // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the 1386 // result is one, otherwise it's zero. 1387 return new ZExtInst(Builder.CreateICmpEQ(Op1, Op0), Ty); 1388 } 1389 } 1390 1391 // See if we can fold away this div instruction. 1392 if (SimplifyDemandedInstructionBits(I)) 1393 return &I; 1394 1395 // (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y 1396 Value *X, *Z; 1397 if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) // (X - Z) / Y; Y = Op1 1398 if ((IsSigned && match(Z, m_SRem(m_Specific(X), m_Specific(Op1)))) || 1399 (!IsSigned && match(Z, m_URem(m_Specific(X), m_Specific(Op1))))) 1400 return BinaryOperator::Create(I.getOpcode(), X, Op1); 1401 1402 // (X << Y) / X -> 1 << Y 1403 Value *Y; 1404 if (IsSigned && match(Op0, m_NSWShl(m_Specific(Op1), m_Value(Y)))) 1405 return BinaryOperator::CreateNSWShl(ConstantInt::get(Ty, 1), Y); 1406 if (!IsSigned && match(Op0, m_NUWShl(m_Specific(Op1), m_Value(Y)))) 1407 return BinaryOperator::CreateNUWShl(ConstantInt::get(Ty, 1), Y); 1408 1409 // X / (X * Y) -> 1 / Y if the multiplication does not overflow. 1410 if (match(Op1, m_c_Mul(m_Specific(Op0), m_Value(Y)))) { 1411 bool HasNSW = cast<OverflowingBinaryOperator>(Op1)->hasNoSignedWrap(); 1412 bool HasNUW = cast<OverflowingBinaryOperator>(Op1)->hasNoUnsignedWrap(); 1413 if ((IsSigned && HasNSW) || (!IsSigned && HasNUW)) { 1414 replaceOperand(I, 0, ConstantInt::get(Ty, 1)); 1415 replaceOperand(I, 1, Y); 1416 return &I; 1417 } 1418 } 1419 1420 // (X << Z) / (X * Y) -> (1 << Z) / Y 1421 // TODO: Handle sdiv. 1422 if (!IsSigned && Op1->hasOneUse() && 1423 match(Op0, m_NUWShl(m_Value(X), m_Value(Z))) && 1424 match(Op1, m_c_Mul(m_Specific(X), m_Value(Y)))) 1425 if (cast<OverflowingBinaryOperator>(Op1)->hasNoUnsignedWrap()) { 1426 Instruction *NewDiv = BinaryOperator::CreateUDiv( 1427 Builder.CreateShl(ConstantInt::get(Ty, 1), Z, "", /*NUW*/ true), Y); 1428 NewDiv->setIsExact(I.isExact()); 1429 return NewDiv; 1430 } 1431 1432 if (Value *R = foldIDivShl(I, Builder)) 1433 return replaceInstUsesWith(I, R); 1434 1435 // With the appropriate no-wrap constraint, remove a multiply by the divisor 1436 // after peeking through another divide: 1437 // ((Op1 * X) / Y) / Op1 --> X / Y 1438 if (match(Op0, m_BinOp(I.getOpcode(), m_c_Mul(m_Specific(Op1), m_Value(X)), 1439 m_Value(Y)))) { 1440 auto *InnerDiv = cast<PossiblyExactOperator>(Op0); 1441 auto *Mul = cast<OverflowingBinaryOperator>(InnerDiv->getOperand(0)); 1442 Instruction *NewDiv = nullptr; 1443 if (!IsSigned && Mul->hasNoUnsignedWrap()) 1444 NewDiv = BinaryOperator::CreateUDiv(X, Y); 1445 else if (IsSigned && Mul->hasNoSignedWrap()) 1446 NewDiv = BinaryOperator::CreateSDiv(X, Y); 1447 1448 // Exact propagates only if both of the original divides are exact. 1449 if (NewDiv) { 1450 NewDiv->setIsExact(I.isExact() && InnerDiv->isExact()); 1451 return NewDiv; 1452 } 1453 } 1454 1455 // (X * Y) / (X * Z) --> Y / Z (and commuted variants) 1456 if (match(Op0, m_Mul(m_Value(X), m_Value(Y)))) { 1457 auto OB0HasNSW = cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap(); 1458 auto OB0HasNUW = cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap(); 1459 1460 auto CreateDivOrNull = [&](Value *A, Value *B) -> Instruction * { 1461 auto OB1HasNSW = cast<OverflowingBinaryOperator>(Op1)->hasNoSignedWrap(); 1462 auto OB1HasNUW = 1463 cast<OverflowingBinaryOperator>(Op1)->hasNoUnsignedWrap(); 1464 const APInt *C1, *C2; 1465 if (IsSigned && OB0HasNSW) { 1466 if (OB1HasNSW && match(B, m_APInt(C1)) && !C1->isAllOnes()) 1467 return BinaryOperator::CreateSDiv(A, B); 1468 } 1469 if (!IsSigned && OB0HasNUW) { 1470 if (OB1HasNUW) 1471 return BinaryOperator::CreateUDiv(A, B); 1472 if (match(A, m_APInt(C1)) && match(B, m_APInt(C2)) && C2->ule(*C1)) 1473 return BinaryOperator::CreateUDiv(A, B); 1474 } 1475 return nullptr; 1476 }; 1477 1478 if (match(Op1, m_c_Mul(m_Specific(X), m_Value(Z)))) { 1479 if (auto *Val = CreateDivOrNull(Y, Z)) 1480 return Val; 1481 } 1482 if (match(Op1, m_c_Mul(m_Specific(Y), m_Value(Z)))) { 1483 if (auto *Val = CreateDivOrNull(X, Z)) 1484 return Val; 1485 } 1486 } 1487 return nullptr; 1488 } 1489 1490 Value *InstCombinerImpl::takeLog2(Value *Op, unsigned Depth, bool AssumeNonZero, 1491 bool DoFold) { 1492 auto IfFold = [DoFold](function_ref<Value *()> Fn) { 1493 if (!DoFold) 1494 return reinterpret_cast<Value *>(-1); 1495 return Fn(); 1496 }; 1497 1498 // FIXME: assert that Op1 isn't/doesn't contain undef. 1499 1500 // log2(2^C) -> C 1501 if (match(Op, m_Power2())) 1502 return IfFold([&]() { 1503 Constant *C = ConstantExpr::getExactLogBase2(cast<Constant>(Op)); 1504 if (!C) 1505 llvm_unreachable("Failed to constant fold udiv -> logbase2"); 1506 return C; 1507 }); 1508 1509 // The remaining tests are all recursive, so bail out if we hit the limit. 1510 if (Depth++ == MaxAnalysisRecursionDepth) 1511 return nullptr; 1512 1513 // log2(zext X) -> zext log2(X) 1514 // FIXME: Require one use? 1515 Value *X, *Y; 1516 if (match(Op, m_ZExt(m_Value(X)))) 1517 if (Value *LogX = takeLog2(X, Depth, AssumeNonZero, DoFold)) 1518 return IfFold([&]() { return Builder.CreateZExt(LogX, Op->getType()); }); 1519 1520 // log2(trunc x) -> trunc log2(X) 1521 // FIXME: Require one use? 1522 if (match(Op, m_Trunc(m_Value(X)))) { 1523 auto *TI = cast<TruncInst>(Op); 1524 if (AssumeNonZero || TI->hasNoUnsignedWrap()) 1525 if (Value *LogX = takeLog2(X, Depth, AssumeNonZero, DoFold)) 1526 return IfFold([&]() { 1527 return Builder.CreateTrunc(LogX, Op->getType(), "", 1528 /*IsNUW=*/TI->hasNoUnsignedWrap()); 1529 }); 1530 } 1531 1532 // log2(X << Y) -> log2(X) + Y 1533 // FIXME: Require one use unless X is 1? 1534 if (match(Op, m_Shl(m_Value(X), m_Value(Y)))) { 1535 auto *BO = cast<OverflowingBinaryOperator>(Op); 1536 // nuw will be set if the `shl` is trivially non-zero. 1537 if (AssumeNonZero || BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap()) 1538 if (Value *LogX = takeLog2(X, Depth, AssumeNonZero, DoFold)) 1539 return IfFold([&]() { return Builder.CreateAdd(LogX, Y); }); 1540 } 1541 1542 // log2(X >>u Y) -> log2(X) - Y 1543 // FIXME: Require one use? 1544 if (match(Op, m_LShr(m_Value(X), m_Value(Y)))) { 1545 auto *PEO = cast<PossiblyExactOperator>(Op); 1546 if (AssumeNonZero || PEO->isExact()) 1547 if (Value *LogX = takeLog2(X, Depth, AssumeNonZero, DoFold)) 1548 return IfFold([&]() { return Builder.CreateSub(LogX, Y); }); 1549 } 1550 1551 // log2(X & Y) -> either log2(X) or log2(Y) 1552 // This requires `AssumeNonZero` as `X & Y` may be zero when X != Y. 1553 if (AssumeNonZero && match(Op, m_And(m_Value(X), m_Value(Y)))) { 1554 if (Value *LogX = takeLog2(X, Depth, AssumeNonZero, DoFold)) 1555 return IfFold([&]() { return LogX; }); 1556 if (Value *LogY = takeLog2(Y, Depth, AssumeNonZero, DoFold)) 1557 return IfFold([&]() { return LogY; }); 1558 } 1559 1560 // log2(Cond ? X : Y) -> Cond ? log2(X) : log2(Y) 1561 // FIXME: Require one use? 1562 if (SelectInst *SI = dyn_cast<SelectInst>(Op)) 1563 if (Value *LogX = takeLog2(SI->getOperand(1), Depth, AssumeNonZero, DoFold)) 1564 if (Value *LogY = 1565 takeLog2(SI->getOperand(2), Depth, AssumeNonZero, DoFold)) 1566 return IfFold([&]() { 1567 return Builder.CreateSelect(SI->getOperand(0), LogX, LogY); 1568 }); 1569 1570 // log2(umin(X, Y)) -> umin(log2(X), log2(Y)) 1571 // log2(umax(X, Y)) -> umax(log2(X), log2(Y)) 1572 auto *MinMax = dyn_cast<MinMaxIntrinsic>(Op); 1573 if (MinMax && MinMax->hasOneUse() && !MinMax->isSigned()) { 1574 // Use AssumeNonZero as false here. Otherwise we can hit case where 1575 // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow). 1576 if (Value *LogX = takeLog2(MinMax->getLHS(), Depth, 1577 /*AssumeNonZero*/ false, DoFold)) 1578 if (Value *LogY = takeLog2(MinMax->getRHS(), Depth, 1579 /*AssumeNonZero*/ false, DoFold)) 1580 return IfFold([&]() { 1581 return Builder.CreateBinaryIntrinsic(MinMax->getIntrinsicID(), LogX, 1582 LogY); 1583 }); 1584 } 1585 1586 return nullptr; 1587 } 1588 1589 /// If we have zero-extended operands of an unsigned div or rem, we may be able 1590 /// to narrow the operation (sink the zext below the math). 1591 static Instruction *narrowUDivURem(BinaryOperator &I, 1592 InstCombinerImpl &IC) { 1593 Instruction::BinaryOps Opcode = I.getOpcode(); 1594 Value *N = I.getOperand(0); 1595 Value *D = I.getOperand(1); 1596 Type *Ty = I.getType(); 1597 Value *X, *Y; 1598 if (match(N, m_ZExt(m_Value(X))) && match(D, m_ZExt(m_Value(Y))) && 1599 X->getType() == Y->getType() && (N->hasOneUse() || D->hasOneUse())) { 1600 // udiv (zext X), (zext Y) --> zext (udiv X, Y) 1601 // urem (zext X), (zext Y) --> zext (urem X, Y) 1602 Value *NarrowOp = IC.Builder.CreateBinOp(Opcode, X, Y); 1603 return new ZExtInst(NarrowOp, Ty); 1604 } 1605 1606 Constant *C; 1607 if (isa<Instruction>(N) && match(N, m_OneUse(m_ZExt(m_Value(X)))) && 1608 match(D, m_Constant(C))) { 1609 // If the constant is the same in the smaller type, use the narrow version. 1610 Constant *TruncC = IC.getLosslessUnsignedTrunc(C, X->getType()); 1611 if (!TruncC) 1612 return nullptr; 1613 1614 // udiv (zext X), C --> zext (udiv X, C') 1615 // urem (zext X), C --> zext (urem X, C') 1616 return new ZExtInst(IC.Builder.CreateBinOp(Opcode, X, TruncC), Ty); 1617 } 1618 if (isa<Instruction>(D) && match(D, m_OneUse(m_ZExt(m_Value(X)))) && 1619 match(N, m_Constant(C))) { 1620 // If the constant is the same in the smaller type, use the narrow version. 1621 Constant *TruncC = IC.getLosslessUnsignedTrunc(C, X->getType()); 1622 if (!TruncC) 1623 return nullptr; 1624 1625 // udiv C, (zext X) --> zext (udiv C', X) 1626 // urem C, (zext X) --> zext (urem C', X) 1627 return new ZExtInst(IC.Builder.CreateBinOp(Opcode, TruncC, X), Ty); 1628 } 1629 1630 return nullptr; 1631 } 1632 1633 Instruction *InstCombinerImpl::visitUDiv(BinaryOperator &I) { 1634 if (Value *V = simplifyUDivInst(I.getOperand(0), I.getOperand(1), I.isExact(), 1635 SQ.getWithInstruction(&I))) 1636 return replaceInstUsesWith(I, V); 1637 1638 if (Instruction *X = foldVectorBinop(I)) 1639 return X; 1640 1641 // Handle the integer div common cases 1642 if (Instruction *Common = commonIDivTransforms(I)) 1643 return Common; 1644 1645 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1646 Value *X; 1647 const APInt *C1, *C2; 1648 if (match(Op0, m_LShr(m_Value(X), m_APInt(C1))) && match(Op1, m_APInt(C2))) { 1649 // (X lshr C1) udiv C2 --> X udiv (C2 << C1) 1650 bool Overflow; 1651 APInt C2ShlC1 = C2->ushl_ov(*C1, Overflow); 1652 if (!Overflow) { 1653 bool IsExact = I.isExact() && match(Op0, m_Exact(m_Value())); 1654 BinaryOperator *BO = BinaryOperator::CreateUDiv( 1655 X, ConstantInt::get(X->getType(), C2ShlC1)); 1656 if (IsExact) 1657 BO->setIsExact(); 1658 return BO; 1659 } 1660 } 1661 1662 // Op0 / C where C is large (negative) --> zext (Op0 >= C) 1663 // TODO: Could use isKnownNegative() to handle non-constant values. 1664 Type *Ty = I.getType(); 1665 if (match(Op1, m_Negative())) { 1666 Value *Cmp = Builder.CreateICmpUGE(Op0, Op1); 1667 return CastInst::CreateZExtOrBitCast(Cmp, Ty); 1668 } 1669 // Op0 / (sext i1 X) --> zext (Op0 == -1) (if X is 0, the div is undefined) 1670 if (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) { 1671 Value *Cmp = Builder.CreateICmpEQ(Op0, ConstantInt::getAllOnesValue(Ty)); 1672 return CastInst::CreateZExtOrBitCast(Cmp, Ty); 1673 } 1674 1675 if (Instruction *NarrowDiv = narrowUDivURem(I, *this)) 1676 return NarrowDiv; 1677 1678 Value *A, *B; 1679 1680 // Look through a right-shift to find the common factor: 1681 // ((Op1 *nuw A) >> B) / Op1 --> A >> B 1682 if (match(Op0, m_LShr(m_NUWMul(m_Specific(Op1), m_Value(A)), m_Value(B))) || 1683 match(Op0, m_LShr(m_NUWMul(m_Value(A), m_Specific(Op1)), m_Value(B)))) { 1684 Instruction *Lshr = BinaryOperator::CreateLShr(A, B); 1685 if (I.isExact() && cast<PossiblyExactOperator>(Op0)->isExact()) 1686 Lshr->setIsExact(); 1687 return Lshr; 1688 } 1689 1690 auto GetShiftableDenom = [&](Value *Denom) -> Value * { 1691 // Op0 udiv Op1 -> Op0 lshr log2(Op1), if log2() folds away. 1692 if (Value *Log2 = tryGetLog2(Op1, /*AssumeNonZero=*/true)) 1693 return Log2; 1694 1695 // Op0 udiv Op1 -> Op0 lshr cttz(Op1), if Op1 is a power of 2. 1696 if (isKnownToBeAPowerOfTwo(Denom, /*OrZero=*/true, /*Depth=*/0, &I)) 1697 // This will increase instruction count but it's okay 1698 // since bitwise operations are substantially faster than 1699 // division. 1700 return Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Denom, 1701 Builder.getTrue()); 1702 1703 return nullptr; 1704 }; 1705 1706 if (auto *Res = GetShiftableDenom(Op1)) 1707 return replaceInstUsesWith( 1708 I, Builder.CreateLShr(Op0, Res, I.getName(), I.isExact())); 1709 1710 return nullptr; 1711 } 1712 1713 Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) { 1714 if (Value *V = simplifySDivInst(I.getOperand(0), I.getOperand(1), I.isExact(), 1715 SQ.getWithInstruction(&I))) 1716 return replaceInstUsesWith(I, V); 1717 1718 if (Instruction *X = foldVectorBinop(I)) 1719 return X; 1720 1721 // Handle the integer div common cases 1722 if (Instruction *Common = commonIDivTransforms(I)) 1723 return Common; 1724 1725 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1726 Type *Ty = I.getType(); 1727 Value *X; 1728 // sdiv Op0, -1 --> -Op0 1729 // sdiv Op0, (sext i1 X) --> -Op0 (because if X is 0, the op is undefined) 1730 if (match(Op1, m_AllOnes()) || 1731 (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))) 1732 return BinaryOperator::CreateNSWNeg(Op0); 1733 1734 // X / INT_MIN --> X == INT_MIN 1735 if (match(Op1, m_SignMask())) 1736 return new ZExtInst(Builder.CreateICmpEQ(Op0, Op1), Ty); 1737 1738 if (I.isExact()) { 1739 // sdiv exact X, 1<<C --> ashr exact X, C iff 1<<C is non-negative 1740 if (match(Op1, m_Power2()) && match(Op1, m_NonNegative())) { 1741 Constant *C = ConstantExpr::getExactLogBase2(cast<Constant>(Op1)); 1742 return BinaryOperator::CreateExactAShr(Op0, C); 1743 } 1744 1745 // sdiv exact X, (1<<ShAmt) --> ashr exact X, ShAmt (if shl is non-negative) 1746 Value *ShAmt; 1747 if (match(Op1, m_NSWShl(m_One(), m_Value(ShAmt)))) 1748 return BinaryOperator::CreateExactAShr(Op0, ShAmt); 1749 1750 // sdiv exact X, -1<<C --> -(ashr exact X, C) 1751 if (match(Op1, m_NegatedPower2())) { 1752 Constant *NegPow2C = ConstantExpr::getNeg(cast<Constant>(Op1)); 1753 Constant *C = ConstantExpr::getExactLogBase2(NegPow2C); 1754 Value *Ashr = Builder.CreateAShr(Op0, C, I.getName() + ".neg", true); 1755 return BinaryOperator::CreateNSWNeg(Ashr); 1756 } 1757 } 1758 1759 const APInt *Op1C; 1760 if (match(Op1, m_APInt(Op1C))) { 1761 // If the dividend is sign-extended and the constant divisor is small enough 1762 // to fit in the source type, shrink the division to the narrower type: 1763 // (sext X) sdiv C --> sext (X sdiv C) 1764 Value *Op0Src; 1765 if (match(Op0, m_OneUse(m_SExt(m_Value(Op0Src)))) && 1766 Op0Src->getType()->getScalarSizeInBits() >= 1767 Op1C->getSignificantBits()) { 1768 1769 // In the general case, we need to make sure that the dividend is not the 1770 // minimum signed value because dividing that by -1 is UB. But here, we 1771 // know that the -1 divisor case is already handled above. 1772 1773 Constant *NarrowDivisor = 1774 ConstantExpr::getTrunc(cast<Constant>(Op1), Op0Src->getType()); 1775 Value *NarrowOp = Builder.CreateSDiv(Op0Src, NarrowDivisor); 1776 return new SExtInst(NarrowOp, Ty); 1777 } 1778 1779 // -X / C --> X / -C (if the negation doesn't overflow). 1780 // TODO: This could be enhanced to handle arbitrary vector constants by 1781 // checking if all elements are not the min-signed-val. 1782 if (!Op1C->isMinSignedValue() && match(Op0, m_NSWNeg(m_Value(X)))) { 1783 Constant *NegC = ConstantInt::get(Ty, -(*Op1C)); 1784 Instruction *BO = BinaryOperator::CreateSDiv(X, NegC); 1785 BO->setIsExact(I.isExact()); 1786 return BO; 1787 } 1788 } 1789 1790 // -X / Y --> -(X / Y) 1791 Value *Y; 1792 if (match(&I, m_SDiv(m_OneUse(m_NSWNeg(m_Value(X))), m_Value(Y)))) 1793 return BinaryOperator::CreateNSWNeg( 1794 Builder.CreateSDiv(X, Y, I.getName(), I.isExact())); 1795 1796 // abs(X) / X --> X > -1 ? 1 : -1 1797 // X / abs(X) --> X > -1 ? 1 : -1 1798 if (match(&I, m_c_BinOp( 1799 m_OneUse(m_Intrinsic<Intrinsic::abs>(m_Value(X), m_One())), 1800 m_Deferred(X)))) { 1801 Value *Cond = Builder.CreateIsNotNeg(X); 1802 return SelectInst::Create(Cond, ConstantInt::get(Ty, 1), 1803 ConstantInt::getAllOnesValue(Ty)); 1804 } 1805 1806 KnownBits KnownDividend = computeKnownBits(Op0, 0, &I); 1807 if (!I.isExact() && 1808 (match(Op1, m_Power2(Op1C)) || match(Op1, m_NegatedPower2(Op1C))) && 1809 KnownDividend.countMinTrailingZeros() >= Op1C->countr_zero()) { 1810 I.setIsExact(); 1811 return &I; 1812 } 1813 1814 if (KnownDividend.isNonNegative()) { 1815 // If both operands are unsigned, turn this into a udiv. 1816 if (isKnownNonNegative(Op1, SQ.getWithInstruction(&I))) { 1817 auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); 1818 BO->setIsExact(I.isExact()); 1819 return BO; 1820 } 1821 1822 if (match(Op1, m_NegatedPower2())) { 1823 // X sdiv (-(1 << C)) -> -(X sdiv (1 << C)) -> 1824 // -> -(X udiv (1 << C)) -> -(X u>> C) 1825 Constant *CNegLog2 = ConstantExpr::getExactLogBase2( 1826 ConstantExpr::getNeg(cast<Constant>(Op1))); 1827 Value *Shr = Builder.CreateLShr(Op0, CNegLog2, I.getName(), I.isExact()); 1828 return BinaryOperator::CreateNeg(Shr); 1829 } 1830 1831 if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) { 1832 // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) 1833 // Safe because the only negative value (1 << Y) can take on is 1834 // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have 1835 // the sign bit set. 1836 auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); 1837 BO->setIsExact(I.isExact()); 1838 return BO; 1839 } 1840 } 1841 1842 // -X / X --> X == INT_MIN ? 1 : -1 1843 if (isKnownNegation(Op0, Op1)) { 1844 APInt MinVal = APInt::getSignedMinValue(Ty->getScalarSizeInBits()); 1845 Value *Cond = Builder.CreateICmpEQ(Op0, ConstantInt::get(Ty, MinVal)); 1846 return SelectInst::Create(Cond, ConstantInt::get(Ty, 1), 1847 ConstantInt::getAllOnesValue(Ty)); 1848 } 1849 return nullptr; 1850 } 1851 1852 /// Remove negation and try to convert division into multiplication. 1853 Instruction *InstCombinerImpl::foldFDivConstantDivisor(BinaryOperator &I) { 1854 Constant *C; 1855 if (!match(I.getOperand(1), m_Constant(C))) 1856 return nullptr; 1857 1858 // -X / C --> X / -C 1859 Value *X; 1860 const DataLayout &DL = I.getDataLayout(); 1861 if (match(I.getOperand(0), m_FNeg(m_Value(X)))) 1862 if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL)) 1863 return BinaryOperator::CreateFDivFMF(X, NegC, &I); 1864 1865 // nnan X / +0.0 -> copysign(inf, X) 1866 // nnan nsz X / -0.0 -> copysign(inf, X) 1867 if (I.hasNoNaNs() && 1868 (match(I.getOperand(1), m_PosZeroFP()) || 1869 (I.hasNoSignedZeros() && match(I.getOperand(1), m_AnyZeroFP())))) { 1870 IRBuilder<> B(&I); 1871 CallInst *CopySign = B.CreateIntrinsic( 1872 Intrinsic::copysign, {C->getType()}, 1873 {ConstantFP::getInfinity(I.getType()), I.getOperand(0)}, &I); 1874 CopySign->takeName(&I); 1875 return replaceInstUsesWith(I, CopySign); 1876 } 1877 1878 // If the constant divisor has an exact inverse, this is always safe. If not, 1879 // then we can still create a reciprocal if fast-math-flags allow it and the 1880 // constant is a regular number (not zero, infinite, or denormal). 1881 if (!(C->hasExactInverseFP() || (I.hasAllowReciprocal() && C->isNormalFP()))) 1882 return nullptr; 1883 1884 // Disallow denormal constants because we don't know what would happen 1885 // on all targets. 1886 // TODO: Use Intrinsic::canonicalize or let function attributes tell us that 1887 // denorms are flushed? 1888 auto *RecipC = ConstantFoldBinaryOpOperands( 1889 Instruction::FDiv, ConstantFP::get(I.getType(), 1.0), C, DL); 1890 if (!RecipC || !RecipC->isNormalFP()) 1891 return nullptr; 1892 1893 // X / C --> X * (1 / C) 1894 return BinaryOperator::CreateFMulFMF(I.getOperand(0), RecipC, &I); 1895 } 1896 1897 /// Remove negation and try to reassociate constant math. 1898 static Instruction *foldFDivConstantDividend(BinaryOperator &I) { 1899 Constant *C; 1900 if (!match(I.getOperand(0), m_Constant(C))) 1901 return nullptr; 1902 1903 // C / -X --> -C / X 1904 Value *X; 1905 const DataLayout &DL = I.getDataLayout(); 1906 if (match(I.getOperand(1), m_FNeg(m_Value(X)))) 1907 if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL)) 1908 return BinaryOperator::CreateFDivFMF(NegC, X, &I); 1909 1910 if (!I.hasAllowReassoc() || !I.hasAllowReciprocal()) 1911 return nullptr; 1912 1913 // Try to reassociate C / X expressions where X includes another constant. 1914 Constant *C2, *NewC = nullptr; 1915 if (match(I.getOperand(1), m_FMul(m_Value(X), m_Constant(C2)))) { 1916 // C / (X * C2) --> (C / C2) / X 1917 NewC = ConstantFoldBinaryOpOperands(Instruction::FDiv, C, C2, DL); 1918 } else if (match(I.getOperand(1), m_FDiv(m_Value(X), m_Constant(C2)))) { 1919 // C / (X / C2) --> (C * C2) / X 1920 NewC = ConstantFoldBinaryOpOperands(Instruction::FMul, C, C2, DL); 1921 } 1922 // Disallow denormal constants because we don't know what would happen 1923 // on all targets. 1924 // TODO: Use Intrinsic::canonicalize or let function attributes tell us that 1925 // denorms are flushed? 1926 if (!NewC || !NewC->isNormalFP()) 1927 return nullptr; 1928 1929 return BinaryOperator::CreateFDivFMF(NewC, X, &I); 1930 } 1931 1932 /// Negate the exponent of pow/exp to fold division-by-pow() into multiply. 1933 static Instruction *foldFDivPowDivisor(BinaryOperator &I, 1934 InstCombiner::BuilderTy &Builder) { 1935 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1936 auto *II = dyn_cast<IntrinsicInst>(Op1); 1937 if (!II || !II->hasOneUse() || !I.hasAllowReassoc() || 1938 !I.hasAllowReciprocal()) 1939 return nullptr; 1940 1941 // Z / pow(X, Y) --> Z * pow(X, -Y) 1942 // Z / exp{2}(Y) --> Z * exp{2}(-Y) 1943 // In the general case, this creates an extra instruction, but fmul allows 1944 // for better canonicalization and optimization than fdiv. 1945 Intrinsic::ID IID = II->getIntrinsicID(); 1946 SmallVector<Value *> Args; 1947 switch (IID) { 1948 case Intrinsic::pow: 1949 Args.push_back(II->getArgOperand(0)); 1950 Args.push_back(Builder.CreateFNegFMF(II->getArgOperand(1), &I)); 1951 break; 1952 case Intrinsic::powi: { 1953 // Require 'ninf' assuming that makes powi(X, -INT_MIN) acceptable. 1954 // That is, X ** (huge negative number) is 0.0, ~1.0, or INF and so 1955 // dividing by that is INF, ~1.0, or 0.0. Code that uses powi allows 1956 // non-standard results, so this corner case should be acceptable if the 1957 // code rules out INF values. 1958 if (!I.hasNoInfs()) 1959 return nullptr; 1960 Args.push_back(II->getArgOperand(0)); 1961 Args.push_back(Builder.CreateNeg(II->getArgOperand(1))); 1962 Type *Tys[] = {I.getType(), II->getArgOperand(1)->getType()}; 1963 Value *Pow = Builder.CreateIntrinsic(IID, Tys, Args, &I); 1964 return BinaryOperator::CreateFMulFMF(Op0, Pow, &I); 1965 } 1966 case Intrinsic::exp: 1967 case Intrinsic::exp2: 1968 Args.push_back(Builder.CreateFNegFMF(II->getArgOperand(0), &I)); 1969 break; 1970 default: 1971 return nullptr; 1972 } 1973 Value *Pow = Builder.CreateIntrinsic(IID, I.getType(), Args, &I); 1974 return BinaryOperator::CreateFMulFMF(Op0, Pow, &I); 1975 } 1976 1977 /// Convert div to mul if we have an sqrt divisor iff sqrt's operand is a fdiv 1978 /// instruction. 1979 static Instruction *foldFDivSqrtDivisor(BinaryOperator &I, 1980 InstCombiner::BuilderTy &Builder) { 1981 // X / sqrt(Y / Z) --> X * sqrt(Z / Y) 1982 if (!I.hasAllowReassoc() || !I.hasAllowReciprocal()) 1983 return nullptr; 1984 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1985 auto *II = dyn_cast<IntrinsicInst>(Op1); 1986 if (!II || II->getIntrinsicID() != Intrinsic::sqrt || !II->hasOneUse() || 1987 !II->hasAllowReassoc() || !II->hasAllowReciprocal()) 1988 return nullptr; 1989 1990 Value *Y, *Z; 1991 auto *DivOp = dyn_cast<Instruction>(II->getOperand(0)); 1992 if (!DivOp) 1993 return nullptr; 1994 if (!match(DivOp, m_FDiv(m_Value(Y), m_Value(Z)))) 1995 return nullptr; 1996 if (!DivOp->hasAllowReassoc() || !I.hasAllowReciprocal() || 1997 !DivOp->hasOneUse()) 1998 return nullptr; 1999 Value *SwapDiv = Builder.CreateFDivFMF(Z, Y, DivOp); 2000 Value *NewSqrt = 2001 Builder.CreateUnaryIntrinsic(II->getIntrinsicID(), SwapDiv, II); 2002 return BinaryOperator::CreateFMulFMF(Op0, NewSqrt, &I); 2003 } 2004 2005 // Change 2006 // X = 1/sqrt(a) 2007 // R1 = X * X 2008 // R2 = a * X 2009 // 2010 // TO 2011 // 2012 // FDiv = 1/a 2013 // FSqrt = sqrt(a) 2014 // FMul = FDiv * FSqrt 2015 // Replace Uses Of R1 With FDiv 2016 // Replace Uses Of R2 With FSqrt 2017 // Replace Uses Of X With FMul 2018 static Instruction * 2019 convertFSqrtDivIntoFMul(CallInst *CI, Instruction *X, 2020 const SmallPtrSetImpl<Instruction *> &R1, 2021 const SmallPtrSetImpl<Instruction *> &R2, 2022 InstCombiner::BuilderTy &B, InstCombinerImpl *IC) { 2023 2024 B.SetInsertPoint(X); 2025 2026 // Have an instruction that is representative of all of instructions in R1 and 2027 // get the most common fpmath metadata and fast-math flags on it. 2028 Value *SqrtOp = CI->getArgOperand(0); 2029 auto *FDiv = cast<Instruction>( 2030 B.CreateFDiv(ConstantFP::get(X->getType(), 1.0), SqrtOp)); 2031 auto *R1FPMathMDNode = (*R1.begin())->getMetadata(LLVMContext::MD_fpmath); 2032 FastMathFlags R1FMF = (*R1.begin())->getFastMathFlags(); // Common FMF 2033 for (Instruction *I : R1) { 2034 R1FPMathMDNode = MDNode::getMostGenericFPMath( 2035 R1FPMathMDNode, I->getMetadata(LLVMContext::MD_fpmath)); 2036 R1FMF &= I->getFastMathFlags(); 2037 IC->replaceInstUsesWith(*I, FDiv); 2038 IC->eraseInstFromFunction(*I); 2039 } 2040 FDiv->setMetadata(LLVMContext::MD_fpmath, R1FPMathMDNode); 2041 FDiv->copyFastMathFlags(R1FMF); 2042 2043 // Have a single sqrt call instruction that is representative of all of 2044 // instructions in R2 and get the most common fpmath metadata and fast-math 2045 // flags on it. 2046 auto *FSqrt = cast<CallInst>(CI->clone()); 2047 FSqrt->insertBefore(CI->getIterator()); 2048 auto *R2FPMathMDNode = (*R2.begin())->getMetadata(LLVMContext::MD_fpmath); 2049 FastMathFlags R2FMF = (*R2.begin())->getFastMathFlags(); // Common FMF 2050 for (Instruction *I : R2) { 2051 R2FPMathMDNode = MDNode::getMostGenericFPMath( 2052 R2FPMathMDNode, I->getMetadata(LLVMContext::MD_fpmath)); 2053 R2FMF &= I->getFastMathFlags(); 2054 IC->replaceInstUsesWith(*I, FSqrt); 2055 IC->eraseInstFromFunction(*I); 2056 } 2057 FSqrt->setMetadata(LLVMContext::MD_fpmath, R2FPMathMDNode); 2058 FSqrt->copyFastMathFlags(R2FMF); 2059 2060 Instruction *FMul; 2061 // If X = -1/sqrt(a) initially,then FMul = -(FDiv * FSqrt) 2062 if (match(X, m_FDiv(m_SpecificFP(-1.0), m_Specific(CI)))) { 2063 Value *Mul = B.CreateFMul(FDiv, FSqrt); 2064 FMul = cast<Instruction>(B.CreateFNeg(Mul)); 2065 } else 2066 FMul = cast<Instruction>(B.CreateFMul(FDiv, FSqrt)); 2067 FMul->copyMetadata(*X); 2068 FMul->copyFastMathFlags(FastMathFlags::intersectRewrite(R1FMF, R2FMF) | 2069 FastMathFlags::unionValue(R1FMF, R2FMF)); 2070 return IC->replaceInstUsesWith(*X, FMul); 2071 } 2072 2073 Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) { 2074 Module *M = I.getModule(); 2075 2076 if (Value *V = simplifyFDivInst(I.getOperand(0), I.getOperand(1), 2077 I.getFastMathFlags(), 2078 SQ.getWithInstruction(&I))) 2079 return replaceInstUsesWith(I, V); 2080 2081 if (Instruction *X = foldVectorBinop(I)) 2082 return X; 2083 2084 if (Instruction *Phi = foldBinopWithPhiOperands(I)) 2085 return Phi; 2086 2087 if (Instruction *R = foldFDivConstantDivisor(I)) 2088 return R; 2089 2090 if (Instruction *R = foldFDivConstantDividend(I)) 2091 return R; 2092 2093 if (Instruction *R = foldFPSignBitOps(I)) 2094 return R; 2095 2096 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 2097 2098 // Convert 2099 // x = 1.0/sqrt(a) 2100 // r1 = x * x; 2101 // r2 = a/sqrt(a); 2102 // 2103 // TO 2104 // 2105 // r1 = 1/a 2106 // r2 = sqrt(a) 2107 // x = r1 * r2 2108 SmallPtrSet<Instruction *, 2> R1, R2; 2109 if (isFSqrtDivToFMulLegal(&I, R1, R2)) { 2110 CallInst *CI = cast<CallInst>(I.getOperand(1)); 2111 if (Instruction *D = convertFSqrtDivIntoFMul(CI, &I, R1, R2, Builder, this)) 2112 return D; 2113 } 2114 2115 if (isa<Constant>(Op0)) 2116 if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) 2117 if (Instruction *R = FoldOpIntoSelect(I, SI)) 2118 return R; 2119 2120 if (isa<Constant>(Op1)) 2121 if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) 2122 if (Instruction *R = FoldOpIntoSelect(I, SI)) 2123 return R; 2124 2125 if (I.hasAllowReassoc() && I.hasAllowReciprocal()) { 2126 Value *X, *Y; 2127 if (match(Op0, m_OneUse(m_FDiv(m_Value(X), m_Value(Y)))) && 2128 (!isa<Constant>(Y) || !isa<Constant>(Op1))) { 2129 // (X / Y) / Z => X / (Y * Z) 2130 Value *YZ = Builder.CreateFMulFMF(Y, Op1, &I); 2131 return BinaryOperator::CreateFDivFMF(X, YZ, &I); 2132 } 2133 if (match(Op1, m_OneUse(m_FDiv(m_Value(X), m_Value(Y)))) && 2134 (!isa<Constant>(Y) || !isa<Constant>(Op0))) { 2135 // Z / (X / Y) => (Y * Z) / X 2136 Value *YZ = Builder.CreateFMulFMF(Y, Op0, &I); 2137 return BinaryOperator::CreateFDivFMF(YZ, X, &I); 2138 } 2139 // Z / (1.0 / Y) => (Y * Z) 2140 // 2141 // This is a special case of Z / (X / Y) => (Y * Z) / X, with X = 1.0. The 2142 // m_OneUse check is avoided because even in the case of the multiple uses 2143 // for 1.0/Y, the number of instructions remain the same and a division is 2144 // replaced by a multiplication. 2145 if (match(Op1, m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) 2146 return BinaryOperator::CreateFMulFMF(Y, Op0, &I); 2147 } 2148 2149 if (I.hasAllowReassoc() && Op0->hasOneUse() && Op1->hasOneUse()) { 2150 // sin(X) / cos(X) -> tan(X) 2151 // cos(X) / sin(X) -> 1/tan(X) (cotangent) 2152 Value *X; 2153 bool IsTan = match(Op0, m_Intrinsic<Intrinsic::sin>(m_Value(X))) && 2154 match(Op1, m_Intrinsic<Intrinsic::cos>(m_Specific(X))); 2155 bool IsCot = 2156 !IsTan && match(Op0, m_Intrinsic<Intrinsic::cos>(m_Value(X))) && 2157 match(Op1, m_Intrinsic<Intrinsic::sin>(m_Specific(X))); 2158 2159 if ((IsTan || IsCot) && hasFloatFn(M, &TLI, I.getType(), LibFunc_tan, 2160 LibFunc_tanf, LibFunc_tanl)) { 2161 IRBuilder<> B(&I); 2162 IRBuilder<>::FastMathFlagGuard FMFGuard(B); 2163 B.setFastMathFlags(I.getFastMathFlags()); 2164 AttributeList Attrs = 2165 cast<CallBase>(Op0)->getCalledFunction()->getAttributes(); 2166 Value *Res = emitUnaryFloatFnCall(X, &TLI, LibFunc_tan, LibFunc_tanf, 2167 LibFunc_tanl, B, Attrs); 2168 if (IsCot) 2169 Res = B.CreateFDiv(ConstantFP::get(I.getType(), 1.0), Res); 2170 return replaceInstUsesWith(I, Res); 2171 } 2172 } 2173 2174 // X / (X * Y) --> 1.0 / Y 2175 // Reassociate to (X / X -> 1.0) is legal when NaNs are not allowed. 2176 // We can ignore the possibility that X is infinity because INF/INF is NaN. 2177 Value *X, *Y; 2178 if (I.hasNoNaNs() && I.hasAllowReassoc() && 2179 match(Op1, m_c_FMul(m_Specific(Op0), m_Value(Y)))) { 2180 replaceOperand(I, 0, ConstantFP::get(I.getType(), 1.0)); 2181 replaceOperand(I, 1, Y); 2182 return &I; 2183 } 2184 2185 // X / fabs(X) -> copysign(1.0, X) 2186 // fabs(X) / X -> copysign(1.0, X) 2187 if (I.hasNoNaNs() && I.hasNoInfs() && 2188 (match(&I, m_FDiv(m_Value(X), m_FAbs(m_Deferred(X)))) || 2189 match(&I, m_FDiv(m_FAbs(m_Value(X)), m_Deferred(X))))) { 2190 Value *V = Builder.CreateBinaryIntrinsic( 2191 Intrinsic::copysign, ConstantFP::get(I.getType(), 1.0), X, &I); 2192 return replaceInstUsesWith(I, V); 2193 } 2194 2195 if (Instruction *Mul = foldFDivPowDivisor(I, Builder)) 2196 return Mul; 2197 2198 if (Instruction *Mul = foldFDivSqrtDivisor(I, Builder)) 2199 return Mul; 2200 2201 // pow(X, Y) / X --> pow(X, Y-1) 2202 if (I.hasAllowReassoc() && 2203 match(Op0, m_OneUse(m_Intrinsic<Intrinsic::pow>(m_Specific(Op1), 2204 m_Value(Y))))) { 2205 Value *Y1 = 2206 Builder.CreateFAddFMF(Y, ConstantFP::get(I.getType(), -1.0), &I); 2207 Value *Pow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, Op1, Y1, &I); 2208 return replaceInstUsesWith(I, Pow); 2209 } 2210 2211 if (Instruction *FoldedPowi = foldPowiReassoc(I)) 2212 return FoldedPowi; 2213 2214 return nullptr; 2215 } 2216 2217 // Variety of transform for: 2218 // (urem/srem (mul X, Y), (mul X, Z)) 2219 // (urem/srem (shl X, Y), (shl X, Z)) 2220 // (urem/srem (shl Y, X), (shl Z, X)) 2221 // NB: The shift cases are really just extensions of the mul case. We treat 2222 // shift as Val * (1 << Amt). 2223 static Instruction *simplifyIRemMulShl(BinaryOperator &I, 2224 InstCombinerImpl &IC) { 2225 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *X = nullptr; 2226 APInt Y, Z; 2227 bool ShiftByX = false; 2228 2229 // If V is not nullptr, it will be matched using m_Specific. 2230 auto MatchShiftOrMulXC = [](Value *Op, Value *&V, APInt &C, 2231 bool &PreserveNSW) -> bool { 2232 const APInt *Tmp = nullptr; 2233 if ((!V && match(Op, m_Mul(m_Value(V), m_APInt(Tmp)))) || 2234 (V && match(Op, m_Mul(m_Specific(V), m_APInt(Tmp))))) 2235 C = *Tmp; 2236 else if ((!V && match(Op, m_Shl(m_Value(V), m_APInt(Tmp)))) || 2237 (V && match(Op, m_Shl(m_Specific(V), m_APInt(Tmp))))) { 2238 C = APInt(Tmp->getBitWidth(), 1) << *Tmp; 2239 // We cannot preserve NSW when shifting by BW - 1. 2240 PreserveNSW = Tmp->ult(Tmp->getBitWidth() - 1); 2241 } 2242 if (Tmp != nullptr) 2243 return true; 2244 2245 // Reset `V` so we don't start with specific value on next match attempt. 2246 V = nullptr; 2247 return false; 2248 }; 2249 2250 auto MatchShiftCX = [](Value *Op, APInt &C, Value *&V) -> bool { 2251 const APInt *Tmp = nullptr; 2252 if ((!V && match(Op, m_Shl(m_APInt(Tmp), m_Value(V)))) || 2253 (V && match(Op, m_Shl(m_APInt(Tmp), m_Specific(V))))) { 2254 C = *Tmp; 2255 return true; 2256 } 2257 2258 // Reset `V` so we don't start with specific value on next match attempt. 2259 V = nullptr; 2260 return false; 2261 }; 2262 2263 bool Op0PreserveNSW = true, Op1PreserveNSW = true; 2264 if (MatchShiftOrMulXC(Op0, X, Y, Op0PreserveNSW) && 2265 MatchShiftOrMulXC(Op1, X, Z, Op1PreserveNSW)) { 2266 // pass 2267 } else if (MatchShiftCX(Op0, Y, X) && MatchShiftCX(Op1, Z, X)) { 2268 ShiftByX = true; 2269 } else { 2270 return nullptr; 2271 } 2272 2273 bool IsSRem = I.getOpcode() == Instruction::SRem; 2274 2275 OverflowingBinaryOperator *BO0 = cast<OverflowingBinaryOperator>(Op0); 2276 // TODO: We may be able to deduce more about nsw/nuw of BO0/BO1 based on Y >= 2277 // Z or Z >= Y. 2278 bool BO0HasNSW = Op0PreserveNSW && BO0->hasNoSignedWrap(); 2279 bool BO0HasNUW = BO0->hasNoUnsignedWrap(); 2280 bool BO0NoWrap = IsSRem ? BO0HasNSW : BO0HasNUW; 2281 2282 APInt RemYZ = IsSRem ? Y.srem(Z) : Y.urem(Z); 2283 // (rem (mul nuw/nsw X, Y), (mul X, Z)) 2284 // if (rem Y, Z) == 0 2285 // -> 0 2286 if (RemYZ.isZero() && BO0NoWrap) 2287 return IC.replaceInstUsesWith(I, ConstantInt::getNullValue(I.getType())); 2288 2289 // Helper function to emit either (RemSimplificationC << X) or 2290 // (RemSimplificationC * X) depending on whether we matched Op0/Op1 as 2291 // (shl V, X) or (mul V, X) respectively. 2292 auto CreateMulOrShift = 2293 [&](const APInt &RemSimplificationC) -> BinaryOperator * { 2294 Value *RemSimplification = 2295 ConstantInt::get(I.getType(), RemSimplificationC); 2296 return ShiftByX ? BinaryOperator::CreateShl(RemSimplification, X) 2297 : BinaryOperator::CreateMul(X, RemSimplification); 2298 }; 2299 2300 OverflowingBinaryOperator *BO1 = cast<OverflowingBinaryOperator>(Op1); 2301 bool BO1HasNSW = Op1PreserveNSW && BO1->hasNoSignedWrap(); 2302 bool BO1HasNUW = BO1->hasNoUnsignedWrap(); 2303 bool BO1NoWrap = IsSRem ? BO1HasNSW : BO1HasNUW; 2304 // (rem (mul X, Y), (mul nuw/nsw X, Z)) 2305 // if (rem Y, Z) == Y 2306 // -> (mul nuw/nsw X, Y) 2307 if (RemYZ == Y && BO1NoWrap) { 2308 BinaryOperator *BO = CreateMulOrShift(Y); 2309 // Copy any overflow flags from Op0. 2310 BO->setHasNoSignedWrap(IsSRem || BO0HasNSW); 2311 BO->setHasNoUnsignedWrap(!IsSRem || BO0HasNUW); 2312 return BO; 2313 } 2314 2315 // (rem (mul nuw/nsw X, Y), (mul {nsw} X, Z)) 2316 // if Y >= Z 2317 // -> (mul {nuw} nsw X, (rem Y, Z)) 2318 if (Y.uge(Z) && (IsSRem ? (BO0HasNSW && BO1HasNSW) : BO0HasNUW)) { 2319 BinaryOperator *BO = CreateMulOrShift(RemYZ); 2320 BO->setHasNoSignedWrap(); 2321 BO->setHasNoUnsignedWrap(BO0HasNUW); 2322 return BO; 2323 } 2324 2325 return nullptr; 2326 } 2327 2328 /// This function implements the transforms common to both integer remainder 2329 /// instructions (urem and srem). It is called by the visitors to those integer 2330 /// remainder instructions. 2331 /// Common integer remainder transforms 2332 Instruction *InstCombinerImpl::commonIRemTransforms(BinaryOperator &I) { 2333 if (Instruction *Res = commonIDivRemTransforms(I)) 2334 return Res; 2335 2336 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 2337 2338 if (isa<Constant>(Op1)) { 2339 if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) { 2340 if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) { 2341 if (Instruction *R = FoldOpIntoSelect(I, SI)) 2342 return R; 2343 } else if (auto *PN = dyn_cast<PHINode>(Op0I)) { 2344 const APInt *Op1Int; 2345 if (match(Op1, m_APInt(Op1Int)) && !Op1Int->isMinValue() && 2346 (I.getOpcode() == Instruction::URem || 2347 !Op1Int->isMinSignedValue())) { 2348 // foldOpIntoPhi will speculate instructions to the end of the PHI's 2349 // predecessor blocks, so do this only if we know the srem or urem 2350 // will not fault. 2351 if (Instruction *NV = foldOpIntoPhi(I, PN)) 2352 return NV; 2353 } 2354 } 2355 2356 // See if we can fold away this rem instruction. 2357 if (SimplifyDemandedInstructionBits(I)) 2358 return &I; 2359 } 2360 } 2361 2362 if (Instruction *R = simplifyIRemMulShl(I, *this)) 2363 return R; 2364 2365 return nullptr; 2366 } 2367 2368 Instruction *InstCombinerImpl::visitURem(BinaryOperator &I) { 2369 if (Value *V = simplifyURemInst(I.getOperand(0), I.getOperand(1), 2370 SQ.getWithInstruction(&I))) 2371 return replaceInstUsesWith(I, V); 2372 2373 if (Instruction *X = foldVectorBinop(I)) 2374 return X; 2375 2376 if (Instruction *common = commonIRemTransforms(I)) 2377 return common; 2378 2379 if (Instruction *NarrowRem = narrowUDivURem(I, *this)) 2380 return NarrowRem; 2381 2382 // X urem Y -> X and Y-1, where Y is a power of 2, 2383 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 2384 Type *Ty = I.getType(); 2385 if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) { 2386 // This may increase instruction count, we don't enforce that Y is a 2387 // constant. 2388 Constant *N1 = Constant::getAllOnesValue(Ty); 2389 Value *Add = Builder.CreateAdd(Op1, N1); 2390 return BinaryOperator::CreateAnd(Op0, Add); 2391 } 2392 2393 // 1 urem X -> zext(X != 1) 2394 if (match(Op0, m_One())) { 2395 Value *Cmp = Builder.CreateICmpNE(Op1, ConstantInt::get(Ty, 1)); 2396 return CastInst::CreateZExtOrBitCast(Cmp, Ty); 2397 } 2398 2399 // Op0 urem C -> Op0 < C ? Op0 : Op0 - C, where C >= signbit. 2400 // Op0 must be frozen because we are increasing its number of uses. 2401 if (match(Op1, m_Negative())) { 2402 Value *F0 = Op0; 2403 if (!isGuaranteedNotToBeUndef(Op0)) 2404 F0 = Builder.CreateFreeze(Op0, Op0->getName() + ".fr"); 2405 Value *Cmp = Builder.CreateICmpULT(F0, Op1); 2406 Value *Sub = Builder.CreateSub(F0, Op1); 2407 return SelectInst::Create(Cmp, F0, Sub); 2408 } 2409 2410 // If the divisor is a sext of a boolean, then the divisor must be max 2411 // unsigned value (-1). Therefore, the remainder is Op0 unless Op0 is also 2412 // max unsigned value. In that case, the remainder is 0: 2413 // urem Op0, (sext i1 X) --> (Op0 == -1) ? 0 : Op0 2414 Value *X; 2415 if (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) { 2416 Value *FrozenOp0 = Op0; 2417 if (!isGuaranteedNotToBeUndef(Op0)) 2418 FrozenOp0 = Builder.CreateFreeze(Op0, Op0->getName() + ".frozen"); 2419 Value *Cmp = 2420 Builder.CreateICmpEQ(FrozenOp0, ConstantInt::getAllOnesValue(Ty)); 2421 return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), FrozenOp0); 2422 } 2423 2424 // For "(X + 1) % Op1" and if (X u< Op1) => (X + 1) == Op1 ? 0 : X + 1 . 2425 if (match(Op0, m_Add(m_Value(X), m_One()))) { 2426 Value *Val = 2427 simplifyICmpInst(ICmpInst::ICMP_ULT, X, Op1, SQ.getWithInstruction(&I)); 2428 if (Val && match(Val, m_One())) { 2429 Value *FrozenOp0 = Op0; 2430 if (!isGuaranteedNotToBeUndef(Op0)) 2431 FrozenOp0 = Builder.CreateFreeze(Op0, Op0->getName() + ".frozen"); 2432 Value *Cmp = Builder.CreateICmpEQ(FrozenOp0, Op1); 2433 return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), FrozenOp0); 2434 } 2435 } 2436 2437 return nullptr; 2438 } 2439 2440 Instruction *InstCombinerImpl::visitSRem(BinaryOperator &I) { 2441 if (Value *V = simplifySRemInst(I.getOperand(0), I.getOperand(1), 2442 SQ.getWithInstruction(&I))) 2443 return replaceInstUsesWith(I, V); 2444 2445 if (Instruction *X = foldVectorBinop(I)) 2446 return X; 2447 2448 // Handle the integer rem common cases 2449 if (Instruction *Common = commonIRemTransforms(I)) 2450 return Common; 2451 2452 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 2453 { 2454 const APInt *Y; 2455 // X % -Y -> X % Y 2456 if (match(Op1, m_Negative(Y)) && !Y->isMinSignedValue()) 2457 return replaceOperand(I, 1, ConstantInt::get(I.getType(), -*Y)); 2458 } 2459 2460 // -X srem Y --> -(X srem Y) 2461 Value *X, *Y; 2462 if (match(&I, m_SRem(m_OneUse(m_NSWNeg(m_Value(X))), m_Value(Y)))) 2463 return BinaryOperator::CreateNSWNeg(Builder.CreateSRem(X, Y)); 2464 2465 // If the sign bits of both operands are zero (i.e. we can prove they are 2466 // unsigned inputs), turn this into a urem. 2467 APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits())); 2468 if (MaskedValueIsZero(Op1, Mask, 0, &I) && 2469 MaskedValueIsZero(Op0, Mask, 0, &I)) { 2470 // X srem Y -> X urem Y, iff X and Y don't have sign bit set 2471 return BinaryOperator::CreateURem(Op0, Op1, I.getName()); 2472 } 2473 2474 // If it's a constant vector, flip any negative values positive. 2475 if (isa<ConstantVector>(Op1) || isa<ConstantDataVector>(Op1)) { 2476 Constant *C = cast<Constant>(Op1); 2477 unsigned VWidth = cast<FixedVectorType>(C->getType())->getNumElements(); 2478 2479 bool hasNegative = false; 2480 bool hasMissing = false; 2481 for (unsigned i = 0; i != VWidth; ++i) { 2482 Constant *Elt = C->getAggregateElement(i); 2483 if (!Elt) { 2484 hasMissing = true; 2485 break; 2486 } 2487 2488 if (ConstantInt *RHS = dyn_cast<ConstantInt>(Elt)) 2489 if (RHS->isNegative()) 2490 hasNegative = true; 2491 } 2492 2493 if (hasNegative && !hasMissing) { 2494 SmallVector<Constant *, 16> Elts(VWidth); 2495 for (unsigned i = 0; i != VWidth; ++i) { 2496 Elts[i] = C->getAggregateElement(i); // Handle undef, etc. 2497 if (ConstantInt *RHS = dyn_cast<ConstantInt>(Elts[i])) { 2498 if (RHS->isNegative()) 2499 Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS)); 2500 } 2501 } 2502 2503 Constant *NewRHSV = ConstantVector::get(Elts); 2504 if (NewRHSV != C) // Don't loop on -MININT 2505 return replaceOperand(I, 1, NewRHSV); 2506 } 2507 } 2508 2509 return nullptr; 2510 } 2511 2512 Instruction *InstCombinerImpl::visitFRem(BinaryOperator &I) { 2513 if (Value *V = simplifyFRemInst(I.getOperand(0), I.getOperand(1), 2514 I.getFastMathFlags(), 2515 SQ.getWithInstruction(&I))) 2516 return replaceInstUsesWith(I, V); 2517 2518 if (Instruction *X = foldVectorBinop(I)) 2519 return X; 2520 2521 if (Instruction *Phi = foldBinopWithPhiOperands(I)) 2522 return Phi; 2523 2524 return nullptr; 2525 } 2526