1 //===- InstCombineAndOrXor.cpp --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the visitAnd, visitOr, and visitXor functions. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "InstCombineInternal.h" 14 #include "llvm/Analysis/CmpInstAnalysis.h" 15 #include "llvm/Analysis/InstructionSimplify.h" 16 #include "llvm/IR/ConstantRange.h" 17 #include "llvm/IR/Intrinsics.h" 18 #include "llvm/IR/PatternMatch.h" 19 #include "llvm/Transforms/InstCombine/InstCombiner.h" 20 #include "llvm/Transforms/Utils/Local.h" 21 22 using namespace llvm; 23 using namespace PatternMatch; 24 25 #define DEBUG_TYPE "instcombine" 26 27 /// This is the complement of getICmpCode, which turns an opcode and two 28 /// operands into either a constant true or false, or a brand new ICmp 29 /// instruction. The sign is passed in to determine which kind of predicate to 30 /// use in the new icmp instruction. 31 static Value *getNewICmpValue(unsigned Code, bool Sign, Value *LHS, Value *RHS, 32 InstCombiner::BuilderTy &Builder) { 33 ICmpInst::Predicate NewPred; 34 if (Constant *TorF = getPredForICmpCode(Code, Sign, LHS->getType(), NewPred)) 35 return TorF; 36 return Builder.CreateICmp(NewPred, LHS, RHS); 37 } 38 39 /// This is the complement of getFCmpCode, which turns an opcode and two 40 /// operands into either a FCmp instruction, or a true/false constant. 41 static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS, 42 InstCombiner::BuilderTy &Builder, FMFSource FMF) { 43 FCmpInst::Predicate NewPred; 44 if (Constant *TorF = getPredForFCmpCode(Code, LHS->getType(), NewPred)) 45 return TorF; 46 return Builder.CreateFCmpFMF(NewPred, LHS, RHS, FMF); 47 } 48 49 /// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise 50 /// (V < Lo || V >= Hi). This method expects that Lo < Hi. IsSigned indicates 51 /// whether to treat V, Lo, and Hi as signed or not. 52 Value *InstCombinerImpl::insertRangeTest(Value *V, const APInt &Lo, 53 const APInt &Hi, bool isSigned, 54 bool Inside) { 55 assert((isSigned ? Lo.slt(Hi) : Lo.ult(Hi)) && 56 "Lo is not < Hi in range emission code!"); 57 58 Type *Ty = V->getType(); 59 60 // V >= Min && V < Hi --> V < Hi 61 // V < Min || V >= Hi --> V >= Hi 62 ICmpInst::Predicate Pred = Inside ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE; 63 if (isSigned ? Lo.isMinSignedValue() : Lo.isMinValue()) { 64 Pred = isSigned ? ICmpInst::getSignedPredicate(Pred) : Pred; 65 return Builder.CreateICmp(Pred, V, ConstantInt::get(Ty, Hi)); 66 } 67 68 // V >= Lo && V < Hi --> V - Lo u< Hi - Lo 69 // V < Lo || V >= Hi --> V - Lo u>= Hi - Lo 70 Value *VMinusLo = 71 Builder.CreateSub(V, ConstantInt::get(Ty, Lo), V->getName() + ".off"); 72 Constant *HiMinusLo = ConstantInt::get(Ty, Hi - Lo); 73 return Builder.CreateICmp(Pred, VMinusLo, HiMinusLo); 74 } 75 76 /// Classify (icmp eq (A & B), C) and (icmp ne (A & B), C) as matching patterns 77 /// that can be simplified. 78 /// One of A and B is considered the mask. The other is the value. This is 79 /// described as the "AMask" or "BMask" part of the enum. If the enum contains 80 /// only "Mask", then both A and B can be considered masks. If A is the mask, 81 /// then it was proven that (A & C) == C. This is trivial if C == A or C == 0. 82 /// If both A and C are constants, this proof is also easy. 83 /// For the following explanations, we assume that A is the mask. 84 /// 85 /// "AllOnes" declares that the comparison is true only if (A & B) == A or all 86 /// bits of A are set in B. 87 /// Example: (icmp eq (A & 3), 3) -> AMask_AllOnes 88 /// 89 /// "AllZeros" declares that the comparison is true only if (A & B) == 0 or all 90 /// bits of A are cleared in B. 91 /// Example: (icmp eq (A & 3), 0) -> Mask_AllZeroes 92 /// 93 /// "Mixed" declares that (A & B) == C and C might or might not contain any 94 /// number of one bits and zero bits. 95 /// Example: (icmp eq (A & 3), 1) -> AMask_Mixed 96 /// 97 /// "Not" means that in above descriptions "==" should be replaced by "!=". 98 /// Example: (icmp ne (A & 3), 3) -> AMask_NotAllOnes 99 /// 100 /// If the mask A contains a single bit, then the following is equivalent: 101 /// (icmp eq (A & B), A) equals (icmp ne (A & B), 0) 102 /// (icmp ne (A & B), A) equals (icmp eq (A & B), 0) 103 enum MaskedICmpType { 104 AMask_AllOnes = 1, 105 AMask_NotAllOnes = 2, 106 BMask_AllOnes = 4, 107 BMask_NotAllOnes = 8, 108 Mask_AllZeros = 16, 109 Mask_NotAllZeros = 32, 110 AMask_Mixed = 64, 111 AMask_NotMixed = 128, 112 BMask_Mixed = 256, 113 BMask_NotMixed = 512 114 }; 115 116 /// Return the set of patterns (from MaskedICmpType) that (icmp SCC (A & B), C) 117 /// satisfies. 118 static unsigned getMaskedICmpType(Value *A, Value *B, Value *C, 119 ICmpInst::Predicate Pred) { 120 const APInt *ConstA = nullptr, *ConstB = nullptr, *ConstC = nullptr; 121 match(A, m_APInt(ConstA)); 122 match(B, m_APInt(ConstB)); 123 match(C, m_APInt(ConstC)); 124 bool IsEq = (Pred == ICmpInst::ICMP_EQ); 125 bool IsAPow2 = ConstA && ConstA->isPowerOf2(); 126 bool IsBPow2 = ConstB && ConstB->isPowerOf2(); 127 unsigned MaskVal = 0; 128 if (ConstC && ConstC->isZero()) { 129 // if C is zero, then both A and B qualify as mask 130 MaskVal |= (IsEq ? (Mask_AllZeros | AMask_Mixed | BMask_Mixed) 131 : (Mask_NotAllZeros | AMask_NotMixed | BMask_NotMixed)); 132 if (IsAPow2) 133 MaskVal |= (IsEq ? (AMask_NotAllOnes | AMask_NotMixed) 134 : (AMask_AllOnes | AMask_Mixed)); 135 if (IsBPow2) 136 MaskVal |= (IsEq ? (BMask_NotAllOnes | BMask_NotMixed) 137 : (BMask_AllOnes | BMask_Mixed)); 138 return MaskVal; 139 } 140 141 if (A == C) { 142 MaskVal |= (IsEq ? (AMask_AllOnes | AMask_Mixed) 143 : (AMask_NotAllOnes | AMask_NotMixed)); 144 if (IsAPow2) 145 MaskVal |= (IsEq ? (Mask_NotAllZeros | AMask_NotMixed) 146 : (Mask_AllZeros | AMask_Mixed)); 147 } else if (ConstA && ConstC && ConstC->isSubsetOf(*ConstA)) { 148 MaskVal |= (IsEq ? AMask_Mixed : AMask_NotMixed); 149 } 150 151 if (B == C) { 152 MaskVal |= (IsEq ? (BMask_AllOnes | BMask_Mixed) 153 : (BMask_NotAllOnes | BMask_NotMixed)); 154 if (IsBPow2) 155 MaskVal |= (IsEq ? (Mask_NotAllZeros | BMask_NotMixed) 156 : (Mask_AllZeros | BMask_Mixed)); 157 } else if (ConstB && ConstC && ConstC->isSubsetOf(*ConstB)) { 158 MaskVal |= (IsEq ? BMask_Mixed : BMask_NotMixed); 159 } 160 161 return MaskVal; 162 } 163 164 /// Convert an analysis of a masked ICmp into its equivalent if all boolean 165 /// operations had the opposite sense. Since each "NotXXX" flag (recording !=) 166 /// is adjacent to the corresponding normal flag (recording ==), this just 167 /// involves swapping those bits over. 168 static unsigned conjugateICmpMask(unsigned Mask) { 169 unsigned NewMask; 170 NewMask = (Mask & (AMask_AllOnes | BMask_AllOnes | Mask_AllZeros | 171 AMask_Mixed | BMask_Mixed)) 172 << 1; 173 174 NewMask |= (Mask & (AMask_NotAllOnes | BMask_NotAllOnes | Mask_NotAllZeros | 175 AMask_NotMixed | BMask_NotMixed)) 176 >> 1; 177 178 return NewMask; 179 } 180 181 // Adapts the external decomposeBitTestICmp for local use. 182 static bool decomposeBitTestICmp(Value *Cond, CmpInst::Predicate &Pred, 183 Value *&X, Value *&Y, Value *&Z) { 184 auto Res = llvm::decomposeBitTest(Cond, /*LookThroughTrunc=*/true, 185 /*AllowNonZeroC=*/true); 186 if (!Res) 187 return false; 188 189 Pred = Res->Pred; 190 X = Res->X; 191 Y = ConstantInt::get(X->getType(), Res->Mask); 192 Z = ConstantInt::get(X->getType(), Res->C); 193 return true; 194 } 195 196 /// Handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E). 197 /// Return the pattern classes (from MaskedICmpType) for the left hand side and 198 /// the right hand side as a pair. 199 /// LHS and RHS are the left hand side and the right hand side ICmps and PredL 200 /// and PredR are their predicates, respectively. 201 static std::optional<std::pair<unsigned, unsigned>> 202 getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C, Value *&D, Value *&E, 203 Value *LHS, Value *RHS, ICmpInst::Predicate &PredL, 204 ICmpInst::Predicate &PredR) { 205 206 // Here comes the tricky part: 207 // LHS might be of the form L11 & L12 == X, X == L21 & L22, 208 // and L11 & L12 == L21 & L22. The same goes for RHS. 209 // Now we must find those components L** and R**, that are equal, so 210 // that we can extract the parameters A, B, C, D, and E for the canonical 211 // above. 212 213 // Check whether the icmp can be decomposed into a bit test. 214 Value *L1, *L11, *L12, *L2, *L21, *L22; 215 if (decomposeBitTestICmp(LHS, PredL, L11, L12, L2)) { 216 L21 = L22 = L1 = nullptr; 217 } else { 218 auto *LHSCMP = dyn_cast<ICmpInst>(LHS); 219 if (!LHSCMP) 220 return std::nullopt; 221 222 // Don't allow pointers. Splat vectors are fine. 223 if (!LHSCMP->getOperand(0)->getType()->isIntOrIntVectorTy()) 224 return std::nullopt; 225 226 PredL = LHSCMP->getPredicate(); 227 L1 = LHSCMP->getOperand(0); 228 L2 = LHSCMP->getOperand(1); 229 // Look for ANDs in the LHS icmp. 230 if (!match(L1, m_And(m_Value(L11), m_Value(L12)))) { 231 // Any icmp can be viewed as being trivially masked; if it allows us to 232 // remove one, it's worth it. 233 L11 = L1; 234 L12 = Constant::getAllOnesValue(L1->getType()); 235 } 236 237 if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) { 238 L21 = L2; 239 L22 = Constant::getAllOnesValue(L2->getType()); 240 } 241 } 242 243 // Bail if LHS was a icmp that can't be decomposed into an equality. 244 if (!ICmpInst::isEquality(PredL)) 245 return std::nullopt; 246 247 Value *R11, *R12, *R2; 248 if (decomposeBitTestICmp(RHS, PredR, R11, R12, R2)) { 249 if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { 250 A = R11; 251 D = R12; 252 } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { 253 A = R12; 254 D = R11; 255 } else { 256 return std::nullopt; 257 } 258 E = R2; 259 } else { 260 auto *RHSCMP = dyn_cast<ICmpInst>(RHS); 261 if (!RHSCMP) 262 return std::nullopt; 263 // Don't allow pointers. Splat vectors are fine. 264 if (!RHSCMP->getOperand(0)->getType()->isIntOrIntVectorTy()) 265 return std::nullopt; 266 267 PredR = RHSCMP->getPredicate(); 268 269 Value *R1 = RHSCMP->getOperand(0); 270 R2 = RHSCMP->getOperand(1); 271 bool Ok = false; 272 if (!match(R1, m_And(m_Value(R11), m_Value(R12)))) { 273 // As before, model no mask as a trivial mask if it'll let us do an 274 // optimization. 275 R11 = R1; 276 R12 = Constant::getAllOnesValue(R1->getType()); 277 } 278 279 if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { 280 A = R11; 281 D = R12; 282 E = R2; 283 Ok = true; 284 } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { 285 A = R12; 286 D = R11; 287 E = R2; 288 Ok = true; 289 } 290 291 // Avoid matching against the -1 value we created for unmasked operand. 292 if (Ok && match(A, m_AllOnes())) 293 Ok = false; 294 295 // Look for ANDs on the right side of the RHS icmp. 296 if (!Ok) { 297 if (!match(R2, m_And(m_Value(R11), m_Value(R12)))) { 298 R11 = R2; 299 R12 = Constant::getAllOnesValue(R2->getType()); 300 } 301 302 if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { 303 A = R11; 304 D = R12; 305 E = R1; 306 } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { 307 A = R12; 308 D = R11; 309 E = R1; 310 } else { 311 return std::nullopt; 312 } 313 } 314 } 315 316 // Bail if RHS was a icmp that can't be decomposed into an equality. 317 if (!ICmpInst::isEquality(PredR)) 318 return std::nullopt; 319 320 if (L11 == A) { 321 B = L12; 322 C = L2; 323 } else if (L12 == A) { 324 B = L11; 325 C = L2; 326 } else if (L21 == A) { 327 B = L22; 328 C = L1; 329 } else if (L22 == A) { 330 B = L21; 331 C = L1; 332 } 333 334 unsigned LeftType = getMaskedICmpType(A, B, C, PredL); 335 unsigned RightType = getMaskedICmpType(A, D, E, PredR); 336 return std::optional<std::pair<unsigned, unsigned>>( 337 std::make_pair(LeftType, RightType)); 338 } 339 340 /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) into a single 341 /// (icmp(A & X) ==/!= Y), where the left-hand side is of type Mask_NotAllZeros 342 /// and the right hand side is of type BMask_Mixed. For example, 343 /// (icmp (A & 12) != 0) & (icmp (A & 15) == 8) -> (icmp (A & 15) == 8). 344 /// Also used for logical and/or, must be poison safe. 345 static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( 346 Value *LHS, Value *RHS, bool IsAnd, Value *A, Value *B, Value *D, Value *E, 347 ICmpInst::Predicate PredL, ICmpInst::Predicate PredR, 348 InstCombiner::BuilderTy &Builder) { 349 // We are given the canonical form: 350 // (icmp ne (A & B), 0) & (icmp eq (A & D), E). 351 // where D & E == E. 352 // 353 // If IsAnd is false, we get it in negated form: 354 // (icmp eq (A & B), 0) | (icmp ne (A & D), E) -> 355 // !((icmp ne (A & B), 0) & (icmp eq (A & D), E)). 356 // 357 // We currently handle the case of B, C, D, E are constant. 358 // 359 const APInt *BCst, *DCst, *OrigECst; 360 if (!match(B, m_APInt(BCst)) || !match(D, m_APInt(DCst)) || 361 !match(E, m_APInt(OrigECst))) 362 return nullptr; 363 364 ICmpInst::Predicate NewCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; 365 366 // Update E to the canonical form when D is a power of two and RHS is 367 // canonicalized as, 368 // (icmp ne (A & D), 0) -> (icmp eq (A & D), D) or 369 // (icmp ne (A & D), D) -> (icmp eq (A & D), 0). 370 APInt ECst = *OrigECst; 371 if (PredR != NewCC) 372 ECst ^= *DCst; 373 374 // If B or D is zero, skip because if LHS or RHS can be trivially folded by 375 // other folding rules and this pattern won't apply any more. 376 if (*BCst == 0 || *DCst == 0) 377 return nullptr; 378 379 // If B and D don't intersect, ie. (B & D) == 0, try to fold isNaN idiom: 380 // (icmp ne (A & FractionBits), 0) & (icmp eq (A & ExpBits), ExpBits) 381 // -> isNaN(A) 382 // Otherwise, we cannot deduce anything from it. 383 if (!BCst->intersects(*DCst)) { 384 Value *Src; 385 if (*DCst == ECst && match(A, m_ElementWiseBitCast(m_Value(Src))) && 386 !Builder.GetInsertBlock()->getParent()->hasFnAttribute( 387 Attribute::StrictFP)) { 388 Type *Ty = Src->getType()->getScalarType(); 389 if (!Ty->isIEEELikeFPTy()) 390 return nullptr; 391 392 APInt ExpBits = APFloat::getInf(Ty->getFltSemantics()).bitcastToAPInt(); 393 if (ECst != ExpBits) 394 return nullptr; 395 APInt FractionBits = ~ExpBits; 396 FractionBits.clearSignBit(); 397 if (*BCst != FractionBits) 398 return nullptr; 399 400 return Builder.CreateFCmp(IsAnd ? FCmpInst::FCMP_UNO : FCmpInst::FCMP_ORD, 401 Src, ConstantFP::getZero(Src->getType())); 402 } 403 return nullptr; 404 } 405 406 // If the following two conditions are met: 407 // 408 // 1. mask B covers only a single bit that's not covered by mask D, that is, 409 // (B & (B ^ D)) is a power of 2 (in other words, B minus the intersection of 410 // B and D has only one bit set) and, 411 // 412 // 2. RHS (and E) indicates that the rest of B's bits are zero (in other 413 // words, the intersection of B and D is zero), that is, ((B & D) & E) == 0 414 // 415 // then that single bit in B must be one and thus the whole expression can be 416 // folded to 417 // (A & (B | D)) == (B & (B ^ D)) | E. 418 // 419 // For example, 420 // (icmp ne (A & 12), 0) & (icmp eq (A & 7), 1) -> (icmp eq (A & 15), 9) 421 // (icmp ne (A & 15), 0) & (icmp eq (A & 7), 0) -> (icmp eq (A & 15), 8) 422 if ((((*BCst & *DCst) & ECst) == 0) && 423 (*BCst & (*BCst ^ *DCst)).isPowerOf2()) { 424 APInt BorD = *BCst | *DCst; 425 APInt BandBxorDorE = (*BCst & (*BCst ^ *DCst)) | ECst; 426 Value *NewMask = ConstantInt::get(A->getType(), BorD); 427 Value *NewMaskedValue = ConstantInt::get(A->getType(), BandBxorDorE); 428 Value *NewAnd = Builder.CreateAnd(A, NewMask); 429 return Builder.CreateICmp(NewCC, NewAnd, NewMaskedValue); 430 } 431 432 auto IsSubSetOrEqual = [](const APInt *C1, const APInt *C2) { 433 return (*C1 & *C2) == *C1; 434 }; 435 auto IsSuperSetOrEqual = [](const APInt *C1, const APInt *C2) { 436 return (*C1 & *C2) == *C2; 437 }; 438 439 // In the following, we consider only the cases where B is a superset of D, B 440 // is a subset of D, or B == D because otherwise there's at least one bit 441 // covered by B but not D, in which case we can't deduce much from it, so 442 // no folding (aside from the single must-be-one bit case right above.) 443 // For example, 444 // (icmp ne (A & 14), 0) & (icmp eq (A & 3), 1) -> no folding. 445 if (!IsSubSetOrEqual(BCst, DCst) && !IsSuperSetOrEqual(BCst, DCst)) 446 return nullptr; 447 448 // At this point, either B is a superset of D, B is a subset of D or B == D. 449 450 // If E is zero, if B is a subset of (or equal to) D, LHS and RHS contradict 451 // and the whole expression becomes false (or true if negated), otherwise, no 452 // folding. 453 // For example, 454 // (icmp ne (A & 3), 0) & (icmp eq (A & 7), 0) -> false. 455 // (icmp ne (A & 15), 0) & (icmp eq (A & 3), 0) -> no folding. 456 if (ECst.isZero()) { 457 if (IsSubSetOrEqual(BCst, DCst)) 458 return ConstantInt::get(LHS->getType(), !IsAnd); 459 return nullptr; 460 } 461 462 // At this point, B, D, E aren't zero and (B & D) == B, (B & D) == D or B == 463 // D. If B is a superset of (or equal to) D, since E is not zero, LHS is 464 // subsumed by RHS (RHS implies LHS.) So the whole expression becomes 465 // RHS. For example, 466 // (icmp ne (A & 255), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8). 467 // (icmp ne (A & 15), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8). 468 if (IsSuperSetOrEqual(BCst, DCst)) { 469 // We can't guarantee that samesign hold after this fold. 470 if (auto *ICmp = dyn_cast<ICmpInst>(RHS)) 471 ICmp->setSameSign(false); 472 return RHS; 473 } 474 // Otherwise, B is a subset of D. If B and E have a common bit set, 475 // ie. (B & E) != 0, then LHS is subsumed by RHS. For example. 476 // (icmp ne (A & 12), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8). 477 assert(IsSubSetOrEqual(BCst, DCst) && "Precondition due to above code"); 478 if ((*BCst & ECst) != 0) { 479 // We can't guarantee that samesign hold after this fold. 480 if (auto *ICmp = dyn_cast<ICmpInst>(RHS)) 481 ICmp->setSameSign(false); 482 return RHS; 483 } 484 // Otherwise, LHS and RHS contradict and the whole expression becomes false 485 // (or true if negated.) For example, 486 // (icmp ne (A & 7), 0) & (icmp eq (A & 15), 8) -> false. 487 // (icmp ne (A & 6), 0) & (icmp eq (A & 15), 8) -> false. 488 return ConstantInt::get(LHS->getType(), !IsAnd); 489 } 490 491 /// Try to fold (icmp(A & B) ==/!= 0) &/| (icmp(A & D) ==/!= E) into a single 492 /// (icmp(A & X) ==/!= Y), where the left-hand side and the right hand side 493 /// aren't of the common mask pattern type. 494 /// Also used for logical and/or, must be poison safe. 495 static Value *foldLogOpOfMaskedICmpsAsymmetric( 496 Value *LHS, Value *RHS, bool IsAnd, Value *A, Value *B, Value *C, Value *D, 497 Value *E, ICmpInst::Predicate PredL, ICmpInst::Predicate PredR, 498 unsigned LHSMask, unsigned RHSMask, InstCombiner::BuilderTy &Builder) { 499 assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) && 500 "Expected equality predicates for masked type of icmps."); 501 // Handle Mask_NotAllZeros-BMask_Mixed cases. 502 // (icmp ne/eq (A & B), C) &/| (icmp eq/ne (A & D), E), or 503 // (icmp eq/ne (A & B), C) &/| (icmp ne/eq (A & D), E) 504 // which gets swapped to 505 // (icmp ne/eq (A & D), E) &/| (icmp eq/ne (A & B), C). 506 if (!IsAnd) { 507 LHSMask = conjugateICmpMask(LHSMask); 508 RHSMask = conjugateICmpMask(RHSMask); 509 } 510 if ((LHSMask & Mask_NotAllZeros) && (RHSMask & BMask_Mixed)) { 511 if (Value *V = foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( 512 LHS, RHS, IsAnd, A, B, D, E, PredL, PredR, Builder)) { 513 return V; 514 } 515 } else if ((LHSMask & BMask_Mixed) && (RHSMask & Mask_NotAllZeros)) { 516 if (Value *V = foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed( 517 RHS, LHS, IsAnd, A, D, B, C, PredR, PredL, Builder)) { 518 return V; 519 } 520 } 521 return nullptr; 522 } 523 524 /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) 525 /// into a single (icmp(A & X) ==/!= Y). 526 static Value *foldLogOpOfMaskedICmps(Value *LHS, Value *RHS, bool IsAnd, 527 bool IsLogical, 528 InstCombiner::BuilderTy &Builder, 529 const SimplifyQuery &Q) { 530 Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr; 531 ICmpInst::Predicate PredL, PredR; 532 std::optional<std::pair<unsigned, unsigned>> MaskPair = 533 getMaskedTypeForICmpPair(A, B, C, D, E, LHS, RHS, PredL, PredR); 534 if (!MaskPair) 535 return nullptr; 536 assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) && 537 "Expected equality predicates for masked type of icmps."); 538 unsigned LHSMask = MaskPair->first; 539 unsigned RHSMask = MaskPair->second; 540 unsigned Mask = LHSMask & RHSMask; 541 if (Mask == 0) { 542 // Even if the two sides don't share a common pattern, check if folding can 543 // still happen. 544 if (Value *V = foldLogOpOfMaskedICmpsAsymmetric( 545 LHS, RHS, IsAnd, A, B, C, D, E, PredL, PredR, LHSMask, RHSMask, 546 Builder)) 547 return V; 548 return nullptr; 549 } 550 551 // In full generality: 552 // (icmp (A & B) Op C) | (icmp (A & D) Op E) 553 // == ![ (icmp (A & B) !Op C) & (icmp (A & D) !Op E) ] 554 // 555 // If the latter can be converted into (icmp (A & X) Op Y) then the former is 556 // equivalent to (icmp (A & X) !Op Y). 557 // 558 // Therefore, we can pretend for the rest of this function that we're dealing 559 // with the conjunction, provided we flip the sense of any comparisons (both 560 // input and output). 561 562 // In most cases we're going to produce an EQ for the "&&" case. 563 ICmpInst::Predicate NewCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; 564 if (!IsAnd) { 565 // Convert the masking analysis into its equivalent with negated 566 // comparisons. 567 Mask = conjugateICmpMask(Mask); 568 } 569 570 if (Mask & Mask_AllZeros) { 571 // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) 572 // -> (icmp eq (A & (B|D)), 0) 573 if (IsLogical && !isGuaranteedNotToBeUndefOrPoison(D)) 574 return nullptr; // TODO: Use freeze? 575 Value *NewOr = Builder.CreateOr(B, D); 576 Value *NewAnd = Builder.CreateAnd(A, NewOr); 577 // We can't use C as zero because we might actually handle 578 // (icmp ne (A & B), B) & (icmp ne (A & D), D) 579 // with B and D, having a single bit set. 580 Value *Zero = Constant::getNullValue(A->getType()); 581 return Builder.CreateICmp(NewCC, NewAnd, Zero); 582 } 583 if (Mask & BMask_AllOnes) { 584 // (icmp eq (A & B), B) & (icmp eq (A & D), D) 585 // -> (icmp eq (A & (B|D)), (B|D)) 586 if (IsLogical && !isGuaranteedNotToBeUndefOrPoison(D)) 587 return nullptr; // TODO: Use freeze? 588 Value *NewOr = Builder.CreateOr(B, D); 589 Value *NewAnd = Builder.CreateAnd(A, NewOr); 590 return Builder.CreateICmp(NewCC, NewAnd, NewOr); 591 } 592 if (Mask & AMask_AllOnes) { 593 // (icmp eq (A & B), A) & (icmp eq (A & D), A) 594 // -> (icmp eq (A & (B&D)), A) 595 if (IsLogical && !isGuaranteedNotToBeUndefOrPoison(D)) 596 return nullptr; // TODO: Use freeze? 597 Value *NewAnd1 = Builder.CreateAnd(B, D); 598 Value *NewAnd2 = Builder.CreateAnd(A, NewAnd1); 599 return Builder.CreateICmp(NewCC, NewAnd2, A); 600 } 601 602 const APInt *ConstB, *ConstD; 603 if (match(B, m_APInt(ConstB)) && match(D, m_APInt(ConstD))) { 604 if (Mask & (Mask_NotAllZeros | BMask_NotAllOnes)) { 605 // (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and 606 // (icmp ne (A & B), B) & (icmp ne (A & D), D) 607 // -> (icmp ne (A & B), 0) or (icmp ne (A & D), 0) 608 // Only valid if one of the masks is a superset of the other (check "B&D" 609 // is the same as either B or D). 610 APInt NewMask = *ConstB & *ConstD; 611 if (NewMask == *ConstB) 612 return LHS; 613 if (NewMask == *ConstD) 614 return RHS; 615 } 616 617 if (Mask & AMask_NotAllOnes) { 618 // (icmp ne (A & B), B) & (icmp ne (A & D), D) 619 // -> (icmp ne (A & B), A) or (icmp ne (A & D), A) 620 // Only valid if one of the masks is a superset of the other (check "B|D" 621 // is the same as either B or D). 622 APInt NewMask = *ConstB | *ConstD; 623 if (NewMask == *ConstB) 624 return LHS; 625 if (NewMask == *ConstD) 626 return RHS; 627 } 628 629 if (Mask & (BMask_Mixed | BMask_NotMixed)) { 630 // Mixed: 631 // (icmp eq (A & B), C) & (icmp eq (A & D), E) 632 // We already know that B & C == C && D & E == E. 633 // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of 634 // C and E, which are shared by both the mask B and the mask D, don't 635 // contradict, then we can transform to 636 // -> (icmp eq (A & (B|D)), (C|E)) 637 // Currently, we only handle the case of B, C, D, and E being constant. 638 // We can't simply use C and E because we might actually handle 639 // (icmp ne (A & B), B) & (icmp eq (A & D), D) 640 // with B and D, having a single bit set. 641 642 // NotMixed: 643 // (icmp ne (A & B), C) & (icmp ne (A & D), E) 644 // -> (icmp ne (A & (B & D)), (C & E)) 645 // Check the intersection (B & D) for inequality. 646 // Assume that (B & D) == B || (B & D) == D, i.e B/D is a subset of D/B 647 // and (B & D) & (C ^ E) == 0, bits of C and E, which are shared by both 648 // the B and the D, don't contradict. Note that we can assume (~B & C) == 649 // 0 && (~D & E) == 0, previous operation should delete these icmps if it 650 // hadn't been met. 651 652 const APInt *OldConstC, *OldConstE; 653 if (!match(C, m_APInt(OldConstC)) || !match(E, m_APInt(OldConstE))) 654 return nullptr; 655 656 auto FoldBMixed = [&](ICmpInst::Predicate CC, bool IsNot) -> Value * { 657 CC = IsNot ? CmpInst::getInversePredicate(CC) : CC; 658 const APInt ConstC = PredL != CC ? *ConstB ^ *OldConstC : *OldConstC; 659 const APInt ConstE = PredR != CC ? *ConstD ^ *OldConstE : *OldConstE; 660 661 if (((*ConstB & *ConstD) & (ConstC ^ ConstE)).getBoolValue()) 662 return IsNot ? nullptr : ConstantInt::get(LHS->getType(), !IsAnd); 663 664 if (IsNot && !ConstB->isSubsetOf(*ConstD) && 665 !ConstD->isSubsetOf(*ConstB)) 666 return nullptr; 667 668 APInt BD, CE; 669 if (IsNot) { 670 BD = *ConstB & *ConstD; 671 CE = ConstC & ConstE; 672 } else { 673 BD = *ConstB | *ConstD; 674 CE = ConstC | ConstE; 675 } 676 Value *NewAnd = Builder.CreateAnd(A, BD); 677 Value *CEVal = ConstantInt::get(A->getType(), CE); 678 return Builder.CreateICmp(CC, CEVal, NewAnd); 679 }; 680 681 if (Mask & BMask_Mixed) 682 return FoldBMixed(NewCC, false); 683 if (Mask & BMask_NotMixed) // can be else also 684 return FoldBMixed(NewCC, true); 685 } 686 } 687 688 // (icmp eq (A & B), 0) | (icmp eq (A & D), 0) 689 // -> (icmp ne (A & (B|D)), (B|D)) 690 // (icmp ne (A & B), 0) & (icmp ne (A & D), 0) 691 // -> (icmp eq (A & (B|D)), (B|D)) 692 // iff B and D is known to be a power of two 693 if (Mask & Mask_NotAllZeros && 694 isKnownToBeAPowerOfTwo(B, /*OrZero=*/false, /*Depth=*/0, Q) && 695 isKnownToBeAPowerOfTwo(D, /*OrZero=*/false, /*Depth=*/0, Q)) { 696 // If this is a logical and/or, then we must prevent propagation of a 697 // poison value from the RHS by inserting freeze. 698 if (IsLogical) 699 D = Builder.CreateFreeze(D); 700 Value *Mask = Builder.CreateOr(B, D); 701 Value *Masked = Builder.CreateAnd(A, Mask); 702 return Builder.CreateICmp(NewCC, Masked, Mask); 703 } 704 return nullptr; 705 } 706 707 /// Try to fold a signed range checked with lower bound 0 to an unsigned icmp. 708 /// Example: (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n 709 /// If \p Inverted is true then the check is for the inverted range, e.g. 710 /// (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n 711 Value *InstCombinerImpl::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, 712 bool Inverted) { 713 // Check the lower range comparison, e.g. x >= 0 714 // InstCombine already ensured that if there is a constant it's on the RHS. 715 ConstantInt *RangeStart = dyn_cast<ConstantInt>(Cmp0->getOperand(1)); 716 if (!RangeStart) 717 return nullptr; 718 719 ICmpInst::Predicate Pred0 = (Inverted ? Cmp0->getInversePredicate() : 720 Cmp0->getPredicate()); 721 722 // Accept x > -1 or x >= 0 (after potentially inverting the predicate). 723 if (!((Pred0 == ICmpInst::ICMP_SGT && RangeStart->isMinusOne()) || 724 (Pred0 == ICmpInst::ICMP_SGE && RangeStart->isZero()))) 725 return nullptr; 726 727 ICmpInst::Predicate Pred1 = (Inverted ? Cmp1->getInversePredicate() : 728 Cmp1->getPredicate()); 729 730 Value *Input = Cmp0->getOperand(0); 731 Value *Cmp1Op0 = Cmp1->getOperand(0); 732 Value *Cmp1Op1 = Cmp1->getOperand(1); 733 Value *RangeEnd; 734 if (match(Cmp1Op0, m_SExtOrSelf(m_Specific(Input)))) { 735 // For the upper range compare we have: icmp x, n 736 Input = Cmp1Op0; 737 RangeEnd = Cmp1Op1; 738 } else if (match(Cmp1Op1, m_SExtOrSelf(m_Specific(Input)))) { 739 // For the upper range compare we have: icmp n, x 740 Input = Cmp1Op1; 741 RangeEnd = Cmp1Op0; 742 Pred1 = ICmpInst::getSwappedPredicate(Pred1); 743 } else { 744 return nullptr; 745 } 746 747 // Check the upper range comparison, e.g. x < n 748 ICmpInst::Predicate NewPred; 749 switch (Pred1) { 750 case ICmpInst::ICMP_SLT: NewPred = ICmpInst::ICMP_ULT; break; 751 case ICmpInst::ICMP_SLE: NewPred = ICmpInst::ICMP_ULE; break; 752 default: return nullptr; 753 } 754 755 // This simplification is only valid if the upper range is not negative. 756 KnownBits Known = computeKnownBits(RangeEnd, /*Depth=*/0, Cmp1); 757 if (!Known.isNonNegative()) 758 return nullptr; 759 760 if (Inverted) 761 NewPred = ICmpInst::getInversePredicate(NewPred); 762 763 return Builder.CreateICmp(NewPred, Input, RangeEnd); 764 } 765 766 // (or (icmp eq X, 0), (icmp eq X, Pow2OrZero)) 767 // -> (icmp eq (and X, Pow2OrZero), X) 768 // (and (icmp ne X, 0), (icmp ne X, Pow2OrZero)) 769 // -> (icmp ne (and X, Pow2OrZero), X) 770 static Value * 771 foldAndOrOfICmpsWithPow2AndWithZero(InstCombiner::BuilderTy &Builder, 772 ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, 773 const SimplifyQuery &Q) { 774 CmpPredicate Pred = IsAnd ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ; 775 // Make sure we have right compares for our op. 776 if (LHS->getPredicate() != Pred || RHS->getPredicate() != Pred) 777 return nullptr; 778 779 // Make it so we can match LHS against the (icmp eq/ne X, 0) just for 780 // simplicity. 781 if (match(RHS->getOperand(1), m_Zero())) 782 std::swap(LHS, RHS); 783 784 Value *Pow2, *Op; 785 // Match the desired pattern: 786 // LHS: (icmp eq/ne X, 0) 787 // RHS: (icmp eq/ne X, Pow2OrZero) 788 // Skip if Pow2OrZero is 1. Either way it gets folded to (icmp ugt X, 1) but 789 // this form ends up slightly less canonical. 790 // We could potentially be more sophisticated than requiring LHS/RHS 791 // be one-use. We don't create additional instructions if only one 792 // of them is one-use. So cases where one is one-use and the other 793 // is two-use might be profitable. 794 if (!match(LHS, m_OneUse(m_ICmp(Pred, m_Value(Op), m_Zero()))) || 795 !match(RHS, m_OneUse(m_c_ICmp(Pred, m_Specific(Op), m_Value(Pow2)))) || 796 match(Pow2, m_One()) || 797 !isKnownToBeAPowerOfTwo(Pow2, Q.DL, /*OrZero=*/true, /*Depth=*/0, Q.AC, 798 Q.CxtI, Q.DT)) 799 return nullptr; 800 801 Value *And = Builder.CreateAnd(Op, Pow2); 802 return Builder.CreateICmp(Pred, And, Op); 803 } 804 805 /// General pattern: 806 /// X & Y 807 /// 808 /// Where Y is checking that all the high bits (covered by a mask 4294967168) 809 /// are uniform, i.e. %arg & 4294967168 can be either 4294967168 or 0 810 /// Pattern can be one of: 811 /// %t = add i32 %arg, 128 812 /// %r = icmp ult i32 %t, 256 813 /// Or 814 /// %t0 = shl i32 %arg, 24 815 /// %t1 = ashr i32 %t0, 24 816 /// %r = icmp eq i32 %t1, %arg 817 /// Or 818 /// %t0 = trunc i32 %arg to i8 819 /// %t1 = sext i8 %t0 to i32 820 /// %r = icmp eq i32 %t1, %arg 821 /// This pattern is a signed truncation check. 822 /// 823 /// And X is checking that some bit in that same mask is zero. 824 /// I.e. can be one of: 825 /// %r = icmp sgt i32 %arg, -1 826 /// Or 827 /// %t = and i32 %arg, 2147483648 828 /// %r = icmp eq i32 %t, 0 829 /// 830 /// Since we are checking that all the bits in that mask are the same, 831 /// and a particular bit is zero, what we are really checking is that all the 832 /// masked bits are zero. 833 /// So this should be transformed to: 834 /// %r = icmp ult i32 %arg, 128 835 static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1, 836 Instruction &CxtI, 837 InstCombiner::BuilderTy &Builder) { 838 assert(CxtI.getOpcode() == Instruction::And); 839 840 // Match icmp ult (add %arg, C01), C1 (C1 == C01 << 1; powers of two) 841 auto tryToMatchSignedTruncationCheck = [](ICmpInst *ICmp, Value *&X, 842 APInt &SignBitMask) -> bool { 843 const APInt *I01, *I1; // powers of two; I1 == I01 << 1 844 if (!(match(ICmp, m_SpecificICmp(ICmpInst::ICMP_ULT, 845 m_Add(m_Value(X), m_Power2(I01)), 846 m_Power2(I1))) && 847 I1->ugt(*I01) && I01->shl(1) == *I1)) 848 return false; 849 // Which bit is the new sign bit as per the 'signed truncation' pattern? 850 SignBitMask = *I01; 851 return true; 852 }; 853 854 // One icmp needs to be 'signed truncation check'. 855 // We need to match this first, else we will mismatch commutative cases. 856 Value *X1; 857 APInt HighestBit; 858 ICmpInst *OtherICmp; 859 if (tryToMatchSignedTruncationCheck(ICmp1, X1, HighestBit)) 860 OtherICmp = ICmp0; 861 else if (tryToMatchSignedTruncationCheck(ICmp0, X1, HighestBit)) 862 OtherICmp = ICmp1; 863 else 864 return nullptr; 865 866 assert(HighestBit.isPowerOf2() && "expected to be power of two (non-zero)"); 867 868 // Try to match/decompose into: icmp eq (X & Mask), 0 869 auto tryToDecompose = [](ICmpInst *ICmp, Value *&X, 870 APInt &UnsetBitsMask) -> bool { 871 CmpPredicate Pred = ICmp->getPredicate(); 872 // Can it be decomposed into icmp eq (X & Mask), 0 ? 873 auto Res = 874 llvm::decomposeBitTestICmp(ICmp->getOperand(0), ICmp->getOperand(1), 875 Pred, /*LookThroughTrunc=*/false); 876 if (Res && Res->Pred == ICmpInst::ICMP_EQ) { 877 X = Res->X; 878 UnsetBitsMask = Res->Mask; 879 return true; 880 } 881 882 // Is it icmp eq (X & Mask), 0 already? 883 const APInt *Mask; 884 if (match(ICmp, m_ICmp(Pred, m_And(m_Value(X), m_APInt(Mask)), m_Zero())) && 885 Pred == ICmpInst::ICMP_EQ) { 886 UnsetBitsMask = *Mask; 887 return true; 888 } 889 return false; 890 }; 891 892 // And the other icmp needs to be decomposable into a bit test. 893 Value *X0; 894 APInt UnsetBitsMask; 895 if (!tryToDecompose(OtherICmp, X0, UnsetBitsMask)) 896 return nullptr; 897 898 assert(!UnsetBitsMask.isZero() && "empty mask makes no sense."); 899 900 // Are they working on the same value? 901 Value *X; 902 if (X1 == X0) { 903 // Ok as is. 904 X = X1; 905 } else if (match(X0, m_Trunc(m_Specific(X1)))) { 906 UnsetBitsMask = UnsetBitsMask.zext(X1->getType()->getScalarSizeInBits()); 907 X = X1; 908 } else 909 return nullptr; 910 911 // So which bits should be uniform as per the 'signed truncation check'? 912 // (all the bits starting with (i.e. including) HighestBit) 913 APInt SignBitsMask = ~(HighestBit - 1U); 914 915 // UnsetBitsMask must have some common bits with SignBitsMask, 916 if (!UnsetBitsMask.intersects(SignBitsMask)) 917 return nullptr; 918 919 // Does UnsetBitsMask contain any bits outside of SignBitsMask? 920 if (!UnsetBitsMask.isSubsetOf(SignBitsMask)) { 921 APInt OtherHighestBit = (~UnsetBitsMask) + 1U; 922 if (!OtherHighestBit.isPowerOf2()) 923 return nullptr; 924 HighestBit = APIntOps::umin(HighestBit, OtherHighestBit); 925 } 926 // Else, if it does not, then all is ok as-is. 927 928 // %r = icmp ult %X, SignBit 929 return Builder.CreateICmpULT(X, ConstantInt::get(X->getType(), HighestBit), 930 CxtI.getName() + ".simplified"); 931 } 932 933 /// Fold (icmp eq ctpop(X) 1) | (icmp eq X 0) into (icmp ult ctpop(X) 2) and 934 /// fold (icmp ne ctpop(X) 1) & (icmp ne X 0) into (icmp ugt ctpop(X) 1). 935 /// Also used for logical and/or, must be poison safe if range attributes are 936 /// dropped. 937 static Value *foldIsPowerOf2OrZero(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd, 938 InstCombiner::BuilderTy &Builder, 939 InstCombinerImpl &IC) { 940 CmpPredicate Pred0, Pred1; 941 Value *X; 942 if (!match(Cmp0, m_ICmp(Pred0, m_Intrinsic<Intrinsic::ctpop>(m_Value(X)), 943 m_SpecificInt(1))) || 944 !match(Cmp1, m_ICmp(Pred1, m_Specific(X), m_ZeroInt()))) 945 return nullptr; 946 947 auto *CtPop = cast<Instruction>(Cmp0->getOperand(0)); 948 if (IsAnd && Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_NE) { 949 // Drop range attributes and re-infer them in the next iteration. 950 CtPop->dropPoisonGeneratingAnnotations(); 951 IC.addToWorklist(CtPop); 952 return Builder.CreateICmpUGT(CtPop, ConstantInt::get(CtPop->getType(), 1)); 953 } 954 if (!IsAnd && Pred0 == ICmpInst::ICMP_EQ && Pred1 == ICmpInst::ICMP_EQ) { 955 // Drop range attributes and re-infer them in the next iteration. 956 CtPop->dropPoisonGeneratingAnnotations(); 957 IC.addToWorklist(CtPop); 958 return Builder.CreateICmpULT(CtPop, ConstantInt::get(CtPop->getType(), 2)); 959 } 960 961 return nullptr; 962 } 963 964 /// Reduce a pair of compares that check if a value has exactly 1 bit set. 965 /// Also used for logical and/or, must be poison safe if range attributes are 966 /// dropped. 967 static Value *foldIsPowerOf2(ICmpInst *Cmp0, ICmpInst *Cmp1, bool JoinedByAnd, 968 InstCombiner::BuilderTy &Builder, 969 InstCombinerImpl &IC) { 970 // Handle 'and' / 'or' commutation: make the equality check the first operand. 971 if (JoinedByAnd && Cmp1->getPredicate() == ICmpInst::ICMP_NE) 972 std::swap(Cmp0, Cmp1); 973 else if (!JoinedByAnd && Cmp1->getPredicate() == ICmpInst::ICMP_EQ) 974 std::swap(Cmp0, Cmp1); 975 976 // (X != 0) && (ctpop(X) u< 2) --> ctpop(X) == 1 977 Value *X; 978 if (JoinedByAnd && 979 match(Cmp0, m_SpecificICmp(ICmpInst::ICMP_NE, m_Value(X), m_ZeroInt())) && 980 match(Cmp1, m_SpecificICmp(ICmpInst::ICMP_ULT, 981 m_Intrinsic<Intrinsic::ctpop>(m_Specific(X)), 982 m_SpecificInt(2)))) { 983 auto *CtPop = cast<Instruction>(Cmp1->getOperand(0)); 984 // Drop range attributes and re-infer them in the next iteration. 985 CtPop->dropPoisonGeneratingAnnotations(); 986 IC.addToWorklist(CtPop); 987 return Builder.CreateICmpEQ(CtPop, ConstantInt::get(CtPop->getType(), 1)); 988 } 989 // (X == 0) || (ctpop(X) u> 1) --> ctpop(X) != 1 990 if (!JoinedByAnd && 991 match(Cmp0, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(X), m_ZeroInt())) && 992 match(Cmp1, m_SpecificICmp(ICmpInst::ICMP_UGT, 993 m_Intrinsic<Intrinsic::ctpop>(m_Specific(X)), 994 m_SpecificInt(1)))) { 995 auto *CtPop = cast<Instruction>(Cmp1->getOperand(0)); 996 // Drop range attributes and re-infer them in the next iteration. 997 CtPop->dropPoisonGeneratingAnnotations(); 998 IC.addToWorklist(CtPop); 999 return Builder.CreateICmpNE(CtPop, ConstantInt::get(CtPop->getType(), 1)); 1000 } 1001 return nullptr; 1002 } 1003 1004 /// Try to fold (icmp(A & B) == 0) & (icmp(A & D) != E) into (icmp A u< D) iff 1005 /// B is a contiguous set of ones starting from the most significant bit 1006 /// (negative power of 2), D and E are equal, and D is a contiguous set of ones 1007 /// starting at the most significant zero bit in B. Parameter B supports masking 1008 /// using undef/poison in either scalar or vector values. 1009 static Value *foldNegativePower2AndShiftedMask( 1010 Value *A, Value *B, Value *D, Value *E, ICmpInst::Predicate PredL, 1011 ICmpInst::Predicate PredR, InstCombiner::BuilderTy &Builder) { 1012 assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) && 1013 "Expected equality predicates for masked type of icmps."); 1014 if (PredL != ICmpInst::ICMP_EQ || PredR != ICmpInst::ICMP_NE) 1015 return nullptr; 1016 1017 if (!match(B, m_NegatedPower2()) || !match(D, m_ShiftedMask()) || 1018 !match(E, m_ShiftedMask())) 1019 return nullptr; 1020 1021 // Test scalar arguments for conversion. B has been validated earlier to be a 1022 // negative power of two and thus is guaranteed to have one or more contiguous 1023 // ones starting from the MSB followed by zero or more contiguous zeros. D has 1024 // been validated earlier to be a shifted set of one or more contiguous ones. 1025 // In order to match, B leading ones and D leading zeros should be equal. The 1026 // predicate that B be a negative power of 2 prevents the condition of there 1027 // ever being zero leading ones. Thus 0 == 0 cannot occur. The predicate that 1028 // D always be a shifted mask prevents the condition of D equaling 0. This 1029 // prevents matching the condition where B contains the maximum number of 1030 // leading one bits (-1) and D contains the maximum number of leading zero 1031 // bits (0). 1032 auto isReducible = [](const Value *B, const Value *D, const Value *E) { 1033 const APInt *BCst, *DCst, *ECst; 1034 return match(B, m_APIntAllowPoison(BCst)) && match(D, m_APInt(DCst)) && 1035 match(E, m_APInt(ECst)) && *DCst == *ECst && 1036 (isa<PoisonValue>(B) || 1037 (BCst->countLeadingOnes() == DCst->countLeadingZeros())); 1038 }; 1039 1040 // Test vector type arguments for conversion. 1041 if (const auto *BVTy = dyn_cast<VectorType>(B->getType())) { 1042 const auto *BFVTy = dyn_cast<FixedVectorType>(BVTy); 1043 const auto *BConst = dyn_cast<Constant>(B); 1044 const auto *DConst = dyn_cast<Constant>(D); 1045 const auto *EConst = dyn_cast<Constant>(E); 1046 1047 if (!BFVTy || !BConst || !DConst || !EConst) 1048 return nullptr; 1049 1050 for (unsigned I = 0; I != BFVTy->getNumElements(); ++I) { 1051 const auto *BElt = BConst->getAggregateElement(I); 1052 const auto *DElt = DConst->getAggregateElement(I); 1053 const auto *EElt = EConst->getAggregateElement(I); 1054 1055 if (!BElt || !DElt || !EElt) 1056 return nullptr; 1057 if (!isReducible(BElt, DElt, EElt)) 1058 return nullptr; 1059 } 1060 } else { 1061 // Test scalar type arguments for conversion. 1062 if (!isReducible(B, D, E)) 1063 return nullptr; 1064 } 1065 return Builder.CreateICmp(ICmpInst::ICMP_ULT, A, D); 1066 } 1067 1068 /// Try to fold ((icmp X u< P) & (icmp(X & M) != M)) or ((icmp X s> -1) & 1069 /// (icmp(X & M) != M)) into (icmp X u< M). Where P is a power of 2, M < P, and 1070 /// M is a contiguous shifted mask starting at the right most significant zero 1071 /// bit in P. SGT is supported as when P is the largest representable power of 1072 /// 2, an earlier optimization converts the expression into (icmp X s> -1). 1073 /// Parameter P supports masking using undef/poison in either scalar or vector 1074 /// values. 1075 static Value *foldPowerOf2AndShiftedMask(ICmpInst *Cmp0, ICmpInst *Cmp1, 1076 bool JoinedByAnd, 1077 InstCombiner::BuilderTy &Builder) { 1078 if (!JoinedByAnd) 1079 return nullptr; 1080 Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr; 1081 ICmpInst::Predicate CmpPred0, CmpPred1; 1082 // Assuming P is a 2^n, getMaskedTypeForICmpPair will normalize (icmp X u< 1083 // 2^n) into (icmp (X & ~(2^n-1)) == 0) and (icmp X s> -1) into (icmp (X & 1084 // SignMask) == 0). 1085 std::optional<std::pair<unsigned, unsigned>> MaskPair = 1086 getMaskedTypeForICmpPair(A, B, C, D, E, Cmp0, Cmp1, CmpPred0, CmpPred1); 1087 if (!MaskPair) 1088 return nullptr; 1089 1090 const auto compareBMask = BMask_NotMixed | BMask_NotAllOnes; 1091 unsigned CmpMask0 = MaskPair->first; 1092 unsigned CmpMask1 = MaskPair->second; 1093 if ((CmpMask0 & Mask_AllZeros) && (CmpMask1 == compareBMask)) { 1094 if (Value *V = foldNegativePower2AndShiftedMask(A, B, D, E, CmpPred0, 1095 CmpPred1, Builder)) 1096 return V; 1097 } else if ((CmpMask0 == compareBMask) && (CmpMask1 & Mask_AllZeros)) { 1098 if (Value *V = foldNegativePower2AndShiftedMask(A, D, B, C, CmpPred1, 1099 CmpPred0, Builder)) 1100 return V; 1101 } 1102 return nullptr; 1103 } 1104 1105 /// Commuted variants are assumed to be handled by calling this function again 1106 /// with the parameters swapped. 1107 static Value *foldUnsignedUnderflowCheck(ICmpInst *ZeroICmp, 1108 ICmpInst *UnsignedICmp, bool IsAnd, 1109 const SimplifyQuery &Q, 1110 InstCombiner::BuilderTy &Builder) { 1111 Value *ZeroCmpOp; 1112 CmpPredicate EqPred; 1113 if (!match(ZeroICmp, m_ICmp(EqPred, m_Value(ZeroCmpOp), m_Zero())) || 1114 !ICmpInst::isEquality(EqPred)) 1115 return nullptr; 1116 1117 CmpPredicate UnsignedPred; 1118 1119 Value *A, *B; 1120 if (match(UnsignedICmp, 1121 m_c_ICmp(UnsignedPred, m_Specific(ZeroCmpOp), m_Value(A))) && 1122 match(ZeroCmpOp, m_c_Add(m_Specific(A), m_Value(B))) && 1123 (ZeroICmp->hasOneUse() || UnsignedICmp->hasOneUse())) { 1124 auto GetKnownNonZeroAndOther = [&](Value *&NonZero, Value *&Other) { 1125 if (!isKnownNonZero(NonZero, Q)) 1126 std::swap(NonZero, Other); 1127 return isKnownNonZero(NonZero, Q); 1128 }; 1129 1130 // Given ZeroCmpOp = (A + B) 1131 // ZeroCmpOp < A && ZeroCmpOp != 0 --> (0-X) < Y iff 1132 // ZeroCmpOp >= A || ZeroCmpOp == 0 --> (0-X) >= Y iff 1133 // with X being the value (A/B) that is known to be non-zero, 1134 // and Y being remaining value. 1135 if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE && 1136 IsAnd && GetKnownNonZeroAndOther(B, A)) 1137 return Builder.CreateICmpULT(Builder.CreateNeg(B), A); 1138 if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_EQ && 1139 !IsAnd && GetKnownNonZeroAndOther(B, A)) 1140 return Builder.CreateICmpUGE(Builder.CreateNeg(B), A); 1141 } 1142 1143 return nullptr; 1144 } 1145 1146 struct IntPart { 1147 Value *From; 1148 unsigned StartBit; 1149 unsigned NumBits; 1150 }; 1151 1152 /// Match an extraction of bits from an integer. 1153 static std::optional<IntPart> matchIntPart(Value *V) { 1154 Value *X; 1155 if (!match(V, m_OneUse(m_Trunc(m_Value(X))))) 1156 return std::nullopt; 1157 1158 unsigned NumOriginalBits = X->getType()->getScalarSizeInBits(); 1159 unsigned NumExtractedBits = V->getType()->getScalarSizeInBits(); 1160 Value *Y; 1161 const APInt *Shift; 1162 // For a trunc(lshr Y, Shift) pattern, make sure we're only extracting bits 1163 // from Y, not any shifted-in zeroes. 1164 if (match(X, m_OneUse(m_LShr(m_Value(Y), m_APInt(Shift)))) && 1165 Shift->ule(NumOriginalBits - NumExtractedBits)) 1166 return {{Y, (unsigned)Shift->getZExtValue(), NumExtractedBits}}; 1167 return {{X, 0, NumExtractedBits}}; 1168 } 1169 1170 /// Materialize an extraction of bits from an integer in IR. 1171 static Value *extractIntPart(const IntPart &P, IRBuilderBase &Builder) { 1172 Value *V = P.From; 1173 if (P.StartBit) 1174 V = Builder.CreateLShr(V, P.StartBit); 1175 Type *TruncTy = V->getType()->getWithNewBitWidth(P.NumBits); 1176 if (TruncTy != V->getType()) 1177 V = Builder.CreateTrunc(V, TruncTy); 1178 return V; 1179 } 1180 1181 /// (icmp eq X0, Y0) & (icmp eq X1, Y1) -> icmp eq X01, Y01 1182 /// (icmp ne X0, Y0) | (icmp ne X1, Y1) -> icmp ne X01, Y01 1183 /// where X0, X1 and Y0, Y1 are adjacent parts extracted from an integer. 1184 Value *InstCombinerImpl::foldEqOfParts(Value *Cmp0, Value *Cmp1, bool IsAnd) { 1185 if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse()) 1186 return nullptr; 1187 1188 CmpInst::Predicate Pred = IsAnd ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE; 1189 auto GetMatchPart = [&](Value *CmpV, 1190 unsigned OpNo) -> std::optional<IntPart> { 1191 assert(CmpV->getType()->isIntOrIntVectorTy(1) && "Must be bool"); 1192 1193 Value *X, *Y; 1194 // icmp ne (and x, 1), (and y, 1) <=> trunc (xor x, y) to i1 1195 // icmp eq (and x, 1), (and y, 1) <=> not (trunc (xor x, y) to i1) 1196 if (Pred == CmpInst::ICMP_NE 1197 ? match(CmpV, m_Trunc(m_Xor(m_Value(X), m_Value(Y)))) 1198 : match(CmpV, m_Not(m_Trunc(m_Xor(m_Value(X), m_Value(Y)))))) 1199 return {{OpNo == 0 ? X : Y, 0, 1}}; 1200 1201 auto *Cmp = dyn_cast<ICmpInst>(CmpV); 1202 if (!Cmp) 1203 return std::nullopt; 1204 1205 if (Pred == Cmp->getPredicate()) 1206 return matchIntPart(Cmp->getOperand(OpNo)); 1207 1208 const APInt *C; 1209 // (icmp eq (lshr x, C), (lshr y, C)) gets optimized to: 1210 // (icmp ult (xor x, y), 1 << C) so also look for that. 1211 if (Pred == CmpInst::ICMP_EQ && Cmp->getPredicate() == CmpInst::ICMP_ULT) { 1212 if (!match(Cmp->getOperand(1), m_Power2(C)) || 1213 !match(Cmp->getOperand(0), m_Xor(m_Value(), m_Value()))) 1214 return std::nullopt; 1215 } 1216 1217 // (icmp ne (lshr x, C), (lshr y, C)) gets optimized to: 1218 // (icmp ugt (xor x, y), (1 << C) - 1) so also look for that. 1219 else if (Pred == CmpInst::ICMP_NE && 1220 Cmp->getPredicate() == CmpInst::ICMP_UGT) { 1221 if (!match(Cmp->getOperand(1), m_LowBitMask(C)) || 1222 !match(Cmp->getOperand(0), m_Xor(m_Value(), m_Value()))) 1223 return std::nullopt; 1224 } else { 1225 return std::nullopt; 1226 } 1227 1228 unsigned From = Pred == CmpInst::ICMP_NE ? C->popcount() : C->countr_zero(); 1229 Instruction *I = cast<Instruction>(Cmp->getOperand(0)); 1230 return {{I->getOperand(OpNo), From, C->getBitWidth() - From}}; 1231 }; 1232 1233 std::optional<IntPart> L0 = GetMatchPart(Cmp0, 0); 1234 std::optional<IntPart> R0 = GetMatchPart(Cmp0, 1); 1235 std::optional<IntPart> L1 = GetMatchPart(Cmp1, 0); 1236 std::optional<IntPart> R1 = GetMatchPart(Cmp1, 1); 1237 if (!L0 || !R0 || !L1 || !R1) 1238 return nullptr; 1239 1240 // Make sure the LHS/RHS compare a part of the same value, possibly after 1241 // an operand swap. 1242 if (L0->From != L1->From || R0->From != R1->From) { 1243 if (L0->From != R1->From || R0->From != L1->From) 1244 return nullptr; 1245 std::swap(L1, R1); 1246 } 1247 1248 // Make sure the extracted parts are adjacent, canonicalizing to L0/R0 being 1249 // the low part and L1/R1 being the high part. 1250 if (L0->StartBit + L0->NumBits != L1->StartBit || 1251 R0->StartBit + R0->NumBits != R1->StartBit) { 1252 if (L1->StartBit + L1->NumBits != L0->StartBit || 1253 R1->StartBit + R1->NumBits != R0->StartBit) 1254 return nullptr; 1255 std::swap(L0, L1); 1256 std::swap(R0, R1); 1257 } 1258 1259 // We can simplify to a comparison of these larger parts of the integers. 1260 IntPart L = {L0->From, L0->StartBit, L0->NumBits + L1->NumBits}; 1261 IntPart R = {R0->From, R0->StartBit, R0->NumBits + R1->NumBits}; 1262 Value *LValue = extractIntPart(L, Builder); 1263 Value *RValue = extractIntPart(R, Builder); 1264 return Builder.CreateICmp(Pred, LValue, RValue); 1265 } 1266 1267 /// Reduce logic-of-compares with equality to a constant by substituting a 1268 /// common operand with the constant. Callers are expected to call this with 1269 /// Cmp0/Cmp1 switched to handle logic op commutativity. 1270 static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1, 1271 bool IsAnd, bool IsLogical, 1272 InstCombiner::BuilderTy &Builder, 1273 const SimplifyQuery &Q) { 1274 // Match an equality compare with a non-poison constant as Cmp0. 1275 // Also, give up if the compare can be constant-folded to avoid looping. 1276 CmpPredicate Pred0; 1277 Value *X; 1278 Constant *C; 1279 if (!match(Cmp0, m_ICmp(Pred0, m_Value(X), m_Constant(C))) || 1280 !isGuaranteedNotToBeUndefOrPoison(C) || isa<Constant>(X)) 1281 return nullptr; 1282 if ((IsAnd && Pred0 != ICmpInst::ICMP_EQ) || 1283 (!IsAnd && Pred0 != ICmpInst::ICMP_NE)) 1284 return nullptr; 1285 1286 // The other compare must include a common operand (X). Canonicalize the 1287 // common operand as operand 1 (Pred1 is swapped if the common operand was 1288 // operand 0). 1289 Value *Y; 1290 CmpPredicate Pred1; 1291 if (!match(Cmp1, m_c_ICmp(Pred1, m_Value(Y), m_Specific(X)))) 1292 return nullptr; 1293 1294 // Replace variable with constant value equivalence to remove a variable use: 1295 // (X == C) && (Y Pred1 X) --> (X == C) && (Y Pred1 C) 1296 // (X != C) || (Y Pred1 X) --> (X != C) || (Y Pred1 C) 1297 // Can think of the 'or' substitution with the 'and' bool equivalent: 1298 // A || B --> A || (!A && B) 1299 Value *SubstituteCmp = simplifyICmpInst(Pred1, Y, C, Q); 1300 if (!SubstituteCmp) { 1301 // If we need to create a new instruction, require that the old compare can 1302 // be removed. 1303 if (!Cmp1->hasOneUse()) 1304 return nullptr; 1305 SubstituteCmp = Builder.CreateICmp(Pred1, Y, C); 1306 } 1307 if (IsLogical) 1308 return IsAnd ? Builder.CreateLogicalAnd(Cmp0, SubstituteCmp) 1309 : Builder.CreateLogicalOr(Cmp0, SubstituteCmp); 1310 return Builder.CreateBinOp(IsAnd ? Instruction::And : Instruction::Or, Cmp0, 1311 SubstituteCmp); 1312 } 1313 1314 /// Fold (icmp Pred1 V1, C1) & (icmp Pred2 V2, C2) 1315 /// or (icmp Pred1 V1, C1) | (icmp Pred2 V2, C2) 1316 /// into a single comparison using range-based reasoning. 1317 /// NOTE: This is also used for logical and/or, must be poison-safe! 1318 Value *InstCombinerImpl::foldAndOrOfICmpsUsingRanges(ICmpInst *ICmp1, 1319 ICmpInst *ICmp2, 1320 bool IsAnd) { 1321 CmpPredicate Pred1, Pred2; 1322 Value *V1, *V2; 1323 const APInt *C1, *C2; 1324 if (!match(ICmp1, m_ICmp(Pred1, m_Value(V1), m_APInt(C1))) || 1325 !match(ICmp2, m_ICmp(Pred2, m_Value(V2), m_APInt(C2)))) 1326 return nullptr; 1327 1328 // Look through add of a constant offset on V1, V2, or both operands. This 1329 // allows us to interpret the V + C' < C'' range idiom into a proper range. 1330 const APInt *Offset1 = nullptr, *Offset2 = nullptr; 1331 if (V1 != V2) { 1332 Value *X; 1333 if (match(V1, m_Add(m_Value(X), m_APInt(Offset1)))) 1334 V1 = X; 1335 if (match(V2, m_Add(m_Value(X), m_APInt(Offset2)))) 1336 V2 = X; 1337 } 1338 1339 if (V1 != V2) 1340 return nullptr; 1341 1342 ConstantRange CR1 = ConstantRange::makeExactICmpRegion( 1343 IsAnd ? ICmpInst::getInverseCmpPredicate(Pred1) : Pred1, *C1); 1344 if (Offset1) 1345 CR1 = CR1.subtract(*Offset1); 1346 1347 ConstantRange CR2 = ConstantRange::makeExactICmpRegion( 1348 IsAnd ? ICmpInst::getInverseCmpPredicate(Pred2) : Pred2, *C2); 1349 if (Offset2) 1350 CR2 = CR2.subtract(*Offset2); 1351 1352 Type *Ty = V1->getType(); 1353 Value *NewV = V1; 1354 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2); 1355 if (!CR) { 1356 if (!(ICmp1->hasOneUse() && ICmp2->hasOneUse()) || CR1.isWrappedSet() || 1357 CR2.isWrappedSet()) 1358 return nullptr; 1359 1360 // Check whether we have equal-size ranges that only differ by one bit. 1361 // In that case we can apply a mask to map one range onto the other. 1362 APInt LowerDiff = CR1.getLower() ^ CR2.getLower(); 1363 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1); 1364 APInt CR1Size = CR1.getUpper() - CR1.getLower(); 1365 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff || 1366 CR1Size != CR2.getUpper() - CR2.getLower()) 1367 return nullptr; 1368 1369 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2; 1370 NewV = Builder.CreateAnd(NewV, ConstantInt::get(Ty, ~LowerDiff)); 1371 } 1372 1373 if (IsAnd) 1374 CR = CR->inverse(); 1375 1376 CmpInst::Predicate NewPred; 1377 APInt NewC, Offset; 1378 CR->getEquivalentICmp(NewPred, NewC, Offset); 1379 1380 if (Offset != 0) 1381 NewV = Builder.CreateAdd(NewV, ConstantInt::get(Ty, Offset)); 1382 return Builder.CreateICmp(NewPred, NewV, ConstantInt::get(Ty, NewC)); 1383 } 1384 1385 /// Ignore all operations which only change the sign of a value, returning the 1386 /// underlying magnitude value. 1387 static Value *stripSignOnlyFPOps(Value *Val) { 1388 match(Val, m_FNeg(m_Value(Val))); 1389 match(Val, m_FAbs(m_Value(Val))); 1390 match(Val, m_CopySign(m_Value(Val), m_Value())); 1391 return Val; 1392 } 1393 1394 /// Matches canonical form of isnan, fcmp ord x, 0 1395 static bool matchIsNotNaN(FCmpInst::Predicate P, Value *LHS, Value *RHS) { 1396 return P == FCmpInst::FCMP_ORD && match(RHS, m_AnyZeroFP()); 1397 } 1398 1399 /// Matches fcmp u__ x, +/-inf 1400 static bool matchUnorderedInfCompare(FCmpInst::Predicate P, Value *LHS, 1401 Value *RHS) { 1402 return FCmpInst::isUnordered(P) && match(RHS, m_Inf()); 1403 } 1404 1405 /// and (fcmp ord x, 0), (fcmp u* x, inf) -> fcmp o* x, inf 1406 /// 1407 /// Clang emits this pattern for doing an isfinite check in __builtin_isnormal. 1408 static Value *matchIsFiniteTest(InstCombiner::BuilderTy &Builder, FCmpInst *LHS, 1409 FCmpInst *RHS) { 1410 Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1); 1411 Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1); 1412 FCmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); 1413 1414 if (!matchIsNotNaN(PredL, LHS0, LHS1) || 1415 !matchUnorderedInfCompare(PredR, RHS0, RHS1)) 1416 return nullptr; 1417 1418 return Builder.CreateFCmpFMF(FCmpInst::getOrderedPredicate(PredR), RHS0, RHS1, 1419 FMFSource::intersect(LHS, RHS)); 1420 } 1421 1422 Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, 1423 bool IsAnd, bool IsLogicalSelect) { 1424 Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1); 1425 Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1); 1426 FCmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); 1427 1428 if (LHS0 == RHS1 && RHS0 == LHS1) { 1429 // Swap RHS operands to match LHS. 1430 PredR = FCmpInst::getSwappedPredicate(PredR); 1431 std::swap(RHS0, RHS1); 1432 } 1433 1434 // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). 1435 // Suppose the relation between x and y is R, where R is one of 1436 // U(1000), L(0100), G(0010) or E(0001), and CC0 and CC1 are the bitmasks for 1437 // testing the desired relations. 1438 // 1439 // Since (R & CC0) and (R & CC1) are either R or 0, we actually have this: 1440 // bool(R & CC0) && bool(R & CC1) 1441 // = bool((R & CC0) & (R & CC1)) 1442 // = bool(R & (CC0 & CC1)) <= by re-association, commutation, and idempotency 1443 // 1444 // Since (R & CC0) and (R & CC1) are either R or 0, we actually have this: 1445 // bool(R & CC0) || bool(R & CC1) 1446 // = bool((R & CC0) | (R & CC1)) 1447 // = bool(R & (CC0 | CC1)) <= by reversed distribution (contribution? ;) 1448 if (LHS0 == RHS0 && LHS1 == RHS1) { 1449 unsigned FCmpCodeL = getFCmpCode(PredL); 1450 unsigned FCmpCodeR = getFCmpCode(PredR); 1451 unsigned NewPred = IsAnd ? FCmpCodeL & FCmpCodeR : FCmpCodeL | FCmpCodeR; 1452 1453 // Intersect the fast math flags. 1454 // TODO: We can union the fast math flags unless this is a logical select. 1455 return getFCmpValue(NewPred, LHS0, LHS1, Builder, 1456 FMFSource::intersect(LHS, RHS)); 1457 } 1458 1459 // This transform is not valid for a logical select. 1460 if (!IsLogicalSelect && 1461 ((PredL == FCmpInst::FCMP_ORD && PredR == FCmpInst::FCMP_ORD && IsAnd) || 1462 (PredL == FCmpInst::FCMP_UNO && PredR == FCmpInst::FCMP_UNO && 1463 !IsAnd))) { 1464 if (LHS0->getType() != RHS0->getType()) 1465 return nullptr; 1466 1467 // FCmp canonicalization ensures that (fcmp ord/uno X, X) and 1468 // (fcmp ord/uno X, C) will be transformed to (fcmp X, +0.0). 1469 if (match(LHS1, m_PosZeroFP()) && match(RHS1, m_PosZeroFP())) { 1470 // Ignore the constants because they are obviously not NANs: 1471 // (fcmp ord x, 0.0) & (fcmp ord y, 0.0) -> (fcmp ord x, y) 1472 // (fcmp uno x, 0.0) | (fcmp uno y, 0.0) -> (fcmp uno x, y) 1473 return Builder.CreateFCmpFMF(PredL, LHS0, RHS0, 1474 FMFSource::intersect(LHS, RHS)); 1475 } 1476 } 1477 1478 if (IsAnd && stripSignOnlyFPOps(LHS0) == stripSignOnlyFPOps(RHS0)) { 1479 // and (fcmp ord x, 0), (fcmp u* x, inf) -> fcmp o* x, inf 1480 // and (fcmp ord x, 0), (fcmp u* fabs(x), inf) -> fcmp o* x, inf 1481 if (Value *Left = matchIsFiniteTest(Builder, LHS, RHS)) 1482 return Left; 1483 if (Value *Right = matchIsFiniteTest(Builder, RHS, LHS)) 1484 return Right; 1485 } 1486 1487 // Turn at least two fcmps with constants into llvm.is.fpclass. 1488 // 1489 // If we can represent a combined value test with one class call, we can 1490 // potentially eliminate 4-6 instructions. If we can represent a test with a 1491 // single fcmp with fneg and fabs, that's likely a better canonical form. 1492 if (LHS->hasOneUse() && RHS->hasOneUse()) { 1493 auto [ClassValRHS, ClassMaskRHS] = 1494 fcmpToClassTest(PredR, *RHS->getFunction(), RHS0, RHS1); 1495 if (ClassValRHS) { 1496 auto [ClassValLHS, ClassMaskLHS] = 1497 fcmpToClassTest(PredL, *LHS->getFunction(), LHS0, LHS1); 1498 if (ClassValLHS == ClassValRHS) { 1499 unsigned CombinedMask = IsAnd ? (ClassMaskLHS & ClassMaskRHS) 1500 : (ClassMaskLHS | ClassMaskRHS); 1501 return Builder.CreateIntrinsic( 1502 Intrinsic::is_fpclass, {ClassValLHS->getType()}, 1503 {ClassValLHS, Builder.getInt32(CombinedMask)}); 1504 } 1505 } 1506 } 1507 1508 // Canonicalize the range check idiom: 1509 // and (fcmp olt/ole/ult/ule x, C), (fcmp ogt/oge/ugt/uge x, -C) 1510 // --> fabs(x) olt/ole/ult/ule C 1511 // or (fcmp ogt/oge/ugt/uge x, C), (fcmp olt/ole/ult/ule x, -C) 1512 // --> fabs(x) ogt/oge/ugt/uge C 1513 // TODO: Generalize to handle a negated variable operand? 1514 const APFloat *LHSC, *RHSC; 1515 if (LHS0 == RHS0 && LHS->hasOneUse() && RHS->hasOneUse() && 1516 FCmpInst::getSwappedPredicate(PredL) == PredR && 1517 match(LHS1, m_APFloatAllowPoison(LHSC)) && 1518 match(RHS1, m_APFloatAllowPoison(RHSC)) && 1519 LHSC->bitwiseIsEqual(neg(*RHSC))) { 1520 auto IsLessThanOrLessEqual = [](FCmpInst::Predicate Pred) { 1521 switch (Pred) { 1522 case FCmpInst::FCMP_OLT: 1523 case FCmpInst::FCMP_OLE: 1524 case FCmpInst::FCMP_ULT: 1525 case FCmpInst::FCMP_ULE: 1526 return true; 1527 default: 1528 return false; 1529 } 1530 }; 1531 if (IsLessThanOrLessEqual(IsAnd ? PredR : PredL)) { 1532 std::swap(LHSC, RHSC); 1533 std::swap(PredL, PredR); 1534 } 1535 if (IsLessThanOrLessEqual(IsAnd ? PredL : PredR)) { 1536 FastMathFlags NewFlag = LHS->getFastMathFlags(); 1537 if (!IsLogicalSelect) 1538 NewFlag |= RHS->getFastMathFlags(); 1539 1540 Value *FAbs = 1541 Builder.CreateUnaryIntrinsic(Intrinsic::fabs, LHS0, NewFlag); 1542 return Builder.CreateFCmpFMF( 1543 PredL, FAbs, ConstantFP::get(LHS0->getType(), *LHSC), NewFlag); 1544 } 1545 } 1546 1547 return nullptr; 1548 } 1549 1550 /// Match an fcmp against a special value that performs a test possible by 1551 /// llvm.is.fpclass. 1552 static bool matchIsFPClassLikeFCmp(Value *Op, Value *&ClassVal, 1553 uint64_t &ClassMask) { 1554 auto *FCmp = dyn_cast<FCmpInst>(Op); 1555 if (!FCmp || !FCmp->hasOneUse()) 1556 return false; 1557 1558 std::tie(ClassVal, ClassMask) = 1559 fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(), 1560 FCmp->getOperand(0), FCmp->getOperand(1)); 1561 return ClassVal != nullptr; 1562 } 1563 1564 /// or (is_fpclass x, mask0), (is_fpclass x, mask1) 1565 /// -> is_fpclass x, (mask0 | mask1) 1566 /// and (is_fpclass x, mask0), (is_fpclass x, mask1) 1567 /// -> is_fpclass x, (mask0 & mask1) 1568 /// xor (is_fpclass x, mask0), (is_fpclass x, mask1) 1569 /// -> is_fpclass x, (mask0 ^ mask1) 1570 Instruction *InstCombinerImpl::foldLogicOfIsFPClass(BinaryOperator &BO, 1571 Value *Op0, Value *Op1) { 1572 Value *ClassVal0 = nullptr; 1573 Value *ClassVal1 = nullptr; 1574 uint64_t ClassMask0, ClassMask1; 1575 1576 // Restrict to folding one fcmp into one is.fpclass for now, don't introduce a 1577 // new class. 1578 // 1579 // TODO: Support forming is.fpclass out of 2 separate fcmps when codegen is 1580 // better. 1581 1582 bool IsLHSClass = 1583 match(Op0, m_OneUse(m_Intrinsic<Intrinsic::is_fpclass>( 1584 m_Value(ClassVal0), m_ConstantInt(ClassMask0)))); 1585 bool IsRHSClass = 1586 match(Op1, m_OneUse(m_Intrinsic<Intrinsic::is_fpclass>( 1587 m_Value(ClassVal1), m_ConstantInt(ClassMask1)))); 1588 if ((((IsLHSClass || matchIsFPClassLikeFCmp(Op0, ClassVal0, ClassMask0)) && 1589 (IsRHSClass || matchIsFPClassLikeFCmp(Op1, ClassVal1, ClassMask1)))) && 1590 ClassVal0 == ClassVal1) { 1591 unsigned NewClassMask; 1592 switch (BO.getOpcode()) { 1593 case Instruction::And: 1594 NewClassMask = ClassMask0 & ClassMask1; 1595 break; 1596 case Instruction::Or: 1597 NewClassMask = ClassMask0 | ClassMask1; 1598 break; 1599 case Instruction::Xor: 1600 NewClassMask = ClassMask0 ^ ClassMask1; 1601 break; 1602 default: 1603 llvm_unreachable("not a binary logic operator"); 1604 } 1605 1606 if (IsLHSClass) { 1607 auto *II = cast<IntrinsicInst>(Op0); 1608 II->setArgOperand( 1609 1, ConstantInt::get(II->getArgOperand(1)->getType(), NewClassMask)); 1610 return replaceInstUsesWith(BO, II); 1611 } 1612 1613 if (IsRHSClass) { 1614 auto *II = cast<IntrinsicInst>(Op1); 1615 II->setArgOperand( 1616 1, ConstantInt::get(II->getArgOperand(1)->getType(), NewClassMask)); 1617 return replaceInstUsesWith(BO, II); 1618 } 1619 1620 CallInst *NewClass = 1621 Builder.CreateIntrinsic(Intrinsic::is_fpclass, {ClassVal0->getType()}, 1622 {ClassVal0, Builder.getInt32(NewClassMask)}); 1623 return replaceInstUsesWith(BO, NewClass); 1624 } 1625 1626 return nullptr; 1627 } 1628 1629 /// Look for the pattern that conditionally negates a value via math operations: 1630 /// cond.splat = sext i1 cond 1631 /// sub = add cond.splat, x 1632 /// xor = xor sub, cond.splat 1633 /// and rewrite it to do the same, but via logical operations: 1634 /// value.neg = sub 0, value 1635 /// cond = select i1 neg, value.neg, value 1636 Instruction *InstCombinerImpl::canonicalizeConditionalNegationViaMathToSelect( 1637 BinaryOperator &I) { 1638 assert(I.getOpcode() == BinaryOperator::Xor && "Only for xor!"); 1639 Value *Cond, *X; 1640 // As per complexity ordering, `xor` is not commutative here. 1641 if (!match(&I, m_c_BinOp(m_OneUse(m_Value()), m_Value())) || 1642 !match(I.getOperand(1), m_SExt(m_Value(Cond))) || 1643 !Cond->getType()->isIntOrIntVectorTy(1) || 1644 !match(I.getOperand(0), m_c_Add(m_SExt(m_Specific(Cond)), m_Value(X)))) 1645 return nullptr; 1646 return SelectInst::Create(Cond, Builder.CreateNeg(X, X->getName() + ".neg"), 1647 X); 1648 } 1649 1650 /// This a limited reassociation for a special case (see above) where we are 1651 /// checking if two values are either both NAN (unordered) or not-NAN (ordered). 1652 /// This could be handled more generally in '-reassociation', but it seems like 1653 /// an unlikely pattern for a large number of logic ops and fcmps. 1654 static Instruction *reassociateFCmps(BinaryOperator &BO, 1655 InstCombiner::BuilderTy &Builder) { 1656 Instruction::BinaryOps Opcode = BO.getOpcode(); 1657 assert((Opcode == Instruction::And || Opcode == Instruction::Or) && 1658 "Expecting and/or op for fcmp transform"); 1659 1660 // There are 4 commuted variants of the pattern. Canonicalize operands of this 1661 // logic op so an fcmp is operand 0 and a matching logic op is operand 1. 1662 Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1), *X; 1663 if (match(Op1, m_FCmp(m_Value(), m_AnyZeroFP()))) 1664 std::swap(Op0, Op1); 1665 1666 // Match inner binop and the predicate for combining 2 NAN checks into 1. 1667 Value *BO10, *BO11; 1668 FCmpInst::Predicate NanPred = Opcode == Instruction::And ? FCmpInst::FCMP_ORD 1669 : FCmpInst::FCMP_UNO; 1670 if (!match(Op0, m_SpecificFCmp(NanPred, m_Value(X), m_AnyZeroFP())) || 1671 !match(Op1, m_BinOp(Opcode, m_Value(BO10), m_Value(BO11)))) 1672 return nullptr; 1673 1674 // The inner logic op must have a matching fcmp operand. 1675 Value *Y; 1676 if (!match(BO10, m_SpecificFCmp(NanPred, m_Value(Y), m_AnyZeroFP())) || 1677 X->getType() != Y->getType()) 1678 std::swap(BO10, BO11); 1679 1680 if (!match(BO10, m_SpecificFCmp(NanPred, m_Value(Y), m_AnyZeroFP())) || 1681 X->getType() != Y->getType()) 1682 return nullptr; 1683 1684 // and (fcmp ord X, 0), (and (fcmp ord Y, 0), Z) --> and (fcmp ord X, Y), Z 1685 // or (fcmp uno X, 0), (or (fcmp uno Y, 0), Z) --> or (fcmp uno X, Y), Z 1686 // Intersect FMF from the 2 source fcmps. 1687 Value *NewFCmp = 1688 Builder.CreateFCmpFMF(NanPred, X, Y, FMFSource::intersect(Op0, BO10)); 1689 return BinaryOperator::Create(Opcode, NewFCmp, BO11); 1690 } 1691 1692 /// Match variations of De Morgan's Laws: 1693 /// (~A & ~B) == (~(A | B)) 1694 /// (~A | ~B) == (~(A & B)) 1695 static Instruction *matchDeMorgansLaws(BinaryOperator &I, 1696 InstCombiner &IC) { 1697 const Instruction::BinaryOps Opcode = I.getOpcode(); 1698 assert((Opcode == Instruction::And || Opcode == Instruction::Or) && 1699 "Trying to match De Morgan's Laws with something other than and/or"); 1700 1701 // Flip the logic operation. 1702 const Instruction::BinaryOps FlippedOpcode = 1703 (Opcode == Instruction::And) ? Instruction::Or : Instruction::And; 1704 1705 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1706 Value *A, *B; 1707 if (match(Op0, m_OneUse(m_Not(m_Value(A)))) && 1708 match(Op1, m_OneUse(m_Not(m_Value(B)))) && 1709 !IC.isFreeToInvert(A, A->hasOneUse()) && 1710 !IC.isFreeToInvert(B, B->hasOneUse())) { 1711 Value *AndOr = 1712 IC.Builder.CreateBinOp(FlippedOpcode, A, B, I.getName() + ".demorgan"); 1713 return BinaryOperator::CreateNot(AndOr); 1714 } 1715 1716 // The 'not' ops may require reassociation. 1717 // (A & ~B) & ~C --> A & ~(B | C) 1718 // (~B & A) & ~C --> A & ~(B | C) 1719 // (A | ~B) | ~C --> A | ~(B & C) 1720 // (~B | A) | ~C --> A | ~(B & C) 1721 Value *C; 1722 if (match(Op0, m_OneUse(m_c_BinOp(Opcode, m_Value(A), m_Not(m_Value(B))))) && 1723 match(Op1, m_Not(m_Value(C)))) { 1724 Value *FlippedBO = IC.Builder.CreateBinOp(FlippedOpcode, B, C); 1725 return BinaryOperator::Create(Opcode, A, IC.Builder.CreateNot(FlippedBO)); 1726 } 1727 1728 return nullptr; 1729 } 1730 1731 bool InstCombinerImpl::shouldOptimizeCast(CastInst *CI) { 1732 Value *CastSrc = CI->getOperand(0); 1733 1734 // Noop casts and casts of constants should be eliminated trivially. 1735 if (CI->getSrcTy() == CI->getDestTy() || isa<Constant>(CastSrc)) 1736 return false; 1737 1738 // If this cast is paired with another cast that can be eliminated, we prefer 1739 // to have it eliminated. 1740 if (const auto *PrecedingCI = dyn_cast<CastInst>(CastSrc)) 1741 if (isEliminableCastPair(PrecedingCI, CI)) 1742 return false; 1743 1744 return true; 1745 } 1746 1747 /// Fold {and,or,xor} (cast X), C. 1748 static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast, 1749 InstCombinerImpl &IC) { 1750 Constant *C = dyn_cast<Constant>(Logic.getOperand(1)); 1751 if (!C) 1752 return nullptr; 1753 1754 auto LogicOpc = Logic.getOpcode(); 1755 Type *DestTy = Logic.getType(); 1756 Type *SrcTy = Cast->getSrcTy(); 1757 1758 // Move the logic operation ahead of a zext or sext if the constant is 1759 // unchanged in the smaller source type. Performing the logic in a smaller 1760 // type may provide more information to later folds, and the smaller logic 1761 // instruction may be cheaper (particularly in the case of vectors). 1762 Value *X; 1763 if (match(Cast, m_OneUse(m_ZExt(m_Value(X))))) { 1764 if (Constant *TruncC = IC.getLosslessUnsignedTrunc(C, SrcTy)) { 1765 // LogicOpc (zext X), C --> zext (LogicOpc X, C) 1766 Value *NewOp = IC.Builder.CreateBinOp(LogicOpc, X, TruncC); 1767 return new ZExtInst(NewOp, DestTy); 1768 } 1769 } 1770 1771 if (match(Cast, m_OneUse(m_SExtLike(m_Value(X))))) { 1772 if (Constant *TruncC = IC.getLosslessSignedTrunc(C, SrcTy)) { 1773 // LogicOpc (sext X), C --> sext (LogicOpc X, C) 1774 Value *NewOp = IC.Builder.CreateBinOp(LogicOpc, X, TruncC); 1775 return new SExtInst(NewOp, DestTy); 1776 } 1777 } 1778 1779 return nullptr; 1780 } 1781 1782 /// Fold {and,or,xor} (cast X), Y. 1783 Instruction *InstCombinerImpl::foldCastedBitwiseLogic(BinaryOperator &I) { 1784 auto LogicOpc = I.getOpcode(); 1785 assert(I.isBitwiseLogicOp() && "Unexpected opcode for bitwise logic folding"); 1786 1787 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1788 1789 // fold bitwise(A >> BW - 1, zext(icmp)) (BW is the scalar bits of the 1790 // type of A) 1791 // -> bitwise(zext(A < 0), zext(icmp)) 1792 // -> zext(bitwise(A < 0, icmp)) 1793 auto FoldBitwiseICmpZeroWithICmp = [&](Value *Op0, 1794 Value *Op1) -> Instruction * { 1795 Value *A; 1796 bool IsMatched = 1797 match(Op0, 1798 m_OneUse(m_LShr( 1799 m_Value(A), 1800 m_SpecificInt(Op0->getType()->getScalarSizeInBits() - 1)))) && 1801 match(Op1, m_OneUse(m_ZExt(m_ICmp(m_Value(), m_Value())))); 1802 1803 if (!IsMatched) 1804 return nullptr; 1805 1806 auto *ICmpL = 1807 Builder.CreateICmpSLT(A, Constant::getNullValue(A->getType())); 1808 auto *ICmpR = cast<ZExtInst>(Op1)->getOperand(0); 1809 auto *BitwiseOp = Builder.CreateBinOp(LogicOpc, ICmpL, ICmpR); 1810 1811 return new ZExtInst(BitwiseOp, Op0->getType()); 1812 }; 1813 1814 if (auto *Ret = FoldBitwiseICmpZeroWithICmp(Op0, Op1)) 1815 return Ret; 1816 1817 if (auto *Ret = FoldBitwiseICmpZeroWithICmp(Op1, Op0)) 1818 return Ret; 1819 1820 CastInst *Cast0 = dyn_cast<CastInst>(Op0); 1821 if (!Cast0) 1822 return nullptr; 1823 1824 // This must be a cast from an integer or integer vector source type to allow 1825 // transformation of the logic operation to the source type. 1826 Type *DestTy = I.getType(); 1827 Type *SrcTy = Cast0->getSrcTy(); 1828 if (!SrcTy->isIntOrIntVectorTy()) 1829 return nullptr; 1830 1831 if (Instruction *Ret = foldLogicCastConstant(I, Cast0, *this)) 1832 return Ret; 1833 1834 CastInst *Cast1 = dyn_cast<CastInst>(Op1); 1835 if (!Cast1) 1836 return nullptr; 1837 1838 // Both operands of the logic operation are casts. The casts must be the 1839 // same kind for reduction. 1840 Instruction::CastOps CastOpcode = Cast0->getOpcode(); 1841 if (CastOpcode != Cast1->getOpcode()) 1842 return nullptr; 1843 1844 // If the source types do not match, but the casts are matching extends, we 1845 // can still narrow the logic op. 1846 if (SrcTy != Cast1->getSrcTy()) { 1847 Value *X, *Y; 1848 if (match(Cast0, m_OneUse(m_ZExtOrSExt(m_Value(X)))) && 1849 match(Cast1, m_OneUse(m_ZExtOrSExt(m_Value(Y))))) { 1850 // Cast the narrower source to the wider source type. 1851 unsigned XNumBits = X->getType()->getScalarSizeInBits(); 1852 unsigned YNumBits = Y->getType()->getScalarSizeInBits(); 1853 if (XNumBits < YNumBits) 1854 X = Builder.CreateCast(CastOpcode, X, Y->getType()); 1855 else 1856 Y = Builder.CreateCast(CastOpcode, Y, X->getType()); 1857 // Do the logic op in the intermediate width, then widen more. 1858 Value *NarrowLogic = Builder.CreateBinOp(LogicOpc, X, Y); 1859 return CastInst::Create(CastOpcode, NarrowLogic, DestTy); 1860 } 1861 1862 // Give up for other cast opcodes. 1863 return nullptr; 1864 } 1865 1866 Value *Cast0Src = Cast0->getOperand(0); 1867 Value *Cast1Src = Cast1->getOperand(0); 1868 1869 // fold logic(cast(A), cast(B)) -> cast(logic(A, B)) 1870 if ((Cast0->hasOneUse() || Cast1->hasOneUse()) && 1871 shouldOptimizeCast(Cast0) && shouldOptimizeCast(Cast1)) { 1872 Value *NewOp = Builder.CreateBinOp(LogicOpc, Cast0Src, Cast1Src, 1873 I.getName()); 1874 return CastInst::Create(CastOpcode, NewOp, DestTy); 1875 } 1876 1877 return nullptr; 1878 } 1879 1880 static Instruction *foldAndToXor(BinaryOperator &I, 1881 InstCombiner::BuilderTy &Builder) { 1882 assert(I.getOpcode() == Instruction::And); 1883 Value *Op0 = I.getOperand(0); 1884 Value *Op1 = I.getOperand(1); 1885 Value *A, *B; 1886 1887 // Operand complexity canonicalization guarantees that the 'or' is Op0. 1888 // (A | B) & ~(A & B) --> A ^ B 1889 // (A | B) & ~(B & A) --> A ^ B 1890 if (match(&I, m_BinOp(m_Or(m_Value(A), m_Value(B)), 1891 m_Not(m_c_And(m_Deferred(A), m_Deferred(B)))))) 1892 return BinaryOperator::CreateXor(A, B); 1893 1894 // (A | ~B) & (~A | B) --> ~(A ^ B) 1895 // (A | ~B) & (B | ~A) --> ~(A ^ B) 1896 // (~B | A) & (~A | B) --> ~(A ^ B) 1897 // (~B | A) & (B | ~A) --> ~(A ^ B) 1898 if (Op0->hasOneUse() || Op1->hasOneUse()) 1899 if (match(&I, m_BinOp(m_c_Or(m_Value(A), m_Not(m_Value(B))), 1900 m_c_Or(m_Not(m_Deferred(A)), m_Deferred(B))))) 1901 return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); 1902 1903 return nullptr; 1904 } 1905 1906 static Instruction *foldOrToXor(BinaryOperator &I, 1907 InstCombiner::BuilderTy &Builder) { 1908 assert(I.getOpcode() == Instruction::Or); 1909 Value *Op0 = I.getOperand(0); 1910 Value *Op1 = I.getOperand(1); 1911 Value *A, *B; 1912 1913 // Operand complexity canonicalization guarantees that the 'and' is Op0. 1914 // (A & B) | ~(A | B) --> ~(A ^ B) 1915 // (A & B) | ~(B | A) --> ~(A ^ B) 1916 if (Op0->hasOneUse() || Op1->hasOneUse()) 1917 if (match(Op0, m_And(m_Value(A), m_Value(B))) && 1918 match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) 1919 return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); 1920 1921 // Operand complexity canonicalization guarantees that the 'xor' is Op0. 1922 // (A ^ B) | ~(A | B) --> ~(A & B) 1923 // (A ^ B) | ~(B | A) --> ~(A & B) 1924 if (Op0->hasOneUse() || Op1->hasOneUse()) 1925 if (match(Op0, m_Xor(m_Value(A), m_Value(B))) && 1926 match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) 1927 return BinaryOperator::CreateNot(Builder.CreateAnd(A, B)); 1928 1929 // (A & ~B) | (~A & B) --> A ^ B 1930 // (A & ~B) | (B & ~A) --> A ^ B 1931 // (~B & A) | (~A & B) --> A ^ B 1932 // (~B & A) | (B & ~A) --> A ^ B 1933 if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) && 1934 match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B)))) 1935 return BinaryOperator::CreateXor(A, B); 1936 1937 return nullptr; 1938 } 1939 1940 /// Return true if a constant shift amount is always less than the specified 1941 /// bit-width. If not, the shift could create poison in the narrower type. 1942 static bool canNarrowShiftAmt(Constant *C, unsigned BitWidth) { 1943 APInt Threshold(C->getType()->getScalarSizeInBits(), BitWidth); 1944 return match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold)); 1945 } 1946 1947 /// Try to use narrower ops (sink zext ops) for an 'and' with binop operand and 1948 /// a common zext operand: and (binop (zext X), C), (zext X). 1949 Instruction *InstCombinerImpl::narrowMaskedBinOp(BinaryOperator &And) { 1950 // This transform could also apply to {or, and, xor}, but there are better 1951 // folds for those cases, so we don't expect those patterns here. AShr is not 1952 // handled because it should always be transformed to LShr in this sequence. 1953 // The subtract transform is different because it has a constant on the left. 1954 // Add/mul commute the constant to RHS; sub with constant RHS becomes add. 1955 Value *Op0 = And.getOperand(0), *Op1 = And.getOperand(1); 1956 Constant *C; 1957 if (!match(Op0, m_OneUse(m_Add(m_Specific(Op1), m_Constant(C)))) && 1958 !match(Op0, m_OneUse(m_Mul(m_Specific(Op1), m_Constant(C)))) && 1959 !match(Op0, m_OneUse(m_LShr(m_Specific(Op1), m_Constant(C)))) && 1960 !match(Op0, m_OneUse(m_Shl(m_Specific(Op1), m_Constant(C)))) && 1961 !match(Op0, m_OneUse(m_Sub(m_Constant(C), m_Specific(Op1))))) 1962 return nullptr; 1963 1964 Value *X; 1965 if (!match(Op1, m_ZExt(m_Value(X))) || Op1->hasNUsesOrMore(3)) 1966 return nullptr; 1967 1968 Type *Ty = And.getType(); 1969 if (!isa<VectorType>(Ty) && !shouldChangeType(Ty, X->getType())) 1970 return nullptr; 1971 1972 // If we're narrowing a shift, the shift amount must be safe (less than the 1973 // width) in the narrower type. If the shift amount is greater, instsimplify 1974 // usually handles that case, but we can't guarantee/assert it. 1975 Instruction::BinaryOps Opc = cast<BinaryOperator>(Op0)->getOpcode(); 1976 if (Opc == Instruction::LShr || Opc == Instruction::Shl) 1977 if (!canNarrowShiftAmt(C, X->getType()->getScalarSizeInBits())) 1978 return nullptr; 1979 1980 // and (sub C, (zext X)), (zext X) --> zext (and (sub C', X), X) 1981 // and (binop (zext X), C), (zext X) --> zext (and (binop X, C'), X) 1982 Value *NewC = ConstantExpr::getTrunc(C, X->getType()); 1983 Value *NewBO = Opc == Instruction::Sub ? Builder.CreateBinOp(Opc, NewC, X) 1984 : Builder.CreateBinOp(Opc, X, NewC); 1985 return new ZExtInst(Builder.CreateAnd(NewBO, X), Ty); 1986 } 1987 1988 /// Try folding relatively complex patterns for both And and Or operations 1989 /// with all And and Or swapped. 1990 static Instruction *foldComplexAndOrPatterns(BinaryOperator &I, 1991 InstCombiner::BuilderTy &Builder) { 1992 const Instruction::BinaryOps Opcode = I.getOpcode(); 1993 assert(Opcode == Instruction::And || Opcode == Instruction::Or); 1994 1995 // Flip the logic operation. 1996 const Instruction::BinaryOps FlippedOpcode = 1997 (Opcode == Instruction::And) ? Instruction::Or : Instruction::And; 1998 1999 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 2000 Value *A, *B, *C, *X, *Y, *Dummy; 2001 2002 // Match following expressions: 2003 // (~(A | B) & C) 2004 // (~(A & B) | C) 2005 // Captures X = ~(A | B) or ~(A & B) 2006 const auto matchNotOrAnd = 2007 [Opcode, FlippedOpcode](Value *Op, auto m_A, auto m_B, auto m_C, 2008 Value *&X, bool CountUses = false) -> bool { 2009 if (CountUses && !Op->hasOneUse()) 2010 return false; 2011 2012 if (match(Op, m_c_BinOp(FlippedOpcode, 2013 m_CombineAnd(m_Value(X), 2014 m_Not(m_c_BinOp(Opcode, m_A, m_B))), 2015 m_C))) 2016 return !CountUses || X->hasOneUse(); 2017 2018 return false; 2019 }; 2020 2021 // (~(A | B) & C) | ... --> ... 2022 // (~(A & B) | C) & ... --> ... 2023 // TODO: One use checks are conservative. We just need to check that a total 2024 // number of multiple used values does not exceed reduction 2025 // in operations. 2026 if (matchNotOrAnd(Op0, m_Value(A), m_Value(B), m_Value(C), X)) { 2027 // (~(A | B) & C) | (~(A | C) & B) --> (B ^ C) & ~A 2028 // (~(A & B) | C) & (~(A & C) | B) --> ~((B ^ C) & A) 2029 if (matchNotOrAnd(Op1, m_Specific(A), m_Specific(C), m_Specific(B), Dummy, 2030 true)) { 2031 Value *Xor = Builder.CreateXor(B, C); 2032 return (Opcode == Instruction::Or) 2033 ? BinaryOperator::CreateAnd(Xor, Builder.CreateNot(A)) 2034 : BinaryOperator::CreateNot(Builder.CreateAnd(Xor, A)); 2035 } 2036 2037 // (~(A | B) & C) | (~(B | C) & A) --> (A ^ C) & ~B 2038 // (~(A & B) | C) & (~(B & C) | A) --> ~((A ^ C) & B) 2039 if (matchNotOrAnd(Op1, m_Specific(B), m_Specific(C), m_Specific(A), Dummy, 2040 true)) { 2041 Value *Xor = Builder.CreateXor(A, C); 2042 return (Opcode == Instruction::Or) 2043 ? BinaryOperator::CreateAnd(Xor, Builder.CreateNot(B)) 2044 : BinaryOperator::CreateNot(Builder.CreateAnd(Xor, B)); 2045 } 2046 2047 // (~(A | B) & C) | ~(A | C) --> ~((B & C) | A) 2048 // (~(A & B) | C) & ~(A & C) --> ~((B | C) & A) 2049 if (match(Op1, m_OneUse(m_Not(m_OneUse( 2050 m_c_BinOp(Opcode, m_Specific(A), m_Specific(C))))))) 2051 return BinaryOperator::CreateNot(Builder.CreateBinOp( 2052 Opcode, Builder.CreateBinOp(FlippedOpcode, B, C), A)); 2053 2054 // (~(A | B) & C) | ~(B | C) --> ~((A & C) | B) 2055 // (~(A & B) | C) & ~(B & C) --> ~((A | C) & B) 2056 if (match(Op1, m_OneUse(m_Not(m_OneUse( 2057 m_c_BinOp(Opcode, m_Specific(B), m_Specific(C))))))) 2058 return BinaryOperator::CreateNot(Builder.CreateBinOp( 2059 Opcode, Builder.CreateBinOp(FlippedOpcode, A, C), B)); 2060 2061 // (~(A | B) & C) | ~(C | (A ^ B)) --> ~((A | B) & (C | (A ^ B))) 2062 // Note, the pattern with swapped and/or is not handled because the 2063 // result is more undefined than a source: 2064 // (~(A & B) | C) & ~(C & (A ^ B)) --> (A ^ B ^ C) | ~(A | C) is invalid. 2065 if (Opcode == Instruction::Or && Op0->hasOneUse() && 2066 match(Op1, m_OneUse(m_Not(m_CombineAnd( 2067 m_Value(Y), 2068 m_c_BinOp(Opcode, m_Specific(C), 2069 m_c_Xor(m_Specific(A), m_Specific(B)))))))) { 2070 // X = ~(A | B) 2071 // Y = (C | (A ^ B) 2072 Value *Or = cast<BinaryOperator>(X)->getOperand(0); 2073 return BinaryOperator::CreateNot(Builder.CreateAnd(Or, Y)); 2074 } 2075 } 2076 2077 // (~A & B & C) | ... --> ... 2078 // (~A | B | C) | ... --> ... 2079 // TODO: One use checks are conservative. We just need to check that a total 2080 // number of multiple used values does not exceed reduction 2081 // in operations. 2082 if (match(Op0, 2083 m_OneUse(m_c_BinOp(FlippedOpcode, 2084 m_BinOp(FlippedOpcode, m_Value(B), m_Value(C)), 2085 m_CombineAnd(m_Value(X), m_Not(m_Value(A)))))) || 2086 match(Op0, m_OneUse(m_c_BinOp( 2087 FlippedOpcode, 2088 m_c_BinOp(FlippedOpcode, m_Value(C), 2089 m_CombineAnd(m_Value(X), m_Not(m_Value(A)))), 2090 m_Value(B))))) { 2091 // X = ~A 2092 // (~A & B & C) | ~(A | B | C) --> ~(A | (B ^ C)) 2093 // (~A | B | C) & ~(A & B & C) --> (~A | (B ^ C)) 2094 if (match(Op1, m_OneUse(m_Not(m_c_BinOp( 2095 Opcode, m_c_BinOp(Opcode, m_Specific(A), m_Specific(B)), 2096 m_Specific(C))))) || 2097 match(Op1, m_OneUse(m_Not(m_c_BinOp( 2098 Opcode, m_c_BinOp(Opcode, m_Specific(B), m_Specific(C)), 2099 m_Specific(A))))) || 2100 match(Op1, m_OneUse(m_Not(m_c_BinOp( 2101 Opcode, m_c_BinOp(Opcode, m_Specific(A), m_Specific(C)), 2102 m_Specific(B)))))) { 2103 Value *Xor = Builder.CreateXor(B, C); 2104 return (Opcode == Instruction::Or) 2105 ? BinaryOperator::CreateNot(Builder.CreateOr(Xor, A)) 2106 : BinaryOperator::CreateOr(Xor, X); 2107 } 2108 2109 // (~A & B & C) | ~(A | B) --> (C | ~B) & ~A 2110 // (~A | B | C) & ~(A & B) --> (C & ~B) | ~A 2111 if (match(Op1, m_OneUse(m_Not(m_OneUse( 2112 m_c_BinOp(Opcode, m_Specific(A), m_Specific(B))))))) 2113 return BinaryOperator::Create( 2114 FlippedOpcode, Builder.CreateBinOp(Opcode, C, Builder.CreateNot(B)), 2115 X); 2116 2117 // (~A & B & C) | ~(A | C) --> (B | ~C) & ~A 2118 // (~A | B | C) & ~(A & C) --> (B & ~C) | ~A 2119 if (match(Op1, m_OneUse(m_Not(m_OneUse( 2120 m_c_BinOp(Opcode, m_Specific(A), m_Specific(C))))))) 2121 return BinaryOperator::Create( 2122 FlippedOpcode, Builder.CreateBinOp(Opcode, B, Builder.CreateNot(C)), 2123 X); 2124 } 2125 2126 return nullptr; 2127 } 2128 2129 /// Try to reassociate a pair of binops so that values with one use only are 2130 /// part of the same instruction. This may enable folds that are limited with 2131 /// multi-use restrictions and makes it more likely to match other patterns that 2132 /// are looking for a common operand. 2133 static Instruction *reassociateForUses(BinaryOperator &BO, 2134 InstCombinerImpl::BuilderTy &Builder) { 2135 Instruction::BinaryOps Opcode = BO.getOpcode(); 2136 Value *X, *Y, *Z; 2137 if (match(&BO, 2138 m_c_BinOp(Opcode, m_OneUse(m_BinOp(Opcode, m_Value(X), m_Value(Y))), 2139 m_OneUse(m_Value(Z))))) { 2140 if (!isa<Constant>(X) && !isa<Constant>(Y) && !isa<Constant>(Z)) { 2141 // (X op Y) op Z --> (Y op Z) op X 2142 if (!X->hasOneUse()) { 2143 Value *YZ = Builder.CreateBinOp(Opcode, Y, Z); 2144 return BinaryOperator::Create(Opcode, YZ, X); 2145 } 2146 // (X op Y) op Z --> (X op Z) op Y 2147 if (!Y->hasOneUse()) { 2148 Value *XZ = Builder.CreateBinOp(Opcode, X, Z); 2149 return BinaryOperator::Create(Opcode, XZ, Y); 2150 } 2151 } 2152 } 2153 2154 return nullptr; 2155 } 2156 2157 // Match 2158 // (X + C2) | C 2159 // (X + C2) ^ C 2160 // (X + C2) & C 2161 // and convert to do the bitwise logic first: 2162 // (X | C) + C2 2163 // (X ^ C) + C2 2164 // (X & C) + C2 2165 // iff bits affected by logic op are lower than last bit affected by math op 2166 static Instruction *canonicalizeLogicFirst(BinaryOperator &I, 2167 InstCombiner::BuilderTy &Builder) { 2168 Type *Ty = I.getType(); 2169 Instruction::BinaryOps OpC = I.getOpcode(); 2170 Value *Op0 = I.getOperand(0); 2171 Value *Op1 = I.getOperand(1); 2172 Value *X; 2173 const APInt *C, *C2; 2174 2175 if (!(match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C2)))) && 2176 match(Op1, m_APInt(C)))) 2177 return nullptr; 2178 2179 unsigned Width = Ty->getScalarSizeInBits(); 2180 unsigned LastOneMath = Width - C2->countr_zero(); 2181 2182 switch (OpC) { 2183 case Instruction::And: 2184 if (C->countl_one() < LastOneMath) 2185 return nullptr; 2186 break; 2187 case Instruction::Xor: 2188 case Instruction::Or: 2189 if (C->countl_zero() < LastOneMath) 2190 return nullptr; 2191 break; 2192 default: 2193 llvm_unreachable("Unexpected BinaryOp!"); 2194 } 2195 2196 Value *NewBinOp = Builder.CreateBinOp(OpC, X, ConstantInt::get(Ty, *C)); 2197 return BinaryOperator::CreateWithCopiedFlags(Instruction::Add, NewBinOp, 2198 ConstantInt::get(Ty, *C2), Op0); 2199 } 2200 2201 // binop(shift(ShiftedC1, ShAmt), shift(ShiftedC2, add(ShAmt, AddC))) -> 2202 // shift(binop(ShiftedC1, shift(ShiftedC2, AddC)), ShAmt) 2203 // where both shifts are the same and AddC is a valid shift amount. 2204 Instruction *InstCombinerImpl::foldBinOpOfDisplacedShifts(BinaryOperator &I) { 2205 assert((I.isBitwiseLogicOp() || I.getOpcode() == Instruction::Add) && 2206 "Unexpected opcode"); 2207 2208 Value *ShAmt; 2209 Constant *ShiftedC1, *ShiftedC2, *AddC; 2210 Type *Ty = I.getType(); 2211 unsigned BitWidth = Ty->getScalarSizeInBits(); 2212 if (!match(&I, m_c_BinOp(m_Shift(m_ImmConstant(ShiftedC1), m_Value(ShAmt)), 2213 m_Shift(m_ImmConstant(ShiftedC2), 2214 m_AddLike(m_Deferred(ShAmt), 2215 m_ImmConstant(AddC)))))) 2216 return nullptr; 2217 2218 // Make sure the add constant is a valid shift amount. 2219 if (!match(AddC, 2220 m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(BitWidth, BitWidth)))) 2221 return nullptr; 2222 2223 // Avoid constant expressions. 2224 auto *Op0Inst = dyn_cast<Instruction>(I.getOperand(0)); 2225 auto *Op1Inst = dyn_cast<Instruction>(I.getOperand(1)); 2226 if (!Op0Inst || !Op1Inst) 2227 return nullptr; 2228 2229 // Both shifts must be the same. 2230 Instruction::BinaryOps ShiftOp = 2231 static_cast<Instruction::BinaryOps>(Op0Inst->getOpcode()); 2232 if (ShiftOp != Op1Inst->getOpcode()) 2233 return nullptr; 2234 2235 // For adds, only left shifts are supported. 2236 if (I.getOpcode() == Instruction::Add && ShiftOp != Instruction::Shl) 2237 return nullptr; 2238 2239 Value *NewC = Builder.CreateBinOp( 2240 I.getOpcode(), ShiftedC1, Builder.CreateBinOp(ShiftOp, ShiftedC2, AddC)); 2241 return BinaryOperator::Create(ShiftOp, NewC, ShAmt); 2242 } 2243 2244 // Fold and/or/xor with two equal intrinsic IDs: 2245 // bitwise(fshl (A, B, ShAmt), fshl(C, D, ShAmt)) 2246 // -> fshl(bitwise(A, C), bitwise(B, D), ShAmt) 2247 // bitwise(fshr (A, B, ShAmt), fshr(C, D, ShAmt)) 2248 // -> fshr(bitwise(A, C), bitwise(B, D), ShAmt) 2249 // bitwise(bswap(A), bswap(B)) -> bswap(bitwise(A, B)) 2250 // bitwise(bswap(A), C) -> bswap(bitwise(A, bswap(C))) 2251 // bitwise(bitreverse(A), bitreverse(B)) -> bitreverse(bitwise(A, B)) 2252 // bitwise(bitreverse(A), C) -> bitreverse(bitwise(A, bitreverse(C))) 2253 static Instruction * 2254 foldBitwiseLogicWithIntrinsics(BinaryOperator &I, 2255 InstCombiner::BuilderTy &Builder) { 2256 assert(I.isBitwiseLogicOp() && "Should and/or/xor"); 2257 if (!I.getOperand(0)->hasOneUse()) 2258 return nullptr; 2259 IntrinsicInst *X = dyn_cast<IntrinsicInst>(I.getOperand(0)); 2260 if (!X) 2261 return nullptr; 2262 2263 IntrinsicInst *Y = dyn_cast<IntrinsicInst>(I.getOperand(1)); 2264 if (Y && (!Y->hasOneUse() || X->getIntrinsicID() != Y->getIntrinsicID())) 2265 return nullptr; 2266 2267 Intrinsic::ID IID = X->getIntrinsicID(); 2268 const APInt *RHSC; 2269 // Try to match constant RHS. 2270 if (!Y && (!(IID == Intrinsic::bswap || IID == Intrinsic::bitreverse) || 2271 !match(I.getOperand(1), m_APInt(RHSC)))) 2272 return nullptr; 2273 2274 switch (IID) { 2275 case Intrinsic::fshl: 2276 case Intrinsic::fshr: { 2277 if (X->getOperand(2) != Y->getOperand(2)) 2278 return nullptr; 2279 Value *NewOp0 = 2280 Builder.CreateBinOp(I.getOpcode(), X->getOperand(0), Y->getOperand(0)); 2281 Value *NewOp1 = 2282 Builder.CreateBinOp(I.getOpcode(), X->getOperand(1), Y->getOperand(1)); 2283 Function *F = 2284 Intrinsic::getOrInsertDeclaration(I.getModule(), IID, I.getType()); 2285 return CallInst::Create(F, {NewOp0, NewOp1, X->getOperand(2)}); 2286 } 2287 case Intrinsic::bswap: 2288 case Intrinsic::bitreverse: { 2289 Value *NewOp0 = Builder.CreateBinOp( 2290 I.getOpcode(), X->getOperand(0), 2291 Y ? Y->getOperand(0) 2292 : ConstantInt::get(I.getType(), IID == Intrinsic::bswap 2293 ? RHSC->byteSwap() 2294 : RHSC->reverseBits())); 2295 Function *F = 2296 Intrinsic::getOrInsertDeclaration(I.getModule(), IID, I.getType()); 2297 return CallInst::Create(F, {NewOp0}); 2298 } 2299 default: 2300 return nullptr; 2301 } 2302 } 2303 2304 // Try to simplify V by replacing occurrences of Op with RepOp, but only look 2305 // through bitwise operations. In particular, for X | Y we try to replace Y with 2306 // 0 inside X and for X & Y we try to replace Y with -1 inside X. 2307 // Return the simplified result of X if successful, and nullptr otherwise. 2308 // If SimplifyOnly is true, no new instructions will be created. 2309 static Value *simplifyAndOrWithOpReplaced(Value *V, Value *Op, Value *RepOp, 2310 bool SimplifyOnly, 2311 InstCombinerImpl &IC, 2312 unsigned Depth = 0) { 2313 if (Op == RepOp) 2314 return nullptr; 2315 2316 if (V == Op) 2317 return RepOp; 2318 2319 auto *I = dyn_cast<BinaryOperator>(V); 2320 if (!I || !I->isBitwiseLogicOp() || Depth >= 3) 2321 return nullptr; 2322 2323 if (!I->hasOneUse()) 2324 SimplifyOnly = true; 2325 2326 Value *NewOp0 = simplifyAndOrWithOpReplaced(I->getOperand(0), Op, RepOp, 2327 SimplifyOnly, IC, Depth + 1); 2328 Value *NewOp1 = simplifyAndOrWithOpReplaced(I->getOperand(1), Op, RepOp, 2329 SimplifyOnly, IC, Depth + 1); 2330 if (!NewOp0 && !NewOp1) 2331 return nullptr; 2332 2333 if (!NewOp0) 2334 NewOp0 = I->getOperand(0); 2335 if (!NewOp1) 2336 NewOp1 = I->getOperand(1); 2337 2338 if (Value *Res = simplifyBinOp(I->getOpcode(), NewOp0, NewOp1, 2339 IC.getSimplifyQuery().getWithInstruction(I))) 2340 return Res; 2341 2342 if (SimplifyOnly) 2343 return nullptr; 2344 return IC.Builder.CreateBinOp(I->getOpcode(), NewOp0, NewOp1); 2345 } 2346 2347 /// Reassociate and/or expressions to see if we can fold the inner and/or ops. 2348 /// TODO: Make this recursive; it's a little tricky because an arbitrary 2349 /// number of and/or instructions might have to be created. 2350 Value *InstCombinerImpl::reassociateBooleanAndOr(Value *LHS, Value *X, Value *Y, 2351 Instruction &I, bool IsAnd, 2352 bool RHSIsLogical) { 2353 Instruction::BinaryOps Opcode = IsAnd ? Instruction::And : Instruction::Or; 2354 // LHS bop (X lop Y) --> (LHS bop X) lop Y 2355 // LHS bop (X bop Y) --> (LHS bop X) bop Y 2356 if (Value *Res = foldBooleanAndOr(LHS, X, I, IsAnd, /*IsLogical=*/false)) 2357 return RHSIsLogical ? Builder.CreateLogicalOp(Opcode, Res, Y) 2358 : Builder.CreateBinOp(Opcode, Res, Y); 2359 // LHS bop (X bop Y) --> X bop (LHS bop Y) 2360 // LHS bop (X lop Y) --> X lop (LHS bop Y) 2361 if (Value *Res = foldBooleanAndOr(LHS, Y, I, IsAnd, /*IsLogical=*/false)) 2362 return RHSIsLogical ? Builder.CreateLogicalOp(Opcode, X, Res) 2363 : Builder.CreateBinOp(Opcode, X, Res); 2364 return nullptr; 2365 } 2366 2367 // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches 2368 // here. We should standardize that construct where it is needed or choose some 2369 // other way to ensure that commutated variants of patterns are not missed. 2370 Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { 2371 Type *Ty = I.getType(); 2372 2373 if (Value *V = simplifyAndInst(I.getOperand(0), I.getOperand(1), 2374 SQ.getWithInstruction(&I))) 2375 return replaceInstUsesWith(I, V); 2376 2377 if (SimplifyAssociativeOrCommutative(I)) 2378 return &I; 2379 2380 if (Instruction *X = foldVectorBinop(I)) 2381 return X; 2382 2383 if (Instruction *Phi = foldBinopWithPhiOperands(I)) 2384 return Phi; 2385 2386 // See if we can simplify any instructions used by the instruction whose sole 2387 // purpose is to compute bits we don't care about. 2388 if (SimplifyDemandedInstructionBits(I)) 2389 return &I; 2390 2391 // Do this before using distributive laws to catch simple and/or/not patterns. 2392 if (Instruction *Xor = foldAndToXor(I, Builder)) 2393 return Xor; 2394 2395 if (Instruction *X = foldComplexAndOrPatterns(I, Builder)) 2396 return X; 2397 2398 // (A|B)&(A|C) -> A|(B&C) etc 2399 if (Value *V = foldUsingDistributiveLaws(I)) 2400 return replaceInstUsesWith(I, V); 2401 2402 if (Instruction *R = foldBinOpShiftWithShift(I)) 2403 return R; 2404 2405 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 2406 2407 Value *X, *Y; 2408 const APInt *C; 2409 if ((match(Op0, m_OneUse(m_LogicalShift(m_One(), m_Value(X)))) || 2410 (match(Op0, m_OneUse(m_Shl(m_APInt(C), m_Value(X)))) && (*C)[0])) && 2411 match(Op1, m_One())) { 2412 // (1 >> X) & 1 --> zext(X == 0) 2413 // (C << X) & 1 --> zext(X == 0), when C is odd 2414 Value *IsZero = Builder.CreateICmpEQ(X, ConstantInt::get(Ty, 0)); 2415 return new ZExtInst(IsZero, Ty); 2416 } 2417 2418 // (-(X & 1)) & Y --> (X & 1) == 0 ? 0 : Y 2419 Value *Neg; 2420 if (match(&I, 2421 m_c_And(m_CombineAnd(m_Value(Neg), 2422 m_OneUse(m_Neg(m_And(m_Value(), m_One())))), 2423 m_Value(Y)))) { 2424 Value *Cmp = Builder.CreateIsNull(Neg); 2425 return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), Y); 2426 } 2427 2428 // Canonicalize: 2429 // (X +/- Y) & Y --> ~X & Y when Y is a power of 2. 2430 if (match(&I, m_c_And(m_Value(Y), m_OneUse(m_CombineOr( 2431 m_c_Add(m_Value(X), m_Deferred(Y)), 2432 m_Sub(m_Value(X), m_Deferred(Y)))))) && 2433 isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, /*Depth*/ 0, &I)) 2434 return BinaryOperator::CreateAnd(Builder.CreateNot(X), Y); 2435 2436 if (match(Op1, m_APInt(C))) { 2437 const APInt *XorC; 2438 if (match(Op0, m_OneUse(m_Xor(m_Value(X), m_APInt(XorC))))) { 2439 // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) 2440 Constant *NewC = ConstantInt::get(Ty, *C & *XorC); 2441 Value *And = Builder.CreateAnd(X, Op1); 2442 And->takeName(Op0); 2443 return BinaryOperator::CreateXor(And, NewC); 2444 } 2445 2446 const APInt *OrC; 2447 if (match(Op0, m_OneUse(m_Or(m_Value(X), m_APInt(OrC))))) { 2448 // (X | C1) & C2 --> (X & C2^(C1&C2)) | (C1&C2) 2449 // NOTE: This reduces the number of bits set in the & mask, which 2450 // can expose opportunities for store narrowing for scalars. 2451 // NOTE: SimplifyDemandedBits should have already removed bits from C1 2452 // that aren't set in C2. Meaning we can replace (C1&C2) with C1 in 2453 // above, but this feels safer. 2454 APInt Together = *C & *OrC; 2455 Value *And = Builder.CreateAnd(X, ConstantInt::get(Ty, Together ^ *C)); 2456 And->takeName(Op0); 2457 return BinaryOperator::CreateOr(And, ConstantInt::get(Ty, Together)); 2458 } 2459 2460 unsigned Width = Ty->getScalarSizeInBits(); 2461 const APInt *ShiftC; 2462 if (match(Op0, m_OneUse(m_SExt(m_AShr(m_Value(X), m_APInt(ShiftC))))) && 2463 ShiftC->ult(Width)) { 2464 if (*C == APInt::getLowBitsSet(Width, Width - ShiftC->getZExtValue())) { 2465 // We are clearing high bits that were potentially set by sext+ashr: 2466 // and (sext (ashr X, ShiftC)), C --> lshr (sext X), ShiftC 2467 Value *Sext = Builder.CreateSExt(X, Ty); 2468 Constant *ShAmtC = ConstantInt::get(Ty, ShiftC->zext(Width)); 2469 return BinaryOperator::CreateLShr(Sext, ShAmtC); 2470 } 2471 } 2472 2473 // If this 'and' clears the sign-bits added by ashr, replace with lshr: 2474 // and (ashr X, ShiftC), C --> lshr X, ShiftC 2475 if (match(Op0, m_AShr(m_Value(X), m_APInt(ShiftC))) && ShiftC->ult(Width) && 2476 C->isMask(Width - ShiftC->getZExtValue())) 2477 return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, *ShiftC)); 2478 2479 const APInt *AddC; 2480 if (match(Op0, m_Add(m_Value(X), m_APInt(AddC)))) { 2481 // If we are masking the result of the add down to exactly one bit and 2482 // the constant we are adding has no bits set below that bit, then the 2483 // add is flipping a single bit. Example: 2484 // (X + 4) & 4 --> (X & 4) ^ 4 2485 if (Op0->hasOneUse() && C->isPowerOf2() && (*AddC & (*C - 1)) == 0) { 2486 assert((*C & *AddC) != 0 && "Expected common bit"); 2487 Value *NewAnd = Builder.CreateAnd(X, Op1); 2488 return BinaryOperator::CreateXor(NewAnd, Op1); 2489 } 2490 } 2491 2492 // ((C1 OP zext(X)) & C2) -> zext((C1 OP X) & C2) if C2 fits in the 2493 // bitwidth of X and OP behaves well when given trunc(C1) and X. 2494 auto isNarrowableBinOpcode = [](BinaryOperator *B) { 2495 switch (B->getOpcode()) { 2496 case Instruction::Xor: 2497 case Instruction::Or: 2498 case Instruction::Mul: 2499 case Instruction::Add: 2500 case Instruction::Sub: 2501 return true; 2502 default: 2503 return false; 2504 } 2505 }; 2506 BinaryOperator *BO; 2507 if (match(Op0, m_OneUse(m_BinOp(BO))) && isNarrowableBinOpcode(BO)) { 2508 Instruction::BinaryOps BOpcode = BO->getOpcode(); 2509 Value *X; 2510 const APInt *C1; 2511 // TODO: The one-use restrictions could be relaxed a little if the AND 2512 // is going to be removed. 2513 // Try to narrow the 'and' and a binop with constant operand: 2514 // and (bo (zext X), C1), C --> zext (and (bo X, TruncC1), TruncC) 2515 if (match(BO, m_c_BinOp(m_OneUse(m_ZExt(m_Value(X))), m_APInt(C1))) && 2516 C->isIntN(X->getType()->getScalarSizeInBits())) { 2517 unsigned XWidth = X->getType()->getScalarSizeInBits(); 2518 Constant *TruncC1 = ConstantInt::get(X->getType(), C1->trunc(XWidth)); 2519 Value *BinOp = isa<ZExtInst>(BO->getOperand(0)) 2520 ? Builder.CreateBinOp(BOpcode, X, TruncC1) 2521 : Builder.CreateBinOp(BOpcode, TruncC1, X); 2522 Constant *TruncC = ConstantInt::get(X->getType(), C->trunc(XWidth)); 2523 Value *And = Builder.CreateAnd(BinOp, TruncC); 2524 return new ZExtInst(And, Ty); 2525 } 2526 2527 // Similar to above: if the mask matches the zext input width, then the 2528 // 'and' can be eliminated, so we can truncate the other variable op: 2529 // and (bo (zext X), Y), C --> zext (bo X, (trunc Y)) 2530 if (isa<Instruction>(BO->getOperand(0)) && 2531 match(BO->getOperand(0), m_OneUse(m_ZExt(m_Value(X)))) && 2532 C->isMask(X->getType()->getScalarSizeInBits())) { 2533 Y = BO->getOperand(1); 2534 Value *TrY = Builder.CreateTrunc(Y, X->getType(), Y->getName() + ".tr"); 2535 Value *NewBO = 2536 Builder.CreateBinOp(BOpcode, X, TrY, BO->getName() + ".narrow"); 2537 return new ZExtInst(NewBO, Ty); 2538 } 2539 // and (bo Y, (zext X)), C --> zext (bo (trunc Y), X) 2540 if (isa<Instruction>(BO->getOperand(1)) && 2541 match(BO->getOperand(1), m_OneUse(m_ZExt(m_Value(X)))) && 2542 C->isMask(X->getType()->getScalarSizeInBits())) { 2543 Y = BO->getOperand(0); 2544 Value *TrY = Builder.CreateTrunc(Y, X->getType(), Y->getName() + ".tr"); 2545 Value *NewBO = 2546 Builder.CreateBinOp(BOpcode, TrY, X, BO->getName() + ".narrow"); 2547 return new ZExtInst(NewBO, Ty); 2548 } 2549 } 2550 2551 // This is intentionally placed after the narrowing transforms for 2552 // efficiency (transform directly to the narrow logic op if possible). 2553 // If the mask is only needed on one incoming arm, push the 'and' op up. 2554 if (match(Op0, m_OneUse(m_Xor(m_Value(X), m_Value(Y)))) || 2555 match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { 2556 APInt NotAndMask(~(*C)); 2557 BinaryOperator::BinaryOps BinOp = cast<BinaryOperator>(Op0)->getOpcode(); 2558 if (MaskedValueIsZero(X, NotAndMask, 0, &I)) { 2559 // Not masking anything out for the LHS, move mask to RHS. 2560 // and ({x}or X, Y), C --> {x}or X, (and Y, C) 2561 Value *NewRHS = Builder.CreateAnd(Y, Op1, Y->getName() + ".masked"); 2562 return BinaryOperator::Create(BinOp, X, NewRHS); 2563 } 2564 if (!isa<Constant>(Y) && MaskedValueIsZero(Y, NotAndMask, 0, &I)) { 2565 // Not masking anything out for the RHS, move mask to LHS. 2566 // and ({x}or X, Y), C --> {x}or (and X, C), Y 2567 Value *NewLHS = Builder.CreateAnd(X, Op1, X->getName() + ".masked"); 2568 return BinaryOperator::Create(BinOp, NewLHS, Y); 2569 } 2570 } 2571 2572 // When the mask is a power-of-2 constant and op0 is a shifted-power-of-2 2573 // constant, test if the shift amount equals the offset bit index: 2574 // (ShiftC << X) & C --> X == (log2(C) - log2(ShiftC)) ? C : 0 2575 // (ShiftC >> X) & C --> X == (log2(ShiftC) - log2(C)) ? C : 0 2576 if (C->isPowerOf2() && 2577 match(Op0, m_OneUse(m_LogicalShift(m_Power2(ShiftC), m_Value(X))))) { 2578 int Log2ShiftC = ShiftC->exactLogBase2(); 2579 int Log2C = C->exactLogBase2(); 2580 bool IsShiftLeft = 2581 cast<BinaryOperator>(Op0)->getOpcode() == Instruction::Shl; 2582 int BitNum = IsShiftLeft ? Log2C - Log2ShiftC : Log2ShiftC - Log2C; 2583 assert(BitNum >= 0 && "Expected demanded bits to handle impossible mask"); 2584 Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(Ty, BitNum)); 2585 return SelectInst::Create(Cmp, ConstantInt::get(Ty, *C), 2586 ConstantInt::getNullValue(Ty)); 2587 } 2588 2589 Constant *C1, *C2; 2590 const APInt *C3 = C; 2591 Value *X; 2592 if (C3->isPowerOf2()) { 2593 Constant *Log2C3 = ConstantInt::get(Ty, C3->countr_zero()); 2594 if (match(Op0, m_OneUse(m_LShr(m_Shl(m_ImmConstant(C1), m_Value(X)), 2595 m_ImmConstant(C2)))) && 2596 match(C1, m_Power2())) { 2597 Constant *Log2C1 = ConstantExpr::getExactLogBase2(C1); 2598 Constant *LshrC = ConstantExpr::getAdd(C2, Log2C3); 2599 KnownBits KnownLShrc = computeKnownBits(LshrC, 0, nullptr); 2600 if (KnownLShrc.getMaxValue().ult(Width)) { 2601 // iff C1,C3 is pow2 and C2 + cttz(C3) < BitWidth: 2602 // ((C1 << X) >> C2) & C3 -> X == (cttz(C3)+C2-cttz(C1)) ? C3 : 0 2603 Constant *CmpC = ConstantExpr::getSub(LshrC, Log2C1); 2604 Value *Cmp = Builder.CreateICmpEQ(X, CmpC); 2605 return SelectInst::Create(Cmp, ConstantInt::get(Ty, *C3), 2606 ConstantInt::getNullValue(Ty)); 2607 } 2608 } 2609 2610 if (match(Op0, m_OneUse(m_Shl(m_LShr(m_ImmConstant(C1), m_Value(X)), 2611 m_ImmConstant(C2)))) && 2612 match(C1, m_Power2())) { 2613 Constant *Log2C1 = ConstantExpr::getExactLogBase2(C1); 2614 Constant *Cmp = 2615 ConstantFoldCompareInstOperands(ICmpInst::ICMP_ULT, Log2C3, C2, DL); 2616 if (Cmp && Cmp->isZeroValue()) { 2617 // iff C1,C3 is pow2 and Log2(C3) >= C2: 2618 // ((C1 >> X) << C2) & C3 -> X == (cttz(C1)+C2-cttz(C3)) ? C3 : 0 2619 Constant *ShlC = ConstantExpr::getAdd(C2, Log2C1); 2620 Constant *CmpC = ConstantExpr::getSub(ShlC, Log2C3); 2621 Value *Cmp = Builder.CreateICmpEQ(X, CmpC); 2622 return SelectInst::Create(Cmp, ConstantInt::get(Ty, *C3), 2623 ConstantInt::getNullValue(Ty)); 2624 } 2625 } 2626 } 2627 } 2628 2629 // If we are clearing the sign bit of a floating-point value, convert this to 2630 // fabs, then cast back to integer. 2631 // 2632 // This is a generous interpretation for noimplicitfloat, this is not a true 2633 // floating-point operation. 2634 // 2635 // Assumes any IEEE-represented type has the sign bit in the high bit. 2636 // TODO: Unify with APInt matcher. This version allows undef unlike m_APInt 2637 Value *CastOp; 2638 if (match(Op0, m_ElementWiseBitCast(m_Value(CastOp))) && 2639 match(Op1, m_MaxSignedValue()) && 2640 !Builder.GetInsertBlock()->getParent()->hasFnAttribute( 2641 Attribute::NoImplicitFloat)) { 2642 Type *EltTy = CastOp->getType()->getScalarType(); 2643 if (EltTy->isFloatingPointTy() && EltTy->isIEEE()) { 2644 Value *FAbs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, CastOp); 2645 return new BitCastInst(FAbs, I.getType()); 2646 } 2647 } 2648 2649 // and(shl(zext(X), Y), SignMask) -> and(sext(X), SignMask) 2650 // where Y is a valid shift amount. 2651 if (match(&I, m_And(m_OneUse(m_Shl(m_ZExt(m_Value(X)), m_Value(Y))), 2652 m_SignMask())) && 2653 match(Y, m_SpecificInt_ICMP( 2654 ICmpInst::Predicate::ICMP_EQ, 2655 APInt(Ty->getScalarSizeInBits(), 2656 Ty->getScalarSizeInBits() - 2657 X->getType()->getScalarSizeInBits())))) { 2658 auto *SExt = Builder.CreateSExt(X, Ty, X->getName() + ".signext"); 2659 return BinaryOperator::CreateAnd(SExt, Op1); 2660 } 2661 2662 if (Instruction *Z = narrowMaskedBinOp(I)) 2663 return Z; 2664 2665 if (I.getType()->isIntOrIntVectorTy(1)) { 2666 if (auto *SI0 = dyn_cast<SelectInst>(Op0)) { 2667 if (auto *R = 2668 foldAndOrOfSelectUsingImpliedCond(Op1, *SI0, /* IsAnd */ true)) 2669 return R; 2670 } 2671 if (auto *SI1 = dyn_cast<SelectInst>(Op1)) { 2672 if (auto *R = 2673 foldAndOrOfSelectUsingImpliedCond(Op0, *SI1, /* IsAnd */ true)) 2674 return R; 2675 } 2676 } 2677 2678 if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I)) 2679 return FoldedLogic; 2680 2681 if (Instruction *DeMorgan = matchDeMorgansLaws(I, *this)) 2682 return DeMorgan; 2683 2684 { 2685 Value *A, *B, *C; 2686 // A & ~(A ^ B) --> A & B 2687 if (match(Op1, m_Not(m_c_Xor(m_Specific(Op0), m_Value(B))))) 2688 return BinaryOperator::CreateAnd(Op0, B); 2689 // ~(A ^ B) & A --> A & B 2690 if (match(Op0, m_Not(m_c_Xor(m_Specific(Op1), m_Value(B))))) 2691 return BinaryOperator::CreateAnd(Op1, B); 2692 2693 // (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C 2694 if (match(Op0, m_Xor(m_Value(A), m_Value(B))) && 2695 match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A)))) { 2696 Value *NotC = Op1->hasOneUse() 2697 ? Builder.CreateNot(C) 2698 : getFreelyInverted(C, C->hasOneUse(), &Builder); 2699 if (NotC != nullptr) 2700 return BinaryOperator::CreateAnd(Op0, NotC); 2701 } 2702 2703 // ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C 2704 if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B))) && 2705 match(Op1, m_Xor(m_Specific(B), m_Specific(A)))) { 2706 Value *NotC = Op0->hasOneUse() 2707 ? Builder.CreateNot(C) 2708 : getFreelyInverted(C, C->hasOneUse(), &Builder); 2709 if (NotC != nullptr) 2710 return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(C)); 2711 } 2712 2713 // (A | B) & (~A ^ B) -> A & B 2714 // (A | B) & (B ^ ~A) -> A & B 2715 // (B | A) & (~A ^ B) -> A & B 2716 // (B | A) & (B ^ ~A) -> A & B 2717 if (match(Op1, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) && 2718 match(Op0, m_c_Or(m_Specific(A), m_Specific(B)))) 2719 return BinaryOperator::CreateAnd(A, B); 2720 2721 // (~A ^ B) & (A | B) -> A & B 2722 // (~A ^ B) & (B | A) -> A & B 2723 // (B ^ ~A) & (A | B) -> A & B 2724 // (B ^ ~A) & (B | A) -> A & B 2725 if (match(Op0, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) && 2726 match(Op1, m_c_Or(m_Specific(A), m_Specific(B)))) 2727 return BinaryOperator::CreateAnd(A, B); 2728 2729 // (~A | B) & (A ^ B) -> ~A & B 2730 // (~A | B) & (B ^ A) -> ~A & B 2731 // (B | ~A) & (A ^ B) -> ~A & B 2732 // (B | ~A) & (B ^ A) -> ~A & B 2733 if (match(Op0, m_c_Or(m_Not(m_Value(A)), m_Value(B))) && 2734 match(Op1, m_c_Xor(m_Specific(A), m_Specific(B)))) 2735 return BinaryOperator::CreateAnd(Builder.CreateNot(A), B); 2736 2737 // (A ^ B) & (~A | B) -> ~A & B 2738 // (B ^ A) & (~A | B) -> ~A & B 2739 // (A ^ B) & (B | ~A) -> ~A & B 2740 // (B ^ A) & (B | ~A) -> ~A & B 2741 if (match(Op1, m_c_Or(m_Not(m_Value(A)), m_Value(B))) && 2742 match(Op0, m_c_Xor(m_Specific(A), m_Specific(B)))) 2743 return BinaryOperator::CreateAnd(Builder.CreateNot(A), B); 2744 } 2745 2746 if (Value *Res = 2747 foldBooleanAndOr(Op0, Op1, I, /*IsAnd=*/true, /*IsLogical=*/false)) 2748 return replaceInstUsesWith(I, Res); 2749 2750 if (match(Op1, m_OneUse(m_LogicalAnd(m_Value(X), m_Value(Y))))) { 2751 bool IsLogical = isa<SelectInst>(Op1); 2752 if (auto *V = reassociateBooleanAndOr(Op0, X, Y, I, /*IsAnd=*/true, 2753 /*RHSIsLogical=*/IsLogical)) 2754 return replaceInstUsesWith(I, V); 2755 } 2756 if (match(Op0, m_OneUse(m_LogicalAnd(m_Value(X), m_Value(Y))))) { 2757 bool IsLogical = isa<SelectInst>(Op0); 2758 if (auto *V = reassociateBooleanAndOr(Op1, X, Y, I, /*IsAnd=*/true, 2759 /*RHSIsLogical=*/IsLogical)) 2760 return replaceInstUsesWith(I, V); 2761 } 2762 2763 if (Instruction *FoldedFCmps = reassociateFCmps(I, Builder)) 2764 return FoldedFCmps; 2765 2766 if (Instruction *CastedAnd = foldCastedBitwiseLogic(I)) 2767 return CastedAnd; 2768 2769 if (Instruction *Sel = foldBinopOfSextBoolToSelect(I)) 2770 return Sel; 2771 2772 // and(sext(A), B) / and(B, sext(A)) --> A ? B : 0, where A is i1 or <N x i1>. 2773 // TODO: Move this into foldBinopOfSextBoolToSelect as a more generalized fold 2774 // with binop identity constant. But creating a select with non-constant 2775 // arm may not be reversible due to poison semantics. Is that a good 2776 // canonicalization? 2777 Value *A, *B; 2778 if (match(&I, m_c_And(m_SExt(m_Value(A)), m_Value(B))) && 2779 A->getType()->isIntOrIntVectorTy(1)) 2780 return SelectInst::Create(A, B, Constant::getNullValue(Ty)); 2781 2782 // Similarly, a 'not' of the bool translates to a swap of the select arms: 2783 // ~sext(A) & B / B & ~sext(A) --> A ? 0 : B 2784 if (match(&I, m_c_And(m_Not(m_SExt(m_Value(A))), m_Value(B))) && 2785 A->getType()->isIntOrIntVectorTy(1)) 2786 return SelectInst::Create(A, Constant::getNullValue(Ty), B); 2787 2788 // and(zext(A), B) -> A ? (B & 1) : 0 2789 if (match(&I, m_c_And(m_OneUse(m_ZExt(m_Value(A))), m_Value(B))) && 2790 A->getType()->isIntOrIntVectorTy(1)) 2791 return SelectInst::Create(A, Builder.CreateAnd(B, ConstantInt::get(Ty, 1)), 2792 Constant::getNullValue(Ty)); 2793 2794 // (-1 + A) & B --> A ? 0 : B where A is 0/1. 2795 if (match(&I, m_c_And(m_OneUse(m_Add(m_ZExtOrSelf(m_Value(A)), m_AllOnes())), 2796 m_Value(B)))) { 2797 if (A->getType()->isIntOrIntVectorTy(1)) 2798 return SelectInst::Create(A, Constant::getNullValue(Ty), B); 2799 if (computeKnownBits(A, /* Depth */ 0, &I).countMaxActiveBits() <= 1) { 2800 return SelectInst::Create( 2801 Builder.CreateICmpEQ(A, Constant::getNullValue(A->getType())), B, 2802 Constant::getNullValue(Ty)); 2803 } 2804 } 2805 2806 // (iN X s>> (N-1)) & Y --> (X s< 0) ? Y : 0 -- with optional sext 2807 if (match(&I, m_c_And(m_OneUse(m_SExtOrSelf( 2808 m_AShr(m_Value(X), m_APIntAllowPoison(C)))), 2809 m_Value(Y))) && 2810 *C == X->getType()->getScalarSizeInBits() - 1) { 2811 Value *IsNeg = Builder.CreateIsNeg(X, "isneg"); 2812 return SelectInst::Create(IsNeg, Y, ConstantInt::getNullValue(Ty)); 2813 } 2814 // If there's a 'not' of the shifted value, swap the select operands: 2815 // ~(iN X s>> (N-1)) & Y --> (X s< 0) ? 0 : Y -- with optional sext 2816 if (match(&I, m_c_And(m_OneUse(m_SExtOrSelf( 2817 m_Not(m_AShr(m_Value(X), m_APIntAllowPoison(C))))), 2818 m_Value(Y))) && 2819 *C == X->getType()->getScalarSizeInBits() - 1) { 2820 Value *IsNeg = Builder.CreateIsNeg(X, "isneg"); 2821 return SelectInst::Create(IsNeg, ConstantInt::getNullValue(Ty), Y); 2822 } 2823 2824 // (~x) & y --> ~(x | (~y)) iff that gets rid of inversions 2825 if (sinkNotIntoOtherHandOfLogicalOp(I)) 2826 return &I; 2827 2828 // An and recurrence w/loop invariant step is equivelent to (and start, step) 2829 PHINode *PN = nullptr; 2830 Value *Start = nullptr, *Step = nullptr; 2831 if (matchSimpleRecurrence(&I, PN, Start, Step) && DT.dominates(Step, PN)) 2832 return replaceInstUsesWith(I, Builder.CreateAnd(Start, Step)); 2833 2834 if (Instruction *R = reassociateForUses(I, Builder)) 2835 return R; 2836 2837 if (Instruction *Canonicalized = canonicalizeLogicFirst(I, Builder)) 2838 return Canonicalized; 2839 2840 if (Instruction *Folded = foldLogicOfIsFPClass(I, Op0, Op1)) 2841 return Folded; 2842 2843 if (Instruction *Res = foldBinOpOfDisplacedShifts(I)) 2844 return Res; 2845 2846 if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder)) 2847 return Res; 2848 2849 if (Value *V = 2850 simplifyAndOrWithOpReplaced(Op0, Op1, Constant::getAllOnesValue(Ty), 2851 /*SimplifyOnly*/ false, *this)) 2852 return BinaryOperator::CreateAnd(V, Op1); 2853 if (Value *V = 2854 simplifyAndOrWithOpReplaced(Op1, Op0, Constant::getAllOnesValue(Ty), 2855 /*SimplifyOnly*/ false, *this)) 2856 return BinaryOperator::CreateAnd(Op0, V); 2857 2858 return nullptr; 2859 } 2860 2861 Instruction *InstCombinerImpl::matchBSwapOrBitReverse(Instruction &I, 2862 bool MatchBSwaps, 2863 bool MatchBitReversals) { 2864 SmallVector<Instruction *, 4> Insts; 2865 if (!recognizeBSwapOrBitReverseIdiom(&I, MatchBSwaps, MatchBitReversals, 2866 Insts)) 2867 return nullptr; 2868 Instruction *LastInst = Insts.pop_back_val(); 2869 LastInst->removeFromParent(); 2870 2871 for (auto *Inst : Insts) { 2872 Inst->setDebugLoc(I.getDebugLoc()); 2873 Worklist.push(Inst); 2874 } 2875 return LastInst; 2876 } 2877 2878 std::optional<std::pair<Intrinsic::ID, SmallVector<Value *, 3>>> 2879 InstCombinerImpl::convertOrOfShiftsToFunnelShift(Instruction &Or) { 2880 // TODO: Can we reduce the code duplication between this and the related 2881 // rotate matching code under visitSelect and visitTrunc? 2882 assert(Or.getOpcode() == BinaryOperator::Or && "Expecting or instruction"); 2883 2884 unsigned Width = Or.getType()->getScalarSizeInBits(); 2885 2886 Instruction *Or0, *Or1; 2887 if (!match(Or.getOperand(0), m_Instruction(Or0)) || 2888 !match(Or.getOperand(1), m_Instruction(Or1))) 2889 return std::nullopt; 2890 2891 bool IsFshl = true; // Sub on LSHR. 2892 SmallVector<Value *, 3> FShiftArgs; 2893 2894 // First, find an or'd pair of opposite shifts: 2895 // or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1) 2896 if (isa<BinaryOperator>(Or0) && isa<BinaryOperator>(Or1)) { 2897 Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1; 2898 if (!match(Or0, 2899 m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) || 2900 !match(Or1, 2901 m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) || 2902 Or0->getOpcode() == Or1->getOpcode()) 2903 return std::nullopt; 2904 2905 // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)). 2906 if (Or0->getOpcode() == BinaryOperator::LShr) { 2907 std::swap(Or0, Or1); 2908 std::swap(ShVal0, ShVal1); 2909 std::swap(ShAmt0, ShAmt1); 2910 } 2911 assert(Or0->getOpcode() == BinaryOperator::Shl && 2912 Or1->getOpcode() == BinaryOperator::LShr && 2913 "Illegal or(shift,shift) pair"); 2914 2915 // Match the shift amount operands for a funnel shift pattern. This always 2916 // matches a subtraction on the R operand. 2917 auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * { 2918 // Check for constant shift amounts that sum to the bitwidth. 2919 const APInt *LI, *RI; 2920 if (match(L, m_APIntAllowPoison(LI)) && match(R, m_APIntAllowPoison(RI))) 2921 if (LI->ult(Width) && RI->ult(Width) && (*LI + *RI) == Width) 2922 return ConstantInt::get(L->getType(), *LI); 2923 2924 Constant *LC, *RC; 2925 if (match(L, m_Constant(LC)) && match(R, m_Constant(RC)) && 2926 match(L, 2927 m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) && 2928 match(R, 2929 m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) && 2930 match(ConstantExpr::getAdd(LC, RC), m_SpecificIntAllowPoison(Width))) 2931 return ConstantExpr::mergeUndefsWith(LC, RC); 2932 2933 // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width. 2934 // We limit this to X < Width in case the backend re-expands the 2935 // intrinsic, and has to reintroduce a shift modulo operation (InstCombine 2936 // might remove it after this fold). This still doesn't guarantee that the 2937 // final codegen will match this original pattern. 2938 if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) { 2939 KnownBits KnownL = computeKnownBits(L, /*Depth*/ 0, &Or); 2940 return KnownL.getMaxValue().ult(Width) ? L : nullptr; 2941 } 2942 2943 // For non-constant cases, the following patterns currently only work for 2944 // rotation patterns. 2945 // TODO: Add general funnel-shift compatible patterns. 2946 if (ShVal0 != ShVal1) 2947 return nullptr; 2948 2949 // For non-constant cases we don't support non-pow2 shift masks. 2950 // TODO: Is it worth matching urem as well? 2951 if (!isPowerOf2_32(Width)) 2952 return nullptr; 2953 2954 // The shift amount may be masked with negation: 2955 // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1))) 2956 Value *X; 2957 unsigned Mask = Width - 1; 2958 if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) && 2959 match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))) 2960 return X; 2961 2962 // (shl ShVal, X) | (lshr ShVal, ((-X) & (Width - 1))) 2963 if (match(R, m_And(m_Neg(m_Specific(L)), m_SpecificInt(Mask)))) 2964 return L; 2965 2966 // Similar to above, but the shift amount may be extended after masking, 2967 // so return the extended value as the parameter for the intrinsic. 2968 if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) && 2969 match(R, 2970 m_And(m_Neg(m_ZExt(m_And(m_Specific(X), m_SpecificInt(Mask)))), 2971 m_SpecificInt(Mask)))) 2972 return L; 2973 2974 if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) && 2975 match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))) 2976 return L; 2977 2978 return nullptr; 2979 }; 2980 2981 Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width); 2982 if (!ShAmt) { 2983 ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width); 2984 IsFshl = false; // Sub on SHL. 2985 } 2986 if (!ShAmt) 2987 return std::nullopt; 2988 2989 FShiftArgs = {ShVal0, ShVal1, ShAmt}; 2990 } else if (isa<ZExtInst>(Or0) || isa<ZExtInst>(Or1)) { 2991 // If there are two 'or' instructions concat variables in opposite order: 2992 // 2993 // Slot1 and Slot2 are all zero bits. 2994 // | Slot1 | Low | Slot2 | High | 2995 // LowHigh = or (shl (zext Low), ZextLowShlAmt), (zext High) 2996 // | Slot2 | High | Slot1 | Low | 2997 // HighLow = or (shl (zext High), ZextHighShlAmt), (zext Low) 2998 // 2999 // the latter 'or' can be safely convert to 3000 // -> HighLow = fshl LowHigh, LowHigh, ZextHighShlAmt 3001 // if ZextLowShlAmt + ZextHighShlAmt == Width. 3002 if (!isa<ZExtInst>(Or1)) 3003 std::swap(Or0, Or1); 3004 3005 Value *High, *ZextHigh, *Low; 3006 const APInt *ZextHighShlAmt; 3007 if (!match(Or0, 3008 m_OneUse(m_Shl(m_Value(ZextHigh), m_APInt(ZextHighShlAmt))))) 3009 return std::nullopt; 3010 3011 if (!match(Or1, m_ZExt(m_Value(Low))) || 3012 !match(ZextHigh, m_ZExt(m_Value(High)))) 3013 return std::nullopt; 3014 3015 unsigned HighSize = High->getType()->getScalarSizeInBits(); 3016 unsigned LowSize = Low->getType()->getScalarSizeInBits(); 3017 // Make sure High does not overlap with Low and most significant bits of 3018 // High aren't shifted out. 3019 if (ZextHighShlAmt->ult(LowSize) || ZextHighShlAmt->ugt(Width - HighSize)) 3020 return std::nullopt; 3021 3022 for (User *U : ZextHigh->users()) { 3023 Value *X, *Y; 3024 if (!match(U, m_Or(m_Value(X), m_Value(Y)))) 3025 continue; 3026 3027 if (!isa<ZExtInst>(Y)) 3028 std::swap(X, Y); 3029 3030 const APInt *ZextLowShlAmt; 3031 if (!match(X, m_Shl(m_Specific(Or1), m_APInt(ZextLowShlAmt))) || 3032 !match(Y, m_Specific(ZextHigh)) || !DT.dominates(U, &Or)) 3033 continue; 3034 3035 // HighLow is good concat. If sum of two shifts amount equals to Width, 3036 // LowHigh must also be a good concat. 3037 if (*ZextLowShlAmt + *ZextHighShlAmt != Width) 3038 continue; 3039 3040 // Low must not overlap with High and most significant bits of Low must 3041 // not be shifted out. 3042 assert(ZextLowShlAmt->uge(HighSize) && 3043 ZextLowShlAmt->ule(Width - LowSize) && "Invalid concat"); 3044 3045 FShiftArgs = {U, U, ConstantInt::get(Or0->getType(), *ZextHighShlAmt)}; 3046 break; 3047 } 3048 } 3049 3050 if (FShiftArgs.empty()) 3051 return std::nullopt; 3052 3053 Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr; 3054 return std::make_pair(IID, FShiftArgs); 3055 } 3056 3057 /// Match UB-safe variants of the funnel shift intrinsic. 3058 static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) { 3059 if (auto Opt = IC.convertOrOfShiftsToFunnelShift(Or)) { 3060 auto [IID, FShiftArgs] = *Opt; 3061 Function *F = 3062 Intrinsic::getOrInsertDeclaration(Or.getModule(), IID, Or.getType()); 3063 return CallInst::Create(F, FShiftArgs); 3064 } 3065 3066 return nullptr; 3067 } 3068 3069 /// Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns. 3070 static Instruction *matchOrConcat(Instruction &Or, 3071 InstCombiner::BuilderTy &Builder) { 3072 assert(Or.getOpcode() == Instruction::Or && "bswap requires an 'or'"); 3073 Value *Op0 = Or.getOperand(0), *Op1 = Or.getOperand(1); 3074 Type *Ty = Or.getType(); 3075 3076 unsigned Width = Ty->getScalarSizeInBits(); 3077 if ((Width & 1) != 0) 3078 return nullptr; 3079 unsigned HalfWidth = Width / 2; 3080 3081 // Canonicalize zext (lower half) to LHS. 3082 if (!isa<ZExtInst>(Op0)) 3083 std::swap(Op0, Op1); 3084 3085 // Find lower/upper half. 3086 Value *LowerSrc, *ShlVal, *UpperSrc; 3087 const APInt *C; 3088 if (!match(Op0, m_OneUse(m_ZExt(m_Value(LowerSrc)))) || 3089 !match(Op1, m_OneUse(m_Shl(m_Value(ShlVal), m_APInt(C)))) || 3090 !match(ShlVal, m_OneUse(m_ZExt(m_Value(UpperSrc))))) 3091 return nullptr; 3092 if (*C != HalfWidth || LowerSrc->getType() != UpperSrc->getType() || 3093 LowerSrc->getType()->getScalarSizeInBits() != HalfWidth) 3094 return nullptr; 3095 3096 auto ConcatIntrinsicCalls = [&](Intrinsic::ID id, Value *Lo, Value *Hi) { 3097 Value *NewLower = Builder.CreateZExt(Lo, Ty); 3098 Value *NewUpper = Builder.CreateZExt(Hi, Ty); 3099 NewUpper = Builder.CreateShl(NewUpper, HalfWidth); 3100 Value *BinOp = Builder.CreateOr(NewLower, NewUpper); 3101 return Builder.CreateIntrinsic(id, Ty, BinOp); 3102 }; 3103 3104 // BSWAP: Push the concat down, swapping the lower/upper sources. 3105 // concat(bswap(x),bswap(y)) -> bswap(concat(x,y)) 3106 Value *LowerBSwap, *UpperBSwap; 3107 if (match(LowerSrc, m_BSwap(m_Value(LowerBSwap))) && 3108 match(UpperSrc, m_BSwap(m_Value(UpperBSwap)))) 3109 return ConcatIntrinsicCalls(Intrinsic::bswap, UpperBSwap, LowerBSwap); 3110 3111 // BITREVERSE: Push the concat down, swapping the lower/upper sources. 3112 // concat(bitreverse(x),bitreverse(y)) -> bitreverse(concat(x,y)) 3113 Value *LowerBRev, *UpperBRev; 3114 if (match(LowerSrc, m_BitReverse(m_Value(LowerBRev))) && 3115 match(UpperSrc, m_BitReverse(m_Value(UpperBRev)))) 3116 return ConcatIntrinsicCalls(Intrinsic::bitreverse, UpperBRev, LowerBRev); 3117 3118 return nullptr; 3119 } 3120 3121 /// If all elements of two constant vectors are 0/-1 and inverses, return true. 3122 static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) { 3123 unsigned NumElts = cast<FixedVectorType>(C1->getType())->getNumElements(); 3124 for (unsigned i = 0; i != NumElts; ++i) { 3125 Constant *EltC1 = C1->getAggregateElement(i); 3126 Constant *EltC2 = C2->getAggregateElement(i); 3127 if (!EltC1 || !EltC2) 3128 return false; 3129 3130 // One element must be all ones, and the other must be all zeros. 3131 if (!((match(EltC1, m_Zero()) && match(EltC2, m_AllOnes())) || 3132 (match(EltC2, m_Zero()) && match(EltC1, m_AllOnes())))) 3133 return false; 3134 } 3135 return true; 3136 } 3137 3138 /// We have an expression of the form (A & C) | (B & D). If A is a scalar or 3139 /// vector composed of all-zeros or all-ones values and is the bitwise 'not' of 3140 /// B, it can be used as the condition operand of a select instruction. 3141 /// We will detect (A & C) | ~(B | D) when the flag ABIsTheSame enabled. 3142 Value *InstCombinerImpl::getSelectCondition(Value *A, Value *B, 3143 bool ABIsTheSame) { 3144 // We may have peeked through bitcasts in the caller. 3145 // Exit immediately if we don't have (vector) integer types. 3146 Type *Ty = A->getType(); 3147 if (!Ty->isIntOrIntVectorTy() || !B->getType()->isIntOrIntVectorTy()) 3148 return nullptr; 3149 3150 // If A is the 'not' operand of B and has enough signbits, we have our answer. 3151 if (ABIsTheSame ? (A == B) : match(B, m_Not(m_Specific(A)))) { 3152 // If these are scalars or vectors of i1, A can be used directly. 3153 if (Ty->isIntOrIntVectorTy(1)) 3154 return A; 3155 3156 // If we look through a vector bitcast, the caller will bitcast the operands 3157 // to match the condition's number of bits (N x i1). 3158 // To make this poison-safe, disallow bitcast from wide element to narrow 3159 // element. That could allow poison in lanes where it was not present in the 3160 // original code. 3161 A = peekThroughBitcast(A); 3162 if (A->getType()->isIntOrIntVectorTy()) { 3163 unsigned NumSignBits = ComputeNumSignBits(A); 3164 if (NumSignBits == A->getType()->getScalarSizeInBits() && 3165 NumSignBits <= Ty->getScalarSizeInBits()) 3166 return Builder.CreateTrunc(A, CmpInst::makeCmpResultType(A->getType())); 3167 } 3168 return nullptr; 3169 } 3170 3171 // TODO: add support for sext and constant case 3172 if (ABIsTheSame) 3173 return nullptr; 3174 3175 // If both operands are constants, see if the constants are inverse bitmasks. 3176 Constant *AConst, *BConst; 3177 if (match(A, m_Constant(AConst)) && match(B, m_Constant(BConst))) 3178 if (AConst == ConstantExpr::getNot(BConst) && 3179 ComputeNumSignBits(A) == Ty->getScalarSizeInBits()) 3180 return Builder.CreateZExtOrTrunc(A, CmpInst::makeCmpResultType(Ty)); 3181 3182 // Look for more complex patterns. The 'not' op may be hidden behind various 3183 // casts. Look through sexts and bitcasts to find the booleans. 3184 Value *Cond; 3185 Value *NotB; 3186 if (match(A, m_SExt(m_Value(Cond))) && 3187 Cond->getType()->isIntOrIntVectorTy(1)) { 3188 // A = sext i1 Cond; B = sext (not (i1 Cond)) 3189 if (match(B, m_SExt(m_Not(m_Specific(Cond))))) 3190 return Cond; 3191 3192 // A = sext i1 Cond; B = not ({bitcast} (sext (i1 Cond))) 3193 // TODO: The one-use checks are unnecessary or misplaced. If the caller 3194 // checked for uses on logic ops/casts, that should be enough to 3195 // make this transform worthwhile. 3196 if (match(B, m_OneUse(m_Not(m_Value(NotB))))) { 3197 NotB = peekThroughBitcast(NotB, true); 3198 if (match(NotB, m_SExt(m_Specific(Cond)))) 3199 return Cond; 3200 } 3201 } 3202 3203 // All scalar (and most vector) possibilities should be handled now. 3204 // Try more matches that only apply to non-splat constant vectors. 3205 if (!Ty->isVectorTy()) 3206 return nullptr; 3207 3208 // If both operands are xor'd with constants using the same sexted boolean 3209 // operand, see if the constants are inverse bitmasks. 3210 // TODO: Use ConstantExpr::getNot()? 3211 if (match(A, (m_Xor(m_SExt(m_Value(Cond)), m_Constant(AConst)))) && 3212 match(B, (m_Xor(m_SExt(m_Specific(Cond)), m_Constant(BConst)))) && 3213 Cond->getType()->isIntOrIntVectorTy(1) && 3214 areInverseVectorBitmasks(AConst, BConst)) { 3215 AConst = ConstantExpr::getTrunc(AConst, CmpInst::makeCmpResultType(Ty)); 3216 return Builder.CreateXor(Cond, AConst); 3217 } 3218 return nullptr; 3219 } 3220 3221 /// We have an expression of the form (A & B) | (C & D). Try to simplify this 3222 /// to "A' ? B : D", where A' is a boolean or vector of booleans. 3223 /// When InvertFalseVal is set to true, we try to match the pattern 3224 /// where we have peeked through a 'not' op and A and C are the same: 3225 /// (A & B) | ~(A | D) --> (A & B) | (~A & ~D) --> A' ? B : ~D 3226 Value *InstCombinerImpl::matchSelectFromAndOr(Value *A, Value *B, Value *C, 3227 Value *D, bool InvertFalseVal) { 3228 // The potential condition of the select may be bitcasted. In that case, look 3229 // through its bitcast and the corresponding bitcast of the 'not' condition. 3230 Type *OrigType = A->getType(); 3231 A = peekThroughBitcast(A, true); 3232 C = peekThroughBitcast(C, true); 3233 if (Value *Cond = getSelectCondition(A, C, InvertFalseVal)) { 3234 // ((bc Cond) & B) | ((bc ~Cond) & D) --> bc (select Cond, (bc B), (bc D)) 3235 // If this is a vector, we may need to cast to match the condition's length. 3236 // The bitcasts will either all exist or all not exist. The builder will 3237 // not create unnecessary casts if the types already match. 3238 Type *SelTy = A->getType(); 3239 if (auto *VecTy = dyn_cast<VectorType>(Cond->getType())) { 3240 // For a fixed or scalable vector get N from <{vscale x} N x iM> 3241 unsigned Elts = VecTy->getElementCount().getKnownMinValue(); 3242 // For a fixed or scalable vector, get the size in bits of N x iM; for a 3243 // scalar this is just M. 3244 unsigned SelEltSize = SelTy->getPrimitiveSizeInBits().getKnownMinValue(); 3245 Type *EltTy = Builder.getIntNTy(SelEltSize / Elts); 3246 SelTy = VectorType::get(EltTy, VecTy->getElementCount()); 3247 } 3248 Value *BitcastB = Builder.CreateBitCast(B, SelTy); 3249 if (InvertFalseVal) 3250 D = Builder.CreateNot(D); 3251 Value *BitcastD = Builder.CreateBitCast(D, SelTy); 3252 Value *Select = Builder.CreateSelect(Cond, BitcastB, BitcastD); 3253 return Builder.CreateBitCast(Select, OrigType); 3254 } 3255 3256 return nullptr; 3257 } 3258 3259 // (icmp eq X, C) | (icmp ult Other, (X - C)) -> (icmp ule Other, (X - (C + 1))) 3260 // (icmp ne X, C) & (icmp uge Other, (X - C)) -> (icmp ugt Other, (X - (C + 1))) 3261 static Value *foldAndOrOfICmpEqConstantAndICmp(ICmpInst *LHS, ICmpInst *RHS, 3262 bool IsAnd, bool IsLogical, 3263 IRBuilderBase &Builder) { 3264 Value *LHS0 = LHS->getOperand(0); 3265 Value *RHS0 = RHS->getOperand(0); 3266 Value *RHS1 = RHS->getOperand(1); 3267 3268 ICmpInst::Predicate LPred = 3269 IsAnd ? LHS->getInversePredicate() : LHS->getPredicate(); 3270 ICmpInst::Predicate RPred = 3271 IsAnd ? RHS->getInversePredicate() : RHS->getPredicate(); 3272 3273 const APInt *CInt; 3274 if (LPred != ICmpInst::ICMP_EQ || 3275 !match(LHS->getOperand(1), m_APIntAllowPoison(CInt)) || 3276 !LHS0->getType()->isIntOrIntVectorTy() || 3277 !(LHS->hasOneUse() || RHS->hasOneUse())) 3278 return nullptr; 3279 3280 auto MatchRHSOp = [LHS0, CInt](const Value *RHSOp) { 3281 return match(RHSOp, 3282 m_Add(m_Specific(LHS0), m_SpecificIntAllowPoison(-*CInt))) || 3283 (CInt->isZero() && RHSOp == LHS0); 3284 }; 3285 3286 Value *Other; 3287 if (RPred == ICmpInst::ICMP_ULT && MatchRHSOp(RHS1)) 3288 Other = RHS0; 3289 else if (RPred == ICmpInst::ICMP_UGT && MatchRHSOp(RHS0)) 3290 Other = RHS1; 3291 else 3292 return nullptr; 3293 3294 if (IsLogical) 3295 Other = Builder.CreateFreeze(Other); 3296 3297 return Builder.CreateICmp( 3298 IsAnd ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE, 3299 Builder.CreateSub(LHS0, ConstantInt::get(LHS0->getType(), *CInt + 1)), 3300 Other); 3301 } 3302 3303 /// Fold (icmp)&(icmp) or (icmp)|(icmp) if possible. 3304 /// If IsLogical is true, then the and/or is in select form and the transform 3305 /// must be poison-safe. 3306 Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, 3307 Instruction &I, bool IsAnd, 3308 bool IsLogical) { 3309 const SimplifyQuery Q = SQ.getWithInstruction(&I); 3310 3311 ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); 3312 Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0); 3313 Value *LHS1 = LHS->getOperand(1), *RHS1 = RHS->getOperand(1); 3314 3315 const APInt *LHSC = nullptr, *RHSC = nullptr; 3316 match(LHS1, m_APInt(LHSC)); 3317 match(RHS1, m_APInt(RHSC)); 3318 3319 // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) 3320 // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) 3321 if (predicatesFoldable(PredL, PredR)) { 3322 if (LHS0 == RHS1 && LHS1 == RHS0) { 3323 PredL = ICmpInst::getSwappedPredicate(PredL); 3324 std::swap(LHS0, LHS1); 3325 } 3326 if (LHS0 == RHS0 && LHS1 == RHS1) { 3327 unsigned Code = IsAnd ? getICmpCode(PredL) & getICmpCode(PredR) 3328 : getICmpCode(PredL) | getICmpCode(PredR); 3329 bool IsSigned = LHS->isSigned() || RHS->isSigned(); 3330 return getNewICmpValue(Code, IsSigned, LHS0, LHS1, Builder); 3331 } 3332 } 3333 3334 // handle (roughly): 3335 // (icmp ne (A & B), C) | (icmp ne (A & D), E) 3336 // (icmp eq (A & B), C) & (icmp eq (A & D), E) 3337 if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, IsAnd, IsLogical, Builder, Q)) 3338 return V; 3339 3340 if (Value *V = 3341 foldAndOrOfICmpEqConstantAndICmp(LHS, RHS, IsAnd, IsLogical, Builder)) 3342 return V; 3343 // We can treat logical like bitwise here, because both operands are used on 3344 // the LHS, and as such poison from both will propagate. 3345 if (Value *V = foldAndOrOfICmpEqConstantAndICmp(RHS, LHS, IsAnd, 3346 /*IsLogical*/ false, Builder)) 3347 return V; 3348 3349 if (Value *V = 3350 foldAndOrOfICmpsWithConstEq(LHS, RHS, IsAnd, IsLogical, Builder, Q)) 3351 return V; 3352 // We can convert this case to bitwise and, because both operands are used 3353 // on the LHS, and as such poison from both will propagate. 3354 if (Value *V = foldAndOrOfICmpsWithConstEq(RHS, LHS, IsAnd, 3355 /*IsLogical=*/false, Builder, Q)) { 3356 // If RHS is still used, we should drop samesign flag. 3357 if (IsLogical && RHS->hasSameSign() && !RHS->use_empty()) { 3358 RHS->setSameSign(false); 3359 addToWorklist(RHS); 3360 } 3361 return V; 3362 } 3363 3364 if (Value *V = foldIsPowerOf2OrZero(LHS, RHS, IsAnd, Builder, *this)) 3365 return V; 3366 if (Value *V = foldIsPowerOf2OrZero(RHS, LHS, IsAnd, Builder, *this)) 3367 return V; 3368 3369 // TODO: One of these directions is fine with logical and/or, the other could 3370 // be supported by inserting freeze. 3371 if (!IsLogical) { 3372 // E.g. (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n 3373 // E.g. (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n 3374 if (Value *V = simplifyRangeCheck(LHS, RHS, /*Inverted=*/!IsAnd)) 3375 return V; 3376 3377 // E.g. (icmp sgt x, n) | (icmp slt x, 0) --> icmp ugt x, n 3378 // E.g. (icmp slt x, n) & (icmp sge x, 0) --> icmp ult x, n 3379 if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/!IsAnd)) 3380 return V; 3381 } 3382 3383 // TODO: Add conjugated or fold, check whether it is safe for logical and/or. 3384 if (IsAnd && !IsLogical) 3385 if (Value *V = foldSignedTruncationCheck(LHS, RHS, I, Builder)) 3386 return V; 3387 3388 if (Value *V = foldIsPowerOf2(LHS, RHS, IsAnd, Builder, *this)) 3389 return V; 3390 3391 if (Value *V = foldPowerOf2AndShiftedMask(LHS, RHS, IsAnd, Builder)) 3392 return V; 3393 3394 // TODO: Verify whether this is safe for logical and/or. 3395 if (!IsLogical) { 3396 if (Value *X = foldUnsignedUnderflowCheck(LHS, RHS, IsAnd, Q, Builder)) 3397 return X; 3398 if (Value *X = foldUnsignedUnderflowCheck(RHS, LHS, IsAnd, Q, Builder)) 3399 return X; 3400 } 3401 3402 // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) 3403 // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) 3404 // TODO: Remove this and below when foldLogOpOfMaskedICmps can handle undefs. 3405 if (PredL == (IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) && 3406 PredL == PredR && match(LHS1, m_ZeroInt()) && match(RHS1, m_ZeroInt()) && 3407 LHS0->getType() == RHS0->getType() && 3408 (!IsLogical || isGuaranteedNotToBePoison(RHS0))) { 3409 Value *NewOr = Builder.CreateOr(LHS0, RHS0); 3410 return Builder.CreateICmp(PredL, NewOr, 3411 Constant::getNullValue(NewOr->getType())); 3412 } 3413 3414 // (icmp ne A, -1) | (icmp ne B, -1) --> (icmp ne (A&B), -1) 3415 // (icmp eq A, -1) & (icmp eq B, -1) --> (icmp eq (A&B), -1) 3416 if (PredL == (IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) && 3417 PredL == PredR && match(LHS1, m_AllOnes()) && match(RHS1, m_AllOnes()) && 3418 LHS0->getType() == RHS0->getType() && 3419 (!IsLogical || isGuaranteedNotToBePoison(RHS0))) { 3420 Value *NewAnd = Builder.CreateAnd(LHS0, RHS0); 3421 return Builder.CreateICmp(PredL, NewAnd, 3422 Constant::getAllOnesValue(LHS0->getType())); 3423 } 3424 3425 if (!IsLogical) 3426 if (Value *V = 3427 foldAndOrOfICmpsWithPow2AndWithZero(Builder, LHS, RHS, IsAnd, Q)) 3428 return V; 3429 3430 // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). 3431 if (!LHSC || !RHSC) 3432 return nullptr; 3433 3434 // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2 3435 // (trunc x) != C1 | (and x, CA) != C2 -> (and x, CA|CMAX) != C1|C2 3436 // where CMAX is the all ones value for the truncated type, 3437 // iff the lower bits of C2 and CA are zero. 3438 if (PredL == (IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) && 3439 PredL == PredR && LHS->hasOneUse() && RHS->hasOneUse()) { 3440 Value *V; 3441 const APInt *AndC, *SmallC = nullptr, *BigC = nullptr; 3442 3443 // (trunc x) == C1 & (and x, CA) == C2 3444 // (and x, CA) == C2 & (trunc x) == C1 3445 if (match(RHS0, m_Trunc(m_Value(V))) && 3446 match(LHS0, m_And(m_Specific(V), m_APInt(AndC)))) { 3447 SmallC = RHSC; 3448 BigC = LHSC; 3449 } else if (match(LHS0, m_Trunc(m_Value(V))) && 3450 match(RHS0, m_And(m_Specific(V), m_APInt(AndC)))) { 3451 SmallC = LHSC; 3452 BigC = RHSC; 3453 } 3454 3455 if (SmallC && BigC) { 3456 unsigned BigBitSize = BigC->getBitWidth(); 3457 unsigned SmallBitSize = SmallC->getBitWidth(); 3458 3459 // Check that the low bits are zero. 3460 APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize); 3461 if ((Low & *AndC).isZero() && (Low & *BigC).isZero()) { 3462 Value *NewAnd = Builder.CreateAnd(V, Low | *AndC); 3463 APInt N = SmallC->zext(BigBitSize) | *BigC; 3464 Value *NewVal = ConstantInt::get(NewAnd->getType(), N); 3465 return Builder.CreateICmp(PredL, NewAnd, NewVal); 3466 } 3467 } 3468 } 3469 3470 // Match naive pattern (and its inverted form) for checking if two values 3471 // share same sign. An example of the pattern: 3472 // (icmp slt (X & Y), 0) | (icmp sgt (X | Y), -1) -> (icmp sgt (X ^ Y), -1) 3473 // Inverted form (example): 3474 // (icmp slt (X | Y), 0) & (icmp sgt (X & Y), -1) -> (icmp slt (X ^ Y), 0) 3475 bool TrueIfSignedL, TrueIfSignedR; 3476 if (isSignBitCheck(PredL, *LHSC, TrueIfSignedL) && 3477 isSignBitCheck(PredR, *RHSC, TrueIfSignedR) && 3478 (RHS->hasOneUse() || LHS->hasOneUse())) { 3479 Value *X, *Y; 3480 if (IsAnd) { 3481 if ((TrueIfSignedL && !TrueIfSignedR && 3482 match(LHS0, m_Or(m_Value(X), m_Value(Y))) && 3483 match(RHS0, m_c_And(m_Specific(X), m_Specific(Y)))) || 3484 (!TrueIfSignedL && TrueIfSignedR && 3485 match(LHS0, m_And(m_Value(X), m_Value(Y))) && 3486 match(RHS0, m_c_Or(m_Specific(X), m_Specific(Y))))) { 3487 Value *NewXor = Builder.CreateXor(X, Y); 3488 return Builder.CreateIsNeg(NewXor); 3489 } 3490 } else { 3491 if ((TrueIfSignedL && !TrueIfSignedR && 3492 match(LHS0, m_And(m_Value(X), m_Value(Y))) && 3493 match(RHS0, m_c_Or(m_Specific(X), m_Specific(Y)))) || 3494 (!TrueIfSignedL && TrueIfSignedR && 3495 match(LHS0, m_Or(m_Value(X), m_Value(Y))) && 3496 match(RHS0, m_c_And(m_Specific(X), m_Specific(Y))))) { 3497 Value *NewXor = Builder.CreateXor(X, Y); 3498 return Builder.CreateIsNotNeg(NewXor); 3499 } 3500 } 3501 } 3502 3503 return foldAndOrOfICmpsUsingRanges(LHS, RHS, IsAnd); 3504 } 3505 3506 /// If IsLogical is true, then the and/or is in select form and the transform 3507 /// must be poison-safe. 3508 Value *InstCombinerImpl::foldBooleanAndOr(Value *LHS, Value *RHS, 3509 Instruction &I, bool IsAnd, 3510 bool IsLogical) { 3511 if (!LHS->getType()->isIntOrIntVectorTy(1)) 3512 return nullptr; 3513 3514 if (auto *LHSCmp = dyn_cast<ICmpInst>(LHS)) 3515 if (auto *RHSCmp = dyn_cast<ICmpInst>(RHS)) 3516 if (Value *Res = foldAndOrOfICmps(LHSCmp, RHSCmp, I, IsAnd, IsLogical)) 3517 return Res; 3518 3519 if (auto *LHSCmp = dyn_cast<FCmpInst>(LHS)) 3520 if (auto *RHSCmp = dyn_cast<FCmpInst>(RHS)) 3521 if (Value *Res = foldLogicOfFCmps(LHSCmp, RHSCmp, IsAnd, IsLogical)) 3522 return Res; 3523 3524 if (Value *Res = foldEqOfParts(LHS, RHS, IsAnd)) 3525 return Res; 3526 3527 return nullptr; 3528 } 3529 3530 static Value *foldOrOfInversions(BinaryOperator &I, 3531 InstCombiner::BuilderTy &Builder) { 3532 assert(I.getOpcode() == Instruction::Or && 3533 "Simplification only supports or at the moment."); 3534 3535 Value *Cmp1, *Cmp2, *Cmp3, *Cmp4; 3536 if (!match(I.getOperand(0), m_And(m_Value(Cmp1), m_Value(Cmp2))) || 3537 !match(I.getOperand(1), m_And(m_Value(Cmp3), m_Value(Cmp4)))) 3538 return nullptr; 3539 3540 // Check if any two pairs of the and operations are inversions of each other. 3541 if (isKnownInversion(Cmp1, Cmp3) && isKnownInversion(Cmp2, Cmp4)) 3542 return Builder.CreateXor(Cmp1, Cmp4); 3543 if (isKnownInversion(Cmp1, Cmp4) && isKnownInversion(Cmp2, Cmp3)) 3544 return Builder.CreateXor(Cmp1, Cmp3); 3545 3546 return nullptr; 3547 } 3548 3549 // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches 3550 // here. We should standardize that construct where it is needed or choose some 3551 // other way to ensure that commutated variants of patterns are not missed. 3552 Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { 3553 if (Value *V = simplifyOrInst(I.getOperand(0), I.getOperand(1), 3554 SQ.getWithInstruction(&I))) 3555 return replaceInstUsesWith(I, V); 3556 3557 if (SimplifyAssociativeOrCommutative(I)) 3558 return &I; 3559 3560 if (Instruction *X = foldVectorBinop(I)) 3561 return X; 3562 3563 if (Instruction *Phi = foldBinopWithPhiOperands(I)) 3564 return Phi; 3565 3566 // See if we can simplify any instructions used by the instruction whose sole 3567 // purpose is to compute bits we don't care about. 3568 if (SimplifyDemandedInstructionBits(I)) 3569 return &I; 3570 3571 // Do this before using distributive laws to catch simple and/or/not patterns. 3572 if (Instruction *Xor = foldOrToXor(I, Builder)) 3573 return Xor; 3574 3575 if (Instruction *X = foldComplexAndOrPatterns(I, Builder)) 3576 return X; 3577 3578 // (A & B) | (C & D) -> A ^ D where A == ~C && B == ~D 3579 // (A & B) | (C & D) -> A ^ C where A == ~D && B == ~C 3580 if (Value *V = foldOrOfInversions(I, Builder)) 3581 return replaceInstUsesWith(I, V); 3582 3583 // (A&B)|(A&C) -> A&(B|C) etc 3584 if (Value *V = foldUsingDistributiveLaws(I)) 3585 return replaceInstUsesWith(I, V); 3586 3587 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 3588 Type *Ty = I.getType(); 3589 if (Ty->isIntOrIntVectorTy(1)) { 3590 if (auto *SI0 = dyn_cast<SelectInst>(Op0)) { 3591 if (auto *R = 3592 foldAndOrOfSelectUsingImpliedCond(Op1, *SI0, /* IsAnd */ false)) 3593 return R; 3594 } 3595 if (auto *SI1 = dyn_cast<SelectInst>(Op1)) { 3596 if (auto *R = 3597 foldAndOrOfSelectUsingImpliedCond(Op0, *SI1, /* IsAnd */ false)) 3598 return R; 3599 } 3600 } 3601 3602 if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I)) 3603 return FoldedLogic; 3604 3605 if (Instruction *BitOp = matchBSwapOrBitReverse(I, /*MatchBSwaps*/ true, 3606 /*MatchBitReversals*/ true)) 3607 return BitOp; 3608 3609 if (Instruction *Funnel = matchFunnelShift(I, *this)) 3610 return Funnel; 3611 3612 if (Instruction *Concat = matchOrConcat(I, Builder)) 3613 return replaceInstUsesWith(I, Concat); 3614 3615 if (Instruction *R = foldBinOpShiftWithShift(I)) 3616 return R; 3617 3618 if (Instruction *R = tryFoldInstWithCtpopWithNot(&I)) 3619 return R; 3620 3621 if (cast<PossiblyDisjointInst>(I).isDisjoint()) { 3622 if (Instruction *R = 3623 foldAddLikeCommutative(I.getOperand(0), I.getOperand(1), 3624 /*NSW=*/true, /*NUW=*/true)) 3625 return R; 3626 if (Instruction *R = 3627 foldAddLikeCommutative(I.getOperand(1), I.getOperand(0), 3628 /*NSW=*/true, /*NUW=*/true)) 3629 return R; 3630 } 3631 3632 Value *X, *Y; 3633 const APInt *CV; 3634 if (match(&I, m_c_Or(m_OneUse(m_Xor(m_Value(X), m_APInt(CV))), m_Value(Y))) && 3635 !CV->isAllOnes() && MaskedValueIsZero(Y, *CV, 0, &I)) { 3636 // (X ^ C) | Y -> (X | Y) ^ C iff Y & C == 0 3637 // The check for a 'not' op is for efficiency (if Y is known zero --> ~X). 3638 Value *Or = Builder.CreateOr(X, Y); 3639 return BinaryOperator::CreateXor(Or, ConstantInt::get(Ty, *CV)); 3640 } 3641 3642 // If the operands have no common bits set: 3643 // or (mul X, Y), X --> add (mul X, Y), X --> mul X, (Y + 1) 3644 if (match(&I, m_c_DisjointOr(m_OneUse(m_Mul(m_Value(X), m_Value(Y))), 3645 m_Deferred(X)))) { 3646 Value *IncrementY = Builder.CreateAdd(Y, ConstantInt::get(Ty, 1)); 3647 return BinaryOperator::CreateMul(X, IncrementY); 3648 } 3649 3650 // (A & C) | (B & D) 3651 Value *A, *B, *C, *D; 3652 if (match(Op0, m_And(m_Value(A), m_Value(C))) && 3653 match(Op1, m_And(m_Value(B), m_Value(D)))) { 3654 3655 // (A & C0) | (B & C1) 3656 const APInt *C0, *C1; 3657 if (match(C, m_APInt(C0)) && match(D, m_APInt(C1))) { 3658 Value *X; 3659 if (*C0 == ~*C1) { 3660 // ((X | B) & MaskC) | (B & ~MaskC) -> (X & MaskC) | B 3661 if (match(A, m_c_Or(m_Value(X), m_Specific(B)))) 3662 return BinaryOperator::CreateOr(Builder.CreateAnd(X, *C0), B); 3663 // (A & MaskC) | ((X | A) & ~MaskC) -> (X & ~MaskC) | A 3664 if (match(B, m_c_Or(m_Specific(A), m_Value(X)))) 3665 return BinaryOperator::CreateOr(Builder.CreateAnd(X, *C1), A); 3666 3667 // ((X ^ B) & MaskC) | (B & ~MaskC) -> (X & MaskC) ^ B 3668 if (match(A, m_c_Xor(m_Value(X), m_Specific(B)))) 3669 return BinaryOperator::CreateXor(Builder.CreateAnd(X, *C0), B); 3670 // (A & MaskC) | ((X ^ A) & ~MaskC) -> (X & ~MaskC) ^ A 3671 if (match(B, m_c_Xor(m_Specific(A), m_Value(X)))) 3672 return BinaryOperator::CreateXor(Builder.CreateAnd(X, *C1), A); 3673 } 3674 3675 if ((*C0 & *C1).isZero()) { 3676 // ((X | B) & C0) | (B & C1) --> (X | B) & (C0 | C1) 3677 // iff (C0 & C1) == 0 and (X & ~C0) == 0 3678 if (match(A, m_c_Or(m_Value(X), m_Specific(B))) && 3679 MaskedValueIsZero(X, ~*C0, 0, &I)) { 3680 Constant *C01 = ConstantInt::get(Ty, *C0 | *C1); 3681 return BinaryOperator::CreateAnd(A, C01); 3682 } 3683 // (A & C0) | ((X | A) & C1) --> (X | A) & (C0 | C1) 3684 // iff (C0 & C1) == 0 and (X & ~C1) == 0 3685 if (match(B, m_c_Or(m_Value(X), m_Specific(A))) && 3686 MaskedValueIsZero(X, ~*C1, 0, &I)) { 3687 Constant *C01 = ConstantInt::get(Ty, *C0 | *C1); 3688 return BinaryOperator::CreateAnd(B, C01); 3689 } 3690 // ((X | C2) & C0) | ((X | C3) & C1) --> (X | C2 | C3) & (C0 | C1) 3691 // iff (C0 & C1) == 0 and (C2 & ~C0) == 0 and (C3 & ~C1) == 0. 3692 const APInt *C2, *C3; 3693 if (match(A, m_Or(m_Value(X), m_APInt(C2))) && 3694 match(B, m_Or(m_Specific(X), m_APInt(C3))) && 3695 (*C2 & ~*C0).isZero() && (*C3 & ~*C1).isZero()) { 3696 Value *Or = Builder.CreateOr(X, *C2 | *C3, "bitfield"); 3697 Constant *C01 = ConstantInt::get(Ty, *C0 | *C1); 3698 return BinaryOperator::CreateAnd(Or, C01); 3699 } 3700 } 3701 } 3702 3703 // Don't try to form a select if it's unlikely that we'll get rid of at 3704 // least one of the operands. A select is generally more expensive than the 3705 // 'or' that it is replacing. 3706 if (Op0->hasOneUse() || Op1->hasOneUse()) { 3707 // (Cond & C) | (~Cond & D) -> Cond ? C : D, and commuted variants. 3708 if (Value *V = matchSelectFromAndOr(A, C, B, D)) 3709 return replaceInstUsesWith(I, V); 3710 if (Value *V = matchSelectFromAndOr(A, C, D, B)) 3711 return replaceInstUsesWith(I, V); 3712 if (Value *V = matchSelectFromAndOr(C, A, B, D)) 3713 return replaceInstUsesWith(I, V); 3714 if (Value *V = matchSelectFromAndOr(C, A, D, B)) 3715 return replaceInstUsesWith(I, V); 3716 if (Value *V = matchSelectFromAndOr(B, D, A, C)) 3717 return replaceInstUsesWith(I, V); 3718 if (Value *V = matchSelectFromAndOr(B, D, C, A)) 3719 return replaceInstUsesWith(I, V); 3720 if (Value *V = matchSelectFromAndOr(D, B, A, C)) 3721 return replaceInstUsesWith(I, V); 3722 if (Value *V = matchSelectFromAndOr(D, B, C, A)) 3723 return replaceInstUsesWith(I, V); 3724 } 3725 } 3726 3727 if (match(Op0, m_And(m_Value(A), m_Value(C))) && 3728 match(Op1, m_Not(m_Or(m_Value(B), m_Value(D)))) && 3729 (Op0->hasOneUse() || Op1->hasOneUse())) { 3730 // (Cond & C) | ~(Cond | D) -> Cond ? C : ~D 3731 if (Value *V = matchSelectFromAndOr(A, C, B, D, true)) 3732 return replaceInstUsesWith(I, V); 3733 if (Value *V = matchSelectFromAndOr(A, C, D, B, true)) 3734 return replaceInstUsesWith(I, V); 3735 if (Value *V = matchSelectFromAndOr(C, A, B, D, true)) 3736 return replaceInstUsesWith(I, V); 3737 if (Value *V = matchSelectFromAndOr(C, A, D, B, true)) 3738 return replaceInstUsesWith(I, V); 3739 } 3740 3741 // (A ^ B) | ((B ^ C) ^ A) -> (A ^ B) | C 3742 if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) 3743 if (match(Op1, 3744 m_c_Xor(m_c_Xor(m_Specific(B), m_Value(C)), m_Specific(A))) || 3745 match(Op1, m_c_Xor(m_c_Xor(m_Specific(A), m_Value(C)), m_Specific(B)))) 3746 return BinaryOperator::CreateOr(Op0, C); 3747 3748 // ((B ^ C) ^ A) | (A ^ B) -> (A ^ B) | C 3749 if (match(Op1, m_Xor(m_Value(A), m_Value(B)))) 3750 if (match(Op0, 3751 m_c_Xor(m_c_Xor(m_Specific(B), m_Value(C)), m_Specific(A))) || 3752 match(Op0, m_c_Xor(m_c_Xor(m_Specific(A), m_Value(C)), m_Specific(B)))) 3753 return BinaryOperator::CreateOr(Op1, C); 3754 3755 if (Instruction *DeMorgan = matchDeMorgansLaws(I, *this)) 3756 return DeMorgan; 3757 3758 // Canonicalize xor to the RHS. 3759 bool SwappedForXor = false; 3760 if (match(Op0, m_Xor(m_Value(), m_Value()))) { 3761 std::swap(Op0, Op1); 3762 SwappedForXor = true; 3763 } 3764 3765 if (match(Op1, m_Xor(m_Value(A), m_Value(B)))) { 3766 // (A | ?) | (A ^ B) --> (A | ?) | B 3767 // (B | ?) | (A ^ B) --> (B | ?) | A 3768 if (match(Op0, m_c_Or(m_Specific(A), m_Value()))) 3769 return BinaryOperator::CreateOr(Op0, B); 3770 if (match(Op0, m_c_Or(m_Specific(B), m_Value()))) 3771 return BinaryOperator::CreateOr(Op0, A); 3772 3773 // (A & B) | (A ^ B) --> A | B 3774 // (B & A) | (A ^ B) --> A | B 3775 if (match(Op0, m_c_And(m_Specific(A), m_Specific(B)))) 3776 return BinaryOperator::CreateOr(A, B); 3777 3778 // ~A | (A ^ B) --> ~(A & B) 3779 // ~B | (A ^ B) --> ~(A & B) 3780 // The swap above should always make Op0 the 'not'. 3781 if ((Op0->hasOneUse() || Op1->hasOneUse()) && 3782 (match(Op0, m_Not(m_Specific(A))) || match(Op0, m_Not(m_Specific(B))))) 3783 return BinaryOperator::CreateNot(Builder.CreateAnd(A, B)); 3784 3785 // Same as above, but peek through an 'and' to the common operand: 3786 // ~(A & ?) | (A ^ B) --> ~((A & ?) & B) 3787 // ~(B & ?) | (A ^ B) --> ~((B & ?) & A) 3788 Instruction *And; 3789 if ((Op0->hasOneUse() || Op1->hasOneUse()) && 3790 match(Op0, m_Not(m_CombineAnd(m_Instruction(And), 3791 m_c_And(m_Specific(A), m_Value()))))) 3792 return BinaryOperator::CreateNot(Builder.CreateAnd(And, B)); 3793 if ((Op0->hasOneUse() || Op1->hasOneUse()) && 3794 match(Op0, m_Not(m_CombineAnd(m_Instruction(And), 3795 m_c_And(m_Specific(B), m_Value()))))) 3796 return BinaryOperator::CreateNot(Builder.CreateAnd(And, A)); 3797 3798 // (~A | C) | (A ^ B) --> ~(A & B) | C 3799 // (~B | C) | (A ^ B) --> ~(A & B) | C 3800 if (Op0->hasOneUse() && Op1->hasOneUse() && 3801 (match(Op0, m_c_Or(m_Not(m_Specific(A)), m_Value(C))) || 3802 match(Op0, m_c_Or(m_Not(m_Specific(B)), m_Value(C))))) { 3803 Value *Nand = Builder.CreateNot(Builder.CreateAnd(A, B), "nand"); 3804 return BinaryOperator::CreateOr(Nand, C); 3805 } 3806 } 3807 3808 if (SwappedForXor) 3809 std::swap(Op0, Op1); 3810 3811 if (Value *Res = 3812 foldBooleanAndOr(Op0, Op1, I, /*IsAnd=*/false, /*IsLogical=*/false)) 3813 return replaceInstUsesWith(I, Res); 3814 3815 if (match(Op1, m_OneUse(m_LogicalOr(m_Value(X), m_Value(Y))))) { 3816 bool IsLogical = isa<SelectInst>(Op1); 3817 if (auto *V = reassociateBooleanAndOr(Op0, X, Y, I, /*IsAnd=*/false, 3818 /*RHSIsLogical=*/IsLogical)) 3819 return replaceInstUsesWith(I, V); 3820 } 3821 if (match(Op0, m_OneUse(m_LogicalOr(m_Value(X), m_Value(Y))))) { 3822 bool IsLogical = isa<SelectInst>(Op0); 3823 if (auto *V = reassociateBooleanAndOr(Op1, X, Y, I, /*IsAnd=*/false, 3824 /*RHSIsLogical=*/IsLogical)) 3825 return replaceInstUsesWith(I, V); 3826 } 3827 3828 if (Instruction *FoldedFCmps = reassociateFCmps(I, Builder)) 3829 return FoldedFCmps; 3830 3831 if (Instruction *CastedOr = foldCastedBitwiseLogic(I)) 3832 return CastedOr; 3833 3834 if (Instruction *Sel = foldBinopOfSextBoolToSelect(I)) 3835 return Sel; 3836 3837 // or(sext(A), B) / or(B, sext(A)) --> A ? -1 : B, where A is i1 or <N x i1>. 3838 // TODO: Move this into foldBinopOfSextBoolToSelect as a more generalized fold 3839 // with binop identity constant. But creating a select with non-constant 3840 // arm may not be reversible due to poison semantics. Is that a good 3841 // canonicalization? 3842 if (match(&I, m_c_Or(m_OneUse(m_SExt(m_Value(A))), m_Value(B))) && 3843 A->getType()->isIntOrIntVectorTy(1)) 3844 return SelectInst::Create(A, ConstantInt::getAllOnesValue(Ty), B); 3845 3846 // Note: If we've gotten to the point of visiting the outer OR, then the 3847 // inner one couldn't be simplified. If it was a constant, then it won't 3848 // be simplified by a later pass either, so we try swapping the inner/outer 3849 // ORs in the hopes that we'll be able to simplify it this way. 3850 // (X|C) | V --> (X|V) | C 3851 ConstantInt *CI; 3852 if (Op0->hasOneUse() && !match(Op1, m_ConstantInt()) && 3853 match(Op0, m_Or(m_Value(A), m_ConstantInt(CI)))) { 3854 Value *Inner = Builder.CreateOr(A, Op1); 3855 Inner->takeName(Op0); 3856 return BinaryOperator::CreateOr(Inner, CI); 3857 } 3858 3859 // Change (or (bool?A:B),(bool?C:D)) --> (bool?(or A,C):(or B,D)) 3860 // Since this OR statement hasn't been optimized further yet, we hope 3861 // that this transformation will allow the new ORs to be optimized. 3862 { 3863 Value *X = nullptr, *Y = nullptr; 3864 if (Op0->hasOneUse() && Op1->hasOneUse() && 3865 match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) && 3866 match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) { 3867 Value *orTrue = Builder.CreateOr(A, C); 3868 Value *orFalse = Builder.CreateOr(B, D); 3869 return SelectInst::Create(X, orTrue, orFalse); 3870 } 3871 } 3872 3873 // or(ashr(subNSW(Y, X), ScalarSizeInBits(Y) - 1), X) --> X s> Y ? -1 : X. 3874 { 3875 Value *X, *Y; 3876 if (match(&I, m_c_Or(m_OneUse(m_AShr( 3877 m_NSWSub(m_Value(Y), m_Value(X)), 3878 m_SpecificInt(Ty->getScalarSizeInBits() - 1))), 3879 m_Deferred(X)))) { 3880 Value *NewICmpInst = Builder.CreateICmpSGT(X, Y); 3881 Value *AllOnes = ConstantInt::getAllOnesValue(Ty); 3882 return SelectInst::Create(NewICmpInst, AllOnes, X); 3883 } 3884 } 3885 3886 { 3887 // ((A & B) ^ A) | ((A & B) ^ B) -> A ^ B 3888 // (A ^ (A & B)) | (B ^ (A & B)) -> A ^ B 3889 // ((A & B) ^ B) | ((A & B) ^ A) -> A ^ B 3890 // (B ^ (A & B)) | (A ^ (A & B)) -> A ^ B 3891 const auto TryXorOpt = [&](Value *Lhs, Value *Rhs) -> Instruction * { 3892 if (match(Lhs, m_c_Xor(m_And(m_Value(A), m_Value(B)), m_Deferred(A))) && 3893 match(Rhs, 3894 m_c_Xor(m_And(m_Specific(A), m_Specific(B)), m_Specific(B)))) { 3895 return BinaryOperator::CreateXor(A, B); 3896 } 3897 return nullptr; 3898 }; 3899 3900 if (Instruction *Result = TryXorOpt(Op0, Op1)) 3901 return Result; 3902 if (Instruction *Result = TryXorOpt(Op1, Op0)) 3903 return Result; 3904 } 3905 3906 if (Instruction *V = 3907 canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I)) 3908 return V; 3909 3910 CmpPredicate Pred; 3911 Value *Mul, *Ov, *MulIsNotZero, *UMulWithOv; 3912 // Check if the OR weakens the overflow condition for umul.with.overflow by 3913 // treating any non-zero result as overflow. In that case, we overflow if both 3914 // umul.with.overflow operands are != 0, as in that case the result can only 3915 // be 0, iff the multiplication overflows. 3916 if (match(&I, 3917 m_c_Or(m_CombineAnd(m_ExtractValue<1>(m_Value(UMulWithOv)), 3918 m_Value(Ov)), 3919 m_CombineAnd( 3920 m_SpecificICmp(ICmpInst::ICMP_NE, 3921 m_CombineAnd(m_ExtractValue<0>( 3922 m_Deferred(UMulWithOv)), 3923 m_Value(Mul)), 3924 m_ZeroInt()), 3925 m_Value(MulIsNotZero)))) && 3926 (Ov->hasOneUse() || (MulIsNotZero->hasOneUse() && Mul->hasOneUse()))) { 3927 Value *A, *B; 3928 if (match(UMulWithOv, m_Intrinsic<Intrinsic::umul_with_overflow>( 3929 m_Value(A), m_Value(B)))) { 3930 Value *NotNullA = Builder.CreateIsNotNull(A); 3931 Value *NotNullB = Builder.CreateIsNotNull(B); 3932 return BinaryOperator::CreateAnd(NotNullA, NotNullB); 3933 } 3934 } 3935 3936 /// Res, Overflow = xxx_with_overflow X, C1 3937 /// Try to canonicalize the pattern "Overflow | icmp pred Res, C2" into 3938 /// "Overflow | icmp pred X, C2 +/- C1". 3939 const WithOverflowInst *WO; 3940 const Value *WOV; 3941 const APInt *C1, *C2; 3942 if (match(&I, m_c_Or(m_CombineAnd(m_ExtractValue<1>(m_CombineAnd( 3943 m_WithOverflowInst(WO), m_Value(WOV))), 3944 m_Value(Ov)), 3945 m_OneUse(m_ICmp(Pred, m_ExtractValue<0>(m_Deferred(WOV)), 3946 m_APInt(C2))))) && 3947 (WO->getBinaryOp() == Instruction::Add || 3948 WO->getBinaryOp() == Instruction::Sub) && 3949 (ICmpInst::isEquality(Pred) || 3950 WO->isSigned() == ICmpInst::isSigned(Pred)) && 3951 match(WO->getRHS(), m_APInt(C1))) { 3952 bool Overflow; 3953 APInt NewC = WO->getBinaryOp() == Instruction::Add 3954 ? (ICmpInst::isSigned(Pred) ? C2->ssub_ov(*C1, Overflow) 3955 : C2->usub_ov(*C1, Overflow)) 3956 : (ICmpInst::isSigned(Pred) ? C2->sadd_ov(*C1, Overflow) 3957 : C2->uadd_ov(*C1, Overflow)); 3958 if (!Overflow || ICmpInst::isEquality(Pred)) { 3959 Value *NewCmp = Builder.CreateICmp( 3960 Pred, WO->getLHS(), ConstantInt::get(WO->getLHS()->getType(), NewC)); 3961 return BinaryOperator::CreateOr(Ov, NewCmp); 3962 } 3963 } 3964 3965 // (~x) | y --> ~(x & (~y)) iff that gets rid of inversions 3966 if (sinkNotIntoOtherHandOfLogicalOp(I)) 3967 return &I; 3968 3969 // Improve "get low bit mask up to and including bit X" pattern: 3970 // (1 << X) | ((1 << X) + -1) --> -1 l>> (bitwidth(x) - 1 - X) 3971 if (match(&I, m_c_Or(m_Add(m_Shl(m_One(), m_Value(X)), m_AllOnes()), 3972 m_Shl(m_One(), m_Deferred(X)))) && 3973 match(&I, m_c_Or(m_OneUse(m_Value()), m_Value()))) { 3974 Value *Sub = Builder.CreateSub( 3975 ConstantInt::get(Ty, Ty->getScalarSizeInBits() - 1), X); 3976 return BinaryOperator::CreateLShr(Constant::getAllOnesValue(Ty), Sub); 3977 } 3978 3979 // An or recurrence w/loop invariant step is equivelent to (or start, step) 3980 PHINode *PN = nullptr; 3981 Value *Start = nullptr, *Step = nullptr; 3982 if (matchSimpleRecurrence(&I, PN, Start, Step) && DT.dominates(Step, PN)) 3983 return replaceInstUsesWith(I, Builder.CreateOr(Start, Step)); 3984 3985 // (A & B) | (C | D) or (C | D) | (A & B) 3986 // Can be combined if C or D is of type (A/B & X) 3987 if (match(&I, m_c_Or(m_OneUse(m_And(m_Value(A), m_Value(B))), 3988 m_OneUse(m_Or(m_Value(C), m_Value(D)))))) { 3989 // (A & B) | (C | ?) -> C | (? | (A & B)) 3990 // (A & B) | (C | ?) -> C | (? | (A & B)) 3991 // (A & B) | (C | ?) -> C | (? | (A & B)) 3992 // (A & B) | (C | ?) -> C | (? | (A & B)) 3993 // (C | ?) | (A & B) -> C | (? | (A & B)) 3994 // (C | ?) | (A & B) -> C | (? | (A & B)) 3995 // (C | ?) | (A & B) -> C | (? | (A & B)) 3996 // (C | ?) | (A & B) -> C | (? | (A & B)) 3997 if (match(D, m_OneUse(m_c_And(m_Specific(A), m_Value()))) || 3998 match(D, m_OneUse(m_c_And(m_Specific(B), m_Value())))) 3999 return BinaryOperator::CreateOr( 4000 C, Builder.CreateOr(D, Builder.CreateAnd(A, B))); 4001 // (A & B) | (? | D) -> (? | (A & B)) | D 4002 // (A & B) | (? | D) -> (? | (A & B)) | D 4003 // (A & B) | (? | D) -> (? | (A & B)) | D 4004 // (A & B) | (? | D) -> (? | (A & B)) | D 4005 // (? | D) | (A & B) -> (? | (A & B)) | D 4006 // (? | D) | (A & B) -> (? | (A & B)) | D 4007 // (? | D) | (A & B) -> (? | (A & B)) | D 4008 // (? | D) | (A & B) -> (? | (A & B)) | D 4009 if (match(C, m_OneUse(m_c_And(m_Specific(A), m_Value()))) || 4010 match(C, m_OneUse(m_c_And(m_Specific(B), m_Value())))) 4011 return BinaryOperator::CreateOr( 4012 Builder.CreateOr(C, Builder.CreateAnd(A, B)), D); 4013 } 4014 4015 if (Instruction *R = reassociateForUses(I, Builder)) 4016 return R; 4017 4018 if (Instruction *Canonicalized = canonicalizeLogicFirst(I, Builder)) 4019 return Canonicalized; 4020 4021 if (Instruction *Folded = foldLogicOfIsFPClass(I, Op0, Op1)) 4022 return Folded; 4023 4024 if (Instruction *Res = foldBinOpOfDisplacedShifts(I)) 4025 return Res; 4026 4027 // If we are setting the sign bit of a floating-point value, convert 4028 // this to fneg(fabs), then cast back to integer. 4029 // 4030 // If the result isn't immediately cast back to a float, this will increase 4031 // the number of instructions. This is still probably a better canonical form 4032 // as it enables FP value tracking. 4033 // 4034 // Assumes any IEEE-represented type has the sign bit in the high bit. 4035 // 4036 // This is generous interpretation of noimplicitfloat, this is not a true 4037 // floating-point operation. 4038 Value *CastOp; 4039 if (match(Op0, m_ElementWiseBitCast(m_Value(CastOp))) && 4040 match(Op1, m_SignMask()) && 4041 !Builder.GetInsertBlock()->getParent()->hasFnAttribute( 4042 Attribute::NoImplicitFloat)) { 4043 Type *EltTy = CastOp->getType()->getScalarType(); 4044 if (EltTy->isFloatingPointTy() && EltTy->isIEEE()) { 4045 Value *FAbs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, CastOp); 4046 Value *FNegFAbs = Builder.CreateFNeg(FAbs); 4047 return new BitCastInst(FNegFAbs, I.getType()); 4048 } 4049 } 4050 4051 // (X & C1) | C2 -> X & (C1 | C2) iff (X & C2) == C2 4052 if (match(Op0, m_OneUse(m_And(m_Value(X), m_APInt(C1)))) && 4053 match(Op1, m_APInt(C2))) { 4054 KnownBits KnownX = computeKnownBits(X, /*Depth*/ 0, &I); 4055 if ((KnownX.One & *C2) == *C2) 4056 return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *C1 | *C2)); 4057 } 4058 4059 if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder)) 4060 return Res; 4061 4062 if (Value *V = 4063 simplifyAndOrWithOpReplaced(Op0, Op1, Constant::getNullValue(Ty), 4064 /*SimplifyOnly*/ false, *this)) 4065 return BinaryOperator::CreateOr(V, Op1); 4066 if (Value *V = 4067 simplifyAndOrWithOpReplaced(Op1, Op0, Constant::getNullValue(Ty), 4068 /*SimplifyOnly*/ false, *this)) 4069 return BinaryOperator::CreateOr(Op0, V); 4070 4071 if (cast<PossiblyDisjointInst>(I).isDisjoint()) 4072 if (Value *V = SimplifyAddWithRemainder(I)) 4073 return replaceInstUsesWith(I, V); 4074 4075 return nullptr; 4076 } 4077 4078 /// A ^ B can be specified using other logic ops in a variety of patterns. We 4079 /// can fold these early and efficiently by morphing an existing instruction. 4080 static Instruction *foldXorToXor(BinaryOperator &I, 4081 InstCombiner::BuilderTy &Builder) { 4082 assert(I.getOpcode() == Instruction::Xor); 4083 Value *Op0 = I.getOperand(0); 4084 Value *Op1 = I.getOperand(1); 4085 Value *A, *B; 4086 4087 // There are 4 commuted variants for each of the basic patterns. 4088 4089 // (A & B) ^ (A | B) -> A ^ B 4090 // (A & B) ^ (B | A) -> A ^ B 4091 // (A | B) ^ (A & B) -> A ^ B 4092 // (A | B) ^ (B & A) -> A ^ B 4093 if (match(&I, m_c_Xor(m_And(m_Value(A), m_Value(B)), 4094 m_c_Or(m_Deferred(A), m_Deferred(B))))) 4095 return BinaryOperator::CreateXor(A, B); 4096 4097 // (A | ~B) ^ (~A | B) -> A ^ B 4098 // (~B | A) ^ (~A | B) -> A ^ B 4099 // (~A | B) ^ (A | ~B) -> A ^ B 4100 // (B | ~A) ^ (A | ~B) -> A ^ B 4101 if (match(&I, m_Xor(m_c_Or(m_Value(A), m_Not(m_Value(B))), 4102 m_c_Or(m_Not(m_Deferred(A)), m_Deferred(B))))) 4103 return BinaryOperator::CreateXor(A, B); 4104 4105 // (A & ~B) ^ (~A & B) -> A ^ B 4106 // (~B & A) ^ (~A & B) -> A ^ B 4107 // (~A & B) ^ (A & ~B) -> A ^ B 4108 // (B & ~A) ^ (A & ~B) -> A ^ B 4109 if (match(&I, m_Xor(m_c_And(m_Value(A), m_Not(m_Value(B))), 4110 m_c_And(m_Not(m_Deferred(A)), m_Deferred(B))))) 4111 return BinaryOperator::CreateXor(A, B); 4112 4113 // For the remaining cases we need to get rid of one of the operands. 4114 if (!Op0->hasOneUse() && !Op1->hasOneUse()) 4115 return nullptr; 4116 4117 // (A | B) ^ ~(A & B) -> ~(A ^ B) 4118 // (A | B) ^ ~(B & A) -> ~(A ^ B) 4119 // (A & B) ^ ~(A | B) -> ~(A ^ B) 4120 // (A & B) ^ ~(B | A) -> ~(A ^ B) 4121 // Complexity sorting ensures the not will be on the right side. 4122 if ((match(Op0, m_Or(m_Value(A), m_Value(B))) && 4123 match(Op1, m_Not(m_c_And(m_Specific(A), m_Specific(B))))) || 4124 (match(Op0, m_And(m_Value(A), m_Value(B))) && 4125 match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))) 4126 return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); 4127 4128 return nullptr; 4129 } 4130 4131 Value *InstCombinerImpl::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, 4132 BinaryOperator &I) { 4133 assert(I.getOpcode() == Instruction::Xor && I.getOperand(0) == LHS && 4134 I.getOperand(1) == RHS && "Should be 'xor' with these operands"); 4135 4136 ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); 4137 Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1); 4138 Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1); 4139 4140 if (predicatesFoldable(PredL, PredR)) { 4141 if (LHS0 == RHS1 && LHS1 == RHS0) { 4142 std::swap(LHS0, LHS1); 4143 PredL = ICmpInst::getSwappedPredicate(PredL); 4144 } 4145 if (LHS0 == RHS0 && LHS1 == RHS1) { 4146 // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) 4147 unsigned Code = getICmpCode(PredL) ^ getICmpCode(PredR); 4148 bool IsSigned = LHS->isSigned() || RHS->isSigned(); 4149 return getNewICmpValue(Code, IsSigned, LHS0, LHS1, Builder); 4150 } 4151 } 4152 4153 // TODO: This can be generalized to compares of non-signbits using 4154 // decomposeBitTestICmp(). It could be enhanced more by using (something like) 4155 // foldLogOpOfMaskedICmps(). 4156 const APInt *LC, *RC; 4157 if (match(LHS1, m_APInt(LC)) && match(RHS1, m_APInt(RC)) && 4158 LHS0->getType() == RHS0->getType() && 4159 LHS0->getType()->isIntOrIntVectorTy()) { 4160 // Convert xor of signbit tests to signbit test of xor'd values: 4161 // (X > -1) ^ (Y > -1) --> (X ^ Y) < 0 4162 // (X < 0) ^ (Y < 0) --> (X ^ Y) < 0 4163 // (X > -1) ^ (Y < 0) --> (X ^ Y) > -1 4164 // (X < 0) ^ (Y > -1) --> (X ^ Y) > -1 4165 bool TrueIfSignedL, TrueIfSignedR; 4166 if ((LHS->hasOneUse() || RHS->hasOneUse()) && 4167 isSignBitCheck(PredL, *LC, TrueIfSignedL) && 4168 isSignBitCheck(PredR, *RC, TrueIfSignedR)) { 4169 Value *XorLR = Builder.CreateXor(LHS0, RHS0); 4170 return TrueIfSignedL == TrueIfSignedR ? Builder.CreateIsNeg(XorLR) : 4171 Builder.CreateIsNotNeg(XorLR); 4172 } 4173 4174 // Fold (icmp pred1 X, C1) ^ (icmp pred2 X, C2) 4175 // into a single comparison using range-based reasoning. 4176 if (LHS0 == RHS0) { 4177 ConstantRange CR1 = ConstantRange::makeExactICmpRegion(PredL, *LC); 4178 ConstantRange CR2 = ConstantRange::makeExactICmpRegion(PredR, *RC); 4179 auto CRUnion = CR1.exactUnionWith(CR2); 4180 auto CRIntersect = CR1.exactIntersectWith(CR2); 4181 if (CRUnion && CRIntersect) 4182 if (auto CR = CRUnion->exactIntersectWith(CRIntersect->inverse())) { 4183 if (CR->isFullSet()) 4184 return ConstantInt::getTrue(I.getType()); 4185 if (CR->isEmptySet()) 4186 return ConstantInt::getFalse(I.getType()); 4187 4188 CmpInst::Predicate NewPred; 4189 APInt NewC, Offset; 4190 CR->getEquivalentICmp(NewPred, NewC, Offset); 4191 4192 if ((Offset.isZero() && (LHS->hasOneUse() || RHS->hasOneUse())) || 4193 (LHS->hasOneUse() && RHS->hasOneUse())) { 4194 Value *NewV = LHS0; 4195 Type *Ty = LHS0->getType(); 4196 if (!Offset.isZero()) 4197 NewV = Builder.CreateAdd(NewV, ConstantInt::get(Ty, Offset)); 4198 return Builder.CreateICmp(NewPred, NewV, 4199 ConstantInt::get(Ty, NewC)); 4200 } 4201 } 4202 } 4203 } 4204 4205 // Instead of trying to imitate the folds for and/or, decompose this 'xor' 4206 // into those logic ops. That is, try to turn this into an and-of-icmps 4207 // because we have many folds for that pattern. 4208 // 4209 // This is based on a truth table definition of xor: 4210 // X ^ Y --> (X | Y) & !(X & Y) 4211 if (Value *OrICmp = simplifyBinOp(Instruction::Or, LHS, RHS, SQ)) { 4212 // TODO: If OrICmp is true, then the definition of xor simplifies to !(X&Y). 4213 // TODO: If OrICmp is false, the whole thing is false (InstSimplify?). 4214 if (Value *AndICmp = simplifyBinOp(Instruction::And, LHS, RHS, SQ)) { 4215 // TODO: Independently handle cases where the 'and' side is a constant. 4216 ICmpInst *X = nullptr, *Y = nullptr; 4217 if (OrICmp == LHS && AndICmp == RHS) { 4218 // (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS --> X & !Y 4219 X = LHS; 4220 Y = RHS; 4221 } 4222 if (OrICmp == RHS && AndICmp == LHS) { 4223 // !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS --> !Y & X 4224 X = RHS; 4225 Y = LHS; 4226 } 4227 if (X && Y && (Y->hasOneUse() || canFreelyInvertAllUsersOf(Y, &I))) { 4228 // Invert the predicate of 'Y', thus inverting its output. 4229 Y->setPredicate(Y->getInversePredicate()); 4230 // So, are there other uses of Y? 4231 if (!Y->hasOneUse()) { 4232 // We need to adapt other uses of Y though. Get a value that matches 4233 // the original value of Y before inversion. While this increases 4234 // immediate instruction count, we have just ensured that all the 4235 // users are freely-invertible, so that 'not' *will* get folded away. 4236 BuilderTy::InsertPointGuard Guard(Builder); 4237 // Set insertion point to right after the Y. 4238 Builder.SetInsertPoint(Y->getParent(), ++(Y->getIterator())); 4239 Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not"); 4240 // Replace all uses of Y (excluding the one in NotY!) with NotY. 4241 Worklist.pushUsersToWorkList(*Y); 4242 Y->replaceUsesWithIf(NotY, 4243 [NotY](Use &U) { return U.getUser() != NotY; }); 4244 } 4245 // All done. 4246 return Builder.CreateAnd(LHS, RHS); 4247 } 4248 } 4249 } 4250 4251 return nullptr; 4252 } 4253 4254 /// If we have a masked merge, in the canonical form of: 4255 /// (assuming that A only has one use.) 4256 /// | A | |B| 4257 /// ((x ^ y) & M) ^ y 4258 /// | D | 4259 /// * If M is inverted: 4260 /// | D | 4261 /// ((x ^ y) & ~M) ^ y 4262 /// We can canonicalize by swapping the final xor operand 4263 /// to eliminate the 'not' of the mask. 4264 /// ((x ^ y) & M) ^ x 4265 /// * If M is a constant, and D has one use, we transform to 'and' / 'or' ops 4266 /// because that shortens the dependency chain and improves analysis: 4267 /// (x & M) | (y & ~M) 4268 static Instruction *visitMaskedMerge(BinaryOperator &I, 4269 InstCombiner::BuilderTy &Builder) { 4270 Value *B, *X, *D; 4271 Value *M; 4272 if (!match(&I, m_c_Xor(m_Value(B), 4273 m_OneUse(m_c_And( 4274 m_CombineAnd(m_c_Xor(m_Deferred(B), m_Value(X)), 4275 m_Value(D)), 4276 m_Value(M)))))) 4277 return nullptr; 4278 4279 Value *NotM; 4280 if (match(M, m_Not(m_Value(NotM)))) { 4281 // De-invert the mask and swap the value in B part. 4282 Value *NewA = Builder.CreateAnd(D, NotM); 4283 return BinaryOperator::CreateXor(NewA, X); 4284 } 4285 4286 Constant *C; 4287 if (D->hasOneUse() && match(M, m_Constant(C))) { 4288 // Propagating undef is unsafe. Clamp undef elements to -1. 4289 Type *EltTy = C->getType()->getScalarType(); 4290 C = Constant::replaceUndefsWith(C, ConstantInt::getAllOnesValue(EltTy)); 4291 // Unfold. 4292 Value *LHS = Builder.CreateAnd(X, C); 4293 Value *NotC = Builder.CreateNot(C); 4294 Value *RHS = Builder.CreateAnd(B, NotC); 4295 return BinaryOperator::CreateOr(LHS, RHS); 4296 } 4297 4298 return nullptr; 4299 } 4300 4301 static Instruction *foldNotXor(BinaryOperator &I, 4302 InstCombiner::BuilderTy &Builder) { 4303 Value *X, *Y; 4304 // FIXME: one-use check is not needed in general, but currently we are unable 4305 // to fold 'not' into 'icmp', if that 'icmp' has multiple uses. (D35182) 4306 if (!match(&I, m_Not(m_OneUse(m_Xor(m_Value(X), m_Value(Y)))))) 4307 return nullptr; 4308 4309 auto hasCommonOperand = [](Value *A, Value *B, Value *C, Value *D) { 4310 return A == C || A == D || B == C || B == D; 4311 }; 4312 4313 Value *A, *B, *C, *D; 4314 // Canonicalize ~((A & B) ^ (A | ?)) -> (A & B) | ~(A | ?) 4315 // 4 commuted variants 4316 if (match(X, m_And(m_Value(A), m_Value(B))) && 4317 match(Y, m_Or(m_Value(C), m_Value(D))) && hasCommonOperand(A, B, C, D)) { 4318 Value *NotY = Builder.CreateNot(Y); 4319 return BinaryOperator::CreateOr(X, NotY); 4320 }; 4321 4322 // Canonicalize ~((A | ?) ^ (A & B)) -> (A & B) | ~(A | ?) 4323 // 4 commuted variants 4324 if (match(Y, m_And(m_Value(A), m_Value(B))) && 4325 match(X, m_Or(m_Value(C), m_Value(D))) && hasCommonOperand(A, B, C, D)) { 4326 Value *NotX = Builder.CreateNot(X); 4327 return BinaryOperator::CreateOr(Y, NotX); 4328 }; 4329 4330 return nullptr; 4331 } 4332 4333 /// Canonicalize a shifty way to code absolute value to the more common pattern 4334 /// that uses negation and select. 4335 static Instruction *canonicalizeAbs(BinaryOperator &Xor, 4336 InstCombiner::BuilderTy &Builder) { 4337 assert(Xor.getOpcode() == Instruction::Xor && "Expected an xor instruction."); 4338 4339 // There are 4 potential commuted variants. Move the 'ashr' candidate to Op1. 4340 // We're relying on the fact that we only do this transform when the shift has 4341 // exactly 2 uses and the add has exactly 1 use (otherwise, we might increase 4342 // instructions). 4343 Value *Op0 = Xor.getOperand(0), *Op1 = Xor.getOperand(1); 4344 if (Op0->hasNUses(2)) 4345 std::swap(Op0, Op1); 4346 4347 Type *Ty = Xor.getType(); 4348 Value *A; 4349 const APInt *ShAmt; 4350 if (match(Op1, m_AShr(m_Value(A), m_APInt(ShAmt))) && 4351 Op1->hasNUses(2) && *ShAmt == Ty->getScalarSizeInBits() - 1 && 4352 match(Op0, m_OneUse(m_c_Add(m_Specific(A), m_Specific(Op1))))) { 4353 // Op1 = ashr i32 A, 31 ; smear the sign bit 4354 // xor (add A, Op1), Op1 ; add -1 and flip bits if negative 4355 // --> (A < 0) ? -A : A 4356 Value *IsNeg = Builder.CreateIsNeg(A); 4357 // Copy the nsw flags from the add to the negate. 4358 auto *Add = cast<BinaryOperator>(Op0); 4359 Value *NegA = Add->hasNoUnsignedWrap() 4360 ? Constant::getNullValue(A->getType()) 4361 : Builder.CreateNeg(A, "", Add->hasNoSignedWrap()); 4362 return SelectInst::Create(IsNeg, NegA, A); 4363 } 4364 return nullptr; 4365 } 4366 4367 static bool canFreelyInvert(InstCombiner &IC, Value *Op, 4368 Instruction *IgnoredUser) { 4369 auto *I = dyn_cast<Instruction>(Op); 4370 return I && IC.isFreeToInvert(I, /*WillInvertAllUses=*/true) && 4371 IC.canFreelyInvertAllUsersOf(I, IgnoredUser); 4372 } 4373 4374 static Value *freelyInvert(InstCombinerImpl &IC, Value *Op, 4375 Instruction *IgnoredUser) { 4376 auto *I = cast<Instruction>(Op); 4377 IC.Builder.SetInsertPoint(*I->getInsertionPointAfterDef()); 4378 Value *NotOp = IC.Builder.CreateNot(Op, Op->getName() + ".not"); 4379 Op->replaceUsesWithIf(NotOp, 4380 [NotOp](Use &U) { return U.getUser() != NotOp; }); 4381 IC.freelyInvertAllUsersOf(NotOp, IgnoredUser); 4382 return NotOp; 4383 } 4384 4385 // Transform 4386 // z = ~(x &/| y) 4387 // into: 4388 // z = ((~x) |/& (~y)) 4389 // iff both x and y are free to invert and all uses of z can be freely updated. 4390 bool InstCombinerImpl::sinkNotIntoLogicalOp(Instruction &I) { 4391 Value *Op0, *Op1; 4392 if (!match(&I, m_LogicalOp(m_Value(Op0), m_Value(Op1)))) 4393 return false; 4394 4395 // If this logic op has not been simplified yet, just bail out and let that 4396 // happen first. Otherwise, the code below may wrongly invert. 4397 if (Op0 == Op1) 4398 return false; 4399 4400 Instruction::BinaryOps NewOpc = 4401 match(&I, m_LogicalAnd()) ? Instruction::Or : Instruction::And; 4402 bool IsBinaryOp = isa<BinaryOperator>(I); 4403 4404 // Can our users be adapted? 4405 if (!InstCombiner::canFreelyInvertAllUsersOf(&I, /*IgnoredUser=*/nullptr)) 4406 return false; 4407 4408 // And can the operands be adapted? 4409 if (!canFreelyInvert(*this, Op0, &I) || !canFreelyInvert(*this, Op1, &I)) 4410 return false; 4411 4412 Op0 = freelyInvert(*this, Op0, &I); 4413 Op1 = freelyInvert(*this, Op1, &I); 4414 4415 Builder.SetInsertPoint(*I.getInsertionPointAfterDef()); 4416 Value *NewLogicOp; 4417 if (IsBinaryOp) 4418 NewLogicOp = Builder.CreateBinOp(NewOpc, Op0, Op1, I.getName() + ".not"); 4419 else 4420 NewLogicOp = 4421 Builder.CreateLogicalOp(NewOpc, Op0, Op1, I.getName() + ".not"); 4422 4423 replaceInstUsesWith(I, NewLogicOp); 4424 // We can not just create an outer `not`, it will most likely be immediately 4425 // folded back, reconstructing our initial pattern, and causing an 4426 // infinite combine loop, so immediately manually fold it away. 4427 freelyInvertAllUsersOf(NewLogicOp); 4428 return true; 4429 } 4430 4431 // Transform 4432 // z = (~x) &/| y 4433 // into: 4434 // z = ~(x |/& (~y)) 4435 // iff y is free to invert and all uses of z can be freely updated. 4436 bool InstCombinerImpl::sinkNotIntoOtherHandOfLogicalOp(Instruction &I) { 4437 Value *Op0, *Op1; 4438 if (!match(&I, m_LogicalOp(m_Value(Op0), m_Value(Op1)))) 4439 return false; 4440 Instruction::BinaryOps NewOpc = 4441 match(&I, m_LogicalAnd()) ? Instruction::Or : Instruction::And; 4442 bool IsBinaryOp = isa<BinaryOperator>(I); 4443 4444 Value *NotOp0 = nullptr; 4445 Value *NotOp1 = nullptr; 4446 Value **OpToInvert = nullptr; 4447 if (match(Op0, m_Not(m_Value(NotOp0))) && canFreelyInvert(*this, Op1, &I)) { 4448 Op0 = NotOp0; 4449 OpToInvert = &Op1; 4450 } else if (match(Op1, m_Not(m_Value(NotOp1))) && 4451 canFreelyInvert(*this, Op0, &I)) { 4452 Op1 = NotOp1; 4453 OpToInvert = &Op0; 4454 } else 4455 return false; 4456 4457 // And can our users be adapted? 4458 if (!InstCombiner::canFreelyInvertAllUsersOf(&I, /*IgnoredUser=*/nullptr)) 4459 return false; 4460 4461 *OpToInvert = freelyInvert(*this, *OpToInvert, &I); 4462 4463 Builder.SetInsertPoint(*I.getInsertionPointAfterDef()); 4464 Value *NewBinOp; 4465 if (IsBinaryOp) 4466 NewBinOp = Builder.CreateBinOp(NewOpc, Op0, Op1, I.getName() + ".not"); 4467 else 4468 NewBinOp = Builder.CreateLogicalOp(NewOpc, Op0, Op1, I.getName() + ".not"); 4469 replaceInstUsesWith(I, NewBinOp); 4470 // We can not just create an outer `not`, it will most likely be immediately 4471 // folded back, reconstructing our initial pattern, and causing an 4472 // infinite combine loop, so immediately manually fold it away. 4473 freelyInvertAllUsersOf(NewBinOp); 4474 return true; 4475 } 4476 4477 Instruction *InstCombinerImpl::foldNot(BinaryOperator &I) { 4478 Value *NotOp; 4479 if (!match(&I, m_Not(m_Value(NotOp)))) 4480 return nullptr; 4481 4482 // Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand. 4483 // We must eliminate the and/or (one-use) for these transforms to not increase 4484 // the instruction count. 4485 // 4486 // ~(~X & Y) --> (X | ~Y) 4487 // ~(Y & ~X) --> (X | ~Y) 4488 // 4489 // Note: The logical matches do not check for the commuted patterns because 4490 // those are handled via SimplifySelectsFeedingBinaryOp(). 4491 Type *Ty = I.getType(); 4492 Value *X, *Y; 4493 if (match(NotOp, m_OneUse(m_c_And(m_Not(m_Value(X)), m_Value(Y))))) { 4494 Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not"); 4495 return BinaryOperator::CreateOr(X, NotY); 4496 } 4497 if (match(NotOp, m_OneUse(m_LogicalAnd(m_Not(m_Value(X)), m_Value(Y))))) { 4498 Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not"); 4499 return SelectInst::Create(X, ConstantInt::getTrue(Ty), NotY); 4500 } 4501 4502 // ~(~X | Y) --> (X & ~Y) 4503 // ~(Y | ~X) --> (X & ~Y) 4504 if (match(NotOp, m_OneUse(m_c_Or(m_Not(m_Value(X)), m_Value(Y))))) { 4505 Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not"); 4506 return BinaryOperator::CreateAnd(X, NotY); 4507 } 4508 if (match(NotOp, m_OneUse(m_LogicalOr(m_Not(m_Value(X)), m_Value(Y))))) { 4509 Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not"); 4510 return SelectInst::Create(X, NotY, ConstantInt::getFalse(Ty)); 4511 } 4512 4513 // Is this a 'not' (~) fed by a binary operator? 4514 BinaryOperator *NotVal; 4515 if (match(NotOp, m_BinOp(NotVal))) { 4516 // ~((-X) | Y) --> (X - 1) & (~Y) 4517 if (match(NotVal, 4518 m_OneUse(m_c_Or(m_OneUse(m_Neg(m_Value(X))), m_Value(Y))))) { 4519 Value *DecX = Builder.CreateAdd(X, ConstantInt::getAllOnesValue(Ty)); 4520 Value *NotY = Builder.CreateNot(Y); 4521 return BinaryOperator::CreateAnd(DecX, NotY); 4522 } 4523 4524 // ~(~X >>s Y) --> (X >>s Y) 4525 if (match(NotVal, m_AShr(m_Not(m_Value(X)), m_Value(Y)))) 4526 return BinaryOperator::CreateAShr(X, Y); 4527 4528 // Treat lshr with non-negative operand as ashr. 4529 // ~(~X >>u Y) --> (X >>s Y) iff X is known negative 4530 if (match(NotVal, m_LShr(m_Not(m_Value(X)), m_Value(Y))) && 4531 isKnownNegative(X, SQ.getWithInstruction(NotVal))) 4532 return BinaryOperator::CreateAShr(X, Y); 4533 4534 // Bit-hack form of a signbit test for iN type: 4535 // ~(X >>s (N - 1)) --> sext i1 (X > -1) to iN 4536 unsigned FullShift = Ty->getScalarSizeInBits() - 1; 4537 if (match(NotVal, m_OneUse(m_AShr(m_Value(X), m_SpecificInt(FullShift))))) { 4538 Value *IsNotNeg = Builder.CreateIsNotNeg(X, "isnotneg"); 4539 return new SExtInst(IsNotNeg, Ty); 4540 } 4541 4542 // If we are inverting a right-shifted constant, we may be able to eliminate 4543 // the 'not' by inverting the constant and using the opposite shift type. 4544 // Canonicalization rules ensure that only a negative constant uses 'ashr', 4545 // but we must check that in case that transform has not fired yet. 4546 4547 // ~(C >>s Y) --> ~C >>u Y (when inverting the replicated sign bits) 4548 Constant *C; 4549 if (match(NotVal, m_AShr(m_Constant(C), m_Value(Y))) && 4550 match(C, m_Negative())) 4551 return BinaryOperator::CreateLShr(ConstantExpr::getNot(C), Y); 4552 4553 // ~(C >>u Y) --> ~C >>s Y (when inverting the replicated sign bits) 4554 if (match(NotVal, m_LShr(m_Constant(C), m_Value(Y))) && 4555 match(C, m_NonNegative())) 4556 return BinaryOperator::CreateAShr(ConstantExpr::getNot(C), Y); 4557 4558 // ~(X + C) --> ~C - X 4559 if (match(NotVal, m_Add(m_Value(X), m_ImmConstant(C)))) 4560 return BinaryOperator::CreateSub(ConstantExpr::getNot(C), X); 4561 4562 // ~(X - Y) --> ~X + Y 4563 // FIXME: is it really beneficial to sink the `not` here? 4564 if (match(NotVal, m_Sub(m_Value(X), m_Value(Y)))) 4565 if (isa<Constant>(X) || NotVal->hasOneUse()) 4566 return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y); 4567 4568 // ~(~X + Y) --> X - Y 4569 if (match(NotVal, m_c_Add(m_Not(m_Value(X)), m_Value(Y)))) 4570 return BinaryOperator::CreateWithCopiedFlags(Instruction::Sub, X, Y, 4571 NotVal); 4572 } 4573 4574 // not (cmp A, B) = !cmp A, B 4575 CmpPredicate Pred; 4576 if (match(NotOp, m_Cmp(Pred, m_Value(), m_Value())) && 4577 (NotOp->hasOneUse() || 4578 InstCombiner::canFreelyInvertAllUsersOf(cast<Instruction>(NotOp), 4579 /*IgnoredUser=*/nullptr))) { 4580 cast<CmpInst>(NotOp)->setPredicate(CmpInst::getInversePredicate(Pred)); 4581 freelyInvertAllUsersOf(NotOp); 4582 return &I; 4583 } 4584 4585 // Move a 'not' ahead of casts of a bool to enable logic reduction: 4586 // not (bitcast (sext i1 X)) --> bitcast (sext (not i1 X)) 4587 if (match(NotOp, m_OneUse(m_BitCast(m_OneUse(m_SExt(m_Value(X)))))) && X->getType()->isIntOrIntVectorTy(1)) { 4588 Type *SextTy = cast<BitCastOperator>(NotOp)->getSrcTy(); 4589 Value *NotX = Builder.CreateNot(X); 4590 Value *Sext = Builder.CreateSExt(NotX, SextTy); 4591 return new BitCastInst(Sext, Ty); 4592 } 4593 4594 if (auto *NotOpI = dyn_cast<Instruction>(NotOp)) 4595 if (sinkNotIntoLogicalOp(*NotOpI)) 4596 return &I; 4597 4598 // Eliminate a bitwise 'not' op of 'not' min/max by inverting the min/max: 4599 // ~min(~X, ~Y) --> max(X, Y) 4600 // ~max(~X, Y) --> min(X, ~Y) 4601 auto *II = dyn_cast<IntrinsicInst>(NotOp); 4602 if (II && II->hasOneUse()) { 4603 if (match(NotOp, m_c_MaxOrMin(m_Not(m_Value(X)), m_Value(Y)))) { 4604 Intrinsic::ID InvID = getInverseMinMaxIntrinsic(II->getIntrinsicID()); 4605 Value *NotY = Builder.CreateNot(Y); 4606 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, NotY); 4607 return replaceInstUsesWith(I, InvMaxMin); 4608 } 4609 4610 if (II->getIntrinsicID() == Intrinsic::is_fpclass) { 4611 ConstantInt *ClassMask = cast<ConstantInt>(II->getArgOperand(1)); 4612 II->setArgOperand( 4613 1, ConstantInt::get(ClassMask->getType(), 4614 ~ClassMask->getZExtValue() & fcAllFlags)); 4615 return replaceInstUsesWith(I, II); 4616 } 4617 } 4618 4619 if (NotOp->hasOneUse()) { 4620 // Pull 'not' into operands of select if both operands are one-use compares 4621 // or one is one-use compare and the other one is a constant. 4622 // Inverting the predicates eliminates the 'not' operation. 4623 // Example: 4624 // not (select ?, (cmp TPred, ?, ?), (cmp FPred, ?, ?) --> 4625 // select ?, (cmp InvTPred, ?, ?), (cmp InvFPred, ?, ?) 4626 // not (select ?, (cmp TPred, ?, ?), true --> 4627 // select ?, (cmp InvTPred, ?, ?), false 4628 if (auto *Sel = dyn_cast<SelectInst>(NotOp)) { 4629 Value *TV = Sel->getTrueValue(); 4630 Value *FV = Sel->getFalseValue(); 4631 auto *CmpT = dyn_cast<CmpInst>(TV); 4632 auto *CmpF = dyn_cast<CmpInst>(FV); 4633 bool InvertibleT = (CmpT && CmpT->hasOneUse()) || isa<Constant>(TV); 4634 bool InvertibleF = (CmpF && CmpF->hasOneUse()) || isa<Constant>(FV); 4635 if (InvertibleT && InvertibleF) { 4636 if (CmpT) 4637 CmpT->setPredicate(CmpT->getInversePredicate()); 4638 else 4639 Sel->setTrueValue(ConstantExpr::getNot(cast<Constant>(TV))); 4640 if (CmpF) 4641 CmpF->setPredicate(CmpF->getInversePredicate()); 4642 else 4643 Sel->setFalseValue(ConstantExpr::getNot(cast<Constant>(FV))); 4644 return replaceInstUsesWith(I, Sel); 4645 } 4646 } 4647 } 4648 4649 if (Instruction *NewXor = foldNotXor(I, Builder)) 4650 return NewXor; 4651 4652 // TODO: Could handle multi-use better by checking if all uses of NotOp (other 4653 // than I) can be inverted. 4654 if (Value *R = getFreelyInverted(NotOp, NotOp->hasOneUse(), &Builder)) 4655 return replaceInstUsesWith(I, R); 4656 4657 return nullptr; 4658 } 4659 4660 // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches 4661 // here. We should standardize that construct where it is needed or choose some 4662 // other way to ensure that commutated variants of patterns are not missed. 4663 Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { 4664 if (Value *V = simplifyXorInst(I.getOperand(0), I.getOperand(1), 4665 SQ.getWithInstruction(&I))) 4666 return replaceInstUsesWith(I, V); 4667 4668 if (SimplifyAssociativeOrCommutative(I)) 4669 return &I; 4670 4671 if (Instruction *X = foldVectorBinop(I)) 4672 return X; 4673 4674 if (Instruction *Phi = foldBinopWithPhiOperands(I)) 4675 return Phi; 4676 4677 if (Instruction *NewXor = foldXorToXor(I, Builder)) 4678 return NewXor; 4679 4680 // (A&B)^(A&C) -> A&(B^C) etc 4681 if (Value *V = foldUsingDistributiveLaws(I)) 4682 return replaceInstUsesWith(I, V); 4683 4684 // See if we can simplify any instructions used by the instruction whose sole 4685 // purpose is to compute bits we don't care about. 4686 if (SimplifyDemandedInstructionBits(I)) 4687 return &I; 4688 4689 if (Instruction *R = foldNot(I)) 4690 return R; 4691 4692 if (Instruction *R = foldBinOpShiftWithShift(I)) 4693 return R; 4694 4695 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 4696 Value *X, *Y, *M; 4697 4698 // (X | Y) ^ M -> (X ^ M) ^ Y 4699 // (X | Y) ^ M -> (Y ^ M) ^ X 4700 if (match(&I, m_c_Xor(m_OneUse(m_DisjointOr(m_Value(X), m_Value(Y))), 4701 m_Value(M)))) { 4702 if (Value *XorAC = simplifyXorInst(X, M, SQ.getWithInstruction(&I))) 4703 return BinaryOperator::CreateXor(XorAC, Y); 4704 4705 if (Value *XorBC = simplifyXorInst(Y, M, SQ.getWithInstruction(&I))) 4706 return BinaryOperator::CreateXor(XorBC, X); 4707 } 4708 4709 // Fold (X & M) ^ (Y & ~M) -> (X & M) | (Y & ~M) 4710 // This it a special case in haveNoCommonBitsSet, but the computeKnownBits 4711 // calls in there are unnecessary as SimplifyDemandedInstructionBits should 4712 // have already taken care of those cases. 4713 if (match(&I, m_c_Xor(m_c_And(m_Not(m_Value(M)), m_Value()), 4714 m_c_And(m_Deferred(M), m_Value())))) { 4715 if (isGuaranteedNotToBeUndef(M)) 4716 return BinaryOperator::CreateDisjointOr(Op0, Op1); 4717 else 4718 return BinaryOperator::CreateOr(Op0, Op1); 4719 } 4720 4721 if (Instruction *Xor = visitMaskedMerge(I, Builder)) 4722 return Xor; 4723 4724 Constant *C1; 4725 if (match(Op1, m_Constant(C1))) { 4726 Constant *C2; 4727 4728 if (match(Op0, m_OneUse(m_Or(m_Value(X), m_ImmConstant(C2)))) && 4729 match(C1, m_ImmConstant())) { 4730 // (X | C2) ^ C1 --> (X & ~C2) ^ (C1^C2) 4731 C2 = Constant::replaceUndefsWith( 4732 C2, Constant::getAllOnesValue(C2->getType()->getScalarType())); 4733 Value *And = Builder.CreateAnd( 4734 X, Constant::mergeUndefsWith(ConstantExpr::getNot(C2), C1)); 4735 return BinaryOperator::CreateXor( 4736 And, Constant::mergeUndefsWith(ConstantExpr::getXor(C1, C2), C1)); 4737 } 4738 4739 // Use DeMorgan and reassociation to eliminate a 'not' op. 4740 if (match(Op0, m_OneUse(m_Or(m_Not(m_Value(X)), m_Constant(C2))))) { 4741 // (~X | C2) ^ C1 --> ((X & ~C2) ^ -1) ^ C1 --> (X & ~C2) ^ ~C1 4742 Value *And = Builder.CreateAnd(X, ConstantExpr::getNot(C2)); 4743 return BinaryOperator::CreateXor(And, ConstantExpr::getNot(C1)); 4744 } 4745 if (match(Op0, m_OneUse(m_And(m_Not(m_Value(X)), m_Constant(C2))))) { 4746 // (~X & C2) ^ C1 --> ((X | ~C2) ^ -1) ^ C1 --> (X | ~C2) ^ ~C1 4747 Value *Or = Builder.CreateOr(X, ConstantExpr::getNot(C2)); 4748 return BinaryOperator::CreateXor(Or, ConstantExpr::getNot(C1)); 4749 } 4750 4751 // Convert xor ([trunc] (ashr X, BW-1)), C => 4752 // select(X >s -1, C, ~C) 4753 // The ashr creates "AllZeroOrAllOne's", which then optionally inverses the 4754 // constant depending on whether this input is less than 0. 4755 const APInt *CA; 4756 if (match(Op0, m_OneUse(m_TruncOrSelf( 4757 m_AShr(m_Value(X), m_APIntAllowPoison(CA))))) && 4758 *CA == X->getType()->getScalarSizeInBits() - 1 && 4759 !match(C1, m_AllOnes())) { 4760 assert(!C1->isZeroValue() && "Unexpected xor with 0"); 4761 Value *IsNotNeg = Builder.CreateIsNotNeg(X); 4762 return SelectInst::Create(IsNotNeg, Op1, Builder.CreateNot(Op1)); 4763 } 4764 } 4765 4766 Type *Ty = I.getType(); 4767 { 4768 const APInt *RHSC; 4769 if (match(Op1, m_APInt(RHSC))) { 4770 Value *X; 4771 const APInt *C; 4772 // (C - X) ^ signmaskC --> (C + signmaskC) - X 4773 if (RHSC->isSignMask() && match(Op0, m_Sub(m_APInt(C), m_Value(X)))) 4774 return BinaryOperator::CreateSub(ConstantInt::get(Ty, *C + *RHSC), X); 4775 4776 // (X + C) ^ signmaskC --> X + (C + signmaskC) 4777 if (RHSC->isSignMask() && match(Op0, m_Add(m_Value(X), m_APInt(C)))) 4778 return BinaryOperator::CreateAdd(X, ConstantInt::get(Ty, *C + *RHSC)); 4779 4780 // (X | C) ^ RHSC --> X ^ (C ^ RHSC) iff X & C == 0 4781 if (match(Op0, m_Or(m_Value(X), m_APInt(C))) && 4782 MaskedValueIsZero(X, *C, 0, &I)) 4783 return BinaryOperator::CreateXor(X, ConstantInt::get(Ty, *C ^ *RHSC)); 4784 4785 // When X is a power-of-two or zero and zero input is poison: 4786 // ctlz(i32 X) ^ 31 --> cttz(X) 4787 // cttz(i32 X) ^ 31 --> ctlz(X) 4788 auto *II = dyn_cast<IntrinsicInst>(Op0); 4789 if (II && II->hasOneUse() && *RHSC == Ty->getScalarSizeInBits() - 1) { 4790 Intrinsic::ID IID = II->getIntrinsicID(); 4791 if ((IID == Intrinsic::ctlz || IID == Intrinsic::cttz) && 4792 match(II->getArgOperand(1), m_One()) && 4793 isKnownToBeAPowerOfTwo(II->getArgOperand(0), /*OrZero */ true)) { 4794 IID = (IID == Intrinsic::ctlz) ? Intrinsic::cttz : Intrinsic::ctlz; 4795 Function *F = 4796 Intrinsic::getOrInsertDeclaration(II->getModule(), IID, Ty); 4797 return CallInst::Create(F, {II->getArgOperand(0), Builder.getTrue()}); 4798 } 4799 } 4800 4801 // If RHSC is inverting the remaining bits of shifted X, 4802 // canonicalize to a 'not' before the shift to help SCEV and codegen: 4803 // (X << C) ^ RHSC --> ~X << C 4804 if (match(Op0, m_OneUse(m_Shl(m_Value(X), m_APInt(C)))) && 4805 *RHSC == APInt::getAllOnes(Ty->getScalarSizeInBits()).shl(*C)) { 4806 Value *NotX = Builder.CreateNot(X); 4807 return BinaryOperator::CreateShl(NotX, ConstantInt::get(Ty, *C)); 4808 } 4809 // (X >>u C) ^ RHSC --> ~X >>u C 4810 if (match(Op0, m_OneUse(m_LShr(m_Value(X), m_APInt(C)))) && 4811 *RHSC == APInt::getAllOnes(Ty->getScalarSizeInBits()).lshr(*C)) { 4812 Value *NotX = Builder.CreateNot(X); 4813 return BinaryOperator::CreateLShr(NotX, ConstantInt::get(Ty, *C)); 4814 } 4815 // TODO: We could handle 'ashr' here as well. That would be matching 4816 // a 'not' op and moving it before the shift. Doing that requires 4817 // preventing the inverse fold in canShiftBinOpWithConstantRHS(). 4818 } 4819 4820 // If we are XORing the sign bit of a floating-point value, convert 4821 // this to fneg, then cast back to integer. 4822 // 4823 // This is generous interpretation of noimplicitfloat, this is not a true 4824 // floating-point operation. 4825 // 4826 // Assumes any IEEE-represented type has the sign bit in the high bit. 4827 // TODO: Unify with APInt matcher. This version allows undef unlike m_APInt 4828 Value *CastOp; 4829 if (match(Op0, m_ElementWiseBitCast(m_Value(CastOp))) && 4830 match(Op1, m_SignMask()) && 4831 !Builder.GetInsertBlock()->getParent()->hasFnAttribute( 4832 Attribute::NoImplicitFloat)) { 4833 Type *EltTy = CastOp->getType()->getScalarType(); 4834 if (EltTy->isFloatingPointTy() && EltTy->isIEEE()) { 4835 Value *FNeg = Builder.CreateFNeg(CastOp); 4836 return new BitCastInst(FNeg, I.getType()); 4837 } 4838 } 4839 } 4840 4841 // FIXME: This should not be limited to scalar (pull into APInt match above). 4842 { 4843 Value *X; 4844 ConstantInt *C1, *C2, *C3; 4845 // ((X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3) 4846 if (match(Op1, m_ConstantInt(C3)) && 4847 match(Op0, m_LShr(m_Xor(m_Value(X), m_ConstantInt(C1)), 4848 m_ConstantInt(C2))) && 4849 Op0->hasOneUse()) { 4850 // fold (C1 >> C2) ^ C3 4851 APInt FoldConst = C1->getValue().lshr(C2->getValue()); 4852 FoldConst ^= C3->getValue(); 4853 // Prepare the two operands. 4854 auto *Opnd0 = Builder.CreateLShr(X, C2); 4855 Opnd0->takeName(Op0); 4856 return BinaryOperator::CreateXor(Opnd0, ConstantInt::get(Ty, FoldConst)); 4857 } 4858 } 4859 4860 if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I)) 4861 return FoldedLogic; 4862 4863 // Y ^ (X | Y) --> X & ~Y 4864 // Y ^ (Y | X) --> X & ~Y 4865 if (match(Op1, m_OneUse(m_c_Or(m_Value(X), m_Specific(Op0))))) 4866 return BinaryOperator::CreateAnd(X, Builder.CreateNot(Op0)); 4867 // (X | Y) ^ Y --> X & ~Y 4868 // (Y | X) ^ Y --> X & ~Y 4869 if (match(Op0, m_OneUse(m_c_Or(m_Value(X), m_Specific(Op1))))) 4870 return BinaryOperator::CreateAnd(X, Builder.CreateNot(Op1)); 4871 4872 // Y ^ (X & Y) --> ~X & Y 4873 // Y ^ (Y & X) --> ~X & Y 4874 if (match(Op1, m_OneUse(m_c_And(m_Value(X), m_Specific(Op0))))) 4875 return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(X)); 4876 // (X & Y) ^ Y --> ~X & Y 4877 // (Y & X) ^ Y --> ~X & Y 4878 // Canonical form is (X & C) ^ C; don't touch that. 4879 // TODO: A 'not' op is better for analysis and codegen, but demanded bits must 4880 // be fixed to prefer that (otherwise we get infinite looping). 4881 if (!match(Op1, m_Constant()) && 4882 match(Op0, m_OneUse(m_c_And(m_Value(X), m_Specific(Op1))))) 4883 return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(X)); 4884 4885 Value *A, *B, *C; 4886 // (A ^ B) ^ (A | C) --> (~A & C) ^ B -- There are 4 commuted variants. 4887 if (match(&I, m_c_Xor(m_OneUse(m_Xor(m_Value(A), m_Value(B))), 4888 m_OneUse(m_c_Or(m_Deferred(A), m_Value(C)))))) 4889 return BinaryOperator::CreateXor( 4890 Builder.CreateAnd(Builder.CreateNot(A), C), B); 4891 4892 // (A ^ B) ^ (B | C) --> (~B & C) ^ A -- There are 4 commuted variants. 4893 if (match(&I, m_c_Xor(m_OneUse(m_Xor(m_Value(A), m_Value(B))), 4894 m_OneUse(m_c_Or(m_Deferred(B), m_Value(C)))))) 4895 return BinaryOperator::CreateXor( 4896 Builder.CreateAnd(Builder.CreateNot(B), C), A); 4897 4898 // (A & B) ^ (A ^ B) -> (A | B) 4899 if (match(Op0, m_And(m_Value(A), m_Value(B))) && 4900 match(Op1, m_c_Xor(m_Specific(A), m_Specific(B)))) 4901 return BinaryOperator::CreateOr(A, B); 4902 // (A ^ B) ^ (A & B) -> (A | B) 4903 if (match(Op0, m_Xor(m_Value(A), m_Value(B))) && 4904 match(Op1, m_c_And(m_Specific(A), m_Specific(B)))) 4905 return BinaryOperator::CreateOr(A, B); 4906 4907 // (A & ~B) ^ ~A -> ~(A & B) 4908 // (~B & A) ^ ~A -> ~(A & B) 4909 if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) && 4910 match(Op1, m_Not(m_Specific(A)))) 4911 return BinaryOperator::CreateNot(Builder.CreateAnd(A, B)); 4912 4913 // (~A & B) ^ A --> A | B -- There are 4 commuted variants. 4914 if (match(&I, m_c_Xor(m_c_And(m_Not(m_Value(A)), m_Value(B)), m_Deferred(A)))) 4915 return BinaryOperator::CreateOr(A, B); 4916 4917 // (~A | B) ^ A --> ~(A & B) 4918 if (match(Op0, m_OneUse(m_c_Or(m_Not(m_Specific(Op1)), m_Value(B))))) 4919 return BinaryOperator::CreateNot(Builder.CreateAnd(Op1, B)); 4920 4921 // A ^ (~A | B) --> ~(A & B) 4922 if (match(Op1, m_OneUse(m_c_Or(m_Not(m_Specific(Op0)), m_Value(B))))) 4923 return BinaryOperator::CreateNot(Builder.CreateAnd(Op0, B)); 4924 4925 // (A | B) ^ (A | C) --> (B ^ C) & ~A -- There are 4 commuted variants. 4926 // TODO: Loosen one-use restriction if common operand is a constant. 4927 Value *D; 4928 if (match(Op0, m_OneUse(m_Or(m_Value(A), m_Value(B)))) && 4929 match(Op1, m_OneUse(m_Or(m_Value(C), m_Value(D))))) { 4930 if (B == C || B == D) 4931 std::swap(A, B); 4932 if (A == C) 4933 std::swap(C, D); 4934 if (A == D) { 4935 Value *NotA = Builder.CreateNot(A); 4936 return BinaryOperator::CreateAnd(Builder.CreateXor(B, C), NotA); 4937 } 4938 } 4939 4940 // (A & B) ^ (A | C) --> A ? ~B : C -- There are 4 commuted variants. 4941 if (I.getType()->isIntOrIntVectorTy(1) && 4942 match(&I, m_c_Xor(m_OneUse(m_LogicalAnd(m_Value(A), m_Value(B))), 4943 m_OneUse(m_LogicalOr(m_Value(C), m_Value(D)))))) { 4944 bool NeedFreeze = isa<SelectInst>(Op0) && isa<SelectInst>(Op1) && B == D; 4945 if (B == C || B == D) 4946 std::swap(A, B); 4947 if (A == C) 4948 std::swap(C, D); 4949 if (A == D) { 4950 if (NeedFreeze) 4951 A = Builder.CreateFreeze(A); 4952 Value *NotB = Builder.CreateNot(B); 4953 return SelectInst::Create(A, NotB, C); 4954 } 4955 } 4956 4957 if (auto *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) 4958 if (auto *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) 4959 if (Value *V = foldXorOfICmps(LHS, RHS, I)) 4960 return replaceInstUsesWith(I, V); 4961 4962 if (Instruction *CastedXor = foldCastedBitwiseLogic(I)) 4963 return CastedXor; 4964 4965 if (Instruction *Abs = canonicalizeAbs(I, Builder)) 4966 return Abs; 4967 4968 // Otherwise, if all else failed, try to hoist the xor-by-constant: 4969 // (X ^ C) ^ Y --> (X ^ Y) ^ C 4970 // Just like we do in other places, we completely avoid the fold 4971 // for constantexprs, at least to avoid endless combine loop. 4972 if (match(&I, m_c_Xor(m_OneUse(m_Xor(m_CombineAnd(m_Value(X), 4973 m_Unless(m_ConstantExpr())), 4974 m_ImmConstant(C1))), 4975 m_Value(Y)))) 4976 return BinaryOperator::CreateXor(Builder.CreateXor(X, Y), C1); 4977 4978 if (Instruction *R = reassociateForUses(I, Builder)) 4979 return R; 4980 4981 if (Instruction *Canonicalized = canonicalizeLogicFirst(I, Builder)) 4982 return Canonicalized; 4983 4984 if (Instruction *Folded = foldLogicOfIsFPClass(I, Op0, Op1)) 4985 return Folded; 4986 4987 if (Instruction *Folded = canonicalizeConditionalNegationViaMathToSelect(I)) 4988 return Folded; 4989 4990 if (Instruction *Res = foldBinOpOfDisplacedShifts(I)) 4991 return Res; 4992 4993 if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder)) 4994 return Res; 4995 4996 return nullptr; 4997 } 4998