10b57cec5SDimitry Andric //===- AggressiveInstCombine.cpp ------------------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements the aggressive expression pattern combiner classes. 100b57cec5SDimitry Andric // Currently, it handles expression patterns for: 110b57cec5SDimitry Andric // * Truncate instruction 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" 160b57cec5SDimitry Andric #include "AggressiveInstCombineInternal.h" 175ffd83dbSDimitry Andric #include "llvm/ADT/Statistic.h" 180b57cec5SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h" 19349cc55cSDimitry Andric #include "llvm/Analysis/AssumptionCache.h" 200b57cec5SDimitry Andric #include "llvm/Analysis/BasicAliasAnalysis.h" 2106c3fb27SDimitry Andric #include "llvm/Analysis/ConstantFolding.h" 220fca6ea1SDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h" 230b57cec5SDimitry Andric #include "llvm/Analysis/GlobalsModRef.h" 240b57cec5SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h" 2581ad6265SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 26e8d8bef9SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 27bdd1243dSDimitry Andric #include "llvm/IR/DataLayout.h" 280b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 29e8d8bef9SDimitry Andric #include "llvm/IR/Function.h" 300b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h" 310b57cec5SDimitry Andric #include "llvm/IR/PatternMatch.h" 320fca6ea1SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h" 33972a253aSDimitry Andric #include "llvm/Transforms/Utils/BuildLibCalls.h" 340b57cec5SDimitry Andric #include "llvm/Transforms/Utils/Local.h" 355ffd83dbSDimitry Andric 360b57cec5SDimitry Andric using namespace llvm; 370b57cec5SDimitry Andric using namespace PatternMatch; 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric #define DEBUG_TYPE "aggressive-instcombine" 400b57cec5SDimitry Andric 415ffd83dbSDimitry Andric STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded"); 425ffd83dbSDimitry Andric STATISTIC(NumGuardedRotates, 435ffd83dbSDimitry Andric "Number of guarded rotates transformed into funnel shifts"); 44e8d8bef9SDimitry Andric STATISTIC(NumGuardedFunnelShifts, 45e8d8bef9SDimitry Andric "Number of guarded funnel shifts transformed into funnel shifts"); 465ffd83dbSDimitry Andric STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized"); 475ffd83dbSDimitry Andric 48bdd1243dSDimitry Andric static cl::opt<unsigned> MaxInstrsToScan( 49bdd1243dSDimitry Andric "aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, 50bdd1243dSDimitry Andric cl::desc("Max number of instructions to scan for aggressive instcombine.")); 510b57cec5SDimitry Andric 520fca6ea1SDimitry Andric static cl::opt<unsigned> StrNCmpInlineThreshold( 530fca6ea1SDimitry Andric "strncmp-inline-threshold", cl::init(3), cl::Hidden, 540fca6ea1SDimitry Andric cl::desc("The maximum length of a constant string for a builtin string cmp " 550fca6ea1SDimitry Andric "call eligible for inlining. The default value is 3.")); 560fca6ea1SDimitry Andric 570fca6ea1SDimitry Andric static cl::opt<unsigned> 580fca6ea1SDimitry Andric MemChrInlineThreshold("memchr-inline-threshold", cl::init(3), cl::Hidden, 590fca6ea1SDimitry Andric cl::desc("The maximum length of a constant string to " 600fca6ea1SDimitry Andric "inline a memchr call.")); 610fca6ea1SDimitry Andric 62e8d8bef9SDimitry Andric /// Match a pattern for a bitwise funnel/rotate operation that partially guards 63e8d8bef9SDimitry Andric /// against undefined behavior by branching around the funnel-shift/rotation 64e8d8bef9SDimitry Andric /// when the shift amount is 0. 65e8d8bef9SDimitry Andric static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT) { 660b57cec5SDimitry Andric if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2) 670b57cec5SDimitry Andric return false; 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric // As with the one-use checks below, this is not strictly necessary, but we 700b57cec5SDimitry Andric // are being cautious to avoid potential perf regressions on targets that 71e8d8bef9SDimitry Andric // do not actually have a funnel/rotate instruction (where the funnel shift 72e8d8bef9SDimitry Andric // would be expanded back into math/shift/logic ops). 730b57cec5SDimitry Andric if (!isPowerOf2_32(I.getType()->getScalarSizeInBits())) 740b57cec5SDimitry Andric return false; 750b57cec5SDimitry Andric 76e8d8bef9SDimitry Andric // Match V to funnel shift left/right and capture the source operands and 77e8d8bef9SDimitry Andric // shift amount. 78e8d8bef9SDimitry Andric auto matchFunnelShift = [](Value *V, Value *&ShVal0, Value *&ShVal1, 79e8d8bef9SDimitry Andric Value *&ShAmt) { 800b57cec5SDimitry Andric unsigned Width = V->getType()->getScalarSizeInBits(); 810b57cec5SDimitry Andric 82e8d8bef9SDimitry Andric // fshl(ShVal0, ShVal1, ShAmt) 83e8d8bef9SDimitry Andric // == (ShVal0 << ShAmt) | (ShVal1 >> (Width -ShAmt)) 84e8d8bef9SDimitry Andric if (match(V, m_OneUse(m_c_Or( 85e8d8bef9SDimitry Andric m_Shl(m_Value(ShVal0), m_Value(ShAmt)), 86e8d8bef9SDimitry Andric m_LShr(m_Value(ShVal1), 8706c3fb27SDimitry Andric m_Sub(m_SpecificInt(Width), m_Deferred(ShAmt))))))) { 880b57cec5SDimitry Andric return Intrinsic::fshl; 890b57cec5SDimitry Andric } 900b57cec5SDimitry Andric 91e8d8bef9SDimitry Andric // fshr(ShVal0, ShVal1, ShAmt) 92e8d8bef9SDimitry Andric // == (ShVal0 >> ShAmt) | (ShVal1 << (Width - ShAmt)) 93e8d8bef9SDimitry Andric if (match(V, 94e8d8bef9SDimitry Andric m_OneUse(m_c_Or(m_Shl(m_Value(ShVal0), m_Sub(m_SpecificInt(Width), 9506c3fb27SDimitry Andric m_Value(ShAmt))), 9606c3fb27SDimitry Andric m_LShr(m_Value(ShVal1), m_Deferred(ShAmt)))))) { 970b57cec5SDimitry Andric return Intrinsic::fshr; 980b57cec5SDimitry Andric } 990b57cec5SDimitry Andric 1000b57cec5SDimitry Andric return Intrinsic::not_intrinsic; 1010b57cec5SDimitry Andric }; 1020b57cec5SDimitry Andric 103e8d8bef9SDimitry Andric // One phi operand must be a funnel/rotate operation, and the other phi 104e8d8bef9SDimitry Andric // operand must be the source value of that funnel/rotate operation: 105e8d8bef9SDimitry Andric // phi [ rotate(RotSrc, ShAmt), FunnelBB ], [ RotSrc, GuardBB ] 106e8d8bef9SDimitry Andric // phi [ fshl(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal0, GuardBB ] 107e8d8bef9SDimitry Andric // phi [ fshr(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal1, GuardBB ] 1080b57cec5SDimitry Andric PHINode &Phi = cast<PHINode>(I); 109e8d8bef9SDimitry Andric unsigned FunnelOp = 0, GuardOp = 1; 1100b57cec5SDimitry Andric Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1); 111e8d8bef9SDimitry Andric Value *ShVal0, *ShVal1, *ShAmt; 112e8d8bef9SDimitry Andric Intrinsic::ID IID = matchFunnelShift(P0, ShVal0, ShVal1, ShAmt); 113e8d8bef9SDimitry Andric if (IID == Intrinsic::not_intrinsic || 114e8d8bef9SDimitry Andric (IID == Intrinsic::fshl && ShVal0 != P1) || 115e8d8bef9SDimitry Andric (IID == Intrinsic::fshr && ShVal1 != P1)) { 116e8d8bef9SDimitry Andric IID = matchFunnelShift(P1, ShVal0, ShVal1, ShAmt); 117e8d8bef9SDimitry Andric if (IID == Intrinsic::not_intrinsic || 118e8d8bef9SDimitry Andric (IID == Intrinsic::fshl && ShVal0 != P0) || 119e8d8bef9SDimitry Andric (IID == Intrinsic::fshr && ShVal1 != P0)) 1200b57cec5SDimitry Andric return false; 1210b57cec5SDimitry Andric assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) && 1220b57cec5SDimitry Andric "Pattern must match funnel shift left or right"); 123e8d8bef9SDimitry Andric std::swap(FunnelOp, GuardOp); 1240b57cec5SDimitry Andric } 1250b57cec5SDimitry Andric 1260b57cec5SDimitry Andric // The incoming block with our source operand must be the "guard" block. 127e8d8bef9SDimitry Andric // That must contain a cmp+branch to avoid the funnel/rotate when the shift 128e8d8bef9SDimitry Andric // amount is equal to 0. The other incoming block is the block with the 129e8d8bef9SDimitry Andric // funnel/rotate. 130e8d8bef9SDimitry Andric BasicBlock *GuardBB = Phi.getIncomingBlock(GuardOp); 131e8d8bef9SDimitry Andric BasicBlock *FunnelBB = Phi.getIncomingBlock(FunnelOp); 1320b57cec5SDimitry Andric Instruction *TermI = GuardBB->getTerminator(); 133e8d8bef9SDimitry Andric 134e8d8bef9SDimitry Andric // Ensure that the shift values dominate each block. 135e8d8bef9SDimitry Andric if (!DT.dominates(ShVal0, TermI) || !DT.dominates(ShVal1, TermI)) 136e8d8bef9SDimitry Andric return false; 137e8d8bef9SDimitry Andric 1380b57cec5SDimitry Andric ICmpInst::Predicate Pred; 1398bcb0991SDimitry Andric BasicBlock *PhiBB = Phi.getParent(); 140e8d8bef9SDimitry Andric if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(ShAmt), m_ZeroInt()), 141e8d8bef9SDimitry Andric m_SpecificBB(PhiBB), m_SpecificBB(FunnelBB)))) 1420b57cec5SDimitry Andric return false; 1430b57cec5SDimitry Andric 1448bcb0991SDimitry Andric if (Pred != CmpInst::ICMP_EQ) 1450b57cec5SDimitry Andric return false; 1460b57cec5SDimitry Andric 147e8d8bef9SDimitry Andric IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt()); 148e8d8bef9SDimitry Andric 149e8d8bef9SDimitry Andric if (ShVal0 == ShVal1) 150e8d8bef9SDimitry Andric ++NumGuardedRotates; 151e8d8bef9SDimitry Andric else 152e8d8bef9SDimitry Andric ++NumGuardedFunnelShifts; 153e8d8bef9SDimitry Andric 154e8d8bef9SDimitry Andric // If this is not a rotate then the select was blocking poison from the 155e8d8bef9SDimitry Andric // 'shift-by-zero' non-TVal, but a funnel shift won't - so freeze it. 156e8d8bef9SDimitry Andric bool IsFshl = IID == Intrinsic::fshl; 157e8d8bef9SDimitry Andric if (ShVal0 != ShVal1) { 158e8d8bef9SDimitry Andric if (IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal1)) 159e8d8bef9SDimitry Andric ShVal1 = Builder.CreateFreeze(ShVal1); 160e8d8bef9SDimitry Andric else if (!IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal0)) 161e8d8bef9SDimitry Andric ShVal0 = Builder.CreateFreeze(ShVal0); 162e8d8bef9SDimitry Andric } 163e8d8bef9SDimitry Andric 1640b57cec5SDimitry Andric // We matched a variation of this IR pattern: 1650b57cec5SDimitry Andric // GuardBB: 166e8d8bef9SDimitry Andric // %cmp = icmp eq i32 %ShAmt, 0 167e8d8bef9SDimitry Andric // br i1 %cmp, label %PhiBB, label %FunnelBB 168e8d8bef9SDimitry Andric // FunnelBB: 169e8d8bef9SDimitry Andric // %sub = sub i32 32, %ShAmt 170e8d8bef9SDimitry Andric // %shr = lshr i32 %ShVal1, %sub 171e8d8bef9SDimitry Andric // %shl = shl i32 %ShVal0, %ShAmt 172e8d8bef9SDimitry Andric // %fsh = or i32 %shr, %shl 1730b57cec5SDimitry Andric // br label %PhiBB 1740b57cec5SDimitry Andric // PhiBB: 175e8d8bef9SDimitry Andric // %cond = phi i32 [ %fsh, %FunnelBB ], [ %ShVal0, %GuardBB ] 1760b57cec5SDimitry Andric // --> 177e8d8bef9SDimitry Andric // llvm.fshl.i32(i32 %ShVal0, i32 %ShVal1, i32 %ShAmt) 1780b57cec5SDimitry Andric Function *F = Intrinsic::getDeclaration(Phi.getModule(), IID, Phi.getType()); 179e8d8bef9SDimitry Andric Phi.replaceAllUsesWith(Builder.CreateCall(F, {ShVal0, ShVal1, ShAmt})); 1800b57cec5SDimitry Andric return true; 1810b57cec5SDimitry Andric } 1820b57cec5SDimitry Andric 1830b57cec5SDimitry Andric /// This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and 1840b57cec5SDimitry Andric /// the bit indexes (Mask) needed by a masked compare. If we're matching a chain 1850b57cec5SDimitry Andric /// of 'and' ops, then we also need to capture the fact that we saw an 1860b57cec5SDimitry Andric /// "and X, 1", so that's an extra return value for that case. 1870b57cec5SDimitry Andric struct MaskOps { 18881ad6265SDimitry Andric Value *Root = nullptr; 1890b57cec5SDimitry Andric APInt Mask; 1900b57cec5SDimitry Andric bool MatchAndChain; 19181ad6265SDimitry Andric bool FoundAnd1 = false; 1920b57cec5SDimitry Andric 1930b57cec5SDimitry Andric MaskOps(unsigned BitWidth, bool MatchAnds) 19481ad6265SDimitry Andric : Mask(APInt::getZero(BitWidth)), MatchAndChain(MatchAnds) {} 1950b57cec5SDimitry Andric }; 1960b57cec5SDimitry Andric 1970b57cec5SDimitry Andric /// This is a recursive helper for foldAnyOrAllBitsSet() that walks through a 1980b57cec5SDimitry Andric /// chain of 'and' or 'or' instructions looking for shift ops of a common source 1990b57cec5SDimitry Andric /// value. Examples: 2000b57cec5SDimitry Andric /// or (or (or X, (X >> 3)), (X >> 5)), (X >> 8) 2010b57cec5SDimitry Andric /// returns { X, 0x129 } 2020b57cec5SDimitry Andric /// and (and (X >> 1), 1), (X >> 4) 2030b57cec5SDimitry Andric /// returns { X, 0x12 } 2040b57cec5SDimitry Andric static bool matchAndOrChain(Value *V, MaskOps &MOps) { 2050b57cec5SDimitry Andric Value *Op0, *Op1; 2060b57cec5SDimitry Andric if (MOps.MatchAndChain) { 2070b57cec5SDimitry Andric // Recurse through a chain of 'and' operands. This requires an extra check 2080b57cec5SDimitry Andric // vs. the 'or' matcher: we must find an "and X, 1" instruction somewhere 2090b57cec5SDimitry Andric // in the chain to know that all of the high bits are cleared. 2100b57cec5SDimitry Andric if (match(V, m_And(m_Value(Op0), m_One()))) { 2110b57cec5SDimitry Andric MOps.FoundAnd1 = true; 2120b57cec5SDimitry Andric return matchAndOrChain(Op0, MOps); 2130b57cec5SDimitry Andric } 2140b57cec5SDimitry Andric if (match(V, m_And(m_Value(Op0), m_Value(Op1)))) 2150b57cec5SDimitry Andric return matchAndOrChain(Op0, MOps) && matchAndOrChain(Op1, MOps); 2160b57cec5SDimitry Andric } else { 2170b57cec5SDimitry Andric // Recurse through a chain of 'or' operands. 2180b57cec5SDimitry Andric if (match(V, m_Or(m_Value(Op0), m_Value(Op1)))) 2190b57cec5SDimitry Andric return matchAndOrChain(Op0, MOps) && matchAndOrChain(Op1, MOps); 2200b57cec5SDimitry Andric } 2210b57cec5SDimitry Andric 2220b57cec5SDimitry Andric // We need a shift-right or a bare value representing a compare of bit 0 of 2230b57cec5SDimitry Andric // the original source operand. 2240b57cec5SDimitry Andric Value *Candidate; 225e8d8bef9SDimitry Andric const APInt *BitIndex = nullptr; 226e8d8bef9SDimitry Andric if (!match(V, m_LShr(m_Value(Candidate), m_APInt(BitIndex)))) 2270b57cec5SDimitry Andric Candidate = V; 2280b57cec5SDimitry Andric 2290b57cec5SDimitry Andric // Initialize result source operand. 2300b57cec5SDimitry Andric if (!MOps.Root) 2310b57cec5SDimitry Andric MOps.Root = Candidate; 2320b57cec5SDimitry Andric 2330b57cec5SDimitry Andric // The shift constant is out-of-range? This code hasn't been simplified. 234e8d8bef9SDimitry Andric if (BitIndex && BitIndex->uge(MOps.Mask.getBitWidth())) 2350b57cec5SDimitry Andric return false; 2360b57cec5SDimitry Andric 2370b57cec5SDimitry Andric // Fill in the mask bit derived from the shift constant. 238e8d8bef9SDimitry Andric MOps.Mask.setBit(BitIndex ? BitIndex->getZExtValue() : 0); 2390b57cec5SDimitry Andric return MOps.Root == Candidate; 2400b57cec5SDimitry Andric } 2410b57cec5SDimitry Andric 2420b57cec5SDimitry Andric /// Match patterns that correspond to "any-bits-set" and "all-bits-set". 2430b57cec5SDimitry Andric /// These will include a chain of 'or' or 'and'-shifted bits from a 2440b57cec5SDimitry Andric /// common source value: 2450b57cec5SDimitry Andric /// and (or (lshr X, C), ...), 1 --> (X & CMask) != 0 2460b57cec5SDimitry Andric /// and (and (lshr X, C), ...), 1 --> (X & CMask) == CMask 2470b57cec5SDimitry Andric /// Note: "any-bits-clear" and "all-bits-clear" are variations of these patterns 2480b57cec5SDimitry Andric /// that differ only with a final 'not' of the result. We expect that final 2490b57cec5SDimitry Andric /// 'not' to be folded with the compare that we create here (invert predicate). 2500b57cec5SDimitry Andric static bool foldAnyOrAllBitsSet(Instruction &I) { 2510b57cec5SDimitry Andric // The 'any-bits-set' ('or' chain) pattern is simpler to match because the 2520b57cec5SDimitry Andric // final "and X, 1" instruction must be the final op in the sequence. 2530b57cec5SDimitry Andric bool MatchAllBitsSet; 2540b57cec5SDimitry Andric if (match(&I, m_c_And(m_OneUse(m_And(m_Value(), m_Value())), m_Value()))) 2550b57cec5SDimitry Andric MatchAllBitsSet = true; 2560b57cec5SDimitry Andric else if (match(&I, m_And(m_OneUse(m_Or(m_Value(), m_Value())), m_One()))) 2570b57cec5SDimitry Andric MatchAllBitsSet = false; 2580b57cec5SDimitry Andric else 2590b57cec5SDimitry Andric return false; 2600b57cec5SDimitry Andric 2610b57cec5SDimitry Andric MaskOps MOps(I.getType()->getScalarSizeInBits(), MatchAllBitsSet); 2620b57cec5SDimitry Andric if (MatchAllBitsSet) { 2630b57cec5SDimitry Andric if (!matchAndOrChain(cast<BinaryOperator>(&I), MOps) || !MOps.FoundAnd1) 2640b57cec5SDimitry Andric return false; 2650b57cec5SDimitry Andric } else { 2660b57cec5SDimitry Andric if (!matchAndOrChain(cast<BinaryOperator>(&I)->getOperand(0), MOps)) 2670b57cec5SDimitry Andric return false; 2680b57cec5SDimitry Andric } 2690b57cec5SDimitry Andric 2700b57cec5SDimitry Andric // The pattern was found. Create a masked compare that replaces all of the 2710b57cec5SDimitry Andric // shift and logic ops. 2720b57cec5SDimitry Andric IRBuilder<> Builder(&I); 2730b57cec5SDimitry Andric Constant *Mask = ConstantInt::get(I.getType(), MOps.Mask); 2740b57cec5SDimitry Andric Value *And = Builder.CreateAnd(MOps.Root, Mask); 2750b57cec5SDimitry Andric Value *Cmp = MatchAllBitsSet ? Builder.CreateICmpEQ(And, Mask) 2760b57cec5SDimitry Andric : Builder.CreateIsNotNull(And); 2770b57cec5SDimitry Andric Value *Zext = Builder.CreateZExt(Cmp, I.getType()); 2780b57cec5SDimitry Andric I.replaceAllUsesWith(Zext); 2795ffd83dbSDimitry Andric ++NumAnyOrAllBitsSet; 2800b57cec5SDimitry Andric return true; 2810b57cec5SDimitry Andric } 2820b57cec5SDimitry Andric 2838bcb0991SDimitry Andric // Try to recognize below function as popcount intrinsic. 2848bcb0991SDimitry Andric // This is the "best" algorithm from 2858bcb0991SDimitry Andric // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel 2868bcb0991SDimitry Andric // Also used in TargetLowering::expandCTPOP(). 2878bcb0991SDimitry Andric // 2888bcb0991SDimitry Andric // int popcount(unsigned int i) { 2898bcb0991SDimitry Andric // i = i - ((i >> 1) & 0x55555555); 2908bcb0991SDimitry Andric // i = (i & 0x33333333) + ((i >> 2) & 0x33333333); 2918bcb0991SDimitry Andric // i = ((i + (i >> 4)) & 0x0F0F0F0F); 2928bcb0991SDimitry Andric // return (i * 0x01010101) >> 24; 2938bcb0991SDimitry Andric // } 2948bcb0991SDimitry Andric static bool tryToRecognizePopCount(Instruction &I) { 2958bcb0991SDimitry Andric if (I.getOpcode() != Instruction::LShr) 2968bcb0991SDimitry Andric return false; 2978bcb0991SDimitry Andric 2988bcb0991SDimitry Andric Type *Ty = I.getType(); 2998bcb0991SDimitry Andric if (!Ty->isIntOrIntVectorTy()) 3008bcb0991SDimitry Andric return false; 3018bcb0991SDimitry Andric 3028bcb0991SDimitry Andric unsigned Len = Ty->getScalarSizeInBits(); 3038bcb0991SDimitry Andric // FIXME: fix Len == 8 and other irregular type lengths. 3048bcb0991SDimitry Andric if (!(Len <= 128 && Len > 8 && Len % 8 == 0)) 3058bcb0991SDimitry Andric return false; 3068bcb0991SDimitry Andric 3078bcb0991SDimitry Andric APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55)); 3088bcb0991SDimitry Andric APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33)); 3098bcb0991SDimitry Andric APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F)); 3108bcb0991SDimitry Andric APInt Mask01 = APInt::getSplat(Len, APInt(8, 0x01)); 3118bcb0991SDimitry Andric APInt MaskShift = APInt(Len, Len - 8); 3128bcb0991SDimitry Andric 3138bcb0991SDimitry Andric Value *Op0 = I.getOperand(0); 3148bcb0991SDimitry Andric Value *Op1 = I.getOperand(1); 3158bcb0991SDimitry Andric Value *MulOp0; 3168bcb0991SDimitry Andric // Matching "(i * 0x01010101...) >> 24". 3178bcb0991SDimitry Andric if ((match(Op0, m_Mul(m_Value(MulOp0), m_SpecificInt(Mask01)))) && 3188bcb0991SDimitry Andric match(Op1, m_SpecificInt(MaskShift))) { 3198bcb0991SDimitry Andric Value *ShiftOp0; 3208bcb0991SDimitry Andric // Matching "((i + (i >> 4)) & 0x0F0F0F0F...)". 3218bcb0991SDimitry Andric if (match(MulOp0, m_And(m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(4)), 3228bcb0991SDimitry Andric m_Deferred(ShiftOp0)), 3238bcb0991SDimitry Andric m_SpecificInt(Mask0F)))) { 3248bcb0991SDimitry Andric Value *AndOp0; 3258bcb0991SDimitry Andric // Matching "(i & 0x33333333...) + ((i >> 2) & 0x33333333...)". 3268bcb0991SDimitry Andric if (match(ShiftOp0, 3278bcb0991SDimitry Andric m_c_Add(m_And(m_Value(AndOp0), m_SpecificInt(Mask33)), 3288bcb0991SDimitry Andric m_And(m_LShr(m_Deferred(AndOp0), m_SpecificInt(2)), 3298bcb0991SDimitry Andric m_SpecificInt(Mask33))))) { 3308bcb0991SDimitry Andric Value *Root, *SubOp1; 3318bcb0991SDimitry Andric // Matching "i - ((i >> 1) & 0x55555555...)". 3328bcb0991SDimitry Andric if (match(AndOp0, m_Sub(m_Value(Root), m_Value(SubOp1))) && 3338bcb0991SDimitry Andric match(SubOp1, m_And(m_LShr(m_Specific(Root), m_SpecificInt(1)), 3348bcb0991SDimitry Andric m_SpecificInt(Mask55)))) { 3358bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n"); 3368bcb0991SDimitry Andric IRBuilder<> Builder(&I); 3378bcb0991SDimitry Andric Function *Func = Intrinsic::getDeclaration( 3388bcb0991SDimitry Andric I.getModule(), Intrinsic::ctpop, I.getType()); 3398bcb0991SDimitry Andric I.replaceAllUsesWith(Builder.CreateCall(Func, {Root})); 3405ffd83dbSDimitry Andric ++NumPopCountRecognized; 3418bcb0991SDimitry Andric return true; 3428bcb0991SDimitry Andric } 3438bcb0991SDimitry Andric } 3448bcb0991SDimitry Andric } 3458bcb0991SDimitry Andric } 3468bcb0991SDimitry Andric 3478bcb0991SDimitry Andric return false; 3488bcb0991SDimitry Andric } 3498bcb0991SDimitry Andric 35081ad6265SDimitry Andric /// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and 35181ad6265SDimitry Andric /// C2 saturate the value of the fp conversion. The transform is not reversable 35281ad6265SDimitry Andric /// as the fptosi.sat is more defined than the input - all values produce a 35381ad6265SDimitry Andric /// valid value for the fptosi.sat, where as some produce poison for original 35481ad6265SDimitry Andric /// that were out of range of the integer conversion. The reversed pattern may 35581ad6265SDimitry Andric /// use fmax and fmin instead. As we cannot directly reverse the transform, and 35681ad6265SDimitry Andric /// it is not always profitable, we make it conditional on the cost being 35781ad6265SDimitry Andric /// reported as lower by TTI. 35881ad6265SDimitry Andric static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) { 35981ad6265SDimitry Andric // Look for min(max(fptosi, converting to fptosi_sat. 36081ad6265SDimitry Andric Value *In; 36181ad6265SDimitry Andric const APInt *MinC, *MaxC; 36281ad6265SDimitry Andric if (!match(&I, m_SMax(m_OneUse(m_SMin(m_OneUse(m_FPToSI(m_Value(In))), 36381ad6265SDimitry Andric m_APInt(MinC))), 36481ad6265SDimitry Andric m_APInt(MaxC))) && 36581ad6265SDimitry Andric !match(&I, m_SMin(m_OneUse(m_SMax(m_OneUse(m_FPToSI(m_Value(In))), 36681ad6265SDimitry Andric m_APInt(MaxC))), 36781ad6265SDimitry Andric m_APInt(MinC)))) 36881ad6265SDimitry Andric return false; 36981ad6265SDimitry Andric 37081ad6265SDimitry Andric // Check that the constants clamp a saturate. 37181ad6265SDimitry Andric if (!(*MinC + 1).isPowerOf2() || -*MaxC != *MinC + 1) 37281ad6265SDimitry Andric return false; 37381ad6265SDimitry Andric 37481ad6265SDimitry Andric Type *IntTy = I.getType(); 37581ad6265SDimitry Andric Type *FpTy = In->getType(); 37681ad6265SDimitry Andric Type *SatTy = 37781ad6265SDimitry Andric IntegerType::get(IntTy->getContext(), (*MinC + 1).exactLogBase2() + 1); 37881ad6265SDimitry Andric if (auto *VecTy = dyn_cast<VectorType>(IntTy)) 37981ad6265SDimitry Andric SatTy = VectorType::get(SatTy, VecTy->getElementCount()); 38081ad6265SDimitry Andric 38181ad6265SDimitry Andric // Get the cost of the intrinsic, and check that against the cost of 38281ad6265SDimitry Andric // fptosi+smin+smax 38381ad6265SDimitry Andric InstructionCost SatCost = TTI.getIntrinsicInstrCost( 38481ad6265SDimitry Andric IntrinsicCostAttributes(Intrinsic::fptosi_sat, SatTy, {In}, {FpTy}), 38581ad6265SDimitry Andric TTI::TCK_RecipThroughput); 3865f757f3fSDimitry Andric SatCost += TTI.getCastInstrCost(Instruction::SExt, IntTy, SatTy, 38781ad6265SDimitry Andric TTI::CastContextHint::None, 38881ad6265SDimitry Andric TTI::TCK_RecipThroughput); 38981ad6265SDimitry Andric 39081ad6265SDimitry Andric InstructionCost MinMaxCost = TTI.getCastInstrCost( 39181ad6265SDimitry Andric Instruction::FPToSI, IntTy, FpTy, TTI::CastContextHint::None, 39281ad6265SDimitry Andric TTI::TCK_RecipThroughput); 39381ad6265SDimitry Andric MinMaxCost += TTI.getIntrinsicInstrCost( 39481ad6265SDimitry Andric IntrinsicCostAttributes(Intrinsic::smin, IntTy, {IntTy}), 39581ad6265SDimitry Andric TTI::TCK_RecipThroughput); 39681ad6265SDimitry Andric MinMaxCost += TTI.getIntrinsicInstrCost( 39781ad6265SDimitry Andric IntrinsicCostAttributes(Intrinsic::smax, IntTy, {IntTy}), 39881ad6265SDimitry Andric TTI::TCK_RecipThroughput); 39981ad6265SDimitry Andric 40081ad6265SDimitry Andric if (SatCost >= MinMaxCost) 40181ad6265SDimitry Andric return false; 40281ad6265SDimitry Andric 40381ad6265SDimitry Andric IRBuilder<> Builder(&I); 40481ad6265SDimitry Andric Function *Fn = Intrinsic::getDeclaration(I.getModule(), Intrinsic::fptosi_sat, 40581ad6265SDimitry Andric {SatTy, FpTy}); 40681ad6265SDimitry Andric Value *Sat = Builder.CreateCall(Fn, In); 40781ad6265SDimitry Andric I.replaceAllUsesWith(Builder.CreateSExt(Sat, IntTy)); 40881ad6265SDimitry Andric return true; 40981ad6265SDimitry Andric } 41081ad6265SDimitry Andric 4118a4dda33SDimitry Andric /// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids 4128a4dda33SDimitry Andric /// pessimistic codegen that has to account for setting errno and can enable 4138a4dda33SDimitry Andric /// vectorization. 4140fca6ea1SDimitry Andric static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI, 4158a4dda33SDimitry Andric TargetLibraryInfo &TLI, AssumptionCache &AC, 4168a4dda33SDimitry Andric DominatorTree &DT) { 4178a4dda33SDimitry Andric 4188a4dda33SDimitry Andric Module *M = Call->getModule(); 4198a4dda33SDimitry Andric 4208a4dda33SDimitry Andric // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created 4218a4dda33SDimitry Andric // (because NNAN or the operand arg must not be less than -0.0) and (2) we 4228a4dda33SDimitry Andric // would not end up lowering to a libcall anyway (which could change the value 4238a4dda33SDimitry Andric // of errno), then: 4248a4dda33SDimitry Andric // (1) errno won't be set. 4258a4dda33SDimitry Andric // (2) it is safe to convert this to an intrinsic call. 4268a4dda33SDimitry Andric Type *Ty = Call->getType(); 4278a4dda33SDimitry Andric Value *Arg = Call->getArgOperand(0); 4288a4dda33SDimitry Andric if (TTI.haveFastSqrt(Ty) && 4298a4dda33SDimitry Andric (Call->hasNoNaNs() || 4300fca6ea1SDimitry Andric cannotBeOrderedLessThanZero( 4310fca6ea1SDimitry Andric Arg, 0, 4320fca6ea1SDimitry Andric SimplifyQuery(Call->getDataLayout(), &TLI, &DT, &AC, Call)))) { 4330fca6ea1SDimitry Andric IRBuilder<> Builder(Call); 4348a4dda33SDimitry Andric IRBuilderBase::FastMathFlagGuard Guard(Builder); 4358a4dda33SDimitry Andric Builder.setFastMathFlags(Call->getFastMathFlags()); 4368a4dda33SDimitry Andric 4378a4dda33SDimitry Andric Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty); 4388a4dda33SDimitry Andric Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt"); 4390fca6ea1SDimitry Andric Call->replaceAllUsesWith(NewSqrt); 4408a4dda33SDimitry Andric 4418a4dda33SDimitry Andric // Explicitly erase the old call because a call with side effects is not 4428a4dda33SDimitry Andric // trivially dead. 4430fca6ea1SDimitry Andric Call->eraseFromParent(); 4448a4dda33SDimitry Andric return true; 4458a4dda33SDimitry Andric } 4468a4dda33SDimitry Andric 4478a4dda33SDimitry Andric return false; 4488a4dda33SDimitry Andric } 4498a4dda33SDimitry Andric 450bdd1243dSDimitry Andric // Check if this array of constants represents a cttz table. 451bdd1243dSDimitry Andric // Iterate over the elements from \p Table by trying to find/match all 452bdd1243dSDimitry Andric // the numbers from 0 to \p InputBits that should represent cttz results. 453bdd1243dSDimitry Andric static bool isCTTZTable(const ConstantDataArray &Table, uint64_t Mul, 454bdd1243dSDimitry Andric uint64_t Shift, uint64_t InputBits) { 455bdd1243dSDimitry Andric unsigned Length = Table.getNumElements(); 456bdd1243dSDimitry Andric if (Length < InputBits || Length > InputBits * 2) 457bdd1243dSDimitry Andric return false; 458bdd1243dSDimitry Andric 459bdd1243dSDimitry Andric APInt Mask = APInt::getBitsSetFrom(InputBits, Shift); 460bdd1243dSDimitry Andric unsigned Matched = 0; 461bdd1243dSDimitry Andric 462bdd1243dSDimitry Andric for (unsigned i = 0; i < Length; i++) { 463bdd1243dSDimitry Andric uint64_t Element = Table.getElementAsInteger(i); 464bdd1243dSDimitry Andric if (Element >= InputBits) 465bdd1243dSDimitry Andric continue; 466bdd1243dSDimitry Andric 467bdd1243dSDimitry Andric // Check if \p Element matches a concrete answer. It could fail for some 468bdd1243dSDimitry Andric // elements that are never accessed, so we keep iterating over each element 469bdd1243dSDimitry Andric // from the table. The number of matched elements should be equal to the 470bdd1243dSDimitry Andric // number of potential right answers which is \p InputBits actually. 471bdd1243dSDimitry Andric if ((((Mul << Element) & Mask.getZExtValue()) >> Shift) == i) 472bdd1243dSDimitry Andric Matched++; 473bdd1243dSDimitry Andric } 474bdd1243dSDimitry Andric 475bdd1243dSDimitry Andric return Matched == InputBits; 476bdd1243dSDimitry Andric } 477bdd1243dSDimitry Andric 478bdd1243dSDimitry Andric // Try to recognize table-based ctz implementation. 479bdd1243dSDimitry Andric // E.g., an example in C (for more cases please see the llvm/tests): 480bdd1243dSDimitry Andric // int f(unsigned x) { 481bdd1243dSDimitry Andric // static const char table[32] = 482bdd1243dSDimitry Andric // {0, 1, 28, 2, 29, 14, 24, 3, 30, 483bdd1243dSDimitry Andric // 22, 20, 15, 25, 17, 4, 8, 31, 27, 484bdd1243dSDimitry Andric // 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; 485bdd1243dSDimitry Andric // return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27]; 486bdd1243dSDimitry Andric // } 487bdd1243dSDimitry Andric // this can be lowered to `cttz` instruction. 488bdd1243dSDimitry Andric // There is also a special case when the element is 0. 489bdd1243dSDimitry Andric // 490bdd1243dSDimitry Andric // Here are some examples or LLVM IR for a 64-bit target: 491bdd1243dSDimitry Andric // 492bdd1243dSDimitry Andric // CASE 1: 493bdd1243dSDimitry Andric // %sub = sub i32 0, %x 494bdd1243dSDimitry Andric // %and = and i32 %sub, %x 495bdd1243dSDimitry Andric // %mul = mul i32 %and, 125613361 496bdd1243dSDimitry Andric // %shr = lshr i32 %mul, 27 497bdd1243dSDimitry Andric // %idxprom = zext i32 %shr to i64 498bdd1243dSDimitry Andric // %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @ctz1.table, i64 0, 4995f757f3fSDimitry Andric // i64 %idxprom 5005f757f3fSDimitry Andric // %0 = load i8, i8* %arrayidx, align 1, !tbaa !8 501bdd1243dSDimitry Andric // 502bdd1243dSDimitry Andric // CASE 2: 503bdd1243dSDimitry Andric // %sub = sub i32 0, %x 504bdd1243dSDimitry Andric // %and = and i32 %sub, %x 505bdd1243dSDimitry Andric // %mul = mul i32 %and, 72416175 506bdd1243dSDimitry Andric // %shr = lshr i32 %mul, 26 507bdd1243dSDimitry Andric // %idxprom = zext i32 %shr to i64 5085f757f3fSDimitry Andric // %arrayidx = getelementptr inbounds [64 x i16], [64 x i16]* @ctz2.table, 5095f757f3fSDimitry Andric // i64 0, i64 %idxprom 5105f757f3fSDimitry Andric // %0 = load i16, i16* %arrayidx, align 2, !tbaa !8 511bdd1243dSDimitry Andric // 512bdd1243dSDimitry Andric // CASE 3: 513bdd1243dSDimitry Andric // %sub = sub i32 0, %x 514bdd1243dSDimitry Andric // %and = and i32 %sub, %x 515bdd1243dSDimitry Andric // %mul = mul i32 %and, 81224991 516bdd1243dSDimitry Andric // %shr = lshr i32 %mul, 27 517bdd1243dSDimitry Andric // %idxprom = zext i32 %shr to i64 5185f757f3fSDimitry Andric // %arrayidx = getelementptr inbounds [32 x i32], [32 x i32]* @ctz3.table, 5195f757f3fSDimitry Andric // i64 0, i64 %idxprom 5205f757f3fSDimitry Andric // %0 = load i32, i32* %arrayidx, align 4, !tbaa !8 521bdd1243dSDimitry Andric // 522bdd1243dSDimitry Andric // CASE 4: 523bdd1243dSDimitry Andric // %sub = sub i64 0, %x 524bdd1243dSDimitry Andric // %and = and i64 %sub, %x 525bdd1243dSDimitry Andric // %mul = mul i64 %and, 283881067100198605 526bdd1243dSDimitry Andric // %shr = lshr i64 %mul, 58 5275f757f3fSDimitry Andric // %arrayidx = getelementptr inbounds [64 x i8], [64 x i8]* @table, i64 0, 5285f757f3fSDimitry Andric // i64 %shr 5295f757f3fSDimitry Andric // %0 = load i8, i8* %arrayidx, align 1, !tbaa !8 530bdd1243dSDimitry Andric // 531bdd1243dSDimitry Andric // All this can be lowered to @llvm.cttz.i32/64 intrinsic. 532bdd1243dSDimitry Andric static bool tryToRecognizeTableBasedCttz(Instruction &I) { 533bdd1243dSDimitry Andric LoadInst *LI = dyn_cast<LoadInst>(&I); 534bdd1243dSDimitry Andric if (!LI) 535bdd1243dSDimitry Andric return false; 536bdd1243dSDimitry Andric 537bdd1243dSDimitry Andric Type *AccessType = LI->getType(); 538bdd1243dSDimitry Andric if (!AccessType->isIntegerTy()) 539bdd1243dSDimitry Andric return false; 540bdd1243dSDimitry Andric 541bdd1243dSDimitry Andric GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getPointerOperand()); 542bdd1243dSDimitry Andric if (!GEP || !GEP->isInBounds() || GEP->getNumIndices() != 2) 543bdd1243dSDimitry Andric return false; 544bdd1243dSDimitry Andric 545bdd1243dSDimitry Andric if (!GEP->getSourceElementType()->isArrayTy()) 546bdd1243dSDimitry Andric return false; 547bdd1243dSDimitry Andric 548bdd1243dSDimitry Andric uint64_t ArraySize = GEP->getSourceElementType()->getArrayNumElements(); 549bdd1243dSDimitry Andric if (ArraySize != 32 && ArraySize != 64) 550bdd1243dSDimitry Andric return false; 551bdd1243dSDimitry Andric 552bdd1243dSDimitry Andric GlobalVariable *GVTable = dyn_cast<GlobalVariable>(GEP->getPointerOperand()); 553bdd1243dSDimitry Andric if (!GVTable || !GVTable->hasInitializer() || !GVTable->isConstant()) 554bdd1243dSDimitry Andric return false; 555bdd1243dSDimitry Andric 556bdd1243dSDimitry Andric ConstantDataArray *ConstData = 557bdd1243dSDimitry Andric dyn_cast<ConstantDataArray>(GVTable->getInitializer()); 558bdd1243dSDimitry Andric if (!ConstData) 559bdd1243dSDimitry Andric return false; 560bdd1243dSDimitry Andric 561bdd1243dSDimitry Andric if (!match(GEP->idx_begin()->get(), m_ZeroInt())) 562bdd1243dSDimitry Andric return false; 563bdd1243dSDimitry Andric 564bdd1243dSDimitry Andric Value *Idx2 = std::next(GEP->idx_begin())->get(); 565bdd1243dSDimitry Andric Value *X1; 566bdd1243dSDimitry Andric uint64_t MulConst, ShiftConst; 567bdd1243dSDimitry Andric // FIXME: 64-bit targets have `i64` type for the GEP index, so this match will 568bdd1243dSDimitry Andric // probably fail for other (e.g. 32-bit) targets. 569bdd1243dSDimitry Andric if (!match(Idx2, m_ZExtOrSelf( 570bdd1243dSDimitry Andric m_LShr(m_Mul(m_c_And(m_Neg(m_Value(X1)), m_Deferred(X1)), 571bdd1243dSDimitry Andric m_ConstantInt(MulConst)), 572bdd1243dSDimitry Andric m_ConstantInt(ShiftConst))))) 573bdd1243dSDimitry Andric return false; 574bdd1243dSDimitry Andric 575bdd1243dSDimitry Andric unsigned InputBits = X1->getType()->getScalarSizeInBits(); 576bdd1243dSDimitry Andric if (InputBits != 32 && InputBits != 64) 577bdd1243dSDimitry Andric return false; 578bdd1243dSDimitry Andric 579bdd1243dSDimitry Andric // Shift should extract top 5..7 bits. 580bdd1243dSDimitry Andric if (InputBits - Log2_32(InputBits) != ShiftConst && 581bdd1243dSDimitry Andric InputBits - Log2_32(InputBits) - 1 != ShiftConst) 582bdd1243dSDimitry Andric return false; 583bdd1243dSDimitry Andric 584bdd1243dSDimitry Andric if (!isCTTZTable(*ConstData, MulConst, ShiftConst, InputBits)) 585bdd1243dSDimitry Andric return false; 586bdd1243dSDimitry Andric 587bdd1243dSDimitry Andric auto ZeroTableElem = ConstData->getElementAsInteger(0); 588bdd1243dSDimitry Andric bool DefinedForZero = ZeroTableElem == InputBits; 589bdd1243dSDimitry Andric 590bdd1243dSDimitry Andric IRBuilder<> B(LI); 591bdd1243dSDimitry Andric ConstantInt *BoolConst = B.getInt1(!DefinedForZero); 592bdd1243dSDimitry Andric Type *XType = X1->getType(); 593bdd1243dSDimitry Andric auto Cttz = B.CreateIntrinsic(Intrinsic::cttz, {XType}, {X1, BoolConst}); 594bdd1243dSDimitry Andric Value *ZExtOrTrunc = nullptr; 595bdd1243dSDimitry Andric 596bdd1243dSDimitry Andric if (DefinedForZero) { 597bdd1243dSDimitry Andric ZExtOrTrunc = B.CreateZExtOrTrunc(Cttz, AccessType); 598bdd1243dSDimitry Andric } else { 599bdd1243dSDimitry Andric // If the value in elem 0 isn't the same as InputBits, we still want to 600bdd1243dSDimitry Andric // produce the value from the table. 601bdd1243dSDimitry Andric auto Cmp = B.CreateICmpEQ(X1, ConstantInt::get(XType, 0)); 602bdd1243dSDimitry Andric auto Select = 603bdd1243dSDimitry Andric B.CreateSelect(Cmp, ConstantInt::get(XType, ZeroTableElem), Cttz); 604bdd1243dSDimitry Andric 605bdd1243dSDimitry Andric // NOTE: If the table[0] is 0, but the cttz(0) is defined by the Target 606bdd1243dSDimitry Andric // it should be handled as: `cttz(x) & (typeSize - 1)`. 607bdd1243dSDimitry Andric 608bdd1243dSDimitry Andric ZExtOrTrunc = B.CreateZExtOrTrunc(Select, AccessType); 609bdd1243dSDimitry Andric } 610bdd1243dSDimitry Andric 611bdd1243dSDimitry Andric LI->replaceAllUsesWith(ZExtOrTrunc); 612bdd1243dSDimitry Andric 613bdd1243dSDimitry Andric return true; 614bdd1243dSDimitry Andric } 615bdd1243dSDimitry Andric 616bdd1243dSDimitry Andric /// This is used by foldLoadsRecursive() to capture a Root Load node which is 617bdd1243dSDimitry Andric /// of type or(load, load) and recursively build the wide load. Also capture the 618bdd1243dSDimitry Andric /// shift amount, zero extend type and loadSize. 619bdd1243dSDimitry Andric struct LoadOps { 620bdd1243dSDimitry Andric LoadInst *Root = nullptr; 621bdd1243dSDimitry Andric LoadInst *RootInsert = nullptr; 622bdd1243dSDimitry Andric bool FoundRoot = false; 623bdd1243dSDimitry Andric uint64_t LoadSize = 0; 62406c3fb27SDimitry Andric const APInt *Shift = nullptr; 625bdd1243dSDimitry Andric Type *ZextType; 626bdd1243dSDimitry Andric AAMDNodes AATags; 627bdd1243dSDimitry Andric }; 628bdd1243dSDimitry Andric 629bdd1243dSDimitry Andric // Identify and Merge consecutive loads recursively which is of the form 630bdd1243dSDimitry Andric // (ZExt(L1) << shift1) | (ZExt(L2) << shift2) -> ZExt(L3) << shift1 631bdd1243dSDimitry Andric // (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3) 632bdd1243dSDimitry Andric static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, 633bdd1243dSDimitry Andric AliasAnalysis &AA) { 63406c3fb27SDimitry Andric const APInt *ShAmt2 = nullptr; 635bdd1243dSDimitry Andric Value *X; 636bdd1243dSDimitry Andric Instruction *L1, *L2; 637bdd1243dSDimitry Andric 638bdd1243dSDimitry Andric // Go to the last node with loads. 639bdd1243dSDimitry Andric if (match(V, m_OneUse(m_c_Or( 640bdd1243dSDimitry Andric m_Value(X), 641bdd1243dSDimitry Andric m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))), 64206c3fb27SDimitry Andric m_APInt(ShAmt2)))))) || 643bdd1243dSDimitry Andric match(V, m_OneUse(m_Or(m_Value(X), 644bdd1243dSDimitry Andric m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))))))) { 645bdd1243dSDimitry Andric if (!foldLoadsRecursive(X, LOps, DL, AA) && LOps.FoundRoot) 646bdd1243dSDimitry Andric // Avoid Partial chain merge. 647bdd1243dSDimitry Andric return false; 648bdd1243dSDimitry Andric } else 649bdd1243dSDimitry Andric return false; 650bdd1243dSDimitry Andric 651bdd1243dSDimitry Andric // Check if the pattern has loads 652bdd1243dSDimitry Andric LoadInst *LI1 = LOps.Root; 65306c3fb27SDimitry Andric const APInt *ShAmt1 = LOps.Shift; 654bdd1243dSDimitry Andric if (LOps.FoundRoot == false && 655bdd1243dSDimitry Andric (match(X, m_OneUse(m_ZExt(m_Instruction(L1)))) || 656bdd1243dSDimitry Andric match(X, m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L1)))), 65706c3fb27SDimitry Andric m_APInt(ShAmt1)))))) { 658bdd1243dSDimitry Andric LI1 = dyn_cast<LoadInst>(L1); 659bdd1243dSDimitry Andric } 660bdd1243dSDimitry Andric LoadInst *LI2 = dyn_cast<LoadInst>(L2); 661bdd1243dSDimitry Andric 662bdd1243dSDimitry Andric // Check if loads are same, atomic, volatile and having same address space. 663bdd1243dSDimitry Andric if (LI1 == LI2 || !LI1 || !LI2 || !LI1->isSimple() || !LI2->isSimple() || 664bdd1243dSDimitry Andric LI1->getPointerAddressSpace() != LI2->getPointerAddressSpace()) 665bdd1243dSDimitry Andric return false; 666bdd1243dSDimitry Andric 667bdd1243dSDimitry Andric // Check if Loads come from same BB. 668bdd1243dSDimitry Andric if (LI1->getParent() != LI2->getParent()) 669bdd1243dSDimitry Andric return false; 670bdd1243dSDimitry Andric 671bdd1243dSDimitry Andric // Find the data layout 672bdd1243dSDimitry Andric bool IsBigEndian = DL.isBigEndian(); 673bdd1243dSDimitry Andric 674bdd1243dSDimitry Andric // Check if loads are consecutive and same size. 675bdd1243dSDimitry Andric Value *Load1Ptr = LI1->getPointerOperand(); 676bdd1243dSDimitry Andric APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0); 677bdd1243dSDimitry Andric Load1Ptr = 678bdd1243dSDimitry Andric Load1Ptr->stripAndAccumulateConstantOffsets(DL, Offset1, 679bdd1243dSDimitry Andric /* AllowNonInbounds */ true); 680bdd1243dSDimitry Andric 681bdd1243dSDimitry Andric Value *Load2Ptr = LI2->getPointerOperand(); 682bdd1243dSDimitry Andric APInt Offset2(DL.getIndexTypeSizeInBits(Load2Ptr->getType()), 0); 683bdd1243dSDimitry Andric Load2Ptr = 684bdd1243dSDimitry Andric Load2Ptr->stripAndAccumulateConstantOffsets(DL, Offset2, 685bdd1243dSDimitry Andric /* AllowNonInbounds */ true); 686bdd1243dSDimitry Andric 687bdd1243dSDimitry Andric // Verify if both loads have same base pointers and load sizes are same. 688bdd1243dSDimitry Andric uint64_t LoadSize1 = LI1->getType()->getPrimitiveSizeInBits(); 689bdd1243dSDimitry Andric uint64_t LoadSize2 = LI2->getType()->getPrimitiveSizeInBits(); 690bdd1243dSDimitry Andric if (Load1Ptr != Load2Ptr || LoadSize1 != LoadSize2) 691bdd1243dSDimitry Andric return false; 692bdd1243dSDimitry Andric 693bdd1243dSDimitry Andric // Support Loadsizes greater or equal to 8bits and only power of 2. 694bdd1243dSDimitry Andric if (LoadSize1 < 8 || !isPowerOf2_64(LoadSize1)) 695bdd1243dSDimitry Andric return false; 696bdd1243dSDimitry Andric 697bdd1243dSDimitry Andric // Alias Analysis to check for stores b/w the loads. 698bdd1243dSDimitry Andric LoadInst *Start = LOps.FoundRoot ? LOps.RootInsert : LI1, *End = LI2; 699bdd1243dSDimitry Andric MemoryLocation Loc; 700bdd1243dSDimitry Andric if (!Start->comesBefore(End)) { 701bdd1243dSDimitry Andric std::swap(Start, End); 702bdd1243dSDimitry Andric Loc = MemoryLocation::get(End); 703bdd1243dSDimitry Andric if (LOps.FoundRoot) 704bdd1243dSDimitry Andric Loc = Loc.getWithNewSize(LOps.LoadSize); 705bdd1243dSDimitry Andric } else 706bdd1243dSDimitry Andric Loc = MemoryLocation::get(End); 707bdd1243dSDimitry Andric unsigned NumScanned = 0; 708bdd1243dSDimitry Andric for (Instruction &Inst : 709bdd1243dSDimitry Andric make_range(Start->getIterator(), End->getIterator())) { 710bdd1243dSDimitry Andric if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc))) 711bdd1243dSDimitry Andric return false; 7125f757f3fSDimitry Andric 7135f757f3fSDimitry Andric // Ignore debug info so that's not counted against MaxInstrsToScan. 7145f757f3fSDimitry Andric // Otherwise debug info could affect codegen. 7155f757f3fSDimitry Andric if (!isa<DbgInfoIntrinsic>(Inst) && ++NumScanned > MaxInstrsToScan) 716bdd1243dSDimitry Andric return false; 717bdd1243dSDimitry Andric } 718bdd1243dSDimitry Andric 719bdd1243dSDimitry Andric // Make sure Load with lower Offset is at LI1 720bdd1243dSDimitry Andric bool Reverse = false; 721bdd1243dSDimitry Andric if (Offset2.slt(Offset1)) { 722bdd1243dSDimitry Andric std::swap(LI1, LI2); 723bdd1243dSDimitry Andric std::swap(ShAmt1, ShAmt2); 724bdd1243dSDimitry Andric std::swap(Offset1, Offset2); 725bdd1243dSDimitry Andric std::swap(Load1Ptr, Load2Ptr); 726bdd1243dSDimitry Andric std::swap(LoadSize1, LoadSize2); 727bdd1243dSDimitry Andric Reverse = true; 728bdd1243dSDimitry Andric } 729bdd1243dSDimitry Andric 730bdd1243dSDimitry Andric // Big endian swap the shifts 731bdd1243dSDimitry Andric if (IsBigEndian) 732bdd1243dSDimitry Andric std::swap(ShAmt1, ShAmt2); 733bdd1243dSDimitry Andric 734bdd1243dSDimitry Andric // Find Shifts values. 735bdd1243dSDimitry Andric uint64_t Shift1 = 0, Shift2 = 0; 73606c3fb27SDimitry Andric if (ShAmt1) 73706c3fb27SDimitry Andric Shift1 = ShAmt1->getZExtValue(); 73806c3fb27SDimitry Andric if (ShAmt2) 73906c3fb27SDimitry Andric Shift2 = ShAmt2->getZExtValue(); 740bdd1243dSDimitry Andric 741bdd1243dSDimitry Andric // First load is always LI1. This is where we put the new load. 742bdd1243dSDimitry Andric // Use the merged load size available from LI1 for forward loads. 743bdd1243dSDimitry Andric if (LOps.FoundRoot) { 744bdd1243dSDimitry Andric if (!Reverse) 745bdd1243dSDimitry Andric LoadSize1 = LOps.LoadSize; 746bdd1243dSDimitry Andric else 747bdd1243dSDimitry Andric LoadSize2 = LOps.LoadSize; 748bdd1243dSDimitry Andric } 749bdd1243dSDimitry Andric 750bdd1243dSDimitry Andric // Verify if shift amount and load index aligns and verifies that loads 751bdd1243dSDimitry Andric // are consecutive. 752bdd1243dSDimitry Andric uint64_t ShiftDiff = IsBigEndian ? LoadSize2 : LoadSize1; 753bdd1243dSDimitry Andric uint64_t PrevSize = 754bdd1243dSDimitry Andric DL.getTypeStoreSize(IntegerType::get(LI1->getContext(), LoadSize1)); 755bdd1243dSDimitry Andric if ((Shift2 - Shift1) != ShiftDiff || (Offset2 - Offset1) != PrevSize) 756bdd1243dSDimitry Andric return false; 757bdd1243dSDimitry Andric 758bdd1243dSDimitry Andric // Update LOps 759bdd1243dSDimitry Andric AAMDNodes AATags1 = LOps.AATags; 760bdd1243dSDimitry Andric AAMDNodes AATags2 = LI2->getAAMetadata(); 761bdd1243dSDimitry Andric if (LOps.FoundRoot == false) { 762bdd1243dSDimitry Andric LOps.FoundRoot = true; 763bdd1243dSDimitry Andric AATags1 = LI1->getAAMetadata(); 764bdd1243dSDimitry Andric } 765bdd1243dSDimitry Andric LOps.LoadSize = LoadSize1 + LoadSize2; 766bdd1243dSDimitry Andric LOps.RootInsert = Start; 767bdd1243dSDimitry Andric 768bdd1243dSDimitry Andric // Concatenate the AATags of the Merged Loads. 769bdd1243dSDimitry Andric LOps.AATags = AATags1.concat(AATags2); 770bdd1243dSDimitry Andric 771bdd1243dSDimitry Andric LOps.Root = LI1; 772bdd1243dSDimitry Andric LOps.Shift = ShAmt1; 773bdd1243dSDimitry Andric LOps.ZextType = X->getType(); 774bdd1243dSDimitry Andric return true; 775bdd1243dSDimitry Andric } 776bdd1243dSDimitry Andric 777bdd1243dSDimitry Andric // For a given BB instruction, evaluate all loads in the chain that form a 778bdd1243dSDimitry Andric // pattern which suggests that the loads can be combined. The one and only use 779bdd1243dSDimitry Andric // of the loads is to form a wider load. 780bdd1243dSDimitry Andric static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, 78106c3fb27SDimitry Andric TargetTransformInfo &TTI, AliasAnalysis &AA, 78206c3fb27SDimitry Andric const DominatorTree &DT) { 783bdd1243dSDimitry Andric // Only consider load chains of scalar values. 784bdd1243dSDimitry Andric if (isa<VectorType>(I.getType())) 785bdd1243dSDimitry Andric return false; 786bdd1243dSDimitry Andric 787bdd1243dSDimitry Andric LoadOps LOps; 788bdd1243dSDimitry Andric if (!foldLoadsRecursive(&I, LOps, DL, AA) || !LOps.FoundRoot) 789bdd1243dSDimitry Andric return false; 790bdd1243dSDimitry Andric 791bdd1243dSDimitry Andric IRBuilder<> Builder(&I); 792bdd1243dSDimitry Andric LoadInst *NewLoad = nullptr, *LI1 = LOps.Root; 793bdd1243dSDimitry Andric 794bdd1243dSDimitry Andric IntegerType *WiderType = IntegerType::get(I.getContext(), LOps.LoadSize); 795bdd1243dSDimitry Andric // TTI based checks if we want to proceed with wider load 796bdd1243dSDimitry Andric bool Allowed = TTI.isTypeLegal(WiderType); 797bdd1243dSDimitry Andric if (!Allowed) 798bdd1243dSDimitry Andric return false; 799bdd1243dSDimitry Andric 800bdd1243dSDimitry Andric unsigned AS = LI1->getPointerAddressSpace(); 801bdd1243dSDimitry Andric unsigned Fast = 0; 802bdd1243dSDimitry Andric Allowed = TTI.allowsMisalignedMemoryAccesses(I.getContext(), LOps.LoadSize, 803bdd1243dSDimitry Andric AS, LI1->getAlign(), &Fast); 804bdd1243dSDimitry Andric if (!Allowed || !Fast) 805bdd1243dSDimitry Andric return false; 806bdd1243dSDimitry Andric 80706c3fb27SDimitry Andric // Get the Index and Ptr for the new GEP. 808bdd1243dSDimitry Andric Value *Load1Ptr = LI1->getPointerOperand(); 809bdd1243dSDimitry Andric Builder.SetInsertPoint(LOps.RootInsert); 81006c3fb27SDimitry Andric if (!DT.dominates(Load1Ptr, LOps.RootInsert)) { 81106c3fb27SDimitry Andric APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0); 81206c3fb27SDimitry Andric Load1Ptr = Load1Ptr->stripAndAccumulateConstantOffsets( 81306c3fb27SDimitry Andric DL, Offset1, /* AllowNonInbounds */ true); 814*6c05f3a7SDimitry Andric Load1Ptr = Builder.CreatePtrAdd(Load1Ptr, Builder.getInt(Offset1)); 81506c3fb27SDimitry Andric } 81606c3fb27SDimitry Andric // Generate wider load. 81706c3fb27SDimitry Andric NewLoad = Builder.CreateAlignedLoad(WiderType, Load1Ptr, LI1->getAlign(), 818bdd1243dSDimitry Andric LI1->isVolatile(), ""); 819bdd1243dSDimitry Andric NewLoad->takeName(LI1); 820bdd1243dSDimitry Andric // Set the New Load AATags Metadata. 821bdd1243dSDimitry Andric if (LOps.AATags) 822bdd1243dSDimitry Andric NewLoad->setAAMetadata(LOps.AATags); 823bdd1243dSDimitry Andric 824bdd1243dSDimitry Andric Value *NewOp = NewLoad; 825bdd1243dSDimitry Andric // Check if zero extend needed. 826bdd1243dSDimitry Andric if (LOps.ZextType) 827bdd1243dSDimitry Andric NewOp = Builder.CreateZExt(NewOp, LOps.ZextType); 828bdd1243dSDimitry Andric 829bdd1243dSDimitry Andric // Check if shift needed. We need to shift with the amount of load1 830bdd1243dSDimitry Andric // shift if not zero. 831bdd1243dSDimitry Andric if (LOps.Shift) 83206c3fb27SDimitry Andric NewOp = Builder.CreateShl(NewOp, ConstantInt::get(I.getContext(), *LOps.Shift)); 833bdd1243dSDimitry Andric I.replaceAllUsesWith(NewOp); 834bdd1243dSDimitry Andric 835bdd1243dSDimitry Andric return true; 836bdd1243dSDimitry Andric } 837bdd1243dSDimitry Andric 83806c3fb27SDimitry Andric // Calculate GEP Stride and accumulated const ModOffset. Return Stride and 83906c3fb27SDimitry Andric // ModOffset 84006c3fb27SDimitry Andric static std::pair<APInt, APInt> 84106c3fb27SDimitry Andric getStrideAndModOffsetOfGEP(Value *PtrOp, const DataLayout &DL) { 84206c3fb27SDimitry Andric unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType()); 84306c3fb27SDimitry Andric std::optional<APInt> Stride; 84406c3fb27SDimitry Andric APInt ModOffset(BW, 0); 84506c3fb27SDimitry Andric // Return a minimum gep stride, greatest common divisor of consective gep 84606c3fb27SDimitry Andric // index scales(c.f. Bézout's identity). 84706c3fb27SDimitry Andric while (auto *GEP = dyn_cast<GEPOperator>(PtrOp)) { 84806c3fb27SDimitry Andric MapVector<Value *, APInt> VarOffsets; 84906c3fb27SDimitry Andric if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset)) 85006c3fb27SDimitry Andric break; 85106c3fb27SDimitry Andric 85206c3fb27SDimitry Andric for (auto [V, Scale] : VarOffsets) { 85306c3fb27SDimitry Andric // Only keep a power of two factor for non-inbounds 85406c3fb27SDimitry Andric if (!GEP->isInBounds()) 85506c3fb27SDimitry Andric Scale = APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero()); 85606c3fb27SDimitry Andric 85706c3fb27SDimitry Andric if (!Stride) 85806c3fb27SDimitry Andric Stride = Scale; 85906c3fb27SDimitry Andric else 86006c3fb27SDimitry Andric Stride = APIntOps::GreatestCommonDivisor(*Stride, Scale); 86106c3fb27SDimitry Andric } 86206c3fb27SDimitry Andric 86306c3fb27SDimitry Andric PtrOp = GEP->getPointerOperand(); 86406c3fb27SDimitry Andric } 86506c3fb27SDimitry Andric 86606c3fb27SDimitry Andric // Check whether pointer arrives back at Global Variable via at least one GEP. 86706c3fb27SDimitry Andric // Even if it doesn't, we can check by alignment. 86806c3fb27SDimitry Andric if (!isa<GlobalVariable>(PtrOp) || !Stride) 86906c3fb27SDimitry Andric return {APInt(BW, 1), APInt(BW, 0)}; 87006c3fb27SDimitry Andric 87106c3fb27SDimitry Andric // In consideration of signed GEP indices, non-negligible offset become 87206c3fb27SDimitry Andric // remainder of division by minimum GEP stride. 87306c3fb27SDimitry Andric ModOffset = ModOffset.srem(*Stride); 87406c3fb27SDimitry Andric if (ModOffset.isNegative()) 87506c3fb27SDimitry Andric ModOffset += *Stride; 87606c3fb27SDimitry Andric 87706c3fb27SDimitry Andric return {*Stride, ModOffset}; 87806c3fb27SDimitry Andric } 87906c3fb27SDimitry Andric 88006c3fb27SDimitry Andric /// If C is a constant patterned array and all valid loaded results for given 88106c3fb27SDimitry Andric /// alignment are same to a constant, return that constant. 88206c3fb27SDimitry Andric static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) { 88306c3fb27SDimitry Andric auto *LI = dyn_cast<LoadInst>(&I); 88406c3fb27SDimitry Andric if (!LI || LI->isVolatile()) 88506c3fb27SDimitry Andric return false; 88606c3fb27SDimitry Andric 88706c3fb27SDimitry Andric // We can only fold the load if it is from a constant global with definitive 88806c3fb27SDimitry Andric // initializer. Skip expensive logic if this is not the case. 88906c3fb27SDimitry Andric auto *PtrOp = LI->getPointerOperand(); 89006c3fb27SDimitry Andric auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(PtrOp)); 89106c3fb27SDimitry Andric if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) 89206c3fb27SDimitry Andric return false; 89306c3fb27SDimitry Andric 89406c3fb27SDimitry Andric // Bail for large initializers in excess of 4K to avoid too many scans. 89506c3fb27SDimitry Andric Constant *C = GV->getInitializer(); 89606c3fb27SDimitry Andric uint64_t GVSize = DL.getTypeAllocSize(C->getType()); 89706c3fb27SDimitry Andric if (!GVSize || 4096 < GVSize) 89806c3fb27SDimitry Andric return false; 89906c3fb27SDimitry Andric 90006c3fb27SDimitry Andric Type *LoadTy = LI->getType(); 90106c3fb27SDimitry Andric unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType()); 90206c3fb27SDimitry Andric auto [Stride, ConstOffset] = getStrideAndModOffsetOfGEP(PtrOp, DL); 90306c3fb27SDimitry Andric 90406c3fb27SDimitry Andric // Any possible offset could be multiple of GEP stride. And any valid 90506c3fb27SDimitry Andric // offset is multiple of load alignment, so checking only multiples of bigger 90606c3fb27SDimitry Andric // one is sufficient to say results' equality. 90706c3fb27SDimitry Andric if (auto LA = LI->getAlign(); 90806c3fb27SDimitry Andric LA <= GV->getAlign().valueOrOne() && Stride.getZExtValue() < LA.value()) { 90906c3fb27SDimitry Andric ConstOffset = APInt(BW, 0); 91006c3fb27SDimitry Andric Stride = APInt(BW, LA.value()); 91106c3fb27SDimitry Andric } 91206c3fb27SDimitry Andric 91306c3fb27SDimitry Andric Constant *Ca = ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL); 91406c3fb27SDimitry Andric if (!Ca) 91506c3fb27SDimitry Andric return false; 91606c3fb27SDimitry Andric 91706c3fb27SDimitry Andric unsigned E = GVSize - DL.getTypeStoreSize(LoadTy); 91806c3fb27SDimitry Andric for (; ConstOffset.getZExtValue() <= E; ConstOffset += Stride) 91906c3fb27SDimitry Andric if (Ca != ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL)) 92006c3fb27SDimitry Andric return false; 92106c3fb27SDimitry Andric 92206c3fb27SDimitry Andric I.replaceAllUsesWith(Ca); 92306c3fb27SDimitry Andric 92406c3fb27SDimitry Andric return true; 92506c3fb27SDimitry Andric } 92606c3fb27SDimitry Andric 9270fca6ea1SDimitry Andric namespace { 9280fca6ea1SDimitry Andric class StrNCmpInliner { 9290fca6ea1SDimitry Andric public: 9300fca6ea1SDimitry Andric StrNCmpInliner(CallInst *CI, LibFunc Func, DomTreeUpdater *DTU, 9310fca6ea1SDimitry Andric const DataLayout &DL) 9320fca6ea1SDimitry Andric : CI(CI), Func(Func), DTU(DTU), DL(DL) {} 9330fca6ea1SDimitry Andric 9340fca6ea1SDimitry Andric bool optimizeStrNCmp(); 9350fca6ea1SDimitry Andric 9360fca6ea1SDimitry Andric private: 9370fca6ea1SDimitry Andric void inlineCompare(Value *LHS, StringRef RHS, uint64_t N, bool Swapped); 9380fca6ea1SDimitry Andric 9390fca6ea1SDimitry Andric CallInst *CI; 9400fca6ea1SDimitry Andric LibFunc Func; 9410fca6ea1SDimitry Andric DomTreeUpdater *DTU; 9420fca6ea1SDimitry Andric const DataLayout &DL; 9430fca6ea1SDimitry Andric }; 9440fca6ea1SDimitry Andric 9450fca6ea1SDimitry Andric } // namespace 9460fca6ea1SDimitry Andric 9470fca6ea1SDimitry Andric /// First we normalize calls to strncmp/strcmp to the form of 9480fca6ea1SDimitry Andric /// compare(s1, s2, N), which means comparing first N bytes of s1 and s2 9490fca6ea1SDimitry Andric /// (without considering '\0'). 9500fca6ea1SDimitry Andric /// 9510fca6ea1SDimitry Andric /// Examples: 9520fca6ea1SDimitry Andric /// 9530fca6ea1SDimitry Andric /// \code 9540fca6ea1SDimitry Andric /// strncmp(s, "a", 3) -> compare(s, "a", 2) 9550fca6ea1SDimitry Andric /// strncmp(s, "abc", 3) -> compare(s, "abc", 3) 9560fca6ea1SDimitry Andric /// strncmp(s, "a\0b", 3) -> compare(s, "a\0b", 2) 9570fca6ea1SDimitry Andric /// strcmp(s, "a") -> compare(s, "a", 2) 9580fca6ea1SDimitry Andric /// 9590fca6ea1SDimitry Andric /// char s2[] = {'a'} 9600fca6ea1SDimitry Andric /// strncmp(s, s2, 3) -> compare(s, s2, 3) 9610fca6ea1SDimitry Andric /// 9620fca6ea1SDimitry Andric /// char s2[] = {'a', 'b', 'c', 'd'} 9630fca6ea1SDimitry Andric /// strncmp(s, s2, 3) -> compare(s, s2, 3) 9640fca6ea1SDimitry Andric /// \endcode 9650fca6ea1SDimitry Andric /// 9660fca6ea1SDimitry Andric /// We only handle cases where N and exactly one of s1 and s2 are constant. 9670fca6ea1SDimitry Andric /// Cases that s1 and s2 are both constant are already handled by the 9680fca6ea1SDimitry Andric /// instcombine pass. 9690fca6ea1SDimitry Andric /// 9700fca6ea1SDimitry Andric /// We do not handle cases where N > StrNCmpInlineThreshold. 9710fca6ea1SDimitry Andric /// 9720fca6ea1SDimitry Andric /// We also do not handles cases where N < 2, which are already 9730fca6ea1SDimitry Andric /// handled by the instcombine pass. 9740fca6ea1SDimitry Andric /// 9750fca6ea1SDimitry Andric bool StrNCmpInliner::optimizeStrNCmp() { 9760fca6ea1SDimitry Andric if (StrNCmpInlineThreshold < 2) 9770fca6ea1SDimitry Andric return false; 9780fca6ea1SDimitry Andric 9790fca6ea1SDimitry Andric if (!isOnlyUsedInZeroComparison(CI)) 9800fca6ea1SDimitry Andric return false; 9810fca6ea1SDimitry Andric 9820fca6ea1SDimitry Andric Value *Str1P = CI->getArgOperand(0); 9830fca6ea1SDimitry Andric Value *Str2P = CI->getArgOperand(1); 9840fca6ea1SDimitry Andric // Should be handled elsewhere. 9850fca6ea1SDimitry Andric if (Str1P == Str2P) 9860fca6ea1SDimitry Andric return false; 9870fca6ea1SDimitry Andric 9880fca6ea1SDimitry Andric StringRef Str1, Str2; 9890fca6ea1SDimitry Andric bool HasStr1 = getConstantStringInfo(Str1P, Str1, /*TrimAtNul=*/false); 9900fca6ea1SDimitry Andric bool HasStr2 = getConstantStringInfo(Str2P, Str2, /*TrimAtNul=*/false); 9910fca6ea1SDimitry Andric if (HasStr1 == HasStr2) 9920fca6ea1SDimitry Andric return false; 9930fca6ea1SDimitry Andric 9940fca6ea1SDimitry Andric // Note that '\0' and characters after it are not trimmed. 9950fca6ea1SDimitry Andric StringRef Str = HasStr1 ? Str1 : Str2; 9960fca6ea1SDimitry Andric Value *StrP = HasStr1 ? Str2P : Str1P; 9970fca6ea1SDimitry Andric 9980fca6ea1SDimitry Andric size_t Idx = Str.find('\0'); 9990fca6ea1SDimitry Andric uint64_t N = Idx == StringRef::npos ? UINT64_MAX : Idx + 1; 10000fca6ea1SDimitry Andric if (Func == LibFunc_strncmp) { 10010fca6ea1SDimitry Andric if (auto *ConstInt = dyn_cast<ConstantInt>(CI->getArgOperand(2))) 10020fca6ea1SDimitry Andric N = std::min(N, ConstInt->getZExtValue()); 10030fca6ea1SDimitry Andric else 10040fca6ea1SDimitry Andric return false; 10050fca6ea1SDimitry Andric } 10060fca6ea1SDimitry Andric // Now N means how many bytes we need to compare at most. 10070fca6ea1SDimitry Andric if (N > Str.size() || N < 2 || N > StrNCmpInlineThreshold) 10080fca6ea1SDimitry Andric return false; 10090fca6ea1SDimitry Andric 10100fca6ea1SDimitry Andric // Cases where StrP has two or more dereferenceable bytes might be better 10110fca6ea1SDimitry Andric // optimized elsewhere. 10120fca6ea1SDimitry Andric bool CanBeNull = false, CanBeFreed = false; 10130fca6ea1SDimitry Andric if (StrP->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed) > 1) 10140fca6ea1SDimitry Andric return false; 10150fca6ea1SDimitry Andric inlineCompare(StrP, Str, N, HasStr1); 10160fca6ea1SDimitry Andric return true; 10170fca6ea1SDimitry Andric } 10180fca6ea1SDimitry Andric 10190fca6ea1SDimitry Andric /// Convert 10200fca6ea1SDimitry Andric /// 10210fca6ea1SDimitry Andric /// \code 10220fca6ea1SDimitry Andric /// ret = compare(s1, s2, N) 10230fca6ea1SDimitry Andric /// \endcode 10240fca6ea1SDimitry Andric /// 10250fca6ea1SDimitry Andric /// into 10260fca6ea1SDimitry Andric /// 10270fca6ea1SDimitry Andric /// \code 10280fca6ea1SDimitry Andric /// ret = (int)s1[0] - (int)s2[0] 10290fca6ea1SDimitry Andric /// if (ret != 0) 10300fca6ea1SDimitry Andric /// goto NE 10310fca6ea1SDimitry Andric /// ... 10320fca6ea1SDimitry Andric /// ret = (int)s1[N-2] - (int)s2[N-2] 10330fca6ea1SDimitry Andric /// if (ret != 0) 10340fca6ea1SDimitry Andric /// goto NE 10350fca6ea1SDimitry Andric /// ret = (int)s1[N-1] - (int)s2[N-1] 10360fca6ea1SDimitry Andric /// NE: 10370fca6ea1SDimitry Andric /// \endcode 10380fca6ea1SDimitry Andric /// 10390fca6ea1SDimitry Andric /// CFG before and after the transformation: 10400fca6ea1SDimitry Andric /// 10410fca6ea1SDimitry Andric /// (before) 10420fca6ea1SDimitry Andric /// BBCI 10430fca6ea1SDimitry Andric /// 10440fca6ea1SDimitry Andric /// (after) 10450fca6ea1SDimitry Andric /// BBCI -> BBSubs[0] (sub,icmp) --NE-> BBNE -> BBTail 10460fca6ea1SDimitry Andric /// | ^ 10470fca6ea1SDimitry Andric /// E | 10480fca6ea1SDimitry Andric /// | | 10490fca6ea1SDimitry Andric /// BBSubs[1] (sub,icmp) --NE-----+ 10500fca6ea1SDimitry Andric /// ... | 10510fca6ea1SDimitry Andric /// BBSubs[N-1] (sub) ---------+ 10520fca6ea1SDimitry Andric /// 10530fca6ea1SDimitry Andric void StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N, 10540fca6ea1SDimitry Andric bool Swapped) { 10550fca6ea1SDimitry Andric auto &Ctx = CI->getContext(); 10560fca6ea1SDimitry Andric IRBuilder<> B(Ctx); 10570fca6ea1SDimitry Andric 10580fca6ea1SDimitry Andric BasicBlock *BBCI = CI->getParent(); 10590fca6ea1SDimitry Andric BasicBlock *BBTail = 10600fca6ea1SDimitry Andric SplitBlock(BBCI, CI, DTU, nullptr, nullptr, BBCI->getName() + ".tail"); 10610fca6ea1SDimitry Andric 10620fca6ea1SDimitry Andric SmallVector<BasicBlock *> BBSubs; 10630fca6ea1SDimitry Andric for (uint64_t I = 0; I < N; ++I) 10640fca6ea1SDimitry Andric BBSubs.push_back( 10650fca6ea1SDimitry Andric BasicBlock::Create(Ctx, "sub_" + Twine(I), BBCI->getParent(), BBTail)); 10660fca6ea1SDimitry Andric BasicBlock *BBNE = BasicBlock::Create(Ctx, "ne", BBCI->getParent(), BBTail); 10670fca6ea1SDimitry Andric 10680fca6ea1SDimitry Andric cast<BranchInst>(BBCI->getTerminator())->setSuccessor(0, BBSubs[0]); 10690fca6ea1SDimitry Andric 10700fca6ea1SDimitry Andric B.SetInsertPoint(BBNE); 10710fca6ea1SDimitry Andric PHINode *Phi = B.CreatePHI(CI->getType(), N); 10720fca6ea1SDimitry Andric B.CreateBr(BBTail); 10730fca6ea1SDimitry Andric 10740fca6ea1SDimitry Andric Value *Base = LHS; 10750fca6ea1SDimitry Andric for (uint64_t i = 0; i < N; ++i) { 10760fca6ea1SDimitry Andric B.SetInsertPoint(BBSubs[i]); 10770fca6ea1SDimitry Andric Value *VL = 10780fca6ea1SDimitry Andric B.CreateZExt(B.CreateLoad(B.getInt8Ty(), 10790fca6ea1SDimitry Andric B.CreateInBoundsPtrAdd(Base, B.getInt64(i))), 10800fca6ea1SDimitry Andric CI->getType()); 10810fca6ea1SDimitry Andric Value *VR = 10820fca6ea1SDimitry Andric ConstantInt::get(CI->getType(), static_cast<unsigned char>(RHS[i])); 10830fca6ea1SDimitry Andric Value *Sub = Swapped ? B.CreateSub(VR, VL) : B.CreateSub(VL, VR); 10840fca6ea1SDimitry Andric if (i < N - 1) 10850fca6ea1SDimitry Andric B.CreateCondBr(B.CreateICmpNE(Sub, ConstantInt::get(CI->getType(), 0)), 10860fca6ea1SDimitry Andric BBNE, BBSubs[i + 1]); 10870fca6ea1SDimitry Andric else 10880fca6ea1SDimitry Andric B.CreateBr(BBNE); 10890fca6ea1SDimitry Andric 10900fca6ea1SDimitry Andric Phi->addIncoming(Sub, BBSubs[i]); 10910fca6ea1SDimitry Andric } 10920fca6ea1SDimitry Andric 10930fca6ea1SDimitry Andric CI->replaceAllUsesWith(Phi); 10940fca6ea1SDimitry Andric CI->eraseFromParent(); 10950fca6ea1SDimitry Andric 10960fca6ea1SDimitry Andric if (DTU) { 10970fca6ea1SDimitry Andric SmallVector<DominatorTree::UpdateType, 8> Updates; 10980fca6ea1SDimitry Andric Updates.push_back({DominatorTree::Insert, BBCI, BBSubs[0]}); 10990fca6ea1SDimitry Andric for (uint64_t i = 0; i < N; ++i) { 11000fca6ea1SDimitry Andric if (i < N - 1) 11010fca6ea1SDimitry Andric Updates.push_back({DominatorTree::Insert, BBSubs[i], BBSubs[i + 1]}); 11020fca6ea1SDimitry Andric Updates.push_back({DominatorTree::Insert, BBSubs[i], BBNE}); 11030fca6ea1SDimitry Andric } 11040fca6ea1SDimitry Andric Updates.push_back({DominatorTree::Insert, BBNE, BBTail}); 11050fca6ea1SDimitry Andric Updates.push_back({DominatorTree::Delete, BBCI, BBTail}); 11060fca6ea1SDimitry Andric DTU->applyUpdates(Updates); 11070fca6ea1SDimitry Andric } 11080fca6ea1SDimitry Andric } 11090fca6ea1SDimitry Andric 11100fca6ea1SDimitry Andric /// Convert memchr with a small constant string into a switch 11110fca6ea1SDimitry Andric static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU, 11120fca6ea1SDimitry Andric const DataLayout &DL) { 11130fca6ea1SDimitry Andric if (isa<Constant>(Call->getArgOperand(1))) 11140fca6ea1SDimitry Andric return false; 11150fca6ea1SDimitry Andric 11160fca6ea1SDimitry Andric StringRef Str; 11170fca6ea1SDimitry Andric Value *Base = Call->getArgOperand(0); 11180fca6ea1SDimitry Andric if (!getConstantStringInfo(Base, Str, /*TrimAtNul=*/false)) 11190fca6ea1SDimitry Andric return false; 11200fca6ea1SDimitry Andric 11210fca6ea1SDimitry Andric uint64_t N = Str.size(); 11220fca6ea1SDimitry Andric if (auto *ConstInt = dyn_cast<ConstantInt>(Call->getArgOperand(2))) { 11230fca6ea1SDimitry Andric uint64_t Val = ConstInt->getZExtValue(); 11240fca6ea1SDimitry Andric // Ignore the case that n is larger than the size of string. 11250fca6ea1SDimitry Andric if (Val > N) 11260fca6ea1SDimitry Andric return false; 11270fca6ea1SDimitry Andric N = Val; 11280fca6ea1SDimitry Andric } else 11290fca6ea1SDimitry Andric return false; 11300fca6ea1SDimitry Andric 11310fca6ea1SDimitry Andric if (N > MemChrInlineThreshold) 11320fca6ea1SDimitry Andric return false; 11330fca6ea1SDimitry Andric 11340fca6ea1SDimitry Andric BasicBlock *BB = Call->getParent(); 11350fca6ea1SDimitry Andric BasicBlock *BBNext = SplitBlock(BB, Call, DTU); 11360fca6ea1SDimitry Andric IRBuilder<> IRB(BB); 11370fca6ea1SDimitry Andric IntegerType *ByteTy = IRB.getInt8Ty(); 11380fca6ea1SDimitry Andric BB->getTerminator()->eraseFromParent(); 11390fca6ea1SDimitry Andric SwitchInst *SI = IRB.CreateSwitch( 11400fca6ea1SDimitry Andric IRB.CreateTrunc(Call->getArgOperand(1), ByteTy), BBNext, N); 11410fca6ea1SDimitry Andric Type *IndexTy = DL.getIndexType(Call->getType()); 11420fca6ea1SDimitry Andric SmallVector<DominatorTree::UpdateType, 8> Updates; 11430fca6ea1SDimitry Andric 11440fca6ea1SDimitry Andric BasicBlock *BBSuccess = BasicBlock::Create( 11450fca6ea1SDimitry Andric Call->getContext(), "memchr.success", BB->getParent(), BBNext); 11460fca6ea1SDimitry Andric IRB.SetInsertPoint(BBSuccess); 11470fca6ea1SDimitry Andric PHINode *IndexPHI = IRB.CreatePHI(IndexTy, N, "memchr.idx"); 11480fca6ea1SDimitry Andric Value *FirstOccursLocation = IRB.CreateInBoundsPtrAdd(Base, IndexPHI); 11490fca6ea1SDimitry Andric IRB.CreateBr(BBNext); 11500fca6ea1SDimitry Andric if (DTU) 11510fca6ea1SDimitry Andric Updates.push_back({DominatorTree::Insert, BBSuccess, BBNext}); 11520fca6ea1SDimitry Andric 11530fca6ea1SDimitry Andric SmallPtrSet<ConstantInt *, 4> Cases; 11540fca6ea1SDimitry Andric for (uint64_t I = 0; I < N; ++I) { 11550fca6ea1SDimitry Andric ConstantInt *CaseVal = ConstantInt::get(ByteTy, Str[I]); 11560fca6ea1SDimitry Andric if (!Cases.insert(CaseVal).second) 11570fca6ea1SDimitry Andric continue; 11580fca6ea1SDimitry Andric 11590fca6ea1SDimitry Andric BasicBlock *BBCase = BasicBlock::Create(Call->getContext(), "memchr.case", 11600fca6ea1SDimitry Andric BB->getParent(), BBSuccess); 11610fca6ea1SDimitry Andric SI->addCase(CaseVal, BBCase); 11620fca6ea1SDimitry Andric IRB.SetInsertPoint(BBCase); 11630fca6ea1SDimitry Andric IndexPHI->addIncoming(ConstantInt::get(IndexTy, I), BBCase); 11640fca6ea1SDimitry Andric IRB.CreateBr(BBSuccess); 11650fca6ea1SDimitry Andric if (DTU) { 11660fca6ea1SDimitry Andric Updates.push_back({DominatorTree::Insert, BB, BBCase}); 11670fca6ea1SDimitry Andric Updates.push_back({DominatorTree::Insert, BBCase, BBSuccess}); 11680fca6ea1SDimitry Andric } 11690fca6ea1SDimitry Andric } 11700fca6ea1SDimitry Andric 11710fca6ea1SDimitry Andric PHINode *PHI = 11720fca6ea1SDimitry Andric PHINode::Create(Call->getType(), 2, Call->getName(), BBNext->begin()); 11730fca6ea1SDimitry Andric PHI->addIncoming(Constant::getNullValue(Call->getType()), BB); 11740fca6ea1SDimitry Andric PHI->addIncoming(FirstOccursLocation, BBSuccess); 11750fca6ea1SDimitry Andric 11760fca6ea1SDimitry Andric Call->replaceAllUsesWith(PHI); 11770fca6ea1SDimitry Andric Call->eraseFromParent(); 11780fca6ea1SDimitry Andric 11790fca6ea1SDimitry Andric if (DTU) 11800fca6ea1SDimitry Andric DTU->applyUpdates(Updates); 11810fca6ea1SDimitry Andric 11820fca6ea1SDimitry Andric return true; 11830fca6ea1SDimitry Andric } 11840fca6ea1SDimitry Andric 11850fca6ea1SDimitry Andric static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI, 11860fca6ea1SDimitry Andric TargetLibraryInfo &TLI, AssumptionCache &AC, 11870fca6ea1SDimitry Andric DominatorTree &DT, const DataLayout &DL, 11880fca6ea1SDimitry Andric bool &MadeCFGChange) { 11890fca6ea1SDimitry Andric 11900fca6ea1SDimitry Andric auto *CI = dyn_cast<CallInst>(&I); 11910fca6ea1SDimitry Andric if (!CI || CI->isNoBuiltin()) 11920fca6ea1SDimitry Andric return false; 11930fca6ea1SDimitry Andric 11940fca6ea1SDimitry Andric Function *CalledFunc = CI->getCalledFunction(); 11950fca6ea1SDimitry Andric if (!CalledFunc) 11960fca6ea1SDimitry Andric return false; 11970fca6ea1SDimitry Andric 11980fca6ea1SDimitry Andric LibFunc LF; 11990fca6ea1SDimitry Andric if (!TLI.getLibFunc(*CalledFunc, LF) || 12000fca6ea1SDimitry Andric !isLibFuncEmittable(CI->getModule(), &TLI, LF)) 12010fca6ea1SDimitry Andric return false; 12020fca6ea1SDimitry Andric 12030fca6ea1SDimitry Andric DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy); 12040fca6ea1SDimitry Andric 12050fca6ea1SDimitry Andric switch (LF) { 12060fca6ea1SDimitry Andric case LibFunc_sqrt: 12070fca6ea1SDimitry Andric case LibFunc_sqrtf: 12080fca6ea1SDimitry Andric case LibFunc_sqrtl: 12090fca6ea1SDimitry Andric return foldSqrt(CI, LF, TTI, TLI, AC, DT); 12100fca6ea1SDimitry Andric case LibFunc_strcmp: 12110fca6ea1SDimitry Andric case LibFunc_strncmp: 12120fca6ea1SDimitry Andric if (StrNCmpInliner(CI, LF, &DTU, DL).optimizeStrNCmp()) { 12130fca6ea1SDimitry Andric MadeCFGChange = true; 12140fca6ea1SDimitry Andric return true; 12150fca6ea1SDimitry Andric } 12160fca6ea1SDimitry Andric break; 12170fca6ea1SDimitry Andric case LibFunc_memchr: 12180fca6ea1SDimitry Andric if (foldMemChr(CI, &DTU, DL)) { 12190fca6ea1SDimitry Andric MadeCFGChange = true; 12200fca6ea1SDimitry Andric return true; 12210fca6ea1SDimitry Andric } 12220fca6ea1SDimitry Andric break; 12230fca6ea1SDimitry Andric default:; 12240fca6ea1SDimitry Andric } 12250fca6ea1SDimitry Andric return false; 12260fca6ea1SDimitry Andric } 12270fca6ea1SDimitry Andric 12280b57cec5SDimitry Andric /// This is the entry point for folds that could be implemented in regular 12290b57cec5SDimitry Andric /// InstCombine, but they are separated because they are not expected to 12300b57cec5SDimitry Andric /// occur frequently and/or have more than a constant-length pattern match. 123181ad6265SDimitry Andric static bool foldUnusualPatterns(Function &F, DominatorTree &DT, 1232972a253aSDimitry Andric TargetTransformInfo &TTI, 123306c3fb27SDimitry Andric TargetLibraryInfo &TLI, AliasAnalysis &AA, 12340fca6ea1SDimitry Andric AssumptionCache &AC, bool &MadeCFGChange) { 12350b57cec5SDimitry Andric bool MadeChange = false; 12360b57cec5SDimitry Andric for (BasicBlock &BB : F) { 12370b57cec5SDimitry Andric // Ignore unreachable basic blocks. 12380b57cec5SDimitry Andric if (!DT.isReachableFromEntry(&BB)) 12390b57cec5SDimitry Andric continue; 1240972a253aSDimitry Andric 12410fca6ea1SDimitry Andric const DataLayout &DL = F.getDataLayout(); 1242bdd1243dSDimitry Andric 12430b57cec5SDimitry Andric // Walk the block backwards for efficiency. We're matching a chain of 12440b57cec5SDimitry Andric // use->defs, so we're more likely to succeed by starting from the bottom. 12450b57cec5SDimitry Andric // Also, we want to avoid matching partial patterns. 12460b57cec5SDimitry Andric // TODO: It would be more efficient if we removed dead instructions 12470b57cec5SDimitry Andric // iteratively in this loop rather than waiting until the end. 1248972a253aSDimitry Andric for (Instruction &I : make_early_inc_range(llvm::reverse(BB))) { 12490b57cec5SDimitry Andric MadeChange |= foldAnyOrAllBitsSet(I); 1250e8d8bef9SDimitry Andric MadeChange |= foldGuardedFunnelShift(I, DT); 12518bcb0991SDimitry Andric MadeChange |= tryToRecognizePopCount(I); 125281ad6265SDimitry Andric MadeChange |= tryToFPToSat(I, TTI); 1253bdd1243dSDimitry Andric MadeChange |= tryToRecognizeTableBasedCttz(I); 125406c3fb27SDimitry Andric MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT); 125506c3fb27SDimitry Andric MadeChange |= foldPatternedLoads(I, DL); 1256bdd1243dSDimitry Andric // NOTE: This function introduces erasing of the instruction `I`, so it 1257bdd1243dSDimitry Andric // needs to be called at the end of this sequence, otherwise we may make 1258bdd1243dSDimitry Andric // bugs. 12590fca6ea1SDimitry Andric MadeChange |= foldLibCalls(I, TTI, TLI, AC, DT, DL, MadeCFGChange); 12600b57cec5SDimitry Andric } 12610b57cec5SDimitry Andric } 12620b57cec5SDimitry Andric 12630b57cec5SDimitry Andric // We're done with transforms, so remove dead instructions. 12640b57cec5SDimitry Andric if (MadeChange) 12650b57cec5SDimitry Andric for (BasicBlock &BB : F) 12660b57cec5SDimitry Andric SimplifyInstructionsInBlock(&BB); 12670b57cec5SDimitry Andric 12680b57cec5SDimitry Andric return MadeChange; 12690b57cec5SDimitry Andric } 12700b57cec5SDimitry Andric 12710b57cec5SDimitry Andric /// This is the entry point for all transforms. Pass manager differences are 12720b57cec5SDimitry Andric /// handled in the callers of this function. 127381ad6265SDimitry Andric static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI, 1274bdd1243dSDimitry Andric TargetLibraryInfo &TLI, DominatorTree &DT, 12750fca6ea1SDimitry Andric AliasAnalysis &AA, bool &MadeCFGChange) { 12760b57cec5SDimitry Andric bool MadeChange = false; 12770fca6ea1SDimitry Andric const DataLayout &DL = F.getDataLayout(); 1278349cc55cSDimitry Andric TruncInstCombine TIC(AC, TLI, DL, DT); 12790b57cec5SDimitry Andric MadeChange |= TIC.run(F); 12800fca6ea1SDimitry Andric MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC, MadeCFGChange); 12810b57cec5SDimitry Andric return MadeChange; 12820b57cec5SDimitry Andric } 12830b57cec5SDimitry Andric 12840b57cec5SDimitry Andric PreservedAnalyses AggressiveInstCombinePass::run(Function &F, 12850b57cec5SDimitry Andric FunctionAnalysisManager &AM) { 1286349cc55cSDimitry Andric auto &AC = AM.getResult<AssumptionAnalysis>(F); 12870b57cec5SDimitry Andric auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); 12880b57cec5SDimitry Andric auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 128981ad6265SDimitry Andric auto &TTI = AM.getResult<TargetIRAnalysis>(F); 1290bdd1243dSDimitry Andric auto &AA = AM.getResult<AAManager>(F); 12910fca6ea1SDimitry Andric bool MadeCFGChange = false; 12920fca6ea1SDimitry Andric if (!runImpl(F, AC, TTI, TLI, DT, AA, MadeCFGChange)) { 12930b57cec5SDimitry Andric // No changes, all analyses are preserved. 12940b57cec5SDimitry Andric return PreservedAnalyses::all(); 12950b57cec5SDimitry Andric } 12960b57cec5SDimitry Andric // Mark all the analyses that instcombine updates as preserved. 12970b57cec5SDimitry Andric PreservedAnalyses PA; 12980fca6ea1SDimitry Andric if (MadeCFGChange) 12990fca6ea1SDimitry Andric PA.preserve<DominatorTreeAnalysis>(); 13000fca6ea1SDimitry Andric else 13010b57cec5SDimitry Andric PA.preserveSet<CFGAnalyses>(); 13020b57cec5SDimitry Andric return PA; 13030b57cec5SDimitry Andric } 1304