10b57cec5SDimitry Andric //===- InstCombineVectorOps.cpp -------------------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements instcombine for ExtractElement, InsertElement and 100b57cec5SDimitry Andric // ShuffleVector. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "InstCombineInternal.h" 150b57cec5SDimitry Andric #include "llvm/ADT/APInt.h" 160b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h" 170b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 180b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 195ffd83dbSDimitry Andric #include "llvm/ADT/SmallBitVector.h" 200b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 21e8d8bef9SDimitry Andric #include "llvm/ADT/Statistic.h" 220b57cec5SDimitry Andric #include "llvm/Analysis/InstructionSimplify.h" 230b57cec5SDimitry Andric #include "llvm/Analysis/VectorUtils.h" 240b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 250b57cec5SDimitry Andric #include "llvm/IR/Constant.h" 260b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 270b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h" 280b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h" 290b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 300b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 310b57cec5SDimitry Andric #include "llvm/IR/Operator.h" 320b57cec5SDimitry Andric #include "llvm/IR/PatternMatch.h" 330b57cec5SDimitry Andric #include "llvm/IR/Type.h" 340b57cec5SDimitry Andric #include "llvm/IR/User.h" 350b57cec5SDimitry Andric #include "llvm/IR/Value.h" 360b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 370b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 38e8d8bef9SDimitry Andric #include "llvm/Transforms/InstCombine/InstCombiner.h" 390b57cec5SDimitry Andric #include <cassert> 400b57cec5SDimitry Andric #include <cstdint> 410b57cec5SDimitry Andric #include <iterator> 420b57cec5SDimitry Andric #include <utility> 430b57cec5SDimitry Andric 44349cc55cSDimitry Andric #define DEBUG_TYPE "instcombine" 45349cc55cSDimitry Andric 460b57cec5SDimitry Andric using namespace llvm; 470b57cec5SDimitry Andric using namespace PatternMatch; 480b57cec5SDimitry Andric 49e8d8bef9SDimitry Andric STATISTIC(NumAggregateReconstructionsSimplified, 50e8d8bef9SDimitry Andric "Number of aggregate reconstructions turned into reuse of the " 51e8d8bef9SDimitry Andric "original aggregate"); 52e8d8bef9SDimitry Andric 530b57cec5SDimitry Andric /// Return true if the value is cheaper to scalarize than it is to leave as a 54349cc55cSDimitry Andric /// vector operation. If the extract index \p EI is a constant integer then 55349cc55cSDimitry Andric /// some operations may be cheap to scalarize. 560b57cec5SDimitry Andric /// 570b57cec5SDimitry Andric /// FIXME: It's possible to create more instructions than previously existed. 58349cc55cSDimitry Andric static bool cheapToScalarize(Value *V, Value *EI) { 59349cc55cSDimitry Andric ConstantInt *CEI = dyn_cast<ConstantInt>(EI); 60349cc55cSDimitry Andric 610b57cec5SDimitry Andric // If we can pick a scalar constant value out of a vector, that is free. 620b57cec5SDimitry Andric if (auto *C = dyn_cast<Constant>(V)) 63349cc55cSDimitry Andric return CEI || C->getSplatValue(); 64349cc55cSDimitry Andric 65349cc55cSDimitry Andric if (CEI && match(V, m_Intrinsic<Intrinsic::experimental_stepvector>())) { 66349cc55cSDimitry Andric ElementCount EC = cast<VectorType>(V->getType())->getElementCount(); 67349cc55cSDimitry Andric // Index needs to be lower than the minimum size of the vector, because 68349cc55cSDimitry Andric // for scalable vector, the vector size is known at run time. 69349cc55cSDimitry Andric return CEI->getValue().ult(EC.getKnownMinValue()); 70349cc55cSDimitry Andric } 710b57cec5SDimitry Andric 720b57cec5SDimitry Andric // An insertelement to the same constant index as our extract will simplify 730b57cec5SDimitry Andric // to the scalar inserted element. An insertelement to a different constant 740b57cec5SDimitry Andric // index is irrelevant to our extract. 755ffd83dbSDimitry Andric if (match(V, m_InsertElt(m_Value(), m_Value(), m_ConstantInt()))) 76349cc55cSDimitry Andric return CEI; 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric if (match(V, m_OneUse(m_Load(m_Value())))) 790b57cec5SDimitry Andric return true; 800b57cec5SDimitry Andric 815ffd83dbSDimitry Andric if (match(V, m_OneUse(m_UnOp()))) 825ffd83dbSDimitry Andric return true; 835ffd83dbSDimitry Andric 840b57cec5SDimitry Andric Value *V0, *V1; 850b57cec5SDimitry Andric if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1))))) 86349cc55cSDimitry Andric if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI)) 870b57cec5SDimitry Andric return true; 880b57cec5SDimitry Andric 890b57cec5SDimitry Andric CmpInst::Predicate UnusedPred; 900b57cec5SDimitry Andric if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1))))) 91349cc55cSDimitry Andric if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI)) 920b57cec5SDimitry Andric return true; 930b57cec5SDimitry Andric 940b57cec5SDimitry Andric return false; 950b57cec5SDimitry Andric } 960b57cec5SDimitry Andric 970b57cec5SDimitry Andric // If we have a PHI node with a vector type that is only used to feed 980b57cec5SDimitry Andric // itself and be an operand of extractelement at a constant location, 990b57cec5SDimitry Andric // try to replace the PHI of the vector type with a PHI of a scalar type. 100e8d8bef9SDimitry Andric Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI, 101e8d8bef9SDimitry Andric PHINode *PN) { 1020b57cec5SDimitry Andric SmallVector<Instruction *, 2> Extracts; 1030b57cec5SDimitry Andric // The users we want the PHI to have are: 1040b57cec5SDimitry Andric // 1) The EI ExtractElement (we already know this) 1050b57cec5SDimitry Andric // 2) Possibly more ExtractElements with the same index. 1060b57cec5SDimitry Andric // 3) Another operand, which will feed back into the PHI. 1070b57cec5SDimitry Andric Instruction *PHIUser = nullptr; 108bdd1243dSDimitry Andric for (auto *U : PN->users()) { 1090b57cec5SDimitry Andric if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) { 1100b57cec5SDimitry Andric if (EI.getIndexOperand() == EU->getIndexOperand()) 1110b57cec5SDimitry Andric Extracts.push_back(EU); 1120b57cec5SDimitry Andric else 1130b57cec5SDimitry Andric return nullptr; 1140b57cec5SDimitry Andric } else if (!PHIUser) { 1150b57cec5SDimitry Andric PHIUser = cast<Instruction>(U); 1160b57cec5SDimitry Andric } else { 1170b57cec5SDimitry Andric return nullptr; 1180b57cec5SDimitry Andric } 1190b57cec5SDimitry Andric } 1200b57cec5SDimitry Andric 1210b57cec5SDimitry Andric if (!PHIUser) 1220b57cec5SDimitry Andric return nullptr; 1230b57cec5SDimitry Andric 1240b57cec5SDimitry Andric // Verify that this PHI user has one use, which is the PHI itself, 1250b57cec5SDimitry Andric // and that it is a binary operation which is cheap to scalarize. 1260b57cec5SDimitry Andric // otherwise return nullptr. 1270b57cec5SDimitry Andric if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) || 128349cc55cSDimitry Andric !(isa<BinaryOperator>(PHIUser)) || 129349cc55cSDimitry Andric !cheapToScalarize(PHIUser, EI.getIndexOperand())) 1300b57cec5SDimitry Andric return nullptr; 1310b57cec5SDimitry Andric 1320b57cec5SDimitry Andric // Create a scalar PHI node that will replace the vector PHI node 1330b57cec5SDimitry Andric // just before the current PHI node. 1340b57cec5SDimitry Andric PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith( 1355f757f3fSDimitry Andric PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), PN->getIterator())); 1360b57cec5SDimitry Andric // Scalarize each PHI operand. 1370b57cec5SDimitry Andric for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { 1380b57cec5SDimitry Andric Value *PHIInVal = PN->getIncomingValue(i); 1390b57cec5SDimitry Andric BasicBlock *inBB = PN->getIncomingBlock(i); 1400b57cec5SDimitry Andric Value *Elt = EI.getIndexOperand(); 1410b57cec5SDimitry Andric // If the operand is the PHI induction variable: 1420b57cec5SDimitry Andric if (PHIInVal == PHIUser) { 1430b57cec5SDimitry Andric // Scalarize the binary operation. Its first operand is the 1440b57cec5SDimitry Andric // scalar PHI, and the second operand is extracted from the other 1450b57cec5SDimitry Andric // vector operand. 1460b57cec5SDimitry Andric BinaryOperator *B0 = cast<BinaryOperator>(PHIUser); 1470b57cec5SDimitry Andric unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0; 1480b57cec5SDimitry Andric Value *Op = InsertNewInstWith( 1490b57cec5SDimitry Andric ExtractElementInst::Create(B0->getOperand(opId), Elt, 1500b57cec5SDimitry Andric B0->getOperand(opId)->getName() + ".Elt"), 1515f757f3fSDimitry Andric B0->getIterator()); 1520b57cec5SDimitry Andric Value *newPHIUser = InsertNewInstWith( 1530b57cec5SDimitry Andric BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(), 1545f757f3fSDimitry Andric scalarPHI, Op, B0), B0->getIterator()); 1550b57cec5SDimitry Andric scalarPHI->addIncoming(newPHIUser, inBB); 1560b57cec5SDimitry Andric } else { 1570b57cec5SDimitry Andric // Scalarize PHI input: 1580b57cec5SDimitry Andric Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, ""); 1590b57cec5SDimitry Andric // Insert the new instruction into the predecessor basic block. 1600b57cec5SDimitry Andric Instruction *pos = dyn_cast<Instruction>(PHIInVal); 1610b57cec5SDimitry Andric BasicBlock::iterator InsertPos; 1620b57cec5SDimitry Andric if (pos && !isa<PHINode>(pos)) { 1630b57cec5SDimitry Andric InsertPos = ++pos->getIterator(); 1640b57cec5SDimitry Andric } else { 1650b57cec5SDimitry Andric InsertPos = inBB->getFirstInsertionPt(); 1660b57cec5SDimitry Andric } 1670b57cec5SDimitry Andric 1685f757f3fSDimitry Andric InsertNewInstWith(newEI, InsertPos); 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andric scalarPHI->addIncoming(newEI, inBB); 1710b57cec5SDimitry Andric } 1720b57cec5SDimitry Andric } 1730b57cec5SDimitry Andric 17406c3fb27SDimitry Andric for (auto *E : Extracts) { 1750b57cec5SDimitry Andric replaceInstUsesWith(*E, scalarPHI); 17606c3fb27SDimitry Andric // Add old extract to worklist for DCE. 17706c3fb27SDimitry Andric addToWorklist(E); 17806c3fb27SDimitry Andric } 1790b57cec5SDimitry Andric 1800b57cec5SDimitry Andric return &EI; 1810b57cec5SDimitry Andric } 1820b57cec5SDimitry Andric 183349cc55cSDimitry Andric Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) { 1840b57cec5SDimitry Andric Value *X; 1850b57cec5SDimitry Andric uint64_t ExtIndexC; 1860b57cec5SDimitry Andric if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) || 1870b57cec5SDimitry Andric !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC))) 1880b57cec5SDimitry Andric return nullptr; 1890b57cec5SDimitry Andric 190349cc55cSDimitry Andric ElementCount NumElts = 191349cc55cSDimitry Andric cast<VectorType>(Ext.getVectorOperandType())->getElementCount(); 192349cc55cSDimitry Andric Type *DestTy = Ext.getType(); 193bdd1243dSDimitry Andric unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); 194349cc55cSDimitry Andric bool IsBigEndian = DL.isBigEndian(); 195349cc55cSDimitry Andric 196349cc55cSDimitry Andric // If we are casting an integer to vector and extracting a portion, that is 197349cc55cSDimitry Andric // a shift-right and truncate. 198bdd1243dSDimitry Andric if (X->getType()->isIntegerTy()) { 199349cc55cSDimitry Andric assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) && 200349cc55cSDimitry Andric "Expected fixed vector type for bitcast from scalar integer"); 201349cc55cSDimitry Andric 202349cc55cSDimitry Andric // Big endian requires adjusting the extract index since MSB is at index 0. 203349cc55cSDimitry Andric // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8 204349cc55cSDimitry Andric // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8 205349cc55cSDimitry Andric if (IsBigEndian) 206349cc55cSDimitry Andric ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC; 207bdd1243dSDimitry Andric unsigned ShiftAmountC = ExtIndexC * DestWidth; 208bdd1243dSDimitry Andric if (!ShiftAmountC || 209bdd1243dSDimitry Andric (isDesirableIntType(X->getType()->getPrimitiveSizeInBits()) && 210bdd1243dSDimitry Andric Ext.getVectorOperand()->hasOneUse())) { 211bdd1243dSDimitry Andric if (ShiftAmountC) 212bdd1243dSDimitry Andric X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset"); 213bdd1243dSDimitry Andric if (DestTy->isFloatingPointTy()) { 214bdd1243dSDimitry Andric Type *DstIntTy = IntegerType::getIntNTy(X->getContext(), DestWidth); 215bdd1243dSDimitry Andric Value *Trunc = Builder.CreateTrunc(X, DstIntTy); 216bdd1243dSDimitry Andric return new BitCastInst(Trunc, DestTy); 217bdd1243dSDimitry Andric } 218bdd1243dSDimitry Andric return new TruncInst(X, DestTy); 219349cc55cSDimitry Andric } 220349cc55cSDimitry Andric } 221349cc55cSDimitry Andric 222349cc55cSDimitry Andric if (!X->getType()->isVectorTy()) 223349cc55cSDimitry Andric return nullptr; 224349cc55cSDimitry Andric 2250b57cec5SDimitry Andric // If this extractelement is using a bitcast from a vector of the same number 2260b57cec5SDimitry Andric // of elements, see if we can find the source element from the source vector: 2270b57cec5SDimitry Andric // extelt (bitcast VecX), IndexC --> bitcast X[IndexC] 2285ffd83dbSDimitry Andric auto *SrcTy = cast<VectorType>(X->getType()); 229e8d8bef9SDimitry Andric ElementCount NumSrcElts = SrcTy->getElementCount(); 2300b57cec5SDimitry Andric if (NumSrcElts == NumElts) 2310b57cec5SDimitry Andric if (Value *Elt = findScalarElement(X, ExtIndexC)) 2320b57cec5SDimitry Andric return new BitCastInst(Elt, DestTy); 2330b57cec5SDimitry Andric 234e8d8bef9SDimitry Andric assert(NumSrcElts.isScalable() == NumElts.isScalable() && 235e8d8bef9SDimitry Andric "Src and Dst must be the same sort of vector type"); 236e8d8bef9SDimitry Andric 2370b57cec5SDimitry Andric // If the source elements are wider than the destination, try to shift and 2380b57cec5SDimitry Andric // truncate a subset of scalar bits of an insert op. 239e8d8bef9SDimitry Andric if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) { 2400b57cec5SDimitry Andric Value *Scalar; 241753f127fSDimitry Andric Value *Vec; 2420b57cec5SDimitry Andric uint64_t InsIndexC; 243753f127fSDimitry Andric if (!match(X, m_InsertElt(m_Value(Vec), m_Value(Scalar), 2440b57cec5SDimitry Andric m_ConstantInt(InsIndexC)))) 2450b57cec5SDimitry Andric return nullptr; 2460b57cec5SDimitry Andric 2470b57cec5SDimitry Andric // The extract must be from the subset of vector elements that we inserted 2480b57cec5SDimitry Andric // into. Example: if we inserted element 1 of a <2 x i64> and we are 2490b57cec5SDimitry Andric // extracting an i16 (narrowing ratio = 4), then this extract must be from 1 2500b57cec5SDimitry Andric // of elements 4-7 of the bitcasted vector. 251e8d8bef9SDimitry Andric unsigned NarrowingRatio = 252e8d8bef9SDimitry Andric NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue(); 253753f127fSDimitry Andric 254753f127fSDimitry Andric if (ExtIndexC / NarrowingRatio != InsIndexC) { 255753f127fSDimitry Andric // Remove insertelement, if we don't use the inserted element. 256753f127fSDimitry Andric // extractelement (bitcast (insertelement (Vec, b)), a) -> 257753f127fSDimitry Andric // extractelement (bitcast (Vec), a) 258753f127fSDimitry Andric // FIXME: this should be removed to SimplifyDemandedVectorElts, 259753f127fSDimitry Andric // once scale vectors are supported. 260753f127fSDimitry Andric if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) { 261753f127fSDimitry Andric Value *NewBC = Builder.CreateBitCast(Vec, Ext.getVectorOperandType()); 262753f127fSDimitry Andric return ExtractElementInst::Create(NewBC, Ext.getIndexOperand()); 263753f127fSDimitry Andric } 2640b57cec5SDimitry Andric return nullptr; 265753f127fSDimitry Andric } 2660b57cec5SDimitry Andric 2670b57cec5SDimitry Andric // We are extracting part of the original scalar. How that scalar is 2680b57cec5SDimitry Andric // inserted into the vector depends on the endian-ness. Example: 2690b57cec5SDimitry Andric // Vector Byte Elt Index: 0 1 2 3 4 5 6 7 2700b57cec5SDimitry Andric // +--+--+--+--+--+--+--+--+ 2710b57cec5SDimitry Andric // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3| 2720b57cec5SDimitry Andric // extelt <4 x i16> V', 3: | |S2|S3| 2730b57cec5SDimitry Andric // +--+--+--+--+--+--+--+--+ 2740b57cec5SDimitry Andric // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value. 2750b57cec5SDimitry Andric // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value. 2760b57cec5SDimitry Andric // In this example, we must right-shift little-endian. Big-endian is just a 2770b57cec5SDimitry Andric // truncate. 2780b57cec5SDimitry Andric unsigned Chunk = ExtIndexC % NarrowingRatio; 2790b57cec5SDimitry Andric if (IsBigEndian) 2800b57cec5SDimitry Andric Chunk = NarrowingRatio - 1 - Chunk; 2810b57cec5SDimitry Andric 2820b57cec5SDimitry Andric // Bail out if this is an FP vector to FP vector sequence. That would take 2830b57cec5SDimitry Andric // more instructions than we started with unless there is no shift, and it 2840b57cec5SDimitry Andric // may not be handled as well in the backend. 2850b57cec5SDimitry Andric bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy(); 2860b57cec5SDimitry Andric bool NeedDestBitcast = DestTy->isFloatingPointTy(); 2870b57cec5SDimitry Andric if (NeedSrcBitcast && NeedDestBitcast) 2880b57cec5SDimitry Andric return nullptr; 2890b57cec5SDimitry Andric 2900b57cec5SDimitry Andric unsigned SrcWidth = SrcTy->getScalarSizeInBits(); 2910b57cec5SDimitry Andric unsigned ShAmt = Chunk * DestWidth; 2920b57cec5SDimitry Andric 2930b57cec5SDimitry Andric // TODO: This limitation is more strict than necessary. We could sum the 2940b57cec5SDimitry Andric // number of new instructions and subtract the number eliminated to know if 2950b57cec5SDimitry Andric // we can proceed. 2960b57cec5SDimitry Andric if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse()) 2970b57cec5SDimitry Andric if (NeedSrcBitcast || NeedDestBitcast) 2980b57cec5SDimitry Andric return nullptr; 2990b57cec5SDimitry Andric 3000b57cec5SDimitry Andric if (NeedSrcBitcast) { 3010b57cec5SDimitry Andric Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth); 3020b57cec5SDimitry Andric Scalar = Builder.CreateBitCast(Scalar, SrcIntTy); 3030b57cec5SDimitry Andric } 3040b57cec5SDimitry Andric 3050b57cec5SDimitry Andric if (ShAmt) { 3060b57cec5SDimitry Andric // Bail out if we could end with more instructions than we started with. 3070b57cec5SDimitry Andric if (!Ext.getVectorOperand()->hasOneUse()) 3080b57cec5SDimitry Andric return nullptr; 3090b57cec5SDimitry Andric Scalar = Builder.CreateLShr(Scalar, ShAmt); 3100b57cec5SDimitry Andric } 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric if (NeedDestBitcast) { 3130b57cec5SDimitry Andric Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth); 3140b57cec5SDimitry Andric return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy); 3150b57cec5SDimitry Andric } 3160b57cec5SDimitry Andric return new TruncInst(Scalar, DestTy); 3170b57cec5SDimitry Andric } 3180b57cec5SDimitry Andric 3190b57cec5SDimitry Andric return nullptr; 3200b57cec5SDimitry Andric } 3210b57cec5SDimitry Andric 3228bcb0991SDimitry Andric /// Find elements of V demanded by UserInstr. 3238bcb0991SDimitry Andric static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) { 324e8d8bef9SDimitry Andric unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements(); 3258bcb0991SDimitry Andric 3268bcb0991SDimitry Andric // Conservatively assume that all elements are needed. 327349cc55cSDimitry Andric APInt UsedElts(APInt::getAllOnes(VWidth)); 3288bcb0991SDimitry Andric 3298bcb0991SDimitry Andric switch (UserInstr->getOpcode()) { 3308bcb0991SDimitry Andric case Instruction::ExtractElement: { 3318bcb0991SDimitry Andric ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr); 3328bcb0991SDimitry Andric assert(EEI->getVectorOperand() == V); 3338bcb0991SDimitry Andric ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand()); 3348bcb0991SDimitry Andric if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) { 3358bcb0991SDimitry Andric UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue()); 3368bcb0991SDimitry Andric } 3378bcb0991SDimitry Andric break; 3388bcb0991SDimitry Andric } 3398bcb0991SDimitry Andric case Instruction::ShuffleVector: { 3408bcb0991SDimitry Andric ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr); 3415ffd83dbSDimitry Andric unsigned MaskNumElts = 342e8d8bef9SDimitry Andric cast<FixedVectorType>(UserInstr->getType())->getNumElements(); 3438bcb0991SDimitry Andric 3448bcb0991SDimitry Andric UsedElts = APInt(VWidth, 0); 3458bcb0991SDimitry Andric for (unsigned i = 0; i < MaskNumElts; i++) { 3468bcb0991SDimitry Andric unsigned MaskVal = Shuffle->getMaskValue(i); 3478bcb0991SDimitry Andric if (MaskVal == -1u || MaskVal >= 2 * VWidth) 3488bcb0991SDimitry Andric continue; 3498bcb0991SDimitry Andric if (Shuffle->getOperand(0) == V && (MaskVal < VWidth)) 3508bcb0991SDimitry Andric UsedElts.setBit(MaskVal); 3518bcb0991SDimitry Andric if (Shuffle->getOperand(1) == V && 3528bcb0991SDimitry Andric ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth))) 3538bcb0991SDimitry Andric UsedElts.setBit(MaskVal - VWidth); 3548bcb0991SDimitry Andric } 3558bcb0991SDimitry Andric break; 3568bcb0991SDimitry Andric } 3578bcb0991SDimitry Andric default: 3588bcb0991SDimitry Andric break; 3598bcb0991SDimitry Andric } 3608bcb0991SDimitry Andric return UsedElts; 3618bcb0991SDimitry Andric } 3628bcb0991SDimitry Andric 3638bcb0991SDimitry Andric /// Find union of elements of V demanded by all its users. 3648bcb0991SDimitry Andric /// If it is known by querying findDemandedEltsBySingleUser that 3658bcb0991SDimitry Andric /// no user demands an element of V, then the corresponding bit 3668bcb0991SDimitry Andric /// remains unset in the returned value. 3678bcb0991SDimitry Andric static APInt findDemandedEltsByAllUsers(Value *V) { 368e8d8bef9SDimitry Andric unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements(); 3698bcb0991SDimitry Andric 3708bcb0991SDimitry Andric APInt UnionUsedElts(VWidth, 0); 3718bcb0991SDimitry Andric for (const Use &U : V->uses()) { 3728bcb0991SDimitry Andric if (Instruction *I = dyn_cast<Instruction>(U.getUser())) { 3738bcb0991SDimitry Andric UnionUsedElts |= findDemandedEltsBySingleUser(V, I); 3748bcb0991SDimitry Andric } else { 375349cc55cSDimitry Andric UnionUsedElts = APInt::getAllOnes(VWidth); 3768bcb0991SDimitry Andric break; 3778bcb0991SDimitry Andric } 3788bcb0991SDimitry Andric 379349cc55cSDimitry Andric if (UnionUsedElts.isAllOnes()) 3808bcb0991SDimitry Andric break; 3818bcb0991SDimitry Andric } 3828bcb0991SDimitry Andric 3838bcb0991SDimitry Andric return UnionUsedElts; 3848bcb0991SDimitry Andric } 3858bcb0991SDimitry Andric 3860eae32dcSDimitry Andric /// Given a constant index for a extractelement or insertelement instruction, 3870eae32dcSDimitry Andric /// return it with the canonical type if it isn't already canonical. We 3880eae32dcSDimitry Andric /// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't 3890eae32dcSDimitry Andric /// matter, we just want a consistent type to simplify CSE. 39006c3fb27SDimitry Andric static ConstantInt *getPreferredVectorIndex(ConstantInt *IndexC) { 391cb14a3feSDimitry Andric const unsigned IndexBW = IndexC->getBitWidth(); 3920eae32dcSDimitry Andric if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64) 3930eae32dcSDimitry Andric return nullptr; 3940eae32dcSDimitry Andric return ConstantInt::get(IndexC->getContext(), 3950eae32dcSDimitry Andric IndexC->getValue().zextOrTrunc(64)); 3960eae32dcSDimitry Andric } 3970eae32dcSDimitry Andric 398e8d8bef9SDimitry Andric Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { 3990b57cec5SDimitry Andric Value *SrcVec = EI.getVectorOperand(); 4000b57cec5SDimitry Andric Value *Index = EI.getIndexOperand(); 40181ad6265SDimitry Andric if (Value *V = simplifyExtractElementInst(SrcVec, Index, 4020b57cec5SDimitry Andric SQ.getWithInstruction(&EI))) 4030b57cec5SDimitry Andric return replaceInstUsesWith(EI, V); 4040b57cec5SDimitry Andric 405bdd1243dSDimitry Andric // extractelt (select %x, %vec1, %vec2), %const -> 406bdd1243dSDimitry Andric // select %x, %vec1[%const], %vec2[%const] 407bdd1243dSDimitry Andric // TODO: Support constant folding of multiple select operands: 408bdd1243dSDimitry Andric // extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2) 409bdd1243dSDimitry Andric // If the extractelement will for instance try to do out of bounds accesses 410bdd1243dSDimitry Andric // because of the values of %c1 and/or %c2, the sequence could be optimized 411bdd1243dSDimitry Andric // early. This is currently not possible because constant folding will reach 412bdd1243dSDimitry Andric // an unreachable assertion if it doesn't find a constant operand. 413bdd1243dSDimitry Andric if (SelectInst *SI = dyn_cast<SelectInst>(EI.getVectorOperand())) 414bdd1243dSDimitry Andric if (SI->getCondition()->getType()->isIntegerTy() && 415bdd1243dSDimitry Andric isa<Constant>(EI.getIndexOperand())) 416bdd1243dSDimitry Andric if (Instruction *R = FoldOpIntoSelect(EI, SI)) 417bdd1243dSDimitry Andric return R; 418bdd1243dSDimitry Andric 4190b57cec5SDimitry Andric // If extracting a specified index from the vector, see if we can recursively 4200b57cec5SDimitry Andric // find a previously computed scalar that was inserted into the vector. 4210b57cec5SDimitry Andric auto *IndexC = dyn_cast<ConstantInt>(Index); 422*0fca6ea1SDimitry Andric bool HasKnownValidIndex = false; 4230b57cec5SDimitry Andric if (IndexC) { 4240eae32dcSDimitry Andric // Canonicalize type of constant indices to i64 to simplify CSE 4250eae32dcSDimitry Andric if (auto *NewIdx = getPreferredVectorIndex(IndexC)) 4260eae32dcSDimitry Andric return replaceOperand(EI, 1, NewIdx); 4270eae32dcSDimitry Andric 4285ffd83dbSDimitry Andric ElementCount EC = EI.getVectorOperandType()->getElementCount(); 429e8d8bef9SDimitry Andric unsigned NumElts = EC.getKnownMinValue(); 430*0fca6ea1SDimitry Andric HasKnownValidIndex = IndexC->getValue().ult(NumElts); 4310b57cec5SDimitry Andric 432fe6060f1SDimitry Andric if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) { 433fe6060f1SDimitry Andric Intrinsic::ID IID = II->getIntrinsicID(); 434fe6060f1SDimitry Andric // Index needs to be lower than the minimum size of the vector, because 435fe6060f1SDimitry Andric // for scalable vector, the vector size is known at run time. 436fe6060f1SDimitry Andric if (IID == Intrinsic::experimental_stepvector && 437fe6060f1SDimitry Andric IndexC->getValue().ult(NumElts)) { 438fe6060f1SDimitry Andric Type *Ty = EI.getType(); 439fe6060f1SDimitry Andric unsigned BitWidth = Ty->getIntegerBitWidth(); 440fe6060f1SDimitry Andric Value *Idx; 441fe6060f1SDimitry Andric // Return index when its value does not exceed the allowed limit 442fe6060f1SDimitry Andric // for the element type of the vector, otherwise return undefined. 443fe6060f1SDimitry Andric if (IndexC->getValue().getActiveBits() <= BitWidth) 444fe6060f1SDimitry Andric Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth)); 445fe6060f1SDimitry Andric else 4465f757f3fSDimitry Andric Idx = PoisonValue::get(Ty); 447fe6060f1SDimitry Andric return replaceInstUsesWith(EI, Idx); 448fe6060f1SDimitry Andric } 449fe6060f1SDimitry Andric } 450fe6060f1SDimitry Andric 4510b57cec5SDimitry Andric // InstSimplify should handle cases where the index is invalid. 4525ffd83dbSDimitry Andric // For fixed-length vector, it's invalid to extract out-of-range element. 453e8d8bef9SDimitry Andric if (!EC.isScalable() && IndexC->getValue().uge(NumElts)) 4540b57cec5SDimitry Andric return nullptr; 4550b57cec5SDimitry Andric 456349cc55cSDimitry Andric if (Instruction *I = foldBitcastExtElt(EI)) 4570b57cec5SDimitry Andric return I; 4580b57cec5SDimitry Andric 4590b57cec5SDimitry Andric // If there's a vector PHI feeding a scalar use through this extractelement 4600b57cec5SDimitry Andric // instruction, try to scalarize the PHI. 4610b57cec5SDimitry Andric if (auto *Phi = dyn_cast<PHINode>(SrcVec)) 4620b57cec5SDimitry Andric if (Instruction *ScalarPHI = scalarizePHI(EI, Phi)) 4630b57cec5SDimitry Andric return ScalarPHI; 4640b57cec5SDimitry Andric } 4650b57cec5SDimitry Andric 4665ffd83dbSDimitry Andric // TODO come up with a n-ary matcher that subsumes both unary and 4675ffd83dbSDimitry Andric // binary matchers. 4685ffd83dbSDimitry Andric UnaryOperator *UO; 469349cc55cSDimitry Andric if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) { 4705ffd83dbSDimitry Andric // extelt (unop X), Index --> unop (extelt X, Index) 4715ffd83dbSDimitry Andric Value *X = UO->getOperand(0); 4725ffd83dbSDimitry Andric Value *E = Builder.CreateExtractElement(X, Index); 4735ffd83dbSDimitry Andric return UnaryOperator::CreateWithCopiedFlags(UO->getOpcode(), E, UO); 4745ffd83dbSDimitry Andric } 4755ffd83dbSDimitry Andric 476*0fca6ea1SDimitry Andric // If the binop is not speculatable, we cannot hoist the extractelement if 477*0fca6ea1SDimitry Andric // it may make the operand poison. 4780b57cec5SDimitry Andric BinaryOperator *BO; 479*0fca6ea1SDimitry Andric if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index) && 480*0fca6ea1SDimitry Andric (HasKnownValidIndex || isSafeToSpeculativelyExecute(BO))) { 4810b57cec5SDimitry Andric // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index) 4820b57cec5SDimitry Andric Value *X = BO->getOperand(0), *Y = BO->getOperand(1); 4830b57cec5SDimitry Andric Value *E0 = Builder.CreateExtractElement(X, Index); 4840b57cec5SDimitry Andric Value *E1 = Builder.CreateExtractElement(Y, Index); 4850b57cec5SDimitry Andric return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO); 4860b57cec5SDimitry Andric } 4870b57cec5SDimitry Andric 4880b57cec5SDimitry Andric Value *X, *Y; 4890b57cec5SDimitry Andric CmpInst::Predicate Pred; 4900b57cec5SDimitry Andric if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) && 491349cc55cSDimitry Andric cheapToScalarize(SrcVec, Index)) { 4920b57cec5SDimitry Andric // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index) 4930b57cec5SDimitry Andric Value *E0 = Builder.CreateExtractElement(X, Index); 4940b57cec5SDimitry Andric Value *E1 = Builder.CreateExtractElement(Y, Index); 495*0fca6ea1SDimitry Andric CmpInst *SrcCmpInst = cast<CmpInst>(SrcVec); 496*0fca6ea1SDimitry Andric return CmpInst::CreateWithCopiedFlags(SrcCmpInst->getOpcode(), Pred, E0, E1, 497*0fca6ea1SDimitry Andric SrcCmpInst); 4980b57cec5SDimitry Andric } 4990b57cec5SDimitry Andric 5000b57cec5SDimitry Andric if (auto *I = dyn_cast<Instruction>(SrcVec)) { 5010b57cec5SDimitry Andric if (auto *IE = dyn_cast<InsertElementInst>(I)) { 5020eae32dcSDimitry Andric // instsimplify already handled the case where the indices are constants 5030eae32dcSDimitry Andric // and equal by value, if both are constants, they must not be the same 5040eae32dcSDimitry Andric // value, extract from the pre-inserted value instead. 5055ffd83dbSDimitry Andric if (isa<Constant>(IE->getOperand(2)) && IndexC) 5065ffd83dbSDimitry Andric return replaceOperand(EI, 0, IE->getOperand(0)); 507fe6060f1SDimitry Andric } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) { 508fe6060f1SDimitry Andric auto *VecType = cast<VectorType>(GEP->getType()); 509fe6060f1SDimitry Andric ElementCount EC = VecType->getElementCount(); 510fe6060f1SDimitry Andric uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0; 511fe6060f1SDimitry Andric if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) { 512fe6060f1SDimitry Andric // Find out why we have a vector result - these are a few examples: 513fe6060f1SDimitry Andric // 1. We have a scalar pointer and a vector of indices, or 514fe6060f1SDimitry Andric // 2. We have a vector of pointers and a scalar index, or 515fe6060f1SDimitry Andric // 3. We have a vector of pointers and a vector of indices, etc. 516fe6060f1SDimitry Andric // Here we only consider combining when there is exactly one vector 517fe6060f1SDimitry Andric // operand, since the optimization is less obviously a win due to 518fe6060f1SDimitry Andric // needing more than one extractelements. 519fe6060f1SDimitry Andric 520fe6060f1SDimitry Andric unsigned VectorOps = 521fe6060f1SDimitry Andric llvm::count_if(GEP->operands(), [](const Value *V) { 522fe6060f1SDimitry Andric return isa<VectorType>(V->getType()); 523fe6060f1SDimitry Andric }); 5240eae32dcSDimitry Andric if (VectorOps == 1) { 525fe6060f1SDimitry Andric Value *NewPtr = GEP->getPointerOperand(); 526fe6060f1SDimitry Andric if (isa<VectorType>(NewPtr->getType())) 527fe6060f1SDimitry Andric NewPtr = Builder.CreateExtractElement(NewPtr, IndexC); 528fe6060f1SDimitry Andric 529fe6060f1SDimitry Andric SmallVector<Value *> NewOps; 530fe6060f1SDimitry Andric for (unsigned I = 1; I != GEP->getNumOperands(); ++I) { 531fe6060f1SDimitry Andric Value *Op = GEP->getOperand(I); 532fe6060f1SDimitry Andric if (isa<VectorType>(Op->getType())) 533fe6060f1SDimitry Andric NewOps.push_back(Builder.CreateExtractElement(Op, IndexC)); 534fe6060f1SDimitry Andric else 535fe6060f1SDimitry Andric NewOps.push_back(Op); 536fe6060f1SDimitry Andric } 537fe6060f1SDimitry Andric 538fe6060f1SDimitry Andric GetElementPtrInst *NewGEP = GetElementPtrInst::Create( 53904eeddc0SDimitry Andric GEP->getSourceElementType(), NewPtr, NewOps); 540fe6060f1SDimitry Andric NewGEP->setIsInBounds(GEP->isInBounds()); 541fe6060f1SDimitry Andric return NewGEP; 542fe6060f1SDimitry Andric } 5430eae32dcSDimitry Andric } 5440b57cec5SDimitry Andric } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) { 5450b57cec5SDimitry Andric // If this is extracting an element from a shufflevector, figure out where 5460b57cec5SDimitry Andric // it came from and extract from the appropriate input element instead. 5475ffd83dbSDimitry Andric // Restrict the following transformation to fixed-length vector. 5485ffd83dbSDimitry Andric if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) { 5495ffd83dbSDimitry Andric int SrcIdx = 5505ffd83dbSDimitry Andric SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue()); 5510b57cec5SDimitry Andric Value *Src; 5525ffd83dbSDimitry Andric unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType()) 5535ffd83dbSDimitry Andric ->getNumElements(); 5540b57cec5SDimitry Andric 5550b57cec5SDimitry Andric if (SrcIdx < 0) 55606c3fb27SDimitry Andric return replaceInstUsesWith(EI, PoisonValue::get(EI.getType())); 5570b57cec5SDimitry Andric if (SrcIdx < (int)LHSWidth) 5580b57cec5SDimitry Andric Src = SVI->getOperand(0); 5590b57cec5SDimitry Andric else { 5600b57cec5SDimitry Andric SrcIdx -= LHSWidth; 5610b57cec5SDimitry Andric Src = SVI->getOperand(1); 5620b57cec5SDimitry Andric } 56306c3fb27SDimitry Andric Type *Int64Ty = Type::getInt64Ty(EI.getContext()); 5645ffd83dbSDimitry Andric return ExtractElementInst::Create( 56506c3fb27SDimitry Andric Src, ConstantInt::get(Int64Ty, SrcIdx, false)); 5660b57cec5SDimitry Andric } 5670b57cec5SDimitry Andric } else if (auto *CI = dyn_cast<CastInst>(I)) { 5680b57cec5SDimitry Andric // Canonicalize extractelement(cast) -> cast(extractelement). 5690b57cec5SDimitry Andric // Bitcasts can change the number of vector elements, and they cost 5700b57cec5SDimitry Andric // nothing. 5710b57cec5SDimitry Andric if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) { 5720b57cec5SDimitry Andric Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index); 5730b57cec5SDimitry Andric return CastInst::Create(CI->getOpcode(), EE, EI.getType()); 5740b57cec5SDimitry Andric } 5750b57cec5SDimitry Andric } 5760b57cec5SDimitry Andric } 5770eae32dcSDimitry Andric 5780eae32dcSDimitry Andric // Run demanded elements after other transforms as this can drop flags on 5790eae32dcSDimitry Andric // binops. If there's two paths to the same final result, we prefer the 5800eae32dcSDimitry Andric // one which doesn't force us to drop flags. 5810eae32dcSDimitry Andric if (IndexC) { 5820eae32dcSDimitry Andric ElementCount EC = EI.getVectorOperandType()->getElementCount(); 5830eae32dcSDimitry Andric unsigned NumElts = EC.getKnownMinValue(); 5840eae32dcSDimitry Andric // This instruction only demands the single element from the input vector. 5850eae32dcSDimitry Andric // Skip for scalable type, the number of elements is unknown at 5860eae32dcSDimitry Andric // compile-time. 5870eae32dcSDimitry Andric if (!EC.isScalable() && NumElts != 1) { 5880eae32dcSDimitry Andric // If the input vector has a single use, simplify it based on this use 5890eae32dcSDimitry Andric // property. 5900eae32dcSDimitry Andric if (SrcVec->hasOneUse()) { 591cb14a3feSDimitry Andric APInt PoisonElts(NumElts, 0); 5920eae32dcSDimitry Andric APInt DemandedElts(NumElts, 0); 5930eae32dcSDimitry Andric DemandedElts.setBit(IndexC->getZExtValue()); 5940eae32dcSDimitry Andric if (Value *V = 595cb14a3feSDimitry Andric SimplifyDemandedVectorElts(SrcVec, DemandedElts, PoisonElts)) 5960eae32dcSDimitry Andric return replaceOperand(EI, 0, V); 5970eae32dcSDimitry Andric } else { 5980eae32dcSDimitry Andric // If the input vector has multiple uses, simplify it based on a union 5990eae32dcSDimitry Andric // of all elements used. 6000eae32dcSDimitry Andric APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec); 6010eae32dcSDimitry Andric if (!DemandedElts.isAllOnes()) { 602cb14a3feSDimitry Andric APInt PoisonElts(NumElts, 0); 6030eae32dcSDimitry Andric if (Value *V = SimplifyDemandedVectorElts( 604cb14a3feSDimitry Andric SrcVec, DemandedElts, PoisonElts, 0 /* Depth */, 6050eae32dcSDimitry Andric true /* AllowMultipleUsers */)) { 6060eae32dcSDimitry Andric if (V != SrcVec) { 60706c3fb27SDimitry Andric Worklist.addValue(SrcVec); 6080eae32dcSDimitry Andric SrcVec->replaceAllUsesWith(V); 6090eae32dcSDimitry Andric return &EI; 6100eae32dcSDimitry Andric } 6110eae32dcSDimitry Andric } 6120eae32dcSDimitry Andric } 6130eae32dcSDimitry Andric } 6140eae32dcSDimitry Andric } 6150eae32dcSDimitry Andric } 6160b57cec5SDimitry Andric return nullptr; 6170b57cec5SDimitry Andric } 6180b57cec5SDimitry Andric 6190b57cec5SDimitry Andric /// If V is a shuffle of values that ONLY returns elements from either LHS or 6200b57cec5SDimitry Andric /// RHS, return the shuffle mask and true. Otherwise, return false. 6210b57cec5SDimitry Andric static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, 6225ffd83dbSDimitry Andric SmallVectorImpl<int> &Mask) { 6230b57cec5SDimitry Andric assert(LHS->getType() == RHS->getType() && 6240b57cec5SDimitry Andric "Invalid CollectSingleShuffleElements"); 625e8d8bef9SDimitry Andric unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements(); 6260b57cec5SDimitry Andric 627*0fca6ea1SDimitry Andric if (match(V, m_Poison())) { 6285ffd83dbSDimitry Andric Mask.assign(NumElts, -1); 6290b57cec5SDimitry Andric return true; 6300b57cec5SDimitry Andric } 6310b57cec5SDimitry Andric 6320b57cec5SDimitry Andric if (V == LHS) { 6330b57cec5SDimitry Andric for (unsigned i = 0; i != NumElts; ++i) 6345ffd83dbSDimitry Andric Mask.push_back(i); 6350b57cec5SDimitry Andric return true; 6360b57cec5SDimitry Andric } 6370b57cec5SDimitry Andric 6380b57cec5SDimitry Andric if (V == RHS) { 6390b57cec5SDimitry Andric for (unsigned i = 0; i != NumElts; ++i) 6405ffd83dbSDimitry Andric Mask.push_back(i + NumElts); 6410b57cec5SDimitry Andric return true; 6420b57cec5SDimitry Andric } 6430b57cec5SDimitry Andric 6440b57cec5SDimitry Andric if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { 6450b57cec5SDimitry Andric // If this is an insert of an extract from some other vector, include it. 6460b57cec5SDimitry Andric Value *VecOp = IEI->getOperand(0); 6470b57cec5SDimitry Andric Value *ScalarOp = IEI->getOperand(1); 6480b57cec5SDimitry Andric Value *IdxOp = IEI->getOperand(2); 6490b57cec5SDimitry Andric 6500b57cec5SDimitry Andric if (!isa<ConstantInt>(IdxOp)) 6510b57cec5SDimitry Andric return false; 6520b57cec5SDimitry Andric unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 6530b57cec5SDimitry Andric 65406c3fb27SDimitry Andric if (isa<PoisonValue>(ScalarOp)) { // inserting poison into vector. 6550b57cec5SDimitry Andric // We can handle this if the vector we are inserting into is 6560b57cec5SDimitry Andric // transitively ok. 6570b57cec5SDimitry Andric if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { 65806c3fb27SDimitry Andric // If so, update the mask to reflect the inserted poison. 6595ffd83dbSDimitry Andric Mask[InsertedIdx] = -1; 6600b57cec5SDimitry Andric return true; 6610b57cec5SDimitry Andric } 6620b57cec5SDimitry Andric } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){ 6630b57cec5SDimitry Andric if (isa<ConstantInt>(EI->getOperand(1))) { 6640b57cec5SDimitry Andric unsigned ExtractedIdx = 6650b57cec5SDimitry Andric cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 6665ffd83dbSDimitry Andric unsigned NumLHSElts = 667e8d8bef9SDimitry Andric cast<FixedVectorType>(LHS->getType())->getNumElements(); 6680b57cec5SDimitry Andric 6690b57cec5SDimitry Andric // This must be extracting from either LHS or RHS. 6700b57cec5SDimitry Andric if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { 6710b57cec5SDimitry Andric // We can handle this if the vector we are inserting into is 6720b57cec5SDimitry Andric // transitively ok. 6730b57cec5SDimitry Andric if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { 6740b57cec5SDimitry Andric // If so, update the mask to reflect the inserted value. 6750b57cec5SDimitry Andric if (EI->getOperand(0) == LHS) { 6765ffd83dbSDimitry Andric Mask[InsertedIdx % NumElts] = ExtractedIdx; 6770b57cec5SDimitry Andric } else { 6780b57cec5SDimitry Andric assert(EI->getOperand(0) == RHS); 6795ffd83dbSDimitry Andric Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts; 6800b57cec5SDimitry Andric } 6810b57cec5SDimitry Andric return true; 6820b57cec5SDimitry Andric } 6830b57cec5SDimitry Andric } 6840b57cec5SDimitry Andric } 6850b57cec5SDimitry Andric } 6860b57cec5SDimitry Andric } 6870b57cec5SDimitry Andric 6880b57cec5SDimitry Andric return false; 6890b57cec5SDimitry Andric } 6900b57cec5SDimitry Andric 6910b57cec5SDimitry Andric /// If we have insertion into a vector that is wider than the vector that we 6920b57cec5SDimitry Andric /// are extracting from, try to widen the source vector to allow a single 6930b57cec5SDimitry Andric /// shufflevector to replace one or more insert/extract pairs. 69406c3fb27SDimitry Andric static bool replaceExtractElements(InsertElementInst *InsElt, 6950b57cec5SDimitry Andric ExtractElementInst *ExtElt, 696e8d8bef9SDimitry Andric InstCombinerImpl &IC) { 697e8d8bef9SDimitry Andric auto *InsVecType = cast<FixedVectorType>(InsElt->getType()); 698e8d8bef9SDimitry Andric auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType()); 6995ffd83dbSDimitry Andric unsigned NumInsElts = InsVecType->getNumElements(); 7005ffd83dbSDimitry Andric unsigned NumExtElts = ExtVecType->getNumElements(); 7010b57cec5SDimitry Andric 7020b57cec5SDimitry Andric // The inserted-to vector must be wider than the extracted-from vector. 7030b57cec5SDimitry Andric if (InsVecType->getElementType() != ExtVecType->getElementType() || 7040b57cec5SDimitry Andric NumExtElts >= NumInsElts) 70506c3fb27SDimitry Andric return false; 7060b57cec5SDimitry Andric 707fe6060f1SDimitry Andric // Create a shuffle mask to widen the extended-from vector using poison 7080b57cec5SDimitry Andric // values. The mask selects all of the values of the original vector followed 709fe6060f1SDimitry Andric // by as many poison values as needed to create a vector of the same length 7100b57cec5SDimitry Andric // as the inserted-to vector. 7115ffd83dbSDimitry Andric SmallVector<int, 16> ExtendMask; 7120b57cec5SDimitry Andric for (unsigned i = 0; i < NumExtElts; ++i) 7135ffd83dbSDimitry Andric ExtendMask.push_back(i); 7140b57cec5SDimitry Andric for (unsigned i = NumExtElts; i < NumInsElts; ++i) 7155ffd83dbSDimitry Andric ExtendMask.push_back(-1); 7160b57cec5SDimitry Andric 7170b57cec5SDimitry Andric Value *ExtVecOp = ExtElt->getVectorOperand(); 7180b57cec5SDimitry Andric auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp); 7190b57cec5SDimitry Andric BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst)) 7200b57cec5SDimitry Andric ? ExtVecOpInst->getParent() 7210b57cec5SDimitry Andric : ExtElt->getParent(); 7220b57cec5SDimitry Andric 7230b57cec5SDimitry Andric // TODO: This restriction matches the basic block check below when creating 7240b57cec5SDimitry Andric // new extractelement instructions. If that limitation is removed, this one 7250b57cec5SDimitry Andric // could also be removed. But for now, we just bail out to ensure that we 7260b57cec5SDimitry Andric // will replace the extractelement instruction that is feeding our 7270b57cec5SDimitry Andric // insertelement instruction. This allows the insertelement to then be 7280b57cec5SDimitry Andric // replaced by a shufflevector. If the insertelement is not replaced, we can 7290b57cec5SDimitry Andric // induce infinite looping because there's an optimization for extractelement 7300b57cec5SDimitry Andric // that will delete our widening shuffle. This would trigger another attempt 7310b57cec5SDimitry Andric // here to create that shuffle, and we spin forever. 7320b57cec5SDimitry Andric if (InsertionBlock != InsElt->getParent()) 73306c3fb27SDimitry Andric return false; 7340b57cec5SDimitry Andric 7350b57cec5SDimitry Andric // TODO: This restriction matches the check in visitInsertElementInst() and 7360b57cec5SDimitry Andric // prevents an infinite loop caused by not turning the extract/insert pair 7370b57cec5SDimitry Andric // into a shuffle. We really should not need either check, but we're lacking 7380b57cec5SDimitry Andric // folds for shufflevectors because we're afraid to generate shuffle masks 7390b57cec5SDimitry Andric // that the backend can't handle. 7400b57cec5SDimitry Andric if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back())) 74106c3fb27SDimitry Andric return false; 7420b57cec5SDimitry Andric 743349cc55cSDimitry Andric auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask); 7440b57cec5SDimitry Andric 7450b57cec5SDimitry Andric // Insert the new shuffle after the vector operand of the extract is defined 7460b57cec5SDimitry Andric // (as long as it's not a PHI) or at the start of the basic block of the 7470b57cec5SDimitry Andric // extract, so any subsequent extracts in the same basic block can use it. 7480b57cec5SDimitry Andric // TODO: Insert before the earliest ExtractElementInst that is replaced. 7490b57cec5SDimitry Andric if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst)) 7500b57cec5SDimitry Andric WideVec->insertAfter(ExtVecOpInst); 7510b57cec5SDimitry Andric else 7525f757f3fSDimitry Andric IC.InsertNewInstWith(WideVec, ExtElt->getParent()->getFirstInsertionPt()); 7530b57cec5SDimitry Andric 7540b57cec5SDimitry Andric // Replace extracts from the original narrow vector with extracts from the new 7550b57cec5SDimitry Andric // wide vector. 7560b57cec5SDimitry Andric for (User *U : ExtVecOp->users()) { 7570b57cec5SDimitry Andric ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U); 7580b57cec5SDimitry Andric if (!OldExt || OldExt->getParent() != WideVec->getParent()) 7590b57cec5SDimitry Andric continue; 7600b57cec5SDimitry Andric auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1)); 7615f757f3fSDimitry Andric IC.InsertNewInstWith(NewExt, OldExt->getIterator()); 7620b57cec5SDimitry Andric IC.replaceInstUsesWith(*OldExt, NewExt); 76306c3fb27SDimitry Andric // Add the old extracts to the worklist for DCE. We can't remove the 76406c3fb27SDimitry Andric // extracts directly, because they may still be used by the calling code. 76506c3fb27SDimitry Andric IC.addToWorklist(OldExt); 7660b57cec5SDimitry Andric } 76706c3fb27SDimitry Andric 76806c3fb27SDimitry Andric return true; 7690b57cec5SDimitry Andric } 7700b57cec5SDimitry Andric 7710b57cec5SDimitry Andric /// We are building a shuffle to create V, which is a sequence of insertelement, 7720b57cec5SDimitry Andric /// extractelement pairs. If PermittedRHS is set, then we must either use it or 7730b57cec5SDimitry Andric /// not rely on the second vector source. Return a std::pair containing the 7740b57cec5SDimitry Andric /// left and right vectors of the proposed shuffle (or 0), and set the Mask 7750b57cec5SDimitry Andric /// parameter as required. 7760b57cec5SDimitry Andric /// 7770b57cec5SDimitry Andric /// Note: we intentionally don't try to fold earlier shuffles since they have 7780b57cec5SDimitry Andric /// often been chosen carefully to be efficiently implementable on the target. 7790b57cec5SDimitry Andric using ShuffleOps = std::pair<Value *, Value *>; 7800b57cec5SDimitry Andric 7815ffd83dbSDimitry Andric static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask, 7820b57cec5SDimitry Andric Value *PermittedRHS, 78306c3fb27SDimitry Andric InstCombinerImpl &IC, bool &Rerun) { 7840b57cec5SDimitry Andric assert(V->getType()->isVectorTy() && "Invalid shuffle!"); 7855ffd83dbSDimitry Andric unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements(); 7860b57cec5SDimitry Andric 787cb14a3feSDimitry Andric if (match(V, m_Poison())) { 7885ffd83dbSDimitry Andric Mask.assign(NumElts, -1); 7890b57cec5SDimitry Andric return std::make_pair( 790cb14a3feSDimitry Andric PermittedRHS ? PoisonValue::get(PermittedRHS->getType()) : V, nullptr); 7910b57cec5SDimitry Andric } 7920b57cec5SDimitry Andric 7930b57cec5SDimitry Andric if (isa<ConstantAggregateZero>(V)) { 7945ffd83dbSDimitry Andric Mask.assign(NumElts, 0); 7950b57cec5SDimitry Andric return std::make_pair(V, nullptr); 7960b57cec5SDimitry Andric } 7970b57cec5SDimitry Andric 7980b57cec5SDimitry Andric if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { 7990b57cec5SDimitry Andric // If this is an insert of an extract from some other vector, include it. 8000b57cec5SDimitry Andric Value *VecOp = IEI->getOperand(0); 8010b57cec5SDimitry Andric Value *ScalarOp = IEI->getOperand(1); 8020b57cec5SDimitry Andric Value *IdxOp = IEI->getOperand(2); 8030b57cec5SDimitry Andric 8040b57cec5SDimitry Andric if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { 8050b57cec5SDimitry Andric if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) { 8060b57cec5SDimitry Andric unsigned ExtractedIdx = 8070b57cec5SDimitry Andric cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 8080b57cec5SDimitry Andric unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 8090b57cec5SDimitry Andric 8100b57cec5SDimitry Andric // Either the extracted from or inserted into vector must be RHSVec, 8110b57cec5SDimitry Andric // otherwise we'd end up with a shuffle of three inputs. 8120b57cec5SDimitry Andric if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) { 8130b57cec5SDimitry Andric Value *RHS = EI->getOperand(0); 81406c3fb27SDimitry Andric ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC, Rerun); 8150b57cec5SDimitry Andric assert(LR.second == nullptr || LR.second == RHS); 8160b57cec5SDimitry Andric 8170b57cec5SDimitry Andric if (LR.first->getType() != RHS->getType()) { 8180b57cec5SDimitry Andric // Although we are giving up for now, see if we can create extracts 8190b57cec5SDimitry Andric // that match the inserts for another round of combining. 82006c3fb27SDimitry Andric if (replaceExtractElements(IEI, EI, IC)) 82106c3fb27SDimitry Andric Rerun = true; 8220b57cec5SDimitry Andric 8230b57cec5SDimitry Andric // We tried our best, but we can't find anything compatible with RHS 8240b57cec5SDimitry Andric // further up the chain. Return a trivial shuffle. 8250b57cec5SDimitry Andric for (unsigned i = 0; i < NumElts; ++i) 8265ffd83dbSDimitry Andric Mask[i] = i; 8270b57cec5SDimitry Andric return std::make_pair(V, nullptr); 8280b57cec5SDimitry Andric } 8290b57cec5SDimitry Andric 8305ffd83dbSDimitry Andric unsigned NumLHSElts = 831e8d8bef9SDimitry Andric cast<FixedVectorType>(RHS->getType())->getNumElements(); 8325ffd83dbSDimitry Andric Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx; 8330b57cec5SDimitry Andric return std::make_pair(LR.first, RHS); 8340b57cec5SDimitry Andric } 8350b57cec5SDimitry Andric 8360b57cec5SDimitry Andric if (VecOp == PermittedRHS) { 8370b57cec5SDimitry Andric // We've gone as far as we can: anything on the other side of the 8380b57cec5SDimitry Andric // extractelement will already have been converted into a shuffle. 8390b57cec5SDimitry Andric unsigned NumLHSElts = 840e8d8bef9SDimitry Andric cast<FixedVectorType>(EI->getOperand(0)->getType()) 841e8d8bef9SDimitry Andric ->getNumElements(); 8420b57cec5SDimitry Andric for (unsigned i = 0; i != NumElts; ++i) 8435ffd83dbSDimitry Andric Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i); 8440b57cec5SDimitry Andric return std::make_pair(EI->getOperand(0), PermittedRHS); 8450b57cec5SDimitry Andric } 8460b57cec5SDimitry Andric 8470b57cec5SDimitry Andric // If this insertelement is a chain that comes from exactly these two 8480b57cec5SDimitry Andric // vectors, return the vector and the effective shuffle. 8490b57cec5SDimitry Andric if (EI->getOperand(0)->getType() == PermittedRHS->getType() && 8500b57cec5SDimitry Andric collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS, 8510b57cec5SDimitry Andric Mask)) 8520b57cec5SDimitry Andric return std::make_pair(EI->getOperand(0), PermittedRHS); 8530b57cec5SDimitry Andric } 8540b57cec5SDimitry Andric } 8550b57cec5SDimitry Andric } 8560b57cec5SDimitry Andric 8570b57cec5SDimitry Andric // Otherwise, we can't do anything fancy. Return an identity vector. 8580b57cec5SDimitry Andric for (unsigned i = 0; i != NumElts; ++i) 8595ffd83dbSDimitry Andric Mask.push_back(i); 8600b57cec5SDimitry Andric return std::make_pair(V, nullptr); 8610b57cec5SDimitry Andric } 8620b57cec5SDimitry Andric 863e8d8bef9SDimitry Andric /// Look for chain of insertvalue's that fully define an aggregate, and trace 864e8d8bef9SDimitry Andric /// back the values inserted, see if they are all were extractvalue'd from 865e8d8bef9SDimitry Andric /// the same source aggregate from the exact same element indexes. 866e8d8bef9SDimitry Andric /// If they were, just reuse the source aggregate. 867e8d8bef9SDimitry Andric /// This potentially deals with PHI indirections. 868e8d8bef9SDimitry Andric Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse( 869e8d8bef9SDimitry Andric InsertValueInst &OrigIVI) { 870e8d8bef9SDimitry Andric Type *AggTy = OrigIVI.getType(); 871e8d8bef9SDimitry Andric unsigned NumAggElts; 872e8d8bef9SDimitry Andric switch (AggTy->getTypeID()) { 873e8d8bef9SDimitry Andric case Type::StructTyID: 874e8d8bef9SDimitry Andric NumAggElts = AggTy->getStructNumElements(); 875e8d8bef9SDimitry Andric break; 876e8d8bef9SDimitry Andric case Type::ArrayTyID: 877e8d8bef9SDimitry Andric NumAggElts = AggTy->getArrayNumElements(); 878e8d8bef9SDimitry Andric break; 879e8d8bef9SDimitry Andric default: 880e8d8bef9SDimitry Andric llvm_unreachable("Unhandled aggregate type?"); 881e8d8bef9SDimitry Andric } 882e8d8bef9SDimitry Andric 883e8d8bef9SDimitry Andric // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able 884e8d8bef9SDimitry Andric // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}), 885e8d8bef9SDimitry Andric // FIXME: any interesting patterns to be caught with larger limit? 886e8d8bef9SDimitry Andric assert(NumAggElts > 0 && "Aggregate should have elements."); 887e8d8bef9SDimitry Andric if (NumAggElts > 2) 888e8d8bef9SDimitry Andric return nullptr; 889e8d8bef9SDimitry Andric 890bdd1243dSDimitry Andric static constexpr auto NotFound = std::nullopt; 891e8d8bef9SDimitry Andric static constexpr auto FoundMismatch = nullptr; 892e8d8bef9SDimitry Andric 893e8d8bef9SDimitry Andric // Try to find a value of each element of an aggregate. 894e8d8bef9SDimitry Andric // FIXME: deal with more complex, not one-dimensional, aggregate types 895bdd1243dSDimitry Andric SmallVector<std::optional<Instruction *>, 2> AggElts(NumAggElts, NotFound); 896e8d8bef9SDimitry Andric 897e8d8bef9SDimitry Andric // Do we know values for each element of the aggregate? 898e8d8bef9SDimitry Andric auto KnowAllElts = [&AggElts]() { 899bdd1243dSDimitry Andric return !llvm::is_contained(AggElts, NotFound); 900e8d8bef9SDimitry Andric }; 901e8d8bef9SDimitry Andric 902e8d8bef9SDimitry Andric int Depth = 0; 903e8d8bef9SDimitry Andric 904e8d8bef9SDimitry Andric // Arbitrary `insertvalue` visitation depth limit. Let's be okay with 905e8d8bef9SDimitry Andric // every element being overwritten twice, which should never happen. 906e8d8bef9SDimitry Andric static const int DepthLimit = 2 * NumAggElts; 907e8d8bef9SDimitry Andric 908e8d8bef9SDimitry Andric // Recurse up the chain of `insertvalue` aggregate operands until either we've 909e8d8bef9SDimitry Andric // reconstructed full initializer or can't visit any more `insertvalue`'s. 910e8d8bef9SDimitry Andric for (InsertValueInst *CurrIVI = &OrigIVI; 911e8d8bef9SDimitry Andric Depth < DepthLimit && CurrIVI && !KnowAllElts(); 912e8d8bef9SDimitry Andric CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()), 913e8d8bef9SDimitry Andric ++Depth) { 914fe6060f1SDimitry Andric auto *InsertedValue = 915fe6060f1SDimitry Andric dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand()); 916fe6060f1SDimitry Andric if (!InsertedValue) 917fe6060f1SDimitry Andric return nullptr; // Inserted value must be produced by an instruction. 918fe6060f1SDimitry Andric 919e8d8bef9SDimitry Andric ArrayRef<unsigned int> Indices = CurrIVI->getIndices(); 920e8d8bef9SDimitry Andric 921e8d8bef9SDimitry Andric // Don't bother with more than single-level aggregates. 922e8d8bef9SDimitry Andric if (Indices.size() != 1) 923e8d8bef9SDimitry Andric return nullptr; // FIXME: deal with more complex aggregates? 924e8d8bef9SDimitry Andric 925e8d8bef9SDimitry Andric // Now, we may have already previously recorded the value for this element 926e8d8bef9SDimitry Andric // of an aggregate. If we did, that means the CurrIVI will later be 927e8d8bef9SDimitry Andric // overwritten with the already-recorded value. But if not, let's record it! 928bdd1243dSDimitry Andric std::optional<Instruction *> &Elt = AggElts[Indices.front()]; 92981ad6265SDimitry Andric Elt = Elt.value_or(InsertedValue); 930e8d8bef9SDimitry Andric 931e8d8bef9SDimitry Andric // FIXME: should we handle chain-terminating undef base operand? 932e8d8bef9SDimitry Andric } 933e8d8bef9SDimitry Andric 934e8d8bef9SDimitry Andric // Was that sufficient to deduce the full initializer for the aggregate? 935e8d8bef9SDimitry Andric if (!KnowAllElts()) 936e8d8bef9SDimitry Andric return nullptr; // Give up then. 937e8d8bef9SDimitry Andric 938e8d8bef9SDimitry Andric // We now want to find the source[s] of the aggregate elements we've found. 939e8d8bef9SDimitry Andric // And with "source" we mean the original aggregate[s] from which 940e8d8bef9SDimitry Andric // the inserted elements were extracted. This may require PHI translation. 941e8d8bef9SDimitry Andric 942e8d8bef9SDimitry Andric enum class AggregateDescription { 943e8d8bef9SDimitry Andric /// When analyzing the value that was inserted into an aggregate, we did 944e8d8bef9SDimitry Andric /// not manage to find defining `extractvalue` instruction to analyze. 945e8d8bef9SDimitry Andric NotFound, 946e8d8bef9SDimitry Andric /// When analyzing the value that was inserted into an aggregate, we did 947e8d8bef9SDimitry Andric /// manage to find defining `extractvalue` instruction[s], and everything 948e8d8bef9SDimitry Andric /// matched perfectly - aggregate type, element insertion/extraction index. 949e8d8bef9SDimitry Andric Found, 950e8d8bef9SDimitry Andric /// When analyzing the value that was inserted into an aggregate, we did 951e8d8bef9SDimitry Andric /// manage to find defining `extractvalue` instruction, but there was 952e8d8bef9SDimitry Andric /// a mismatch: either the source type from which the extraction was didn't 953e8d8bef9SDimitry Andric /// match the aggregate type into which the insertion was, 954e8d8bef9SDimitry Andric /// or the extraction/insertion channels mismatched, 955e8d8bef9SDimitry Andric /// or different elements had different source aggregates. 956e8d8bef9SDimitry Andric FoundMismatch 957e8d8bef9SDimitry Andric }; 958bdd1243dSDimitry Andric auto Describe = [](std::optional<Value *> SourceAggregate) { 959e8d8bef9SDimitry Andric if (SourceAggregate == NotFound) 960e8d8bef9SDimitry Andric return AggregateDescription::NotFound; 961e8d8bef9SDimitry Andric if (*SourceAggregate == FoundMismatch) 962e8d8bef9SDimitry Andric return AggregateDescription::FoundMismatch; 963e8d8bef9SDimitry Andric return AggregateDescription::Found; 964e8d8bef9SDimitry Andric }; 965e8d8bef9SDimitry Andric 966e8d8bef9SDimitry Andric // Given the value \p Elt that was being inserted into element \p EltIdx of an 967e8d8bef9SDimitry Andric // aggregate AggTy, see if \p Elt was originally defined by an 968e8d8bef9SDimitry Andric // appropriate extractvalue (same element index, same aggregate type). 969e8d8bef9SDimitry Andric // If found, return the source aggregate from which the extraction was. 970e8d8bef9SDimitry Andric // If \p PredBB is provided, does PHI translation of an \p Elt first. 971e8d8bef9SDimitry Andric auto FindSourceAggregate = 972bdd1243dSDimitry Andric [&](Instruction *Elt, unsigned EltIdx, std::optional<BasicBlock *> UseBB, 973bdd1243dSDimitry Andric std::optional<BasicBlock *> PredBB) -> std::optional<Value *> { 974e8d8bef9SDimitry Andric // For now(?), only deal with, at most, a single level of PHI indirection. 975e8d8bef9SDimitry Andric if (UseBB && PredBB) 976fe6060f1SDimitry Andric Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB)); 977e8d8bef9SDimitry Andric // FIXME: deal with multiple levels of PHI indirection? 978e8d8bef9SDimitry Andric 979e8d8bef9SDimitry Andric // Did we find an extraction? 980fe6060f1SDimitry Andric auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt); 981e8d8bef9SDimitry Andric if (!EVI) 982e8d8bef9SDimitry Andric return NotFound; 983e8d8bef9SDimitry Andric 984e8d8bef9SDimitry Andric Value *SourceAggregate = EVI->getAggregateOperand(); 985e8d8bef9SDimitry Andric 986e8d8bef9SDimitry Andric // Is the extraction from the same type into which the insertion was? 987e8d8bef9SDimitry Andric if (SourceAggregate->getType() != AggTy) 988e8d8bef9SDimitry Andric return FoundMismatch; 989e8d8bef9SDimitry Andric // And the element index doesn't change between extraction and insertion? 990e8d8bef9SDimitry Andric if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front()) 991e8d8bef9SDimitry Andric return FoundMismatch; 992e8d8bef9SDimitry Andric 993e8d8bef9SDimitry Andric return SourceAggregate; // AggregateDescription::Found 994e8d8bef9SDimitry Andric }; 995e8d8bef9SDimitry Andric 996e8d8bef9SDimitry Andric // Given elements AggElts that were constructing an aggregate OrigIVI, 997e8d8bef9SDimitry Andric // see if we can find appropriate source aggregate for each of the elements, 998e8d8bef9SDimitry Andric // and see it's the same aggregate for each element. If so, return it. 999e8d8bef9SDimitry Andric auto FindCommonSourceAggregate = 1000bdd1243dSDimitry Andric [&](std::optional<BasicBlock *> UseBB, 1001bdd1243dSDimitry Andric std::optional<BasicBlock *> PredBB) -> std::optional<Value *> { 1002bdd1243dSDimitry Andric std::optional<Value *> SourceAggregate; 1003e8d8bef9SDimitry Andric 1004e8d8bef9SDimitry Andric for (auto I : enumerate(AggElts)) { 1005e8d8bef9SDimitry Andric assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch && 1006e8d8bef9SDimitry Andric "We don't store nullptr in SourceAggregate!"); 1007e8d8bef9SDimitry Andric assert((Describe(SourceAggregate) == AggregateDescription::Found) == 1008e8d8bef9SDimitry Andric (I.index() != 0) && 1009349cc55cSDimitry Andric "SourceAggregate should be valid after the first element,"); 1010e8d8bef9SDimitry Andric 1011e8d8bef9SDimitry Andric // For this element, is there a plausible source aggregate? 1012e8d8bef9SDimitry Andric // FIXME: we could special-case undef element, IFF we know that in the 1013e8d8bef9SDimitry Andric // source aggregate said element isn't poison. 1014bdd1243dSDimitry Andric std::optional<Value *> SourceAggregateForElement = 1015e8d8bef9SDimitry Andric FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB); 1016e8d8bef9SDimitry Andric 1017e8d8bef9SDimitry Andric // Okay, what have we found? Does that correlate with previous findings? 1018e8d8bef9SDimitry Andric 1019e8d8bef9SDimitry Andric // Regardless of whether or not we have previously found source 1020e8d8bef9SDimitry Andric // aggregate for previous elements (if any), if we didn't find one for 1021e8d8bef9SDimitry Andric // this element, passthrough whatever we have just found. 1022e8d8bef9SDimitry Andric if (Describe(SourceAggregateForElement) != AggregateDescription::Found) 1023e8d8bef9SDimitry Andric return SourceAggregateForElement; 1024e8d8bef9SDimitry Andric 1025e8d8bef9SDimitry Andric // Okay, we have found source aggregate for this element. 1026e8d8bef9SDimitry Andric // Let's see what we already know from previous elements, if any. 1027e8d8bef9SDimitry Andric switch (Describe(SourceAggregate)) { 1028e8d8bef9SDimitry Andric case AggregateDescription::NotFound: 1029e8d8bef9SDimitry Andric // This is apparently the first element that we have examined. 1030e8d8bef9SDimitry Andric SourceAggregate = SourceAggregateForElement; // Record the aggregate! 1031e8d8bef9SDimitry Andric continue; // Great, now look at next element. 1032e8d8bef9SDimitry Andric case AggregateDescription::Found: 1033e8d8bef9SDimitry Andric // We have previously already successfully examined other elements. 1034e8d8bef9SDimitry Andric // Is this the same source aggregate we've found for other elements? 1035e8d8bef9SDimitry Andric if (*SourceAggregateForElement != *SourceAggregate) 1036e8d8bef9SDimitry Andric return FoundMismatch; 1037e8d8bef9SDimitry Andric continue; // Still the same aggregate, look at next element. 1038e8d8bef9SDimitry Andric case AggregateDescription::FoundMismatch: 1039e8d8bef9SDimitry Andric llvm_unreachable("Can't happen. We would have early-exited then."); 1040e8d8bef9SDimitry Andric }; 1041e8d8bef9SDimitry Andric } 1042e8d8bef9SDimitry Andric 1043e8d8bef9SDimitry Andric assert(Describe(SourceAggregate) == AggregateDescription::Found && 1044e8d8bef9SDimitry Andric "Must be a valid Value"); 1045e8d8bef9SDimitry Andric return *SourceAggregate; 1046e8d8bef9SDimitry Andric }; 1047e8d8bef9SDimitry Andric 1048bdd1243dSDimitry Andric std::optional<Value *> SourceAggregate; 1049e8d8bef9SDimitry Andric 1050e8d8bef9SDimitry Andric // Can we find the source aggregate without looking at predecessors? 1051bdd1243dSDimitry Andric SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/std::nullopt, 1052bdd1243dSDimitry Andric /*PredBB=*/std::nullopt); 1053e8d8bef9SDimitry Andric if (Describe(SourceAggregate) != AggregateDescription::NotFound) { 1054e8d8bef9SDimitry Andric if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch) 1055e8d8bef9SDimitry Andric return nullptr; // Conflicting source aggregates! 1056e8d8bef9SDimitry Andric ++NumAggregateReconstructionsSimplified; 1057e8d8bef9SDimitry Andric return replaceInstUsesWith(OrigIVI, *SourceAggregate); 1058e8d8bef9SDimitry Andric } 1059e8d8bef9SDimitry Andric 1060e8d8bef9SDimitry Andric // Okay, apparently we need to look at predecessors. 1061e8d8bef9SDimitry Andric 1062e8d8bef9SDimitry Andric // We should be smart about picking the "use" basic block, which will be the 1063e8d8bef9SDimitry Andric // merge point for aggregate, where we'll insert the final PHI that will be 1064e8d8bef9SDimitry Andric // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice. 1065e8d8bef9SDimitry Andric // We should look in which blocks each of the AggElts is being defined, 1066e8d8bef9SDimitry Andric // they all should be defined in the same basic block. 1067e8d8bef9SDimitry Andric BasicBlock *UseBB = nullptr; 1068e8d8bef9SDimitry Andric 1069bdd1243dSDimitry Andric for (const std::optional<Instruction *> &I : AggElts) { 1070fe6060f1SDimitry Andric BasicBlock *BB = (*I)->getParent(); 1071e8d8bef9SDimitry Andric // If it's the first instruction we've encountered, record the basic block. 1072e8d8bef9SDimitry Andric if (!UseBB) { 1073e8d8bef9SDimitry Andric UseBB = BB; 1074e8d8bef9SDimitry Andric continue; 1075e8d8bef9SDimitry Andric } 1076e8d8bef9SDimitry Andric // Otherwise, this must be the same basic block we've seen previously. 1077e8d8bef9SDimitry Andric if (UseBB != BB) 1078e8d8bef9SDimitry Andric return nullptr; 1079e8d8bef9SDimitry Andric } 1080e8d8bef9SDimitry Andric 1081e8d8bef9SDimitry Andric // If *all* of the elements are basic-block-independent, meaning they are 1082e8d8bef9SDimitry Andric // either function arguments, or constant expressions, then if we didn't 1083e8d8bef9SDimitry Andric // handle them without predecessor-aware handling, we won't handle them now. 1084e8d8bef9SDimitry Andric if (!UseBB) 1085e8d8bef9SDimitry Andric return nullptr; 1086e8d8bef9SDimitry Andric 1087e8d8bef9SDimitry Andric // If we didn't manage to find source aggregate without looking at 1088e8d8bef9SDimitry Andric // predecessors, and there are no predecessors to look at, then we're done. 1089e8d8bef9SDimitry Andric if (pred_empty(UseBB)) 1090e8d8bef9SDimitry Andric return nullptr; 1091e8d8bef9SDimitry Andric 1092e8d8bef9SDimitry Andric // Arbitrary predecessor count limit. 1093e8d8bef9SDimitry Andric static const int PredCountLimit = 64; 1094e8d8bef9SDimitry Andric 1095e8d8bef9SDimitry Andric // Cache the (non-uniqified!) list of predecessors in a vector, 1096e8d8bef9SDimitry Andric // checking the limit at the same time for efficiency. 1097e8d8bef9SDimitry Andric SmallVector<BasicBlock *, 4> Preds; // May have duplicates! 1098e8d8bef9SDimitry Andric for (BasicBlock *Pred : predecessors(UseBB)) { 1099e8d8bef9SDimitry Andric // Don't bother if there are too many predecessors. 1100e8d8bef9SDimitry Andric if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once? 1101e8d8bef9SDimitry Andric return nullptr; 1102e8d8bef9SDimitry Andric Preds.emplace_back(Pred); 1103e8d8bef9SDimitry Andric } 1104e8d8bef9SDimitry Andric 1105e8d8bef9SDimitry Andric // For each predecessor, what is the source aggregate, 1106e8d8bef9SDimitry Andric // from which all the elements were originally extracted from? 1107e8d8bef9SDimitry Andric // Note that we want for the map to have stable iteration order! 1108e8d8bef9SDimitry Andric SmallDenseMap<BasicBlock *, Value *, 4> SourceAggregates; 1109e8d8bef9SDimitry Andric for (BasicBlock *Pred : Preds) { 1110e8d8bef9SDimitry Andric std::pair<decltype(SourceAggregates)::iterator, bool> IV = 1111e8d8bef9SDimitry Andric SourceAggregates.insert({Pred, nullptr}); 1112e8d8bef9SDimitry Andric // Did we already evaluate this predecessor? 1113e8d8bef9SDimitry Andric if (!IV.second) 1114e8d8bef9SDimitry Andric continue; 1115e8d8bef9SDimitry Andric 1116e8d8bef9SDimitry Andric // Let's hope that when coming from predecessor Pred, all elements of the 1117e8d8bef9SDimitry Andric // aggregate produced by OrigIVI must have been originally extracted from 1118e8d8bef9SDimitry Andric // the same aggregate. Is that so? Can we find said original aggregate? 1119e8d8bef9SDimitry Andric SourceAggregate = FindCommonSourceAggregate(UseBB, Pred); 1120e8d8bef9SDimitry Andric if (Describe(SourceAggregate) != AggregateDescription::Found) 1121e8d8bef9SDimitry Andric return nullptr; // Give up. 1122e8d8bef9SDimitry Andric IV.first->second = *SourceAggregate; 1123e8d8bef9SDimitry Andric } 1124e8d8bef9SDimitry Andric 1125e8d8bef9SDimitry Andric // All good! Now we just need to thread the source aggregates here. 1126e8d8bef9SDimitry Andric // Note that we have to insert the new PHI here, ourselves, because we can't 1127e8d8bef9SDimitry Andric // rely on InstCombinerImpl::run() inserting it into the right basic block. 1128e8d8bef9SDimitry Andric // Note that the same block can be a predecessor more than once, 1129e8d8bef9SDimitry Andric // and we need to preserve that invariant for the PHI node. 1130e8d8bef9SDimitry Andric BuilderTy::InsertPointGuard Guard(Builder); 11315f757f3fSDimitry Andric Builder.SetInsertPoint(UseBB, UseBB->getFirstNonPHIIt()); 1132e8d8bef9SDimitry Andric auto *PHI = 1133e8d8bef9SDimitry Andric Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged"); 1134e8d8bef9SDimitry Andric for (BasicBlock *Pred : Preds) 1135e8d8bef9SDimitry Andric PHI->addIncoming(SourceAggregates[Pred], Pred); 1136e8d8bef9SDimitry Andric 1137e8d8bef9SDimitry Andric ++NumAggregateReconstructionsSimplified; 1138e8d8bef9SDimitry Andric return replaceInstUsesWith(OrigIVI, PHI); 1139e8d8bef9SDimitry Andric } 1140e8d8bef9SDimitry Andric 11410b57cec5SDimitry Andric /// Try to find redundant insertvalue instructions, like the following ones: 11420b57cec5SDimitry Andric /// %0 = insertvalue { i8, i32 } undef, i8 %x, 0 11430b57cec5SDimitry Andric /// %1 = insertvalue { i8, i32 } %0, i8 %y, 0 11440b57cec5SDimitry Andric /// Here the second instruction inserts values at the same indices, as the 11450b57cec5SDimitry Andric /// first one, making the first one redundant. 11460b57cec5SDimitry Andric /// It should be transformed to: 11470b57cec5SDimitry Andric /// %0 = insertvalue { i8, i32 } undef, i8 %y, 0 1148e8d8bef9SDimitry Andric Instruction *InstCombinerImpl::visitInsertValueInst(InsertValueInst &I) { 114906c3fb27SDimitry Andric if (Value *V = simplifyInsertValueInst( 115006c3fb27SDimitry Andric I.getAggregateOperand(), I.getInsertedValueOperand(), I.getIndices(), 115106c3fb27SDimitry Andric SQ.getWithInstruction(&I))) 115206c3fb27SDimitry Andric return replaceInstUsesWith(I, V); 115306c3fb27SDimitry Andric 11540b57cec5SDimitry Andric bool IsRedundant = false; 11550b57cec5SDimitry Andric ArrayRef<unsigned int> FirstIndices = I.getIndices(); 11560b57cec5SDimitry Andric 11570b57cec5SDimitry Andric // If there is a chain of insertvalue instructions (each of them except the 11580b57cec5SDimitry Andric // last one has only one use and it's another insertvalue insn from this 11590b57cec5SDimitry Andric // chain), check if any of the 'children' uses the same indices as the first 11600b57cec5SDimitry Andric // instruction. In this case, the first one is redundant. 11610b57cec5SDimitry Andric Value *V = &I; 11620b57cec5SDimitry Andric unsigned Depth = 0; 11630b57cec5SDimitry Andric while (V->hasOneUse() && Depth < 10) { 11640b57cec5SDimitry Andric User *U = V->user_back(); 11650b57cec5SDimitry Andric auto UserInsInst = dyn_cast<InsertValueInst>(U); 11660b57cec5SDimitry Andric if (!UserInsInst || U->getOperand(0) != V) 11670b57cec5SDimitry Andric break; 11680b57cec5SDimitry Andric if (UserInsInst->getIndices() == FirstIndices) { 11690b57cec5SDimitry Andric IsRedundant = true; 11700b57cec5SDimitry Andric break; 11710b57cec5SDimitry Andric } 11720b57cec5SDimitry Andric V = UserInsInst; 11730b57cec5SDimitry Andric Depth++; 11740b57cec5SDimitry Andric } 11750b57cec5SDimitry Andric 11760b57cec5SDimitry Andric if (IsRedundant) 11770b57cec5SDimitry Andric return replaceInstUsesWith(I, I.getOperand(0)); 1178e8d8bef9SDimitry Andric 1179e8d8bef9SDimitry Andric if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(I)) 1180e8d8bef9SDimitry Andric return NewI; 1181e8d8bef9SDimitry Andric 11820b57cec5SDimitry Andric return nullptr; 11830b57cec5SDimitry Andric } 11840b57cec5SDimitry Andric 11850b57cec5SDimitry Andric static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) { 11865ffd83dbSDimitry Andric // Can not analyze scalable type, the number of elements is not a compile-time 11875ffd83dbSDimitry Andric // constant. 11885ffd83dbSDimitry Andric if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType())) 11895ffd83dbSDimitry Andric return false; 11905ffd83dbSDimitry Andric 11915ffd83dbSDimitry Andric int MaskSize = Shuf.getShuffleMask().size(); 11925ffd83dbSDimitry Andric int VecSize = 11935ffd83dbSDimitry Andric cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements(); 11940b57cec5SDimitry Andric 11950b57cec5SDimitry Andric // A vector select does not change the size of the operands. 11960b57cec5SDimitry Andric if (MaskSize != VecSize) 11970b57cec5SDimitry Andric return false; 11980b57cec5SDimitry Andric 11990b57cec5SDimitry Andric // Each mask element must be undefined or choose a vector element from one of 12000b57cec5SDimitry Andric // the source operands without crossing vector lanes. 12010b57cec5SDimitry Andric for (int i = 0; i != MaskSize; ++i) { 12020b57cec5SDimitry Andric int Elt = Shuf.getMaskValue(i); 12030b57cec5SDimitry Andric if (Elt != -1 && Elt != i && Elt != i + VecSize) 12040b57cec5SDimitry Andric return false; 12050b57cec5SDimitry Andric } 12060b57cec5SDimitry Andric 12070b57cec5SDimitry Andric return true; 12080b57cec5SDimitry Andric } 12090b57cec5SDimitry Andric 12100b57cec5SDimitry Andric /// Turn a chain of inserts that splats a value into an insert + shuffle: 12110b57cec5SDimitry Andric /// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... -> 1212fe6060f1SDimitry Andric /// shufflevector(insertelt(X, %k, 0), poison, zero) 12130b57cec5SDimitry Andric static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) { 12140b57cec5SDimitry Andric // We are interested in the last insert in a chain. So if this insert has a 12150b57cec5SDimitry Andric // single user and that user is an insert, bail. 12160b57cec5SDimitry Andric if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back())) 12170b57cec5SDimitry Andric return nullptr; 12180b57cec5SDimitry Andric 12195ffd83dbSDimitry Andric VectorType *VecTy = InsElt.getType(); 12205ffd83dbSDimitry Andric // Can not handle scalable type, the number of elements is not a compile-time 12215ffd83dbSDimitry Andric // constant. 12225ffd83dbSDimitry Andric if (isa<ScalableVectorType>(VecTy)) 12235ffd83dbSDimitry Andric return nullptr; 12245ffd83dbSDimitry Andric unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements(); 12250b57cec5SDimitry Andric 12260b57cec5SDimitry Andric // Do not try to do this for a one-element vector, since that's a nop, 12270b57cec5SDimitry Andric // and will cause an inf-loop. 12280b57cec5SDimitry Andric if (NumElements == 1) 12290b57cec5SDimitry Andric return nullptr; 12300b57cec5SDimitry Andric 12310b57cec5SDimitry Andric Value *SplatVal = InsElt.getOperand(1); 12320b57cec5SDimitry Andric InsertElementInst *CurrIE = &InsElt; 12335ffd83dbSDimitry Andric SmallBitVector ElementPresent(NumElements, false); 12340b57cec5SDimitry Andric InsertElementInst *FirstIE = nullptr; 12350b57cec5SDimitry Andric 12360b57cec5SDimitry Andric // Walk the chain backwards, keeping track of which indices we inserted into, 12370b57cec5SDimitry Andric // until we hit something that isn't an insert of the splatted value. 12380b57cec5SDimitry Andric while (CurrIE) { 12390b57cec5SDimitry Andric auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2)); 12400b57cec5SDimitry Andric if (!Idx || CurrIE->getOperand(1) != SplatVal) 12410b57cec5SDimitry Andric return nullptr; 12420b57cec5SDimitry Andric 12430b57cec5SDimitry Andric auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0)); 12440b57cec5SDimitry Andric // Check none of the intermediate steps have any additional uses, except 12450b57cec5SDimitry Andric // for the root insertelement instruction, which can be re-used, if it 12460b57cec5SDimitry Andric // inserts at position 0. 12470b57cec5SDimitry Andric if (CurrIE != &InsElt && 12480b57cec5SDimitry Andric (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero()))) 12490b57cec5SDimitry Andric return nullptr; 12500b57cec5SDimitry Andric 12510b57cec5SDimitry Andric ElementPresent[Idx->getZExtValue()] = true; 12520b57cec5SDimitry Andric FirstIE = CurrIE; 12530b57cec5SDimitry Andric CurrIE = NextIE; 12540b57cec5SDimitry Andric } 12550b57cec5SDimitry Andric 12560b57cec5SDimitry Andric // If this is just a single insertelement (not a sequence), we are done. 12570b57cec5SDimitry Andric if (FirstIE == &InsElt) 12580b57cec5SDimitry Andric return nullptr; 12590b57cec5SDimitry Andric 126006c3fb27SDimitry Andric // If we are not inserting into a poison vector, make sure we've seen an 12610b57cec5SDimitry Andric // insert into every element. 12620b57cec5SDimitry Andric // TODO: If the base vector is not undef, it might be better to create a splat 12630b57cec5SDimitry Andric // and then a select-shuffle (blend) with the base vector. 126406c3fb27SDimitry Andric if (!match(FirstIE->getOperand(0), m_Poison())) 12655ffd83dbSDimitry Andric if (!ElementPresent.all()) 12660b57cec5SDimitry Andric return nullptr; 12670b57cec5SDimitry Andric 12680b57cec5SDimitry Andric // Create the insert + shuffle. 126906c3fb27SDimitry Andric Type *Int64Ty = Type::getInt64Ty(InsElt.getContext()); 1270fe6060f1SDimitry Andric PoisonValue *PoisonVec = PoisonValue::get(VecTy); 127106c3fb27SDimitry Andric Constant *Zero = ConstantInt::get(Int64Ty, 0); 12720b57cec5SDimitry Andric if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero()) 1273*0fca6ea1SDimitry Andric FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "", 1274*0fca6ea1SDimitry Andric InsElt.getIterator()); 12750b57cec5SDimitry Andric 127606c3fb27SDimitry Andric // Splat from element 0, but replace absent elements with poison in the mask. 12775ffd83dbSDimitry Andric SmallVector<int, 16> Mask(NumElements, 0); 12780b57cec5SDimitry Andric for (unsigned i = 0; i != NumElements; ++i) 12790b57cec5SDimitry Andric if (!ElementPresent[i]) 12805ffd83dbSDimitry Andric Mask[i] = -1; 12810b57cec5SDimitry Andric 1282349cc55cSDimitry Andric return new ShuffleVectorInst(FirstIE, Mask); 12830b57cec5SDimitry Andric } 12840b57cec5SDimitry Andric 12850b57cec5SDimitry Andric /// Try to fold an insert element into an existing splat shuffle by changing 12860b57cec5SDimitry Andric /// the shuffle's mask to include the index of this insert element. 12870b57cec5SDimitry Andric static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) { 12880b57cec5SDimitry Andric // Check if the vector operand of this insert is a canonical splat shuffle. 12890b57cec5SDimitry Andric auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0)); 12900b57cec5SDimitry Andric if (!Shuf || !Shuf->isZeroEltSplat()) 12910b57cec5SDimitry Andric return nullptr; 12920b57cec5SDimitry Andric 12935ffd83dbSDimitry Andric // Bail out early if shuffle is scalable type. The number of elements in 12945ffd83dbSDimitry Andric // shuffle mask is unknown at compile-time. 12955ffd83dbSDimitry Andric if (isa<ScalableVectorType>(Shuf->getType())) 12965ffd83dbSDimitry Andric return nullptr; 12975ffd83dbSDimitry Andric 12980b57cec5SDimitry Andric // Check for a constant insertion index. 12990b57cec5SDimitry Andric uint64_t IdxC; 13000b57cec5SDimitry Andric if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC))) 13010b57cec5SDimitry Andric return nullptr; 13020b57cec5SDimitry Andric 13030b57cec5SDimitry Andric // Check if the splat shuffle's input is the same as this insert's scalar op. 13040b57cec5SDimitry Andric Value *X = InsElt.getOperand(1); 13050b57cec5SDimitry Andric Value *Op0 = Shuf->getOperand(0); 13065ffd83dbSDimitry Andric if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt()))) 13070b57cec5SDimitry Andric return nullptr; 13080b57cec5SDimitry Andric 13090b57cec5SDimitry Andric // Replace the shuffle mask element at the index of this insert with a zero. 13100b57cec5SDimitry Andric // For example: 1311349cc55cSDimitry Andric // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1 1312349cc55cSDimitry Andric // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef> 1313e8d8bef9SDimitry Andric unsigned NumMaskElts = 1314e8d8bef9SDimitry Andric cast<FixedVectorType>(Shuf->getType())->getNumElements(); 13155ffd83dbSDimitry Andric SmallVector<int, 16> NewMask(NumMaskElts); 13160b57cec5SDimitry Andric for (unsigned i = 0; i != NumMaskElts; ++i) 13175ffd83dbSDimitry Andric NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i); 13180b57cec5SDimitry Andric 1319349cc55cSDimitry Andric return new ShuffleVectorInst(Op0, NewMask); 13200b57cec5SDimitry Andric } 13210b57cec5SDimitry Andric 13228bcb0991SDimitry Andric /// Try to fold an extract+insert element into an existing identity shuffle by 13238bcb0991SDimitry Andric /// changing the shuffle's mask to include the index of this insert element. 13248bcb0991SDimitry Andric static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) { 13258bcb0991SDimitry Andric // Check if the vector operand of this insert is an identity shuffle. 13268bcb0991SDimitry Andric auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0)); 1327*0fca6ea1SDimitry Andric if (!Shuf || !match(Shuf->getOperand(1), m_Poison()) || 13288bcb0991SDimitry Andric !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding())) 13298bcb0991SDimitry Andric return nullptr; 13308bcb0991SDimitry Andric 13315ffd83dbSDimitry Andric // Bail out early if shuffle is scalable type. The number of elements in 13325ffd83dbSDimitry Andric // shuffle mask is unknown at compile-time. 13335ffd83dbSDimitry Andric if (isa<ScalableVectorType>(Shuf->getType())) 13345ffd83dbSDimitry Andric return nullptr; 13355ffd83dbSDimitry Andric 13368bcb0991SDimitry Andric // Check for a constant insertion index. 13378bcb0991SDimitry Andric uint64_t IdxC; 13388bcb0991SDimitry Andric if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC))) 13398bcb0991SDimitry Andric return nullptr; 13408bcb0991SDimitry Andric 13418bcb0991SDimitry Andric // Check if this insert's scalar op is extracted from the identity shuffle's 13428bcb0991SDimitry Andric // input vector. 13438bcb0991SDimitry Andric Value *Scalar = InsElt.getOperand(1); 13448bcb0991SDimitry Andric Value *X = Shuf->getOperand(0); 13455ffd83dbSDimitry Andric if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC)))) 13468bcb0991SDimitry Andric return nullptr; 13478bcb0991SDimitry Andric 13488bcb0991SDimitry Andric // Replace the shuffle mask element at the index of this extract+insert with 13498bcb0991SDimitry Andric // that same index value. 13508bcb0991SDimitry Andric // For example: 13518bcb0991SDimitry Andric // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask' 1352e8d8bef9SDimitry Andric unsigned NumMaskElts = 1353e8d8bef9SDimitry Andric cast<FixedVectorType>(Shuf->getType())->getNumElements(); 13545ffd83dbSDimitry Andric SmallVector<int, 16> NewMask(NumMaskElts); 13555ffd83dbSDimitry Andric ArrayRef<int> OldMask = Shuf->getShuffleMask(); 13568bcb0991SDimitry Andric for (unsigned i = 0; i != NumMaskElts; ++i) { 13578bcb0991SDimitry Andric if (i != IdxC) { 13588bcb0991SDimitry Andric // All mask elements besides the inserted element remain the same. 13595ffd83dbSDimitry Andric NewMask[i] = OldMask[i]; 13605ffd83dbSDimitry Andric } else if (OldMask[i] == (int)IdxC) { 13618bcb0991SDimitry Andric // If the mask element was already set, there's nothing to do 13628bcb0991SDimitry Andric // (demanded elements analysis may unset it later). 13638bcb0991SDimitry Andric return nullptr; 13648bcb0991SDimitry Andric } else { 136506c3fb27SDimitry Andric assert(OldMask[i] == PoisonMaskElem && 13668bcb0991SDimitry Andric "Unexpected shuffle mask element for identity shuffle"); 13675ffd83dbSDimitry Andric NewMask[i] = IdxC; 13688bcb0991SDimitry Andric } 13698bcb0991SDimitry Andric } 13708bcb0991SDimitry Andric 13718bcb0991SDimitry Andric return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask); 13728bcb0991SDimitry Andric } 13738bcb0991SDimitry Andric 13740b57cec5SDimitry Andric /// If we have an insertelement instruction feeding into another insertelement 13750b57cec5SDimitry Andric /// and the 2nd is inserting a constant into the vector, canonicalize that 13760b57cec5SDimitry Andric /// constant insertion before the insertion of a variable: 13770b57cec5SDimitry Andric /// 13780b57cec5SDimitry Andric /// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 --> 13790b57cec5SDimitry Andric /// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1 13800b57cec5SDimitry Andric /// 13810b57cec5SDimitry Andric /// This has the potential of eliminating the 2nd insertelement instruction 13820b57cec5SDimitry Andric /// via constant folding of the scalar constant into a vector constant. 13830b57cec5SDimitry Andric static Instruction *hoistInsEltConst(InsertElementInst &InsElt2, 13840b57cec5SDimitry Andric InstCombiner::BuilderTy &Builder) { 13850b57cec5SDimitry Andric auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0)); 13860b57cec5SDimitry Andric if (!InsElt1 || !InsElt1->hasOneUse()) 13870b57cec5SDimitry Andric return nullptr; 13880b57cec5SDimitry Andric 13890b57cec5SDimitry Andric Value *X, *Y; 13900b57cec5SDimitry Andric Constant *ScalarC; 13910b57cec5SDimitry Andric ConstantInt *IdxC1, *IdxC2; 13920b57cec5SDimitry Andric if (match(InsElt1->getOperand(0), m_Value(X)) && 13930b57cec5SDimitry Andric match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) && 13940b57cec5SDimitry Andric match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) && 13950b57cec5SDimitry Andric match(InsElt2.getOperand(1), m_Constant(ScalarC)) && 13960b57cec5SDimitry Andric match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) { 13970b57cec5SDimitry Andric Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2); 13980b57cec5SDimitry Andric return InsertElementInst::Create(NewInsElt1, Y, IdxC1); 13990b57cec5SDimitry Andric } 14000b57cec5SDimitry Andric 14010b57cec5SDimitry Andric return nullptr; 14020b57cec5SDimitry Andric } 14030b57cec5SDimitry Andric 14040b57cec5SDimitry Andric /// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex 14050b57cec5SDimitry Andric /// --> shufflevector X, CVec', Mask' 14060b57cec5SDimitry Andric static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) { 14070b57cec5SDimitry Andric auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0)); 14080b57cec5SDimitry Andric // Bail out if the parent has more than one use. In that case, we'd be 14090b57cec5SDimitry Andric // replacing the insertelt with a shuffle, and that's not a clear win. 14100b57cec5SDimitry Andric if (!Inst || !Inst->hasOneUse()) 14110b57cec5SDimitry Andric return nullptr; 14120b57cec5SDimitry Andric if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) { 14130b57cec5SDimitry Andric // The shuffle must have a constant vector operand. The insertelt must have 14140b57cec5SDimitry Andric // a constant scalar being inserted at a constant position in the vector. 14150b57cec5SDimitry Andric Constant *ShufConstVec, *InsEltScalar; 14160b57cec5SDimitry Andric uint64_t InsEltIndex; 14170b57cec5SDimitry Andric if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) || 14180b57cec5SDimitry Andric !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) || 14190b57cec5SDimitry Andric !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex))) 14200b57cec5SDimitry Andric return nullptr; 14210b57cec5SDimitry Andric 14220b57cec5SDimitry Andric // Adding an element to an arbitrary shuffle could be expensive, but a 14230b57cec5SDimitry Andric // shuffle that selects elements from vectors without crossing lanes is 14240b57cec5SDimitry Andric // assumed cheap. 14250b57cec5SDimitry Andric // If we're just adding a constant into that shuffle, it will still be 14260b57cec5SDimitry Andric // cheap. 14270b57cec5SDimitry Andric if (!isShuffleEquivalentToSelect(*Shuf)) 14280b57cec5SDimitry Andric return nullptr; 14290b57cec5SDimitry Andric 14300b57cec5SDimitry Andric // From the above 'select' check, we know that the mask has the same number 14310b57cec5SDimitry Andric // of elements as the vector input operands. We also know that each constant 14320b57cec5SDimitry Andric // input element is used in its lane and can not be used more than once by 14330b57cec5SDimitry Andric // the shuffle. Therefore, replace the constant in the shuffle's constant 14340b57cec5SDimitry Andric // vector with the insertelt constant. Replace the constant in the shuffle's 14350b57cec5SDimitry Andric // mask vector with the insertelt index plus the length of the vector 14360b57cec5SDimitry Andric // (because the constant vector operand of a shuffle is always the 2nd 14370b57cec5SDimitry Andric // operand). 14385ffd83dbSDimitry Andric ArrayRef<int> Mask = Shuf->getShuffleMask(); 14395ffd83dbSDimitry Andric unsigned NumElts = Mask.size(); 14400b57cec5SDimitry Andric SmallVector<Constant *, 16> NewShufElts(NumElts); 14415ffd83dbSDimitry Andric SmallVector<int, 16> NewMaskElts(NumElts); 14420b57cec5SDimitry Andric for (unsigned I = 0; I != NumElts; ++I) { 14430b57cec5SDimitry Andric if (I == InsEltIndex) { 14440b57cec5SDimitry Andric NewShufElts[I] = InsEltScalar; 14455ffd83dbSDimitry Andric NewMaskElts[I] = InsEltIndex + NumElts; 14460b57cec5SDimitry Andric } else { 14470b57cec5SDimitry Andric // Copy over the existing values. 14480b57cec5SDimitry Andric NewShufElts[I] = ShufConstVec->getAggregateElement(I); 14495ffd83dbSDimitry Andric NewMaskElts[I] = Mask[I]; 14500b57cec5SDimitry Andric } 1451349cc55cSDimitry Andric 1452349cc55cSDimitry Andric // Bail if we failed to find an element. 1453349cc55cSDimitry Andric if (!NewShufElts[I]) 1454349cc55cSDimitry Andric return nullptr; 14550b57cec5SDimitry Andric } 14560b57cec5SDimitry Andric 14570b57cec5SDimitry Andric // Create new operands for a shuffle that includes the constant of the 14580b57cec5SDimitry Andric // original insertelt. The old shuffle will be dead now. 14590b57cec5SDimitry Andric return new ShuffleVectorInst(Shuf->getOperand(0), 14605ffd83dbSDimitry Andric ConstantVector::get(NewShufElts), NewMaskElts); 14610b57cec5SDimitry Andric } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) { 14620b57cec5SDimitry Andric // Transform sequences of insertelements ops with constant data/indexes into 14630b57cec5SDimitry Andric // a single shuffle op. 14645ffd83dbSDimitry Andric // Can not handle scalable type, the number of elements needed to create 14655ffd83dbSDimitry Andric // shuffle mask is not a compile-time constant. 14665ffd83dbSDimitry Andric if (isa<ScalableVectorType>(InsElt.getType())) 14675ffd83dbSDimitry Andric return nullptr; 14685ffd83dbSDimitry Andric unsigned NumElts = 14695ffd83dbSDimitry Andric cast<FixedVectorType>(InsElt.getType())->getNumElements(); 14700b57cec5SDimitry Andric 14710b57cec5SDimitry Andric uint64_t InsertIdx[2]; 14720b57cec5SDimitry Andric Constant *Val[2]; 14730b57cec5SDimitry Andric if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) || 14740b57cec5SDimitry Andric !match(InsElt.getOperand(1), m_Constant(Val[0])) || 14750b57cec5SDimitry Andric !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) || 14760b57cec5SDimitry Andric !match(IEI->getOperand(1), m_Constant(Val[1]))) 14770b57cec5SDimitry Andric return nullptr; 14780b57cec5SDimitry Andric SmallVector<Constant *, 16> Values(NumElts); 14795ffd83dbSDimitry Andric SmallVector<int, 16> Mask(NumElts); 14800b57cec5SDimitry Andric auto ValI = std::begin(Val); 14810b57cec5SDimitry Andric // Generate new constant vector and mask. 14820b57cec5SDimitry Andric // We have 2 values/masks from the insertelements instructions. Insert them 14830b57cec5SDimitry Andric // into new value/mask vectors. 14840b57cec5SDimitry Andric for (uint64_t I : InsertIdx) { 14850b57cec5SDimitry Andric if (!Values[I]) { 14860b57cec5SDimitry Andric Values[I] = *ValI; 14875ffd83dbSDimitry Andric Mask[I] = NumElts + I; 14880b57cec5SDimitry Andric } 14890b57cec5SDimitry Andric ++ValI; 14900b57cec5SDimitry Andric } 149106c3fb27SDimitry Andric // Remaining values are filled with 'poison' values. 14920b57cec5SDimitry Andric for (unsigned I = 0; I < NumElts; ++I) { 14930b57cec5SDimitry Andric if (!Values[I]) { 149406c3fb27SDimitry Andric Values[I] = PoisonValue::get(InsElt.getType()->getElementType()); 14955ffd83dbSDimitry Andric Mask[I] = I; 14960b57cec5SDimitry Andric } 14970b57cec5SDimitry Andric } 14980b57cec5SDimitry Andric // Create new operands for a shuffle that includes the constant of the 14990b57cec5SDimitry Andric // original insertelt. 15000b57cec5SDimitry Andric return new ShuffleVectorInst(IEI->getOperand(0), 15015ffd83dbSDimitry Andric ConstantVector::get(Values), Mask); 15020b57cec5SDimitry Andric } 15030b57cec5SDimitry Andric return nullptr; 15040b57cec5SDimitry Andric } 15050b57cec5SDimitry Andric 1506349cc55cSDimitry Andric /// If both the base vector and the inserted element are extended from the same 1507349cc55cSDimitry Andric /// type, do the insert element in the narrow source type followed by extend. 1508349cc55cSDimitry Andric /// TODO: This can be extended to include other cast opcodes, but particularly 1509349cc55cSDimitry Andric /// if we create a wider insertelement, make sure codegen is not harmed. 1510349cc55cSDimitry Andric static Instruction *narrowInsElt(InsertElementInst &InsElt, 1511349cc55cSDimitry Andric InstCombiner::BuilderTy &Builder) { 1512349cc55cSDimitry Andric // We are creating a vector extend. If the original vector extend has another 1513349cc55cSDimitry Andric // use, that would mean we end up with 2 vector extends, so avoid that. 1514349cc55cSDimitry Andric // TODO: We could ease the use-clause to "if at least one op has one use" 1515349cc55cSDimitry Andric // (assuming that the source types match - see next TODO comment). 1516349cc55cSDimitry Andric Value *Vec = InsElt.getOperand(0); 1517349cc55cSDimitry Andric if (!Vec->hasOneUse()) 1518349cc55cSDimitry Andric return nullptr; 1519349cc55cSDimitry Andric 1520349cc55cSDimitry Andric Value *Scalar = InsElt.getOperand(1); 1521349cc55cSDimitry Andric Value *X, *Y; 1522349cc55cSDimitry Andric CastInst::CastOps CastOpcode; 1523349cc55cSDimitry Andric if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y)))) 1524349cc55cSDimitry Andric CastOpcode = Instruction::FPExt; 1525349cc55cSDimitry Andric else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y)))) 1526349cc55cSDimitry Andric CastOpcode = Instruction::SExt; 1527349cc55cSDimitry Andric else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y)))) 1528349cc55cSDimitry Andric CastOpcode = Instruction::ZExt; 1529349cc55cSDimitry Andric else 1530349cc55cSDimitry Andric return nullptr; 1531349cc55cSDimitry Andric 1532349cc55cSDimitry Andric // TODO: We can allow mismatched types by creating an intermediate cast. 1533349cc55cSDimitry Andric if (X->getType()->getScalarType() != Y->getType()) 1534349cc55cSDimitry Andric return nullptr; 1535349cc55cSDimitry Andric 1536349cc55cSDimitry Andric // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index) 1537349cc55cSDimitry Andric Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2)); 1538349cc55cSDimitry Andric return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType()); 1539349cc55cSDimitry Andric } 1540349cc55cSDimitry Andric 1541bdd1243dSDimitry Andric /// If we are inserting 2 halves of a value into adjacent elements of a vector, 1542bdd1243dSDimitry Andric /// try to convert to a single insert with appropriate bitcasts. 1543bdd1243dSDimitry Andric static Instruction *foldTruncInsEltPair(InsertElementInst &InsElt, 1544bdd1243dSDimitry Andric bool IsBigEndian, 1545bdd1243dSDimitry Andric InstCombiner::BuilderTy &Builder) { 1546bdd1243dSDimitry Andric Value *VecOp = InsElt.getOperand(0); 1547bdd1243dSDimitry Andric Value *ScalarOp = InsElt.getOperand(1); 1548bdd1243dSDimitry Andric Value *IndexOp = InsElt.getOperand(2); 1549bdd1243dSDimitry Andric 1550bdd1243dSDimitry Andric // Pattern depends on endian because we expect lower index is inserted first. 1551bdd1243dSDimitry Andric // Big endian: 1552bdd1243dSDimitry Andric // inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1 1553bdd1243dSDimitry Andric // Little endian: 1554bdd1243dSDimitry Andric // inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1 1555bdd1243dSDimitry Andric // Note: It is not safe to do this transform with an arbitrary base vector 1556bdd1243dSDimitry Andric // because the bitcast of that vector to fewer/larger elements could 1557bdd1243dSDimitry Andric // allow poison to spill into an element that was not poison before. 1558bdd1243dSDimitry Andric // TODO: Detect smaller fractions of the scalar. 1559bdd1243dSDimitry Andric // TODO: One-use checks are conservative. 1560bdd1243dSDimitry Andric auto *VTy = dyn_cast<FixedVectorType>(InsElt.getType()); 1561bdd1243dSDimitry Andric Value *Scalar0, *BaseVec; 1562bdd1243dSDimitry Andric uint64_t Index0, Index1; 1563bdd1243dSDimitry Andric if (!VTy || (VTy->getNumElements() & 1) || 1564bdd1243dSDimitry Andric !match(IndexOp, m_ConstantInt(Index1)) || 1565bdd1243dSDimitry Andric !match(VecOp, m_InsertElt(m_Value(BaseVec), m_Value(Scalar0), 1566bdd1243dSDimitry Andric m_ConstantInt(Index0))) || 1567bdd1243dSDimitry Andric !match(BaseVec, m_Undef())) 1568bdd1243dSDimitry Andric return nullptr; 1569bdd1243dSDimitry Andric 1570bdd1243dSDimitry Andric // The first insert must be to the index one less than this one, and 1571bdd1243dSDimitry Andric // the first insert must be to an even index. 1572bdd1243dSDimitry Andric if (Index0 + 1 != Index1 || Index0 & 1) 1573bdd1243dSDimitry Andric return nullptr; 1574bdd1243dSDimitry Andric 1575bdd1243dSDimitry Andric // For big endian, the high half of the value should be inserted first. 1576bdd1243dSDimitry Andric // For little endian, the low half of the value should be inserted first. 1577bdd1243dSDimitry Andric Value *X; 1578bdd1243dSDimitry Andric uint64_t ShAmt; 1579bdd1243dSDimitry Andric if (IsBigEndian) { 1580bdd1243dSDimitry Andric if (!match(ScalarOp, m_Trunc(m_Value(X))) || 1581bdd1243dSDimitry Andric !match(Scalar0, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt))))) 1582bdd1243dSDimitry Andric return nullptr; 1583bdd1243dSDimitry Andric } else { 1584bdd1243dSDimitry Andric if (!match(Scalar0, m_Trunc(m_Value(X))) || 1585bdd1243dSDimitry Andric !match(ScalarOp, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt))))) 1586bdd1243dSDimitry Andric return nullptr; 1587bdd1243dSDimitry Andric } 1588bdd1243dSDimitry Andric 1589bdd1243dSDimitry Andric Type *SrcTy = X->getType(); 1590bdd1243dSDimitry Andric unsigned ScalarWidth = SrcTy->getScalarSizeInBits(); 1591bdd1243dSDimitry Andric unsigned VecEltWidth = VTy->getScalarSizeInBits(); 1592bdd1243dSDimitry Andric if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth) 1593bdd1243dSDimitry Andric return nullptr; 1594bdd1243dSDimitry Andric 1595bdd1243dSDimitry Andric // Bitcast the base vector to a vector type with the source element type. 1596bdd1243dSDimitry Andric Type *CastTy = FixedVectorType::get(SrcTy, VTy->getNumElements() / 2); 1597bdd1243dSDimitry Andric Value *CastBaseVec = Builder.CreateBitCast(BaseVec, CastTy); 1598bdd1243dSDimitry Andric 1599bdd1243dSDimitry Andric // Scale the insert index for a vector with half as many elements. 1600bdd1243dSDimitry Andric // bitcast (inselt (bitcast BaseVec), X, NewIndex) 1601bdd1243dSDimitry Andric uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2; 1602bdd1243dSDimitry Andric Value *NewInsert = Builder.CreateInsertElement(CastBaseVec, X, NewIndex); 1603bdd1243dSDimitry Andric return new BitCastInst(NewInsert, VTy); 1604bdd1243dSDimitry Andric } 1605bdd1243dSDimitry Andric 1606e8d8bef9SDimitry Andric Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) { 16070b57cec5SDimitry Andric Value *VecOp = IE.getOperand(0); 16080b57cec5SDimitry Andric Value *ScalarOp = IE.getOperand(1); 16090b57cec5SDimitry Andric Value *IdxOp = IE.getOperand(2); 16100b57cec5SDimitry Andric 161181ad6265SDimitry Andric if (auto *V = simplifyInsertElementInst( 16120b57cec5SDimitry Andric VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE))) 16130b57cec5SDimitry Andric return replaceInstUsesWith(IE, V); 16140b57cec5SDimitry Andric 16150eae32dcSDimitry Andric // Canonicalize type of constant indices to i64 to simplify CSE 1616bdd1243dSDimitry Andric if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp)) { 16170eae32dcSDimitry Andric if (auto *NewIdx = getPreferredVectorIndex(IndexC)) 16180eae32dcSDimitry Andric return replaceOperand(IE, 2, NewIdx); 16190eae32dcSDimitry Andric 1620bdd1243dSDimitry Andric Value *BaseVec, *OtherScalar; 1621bdd1243dSDimitry Andric uint64_t OtherIndexVal; 1622bdd1243dSDimitry Andric if (match(VecOp, m_OneUse(m_InsertElt(m_Value(BaseVec), 1623bdd1243dSDimitry Andric m_Value(OtherScalar), 1624bdd1243dSDimitry Andric m_ConstantInt(OtherIndexVal)))) && 1625bdd1243dSDimitry Andric !isa<Constant>(OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) { 1626bdd1243dSDimitry Andric Value *NewIns = Builder.CreateInsertElement(BaseVec, ScalarOp, IdxOp); 1627bdd1243dSDimitry Andric return InsertElementInst::Create(NewIns, OtherScalar, 1628bdd1243dSDimitry Andric Builder.getInt64(OtherIndexVal)); 1629bdd1243dSDimitry Andric } 1630bdd1243dSDimitry Andric } 1631bdd1243dSDimitry Andric 16325ffd83dbSDimitry Andric // If the scalar is bitcast and inserted into undef, do the insert in the 16335ffd83dbSDimitry Andric // source type followed by bitcast. 16345ffd83dbSDimitry Andric // TODO: Generalize for insert into any constant, not just undef? 16355ffd83dbSDimitry Andric Value *ScalarSrc; 16365ffd83dbSDimitry Andric if (match(VecOp, m_Undef()) && 16375ffd83dbSDimitry Andric match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) && 16385ffd83dbSDimitry Andric (ScalarSrc->getType()->isIntegerTy() || 16395ffd83dbSDimitry Andric ScalarSrc->getType()->isFloatingPointTy())) { 16405ffd83dbSDimitry Andric // inselt undef, (bitcast ScalarSrc), IdxOp --> 16415ffd83dbSDimitry Andric // bitcast (inselt undef, ScalarSrc, IdxOp) 16425ffd83dbSDimitry Andric Type *ScalarTy = ScalarSrc->getType(); 16435ffd83dbSDimitry Andric Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount()); 1644cb14a3feSDimitry Andric Constant *NewUndef = isa<PoisonValue>(VecOp) ? PoisonValue::get(VecTy) 1645cb14a3feSDimitry Andric : UndefValue::get(VecTy); 16465ffd83dbSDimitry Andric Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp); 16475ffd83dbSDimitry Andric return new BitCastInst(NewInsElt, IE.getType()); 16485ffd83dbSDimitry Andric } 16495ffd83dbSDimitry Andric 16500b57cec5SDimitry Andric // If the vector and scalar are both bitcast from the same element type, do 16510b57cec5SDimitry Andric // the insert in that source type followed by bitcast. 16525ffd83dbSDimitry Andric Value *VecSrc; 16530b57cec5SDimitry Andric if (match(VecOp, m_BitCast(m_Value(VecSrc))) && 16540b57cec5SDimitry Andric match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) && 16550b57cec5SDimitry Andric (VecOp->hasOneUse() || ScalarOp->hasOneUse()) && 16560b57cec5SDimitry Andric VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() && 16575ffd83dbSDimitry Andric cast<VectorType>(VecSrc->getType())->getElementType() == 16585ffd83dbSDimitry Andric ScalarSrc->getType()) { 16590b57cec5SDimitry Andric // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp --> 16600b57cec5SDimitry Andric // bitcast (inselt VecSrc, ScalarSrc, IdxOp) 16610b57cec5SDimitry Andric Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp); 16620b57cec5SDimitry Andric return new BitCastInst(NewInsElt, IE.getType()); 16630b57cec5SDimitry Andric } 16640b57cec5SDimitry Andric 16655ffd83dbSDimitry Andric // If the inserted element was extracted from some other fixed-length vector 16665ffd83dbSDimitry Andric // and both indexes are valid constants, try to turn this into a shuffle. 16675ffd83dbSDimitry Andric // Can not handle scalable vector type, the number of elements needed to 16685ffd83dbSDimitry Andric // create shuffle mask is not a compile-time constant. 16690b57cec5SDimitry Andric uint64_t InsertedIdx, ExtractedIdx; 16700b57cec5SDimitry Andric Value *ExtVecOp; 16715ffd83dbSDimitry Andric if (isa<FixedVectorType>(IE.getType()) && 16725ffd83dbSDimitry Andric match(IdxOp, m_ConstantInt(InsertedIdx)) && 16735ffd83dbSDimitry Andric match(ScalarOp, 16745ffd83dbSDimitry Andric m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) && 16755ffd83dbSDimitry Andric isa<FixedVectorType>(ExtVecOp->getType()) && 16765ffd83dbSDimitry Andric ExtractedIdx < 16775ffd83dbSDimitry Andric cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) { 16780b57cec5SDimitry Andric // TODO: Looking at the user(s) to determine if this insert is a 16790b57cec5SDimitry Andric // fold-to-shuffle opportunity does not match the usual instcombine 16800b57cec5SDimitry Andric // constraints. We should decide if the transform is worthy based only 16810b57cec5SDimitry Andric // on this instruction and its operands, but that may not work currently. 16820b57cec5SDimitry Andric // 16830b57cec5SDimitry Andric // Here, we are trying to avoid creating shuffles before reaching 16840b57cec5SDimitry Andric // the end of a chain of extract-insert pairs. This is complicated because 16850b57cec5SDimitry Andric // we do not generally form arbitrary shuffle masks in instcombine 16860b57cec5SDimitry Andric // (because those may codegen poorly), but collectShuffleElements() does 16870b57cec5SDimitry Andric // exactly that. 16880b57cec5SDimitry Andric // 16890b57cec5SDimitry Andric // The rules for determining what is an acceptable target-independent 16900b57cec5SDimitry Andric // shuffle mask are fuzzy because they evolve based on the backend's 16910b57cec5SDimitry Andric // capabilities and real-world impact. 16920b57cec5SDimitry Andric auto isShuffleRootCandidate = [](InsertElementInst &Insert) { 16930b57cec5SDimitry Andric if (!Insert.hasOneUse()) 16940b57cec5SDimitry Andric return true; 16950b57cec5SDimitry Andric auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back()); 16960b57cec5SDimitry Andric if (!InsertUser) 16970b57cec5SDimitry Andric return true; 16980b57cec5SDimitry Andric return false; 16990b57cec5SDimitry Andric }; 17000b57cec5SDimitry Andric 17010b57cec5SDimitry Andric // Try to form a shuffle from a chain of extract-insert ops. 17020b57cec5SDimitry Andric if (isShuffleRootCandidate(IE)) { 170306c3fb27SDimitry Andric bool Rerun = true; 170406c3fb27SDimitry Andric while (Rerun) { 170506c3fb27SDimitry Andric Rerun = false; 170606c3fb27SDimitry Andric 17075ffd83dbSDimitry Andric SmallVector<int, 16> Mask; 170806c3fb27SDimitry Andric ShuffleOps LR = 170906c3fb27SDimitry Andric collectShuffleElements(&IE, Mask, nullptr, *this, Rerun); 17100b57cec5SDimitry Andric 17110b57cec5SDimitry Andric // The proposed shuffle may be trivial, in which case we shouldn't 17120b57cec5SDimitry Andric // perform the combine. 17130b57cec5SDimitry Andric if (LR.first != &IE && LR.second != &IE) { 17140b57cec5SDimitry Andric // We now have a shuffle of LHS, RHS, Mask. 17150b57cec5SDimitry Andric if (LR.second == nullptr) 171606c3fb27SDimitry Andric LR.second = PoisonValue::get(LR.first->getType()); 17175ffd83dbSDimitry Andric return new ShuffleVectorInst(LR.first, LR.second, Mask); 17180b57cec5SDimitry Andric } 17190b57cec5SDimitry Andric } 17200b57cec5SDimitry Andric } 172106c3fb27SDimitry Andric } 17220b57cec5SDimitry Andric 17235ffd83dbSDimitry Andric if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) { 17245ffd83dbSDimitry Andric unsigned VWidth = VecTy->getNumElements(); 1725cb14a3feSDimitry Andric APInt PoisonElts(VWidth, 0); 1726349cc55cSDimitry Andric APInt AllOnesEltMask(APInt::getAllOnes(VWidth)); 1727cb14a3feSDimitry Andric if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, 1728cb14a3feSDimitry Andric PoisonElts)) { 17290b57cec5SDimitry Andric if (V != &IE) 17300b57cec5SDimitry Andric return replaceInstUsesWith(IE, V); 17310b57cec5SDimitry Andric return &IE; 17320b57cec5SDimitry Andric } 17335ffd83dbSDimitry Andric } 17340b57cec5SDimitry Andric 17350b57cec5SDimitry Andric if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE)) 17360b57cec5SDimitry Andric return Shuf; 17370b57cec5SDimitry Andric 17380b57cec5SDimitry Andric if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder)) 17390b57cec5SDimitry Andric return NewInsElt; 17400b57cec5SDimitry Andric 17410b57cec5SDimitry Andric if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE)) 17420b57cec5SDimitry Andric return Broadcast; 17430b57cec5SDimitry Andric 17440b57cec5SDimitry Andric if (Instruction *Splat = foldInsEltIntoSplat(IE)) 17450b57cec5SDimitry Andric return Splat; 17460b57cec5SDimitry Andric 17478bcb0991SDimitry Andric if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE)) 17488bcb0991SDimitry Andric return IdentityShuf; 17498bcb0991SDimitry Andric 1750349cc55cSDimitry Andric if (Instruction *Ext = narrowInsElt(IE, Builder)) 1751349cc55cSDimitry Andric return Ext; 1752349cc55cSDimitry Andric 1753bdd1243dSDimitry Andric if (Instruction *Ext = foldTruncInsEltPair(IE, DL.isBigEndian(), Builder)) 1754bdd1243dSDimitry Andric return Ext; 1755bdd1243dSDimitry Andric 17560b57cec5SDimitry Andric return nullptr; 17570b57cec5SDimitry Andric } 17580b57cec5SDimitry Andric 17590b57cec5SDimitry Andric /// Return true if we can evaluate the specified expression tree if the vector 17600b57cec5SDimitry Andric /// elements were shuffled in a different order. 17610b57cec5SDimitry Andric static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask, 17620b57cec5SDimitry Andric unsigned Depth = 5) { 17630b57cec5SDimitry Andric // We can always reorder the elements of a constant. 17640b57cec5SDimitry Andric if (isa<Constant>(V)) 17650b57cec5SDimitry Andric return true; 17660b57cec5SDimitry Andric 17670b57cec5SDimitry Andric // We won't reorder vector arguments. No IPO here. 17680b57cec5SDimitry Andric Instruction *I = dyn_cast<Instruction>(V); 17690b57cec5SDimitry Andric if (!I) return false; 17700b57cec5SDimitry Andric 17710b57cec5SDimitry Andric // Two users may expect different orders of the elements. Don't try it. 17720b57cec5SDimitry Andric if (!I->hasOneUse()) 17730b57cec5SDimitry Andric return false; 17740b57cec5SDimitry Andric 17750b57cec5SDimitry Andric if (Depth == 0) return false; 17760b57cec5SDimitry Andric 17770b57cec5SDimitry Andric switch (I->getOpcode()) { 17788bcb0991SDimitry Andric case Instruction::UDiv: 17798bcb0991SDimitry Andric case Instruction::SDiv: 17808bcb0991SDimitry Andric case Instruction::URem: 17818bcb0991SDimitry Andric case Instruction::SRem: 17828bcb0991SDimitry Andric // Propagating an undefined shuffle mask element to integer div/rem is not 17838bcb0991SDimitry Andric // allowed because those opcodes can create immediate undefined behavior 17848bcb0991SDimitry Andric // from an undefined element in an operand. 1785e8d8bef9SDimitry Andric if (llvm::is_contained(Mask, -1)) 17868bcb0991SDimitry Andric return false; 1787bdd1243dSDimitry Andric [[fallthrough]]; 17880b57cec5SDimitry Andric case Instruction::Add: 17890b57cec5SDimitry Andric case Instruction::FAdd: 17900b57cec5SDimitry Andric case Instruction::Sub: 17910b57cec5SDimitry Andric case Instruction::FSub: 17920b57cec5SDimitry Andric case Instruction::Mul: 17930b57cec5SDimitry Andric case Instruction::FMul: 17940b57cec5SDimitry Andric case Instruction::FDiv: 17950b57cec5SDimitry Andric case Instruction::FRem: 17960b57cec5SDimitry Andric case Instruction::Shl: 17970b57cec5SDimitry Andric case Instruction::LShr: 17980b57cec5SDimitry Andric case Instruction::AShr: 17990b57cec5SDimitry Andric case Instruction::And: 18000b57cec5SDimitry Andric case Instruction::Or: 18010b57cec5SDimitry Andric case Instruction::Xor: 18020b57cec5SDimitry Andric case Instruction::ICmp: 18030b57cec5SDimitry Andric case Instruction::FCmp: 18040b57cec5SDimitry Andric case Instruction::Trunc: 18050b57cec5SDimitry Andric case Instruction::ZExt: 18060b57cec5SDimitry Andric case Instruction::SExt: 18070b57cec5SDimitry Andric case Instruction::FPToUI: 18080b57cec5SDimitry Andric case Instruction::FPToSI: 18090b57cec5SDimitry Andric case Instruction::UIToFP: 18100b57cec5SDimitry Andric case Instruction::SIToFP: 18110b57cec5SDimitry Andric case Instruction::FPTrunc: 18120b57cec5SDimitry Andric case Instruction::FPExt: 18130b57cec5SDimitry Andric case Instruction::GetElementPtr: { 18140b57cec5SDimitry Andric // Bail out if we would create longer vector ops. We could allow creating 18158bcb0991SDimitry Andric // longer vector ops, but that may result in more expensive codegen. 18160b57cec5SDimitry Andric Type *ITy = I->getType(); 18175ffd83dbSDimitry Andric if (ITy->isVectorTy() && 1818e8d8bef9SDimitry Andric Mask.size() > cast<FixedVectorType>(ITy)->getNumElements()) 18190b57cec5SDimitry Andric return false; 18200b57cec5SDimitry Andric for (Value *Operand : I->operands()) { 18210b57cec5SDimitry Andric if (!canEvaluateShuffled(Operand, Mask, Depth - 1)) 18220b57cec5SDimitry Andric return false; 18230b57cec5SDimitry Andric } 18240b57cec5SDimitry Andric return true; 18250b57cec5SDimitry Andric } 18260b57cec5SDimitry Andric case Instruction::InsertElement: { 18270b57cec5SDimitry Andric ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2)); 18280b57cec5SDimitry Andric if (!CI) return false; 18290b57cec5SDimitry Andric int ElementNumber = CI->getLimitedValue(); 18300b57cec5SDimitry Andric 18310b57cec5SDimitry Andric // Verify that 'CI' does not occur twice in Mask. A single 'insertelement' 18320b57cec5SDimitry Andric // can't put an element into multiple indices. 18330b57cec5SDimitry Andric bool SeenOnce = false; 1834bdd1243dSDimitry Andric for (int I : Mask) { 1835bdd1243dSDimitry Andric if (I == ElementNumber) { 18360b57cec5SDimitry Andric if (SeenOnce) 18370b57cec5SDimitry Andric return false; 18380b57cec5SDimitry Andric SeenOnce = true; 18390b57cec5SDimitry Andric } 18400b57cec5SDimitry Andric } 18410b57cec5SDimitry Andric return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1); 18420b57cec5SDimitry Andric } 18430b57cec5SDimitry Andric } 18440b57cec5SDimitry Andric return false; 18450b57cec5SDimitry Andric } 18460b57cec5SDimitry Andric 18470b57cec5SDimitry Andric /// Rebuild a new instruction just like 'I' but with the new operands given. 18480b57cec5SDimitry Andric /// In the event of type mismatch, the type of the operands is correct. 184906c3fb27SDimitry Andric static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps, 185006c3fb27SDimitry Andric IRBuilderBase &Builder) { 185106c3fb27SDimitry Andric Builder.SetInsertPoint(I); 18520b57cec5SDimitry Andric switch (I->getOpcode()) { 18530b57cec5SDimitry Andric case Instruction::Add: 18540b57cec5SDimitry Andric case Instruction::FAdd: 18550b57cec5SDimitry Andric case Instruction::Sub: 18560b57cec5SDimitry Andric case Instruction::FSub: 18570b57cec5SDimitry Andric case Instruction::Mul: 18580b57cec5SDimitry Andric case Instruction::FMul: 18590b57cec5SDimitry Andric case Instruction::UDiv: 18600b57cec5SDimitry Andric case Instruction::SDiv: 18610b57cec5SDimitry Andric case Instruction::FDiv: 18620b57cec5SDimitry Andric case Instruction::URem: 18630b57cec5SDimitry Andric case Instruction::SRem: 18640b57cec5SDimitry Andric case Instruction::FRem: 18650b57cec5SDimitry Andric case Instruction::Shl: 18660b57cec5SDimitry Andric case Instruction::LShr: 18670b57cec5SDimitry Andric case Instruction::AShr: 18680b57cec5SDimitry Andric case Instruction::And: 18690b57cec5SDimitry Andric case Instruction::Or: 18700b57cec5SDimitry Andric case Instruction::Xor: { 18710b57cec5SDimitry Andric BinaryOperator *BO = cast<BinaryOperator>(I); 18720b57cec5SDimitry Andric assert(NewOps.size() == 2 && "binary operator with #ops != 2"); 187306c3fb27SDimitry Andric Value *New = Builder.CreateBinOp(cast<BinaryOperator>(I)->getOpcode(), 187406c3fb27SDimitry Andric NewOps[0], NewOps[1]); 187506c3fb27SDimitry Andric if (auto *NewI = dyn_cast<Instruction>(New)) { 18760b57cec5SDimitry Andric if (isa<OverflowingBinaryOperator>(BO)) { 187706c3fb27SDimitry Andric NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap()); 187806c3fb27SDimitry Andric NewI->setHasNoSignedWrap(BO->hasNoSignedWrap()); 18790b57cec5SDimitry Andric } 18800b57cec5SDimitry Andric if (isa<PossiblyExactOperator>(BO)) { 188106c3fb27SDimitry Andric NewI->setIsExact(BO->isExact()); 18820b57cec5SDimitry Andric } 18830b57cec5SDimitry Andric if (isa<FPMathOperator>(BO)) 188406c3fb27SDimitry Andric NewI->copyFastMathFlags(I); 188506c3fb27SDimitry Andric } 18860b57cec5SDimitry Andric return New; 18870b57cec5SDimitry Andric } 18880b57cec5SDimitry Andric case Instruction::ICmp: 18890b57cec5SDimitry Andric assert(NewOps.size() == 2 && "icmp with #ops != 2"); 189006c3fb27SDimitry Andric return Builder.CreateICmp(cast<ICmpInst>(I)->getPredicate(), NewOps[0], 189106c3fb27SDimitry Andric NewOps[1]); 18920b57cec5SDimitry Andric case Instruction::FCmp: 18930b57cec5SDimitry Andric assert(NewOps.size() == 2 && "fcmp with #ops != 2"); 189406c3fb27SDimitry Andric return Builder.CreateFCmp(cast<FCmpInst>(I)->getPredicate(), NewOps[0], 189506c3fb27SDimitry Andric NewOps[1]); 18960b57cec5SDimitry Andric case Instruction::Trunc: 18970b57cec5SDimitry Andric case Instruction::ZExt: 18980b57cec5SDimitry Andric case Instruction::SExt: 18990b57cec5SDimitry Andric case Instruction::FPToUI: 19000b57cec5SDimitry Andric case Instruction::FPToSI: 19010b57cec5SDimitry Andric case Instruction::UIToFP: 19020b57cec5SDimitry Andric case Instruction::SIToFP: 19030b57cec5SDimitry Andric case Instruction::FPTrunc: 19040b57cec5SDimitry Andric case Instruction::FPExt: { 19050b57cec5SDimitry Andric // It's possible that the mask has a different number of elements from 19060b57cec5SDimitry Andric // the original cast. We recompute the destination type to match the mask. 19075ffd83dbSDimitry Andric Type *DestTy = VectorType::get( 19085ffd83dbSDimitry Andric I->getType()->getScalarType(), 19095ffd83dbSDimitry Andric cast<VectorType>(NewOps[0]->getType())->getElementCount()); 19100b57cec5SDimitry Andric assert(NewOps.size() == 1 && "cast with #ops != 1"); 191106c3fb27SDimitry Andric return Builder.CreateCast(cast<CastInst>(I)->getOpcode(), NewOps[0], 191206c3fb27SDimitry Andric DestTy); 19130b57cec5SDimitry Andric } 19140b57cec5SDimitry Andric case Instruction::GetElementPtr: { 19150b57cec5SDimitry Andric Value *Ptr = NewOps[0]; 19160b57cec5SDimitry Andric ArrayRef<Value*> Idx = NewOps.slice(1); 191706c3fb27SDimitry Andric return Builder.CreateGEP(cast<GEPOperator>(I)->getSourceElementType(), 191806c3fb27SDimitry Andric Ptr, Idx, "", 191906c3fb27SDimitry Andric cast<GEPOperator>(I)->isInBounds()); 19200b57cec5SDimitry Andric } 19210b57cec5SDimitry Andric } 19220b57cec5SDimitry Andric llvm_unreachable("failed to rebuild vector instructions"); 19230b57cec5SDimitry Andric } 19240b57cec5SDimitry Andric 192506c3fb27SDimitry Andric static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask, 192606c3fb27SDimitry Andric IRBuilderBase &Builder) { 19270b57cec5SDimitry Andric // Mask.size() does not need to be equal to the number of vector elements. 19280b57cec5SDimitry Andric 19290b57cec5SDimitry Andric assert(V->getType()->isVectorTy() && "can't reorder non-vector elements"); 19300b57cec5SDimitry Andric Type *EltTy = V->getType()->getScalarType(); 1931cb14a3feSDimitry Andric 1932cb14a3feSDimitry Andric if (isa<PoisonValue>(V)) 1933cb14a3feSDimitry Andric return PoisonValue::get(FixedVectorType::get(EltTy, Mask.size())); 1934cb14a3feSDimitry Andric 1935fe6060f1SDimitry Andric if (match(V, m_Undef())) 19365ffd83dbSDimitry Andric return UndefValue::get(FixedVectorType::get(EltTy, Mask.size())); 19370b57cec5SDimitry Andric 19380b57cec5SDimitry Andric if (isa<ConstantAggregateZero>(V)) 19395ffd83dbSDimitry Andric return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size())); 19400b57cec5SDimitry Andric 19415ffd83dbSDimitry Andric if (Constant *C = dyn_cast<Constant>(V)) 1942fe6060f1SDimitry Andric return ConstantExpr::getShuffleVector(C, PoisonValue::get(C->getType()), 19435ffd83dbSDimitry Andric Mask); 19440b57cec5SDimitry Andric 19450b57cec5SDimitry Andric Instruction *I = cast<Instruction>(V); 19460b57cec5SDimitry Andric switch (I->getOpcode()) { 19470b57cec5SDimitry Andric case Instruction::Add: 19480b57cec5SDimitry Andric case Instruction::FAdd: 19490b57cec5SDimitry Andric case Instruction::Sub: 19500b57cec5SDimitry Andric case Instruction::FSub: 19510b57cec5SDimitry Andric case Instruction::Mul: 19520b57cec5SDimitry Andric case Instruction::FMul: 19530b57cec5SDimitry Andric case Instruction::UDiv: 19540b57cec5SDimitry Andric case Instruction::SDiv: 19550b57cec5SDimitry Andric case Instruction::FDiv: 19560b57cec5SDimitry Andric case Instruction::URem: 19570b57cec5SDimitry Andric case Instruction::SRem: 19580b57cec5SDimitry Andric case Instruction::FRem: 19590b57cec5SDimitry Andric case Instruction::Shl: 19600b57cec5SDimitry Andric case Instruction::LShr: 19610b57cec5SDimitry Andric case Instruction::AShr: 19620b57cec5SDimitry Andric case Instruction::And: 19630b57cec5SDimitry Andric case Instruction::Or: 19640b57cec5SDimitry Andric case Instruction::Xor: 19650b57cec5SDimitry Andric case Instruction::ICmp: 19660b57cec5SDimitry Andric case Instruction::FCmp: 19670b57cec5SDimitry Andric case Instruction::Trunc: 19680b57cec5SDimitry Andric case Instruction::ZExt: 19690b57cec5SDimitry Andric case Instruction::SExt: 19700b57cec5SDimitry Andric case Instruction::FPToUI: 19710b57cec5SDimitry Andric case Instruction::FPToSI: 19720b57cec5SDimitry Andric case Instruction::UIToFP: 19730b57cec5SDimitry Andric case Instruction::SIToFP: 19740b57cec5SDimitry Andric case Instruction::FPTrunc: 19750b57cec5SDimitry Andric case Instruction::FPExt: 19760b57cec5SDimitry Andric case Instruction::Select: 19770b57cec5SDimitry Andric case Instruction::GetElementPtr: { 19780b57cec5SDimitry Andric SmallVector<Value*, 8> NewOps; 19795ffd83dbSDimitry Andric bool NeedsRebuild = 1980e8d8bef9SDimitry Andric (Mask.size() != 1981e8d8bef9SDimitry Andric cast<FixedVectorType>(I->getType())->getNumElements()); 19820b57cec5SDimitry Andric for (int i = 0, e = I->getNumOperands(); i != e; ++i) { 19830b57cec5SDimitry Andric Value *V; 19840b57cec5SDimitry Andric // Recursively call evaluateInDifferentElementOrder on vector arguments 19850b57cec5SDimitry Andric // as well. E.g. GetElementPtr may have scalar operands even if the 19860b57cec5SDimitry Andric // return value is a vector, so we need to examine the operand type. 19870b57cec5SDimitry Andric if (I->getOperand(i)->getType()->isVectorTy()) 198806c3fb27SDimitry Andric V = evaluateInDifferentElementOrder(I->getOperand(i), Mask, Builder); 19890b57cec5SDimitry Andric else 19900b57cec5SDimitry Andric V = I->getOperand(i); 19910b57cec5SDimitry Andric NewOps.push_back(V); 19920b57cec5SDimitry Andric NeedsRebuild |= (V != I->getOperand(i)); 19930b57cec5SDimitry Andric } 199406c3fb27SDimitry Andric if (NeedsRebuild) 199506c3fb27SDimitry Andric return buildNew(I, NewOps, Builder); 19960b57cec5SDimitry Andric return I; 19970b57cec5SDimitry Andric } 19980b57cec5SDimitry Andric case Instruction::InsertElement: { 19990b57cec5SDimitry Andric int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue(); 20000b57cec5SDimitry Andric 20010b57cec5SDimitry Andric // The insertelement was inserting at Element. Figure out which element 20020b57cec5SDimitry Andric // that becomes after shuffling. The answer is guaranteed to be unique 20030b57cec5SDimitry Andric // by CanEvaluateShuffled. 20040b57cec5SDimitry Andric bool Found = false; 20050b57cec5SDimitry Andric int Index = 0; 20060b57cec5SDimitry Andric for (int e = Mask.size(); Index != e; ++Index) { 20070b57cec5SDimitry Andric if (Mask[Index] == Element) { 20080b57cec5SDimitry Andric Found = true; 20090b57cec5SDimitry Andric break; 20100b57cec5SDimitry Andric } 20110b57cec5SDimitry Andric } 20120b57cec5SDimitry Andric 20130b57cec5SDimitry Andric // If element is not in Mask, no need to handle the operand 1 (element to 20140b57cec5SDimitry Andric // be inserted). Just evaluate values in operand 0 according to Mask. 20150b57cec5SDimitry Andric if (!Found) 201606c3fb27SDimitry Andric return evaluateInDifferentElementOrder(I->getOperand(0), Mask, Builder); 20170b57cec5SDimitry Andric 201806c3fb27SDimitry Andric Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask, 201906c3fb27SDimitry Andric Builder); 202006c3fb27SDimitry Andric Builder.SetInsertPoint(I); 202106c3fb27SDimitry Andric return Builder.CreateInsertElement(V, I->getOperand(1), Index); 20220b57cec5SDimitry Andric } 20230b57cec5SDimitry Andric } 20240b57cec5SDimitry Andric llvm_unreachable("failed to reorder elements of vector instruction!"); 20250b57cec5SDimitry Andric } 20260b57cec5SDimitry Andric 20270b57cec5SDimitry Andric // Returns true if the shuffle is extracting a contiguous range of values from 20280b57cec5SDimitry Andric // LHS, for example: 20290b57cec5SDimitry Andric // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ 20300b57cec5SDimitry Andric // Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP| 20310b57cec5SDimitry Andric // Shuffles to: |EE|FF|GG|HH| 20320b57cec5SDimitry Andric // +--+--+--+--+ 20330b57cec5SDimitry Andric static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, 20345ffd83dbSDimitry Andric ArrayRef<int> Mask) { 20355ffd83dbSDimitry Andric unsigned LHSElems = 2036e8d8bef9SDimitry Andric cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements(); 20370b57cec5SDimitry Andric unsigned MaskElems = Mask.size(); 20380b57cec5SDimitry Andric unsigned BegIdx = Mask.front(); 20390b57cec5SDimitry Andric unsigned EndIdx = Mask.back(); 20400b57cec5SDimitry Andric if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1) 20410b57cec5SDimitry Andric return false; 20420b57cec5SDimitry Andric for (unsigned I = 0; I != MaskElems; ++I) 20430b57cec5SDimitry Andric if (static_cast<unsigned>(Mask[I]) != BegIdx + I) 20440b57cec5SDimitry Andric return false; 20450b57cec5SDimitry Andric return true; 20460b57cec5SDimitry Andric } 20470b57cec5SDimitry Andric 20480b57cec5SDimitry Andric /// These are the ingredients in an alternate form binary operator as described 20490b57cec5SDimitry Andric /// below. 20500b57cec5SDimitry Andric struct BinopElts { 20510b57cec5SDimitry Andric BinaryOperator::BinaryOps Opcode; 20520b57cec5SDimitry Andric Value *Op0; 20530b57cec5SDimitry Andric Value *Op1; 20540b57cec5SDimitry Andric BinopElts(BinaryOperator::BinaryOps Opc = (BinaryOperator::BinaryOps)0, 20550b57cec5SDimitry Andric Value *V0 = nullptr, Value *V1 = nullptr) : 20560b57cec5SDimitry Andric Opcode(Opc), Op0(V0), Op1(V1) {} 20570b57cec5SDimitry Andric operator bool() const { return Opcode != 0; } 20580b57cec5SDimitry Andric }; 20590b57cec5SDimitry Andric 20600b57cec5SDimitry Andric /// Binops may be transformed into binops with different opcodes and operands. 20610b57cec5SDimitry Andric /// Reverse the usual canonicalization to enable folds with the non-canonical 20620b57cec5SDimitry Andric /// form of the binop. If a transform is possible, return the elements of the 20630b57cec5SDimitry Andric /// new binop. If not, return invalid elements. 20640b57cec5SDimitry Andric static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) { 20650b57cec5SDimitry Andric Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1); 20660b57cec5SDimitry Andric Type *Ty = BO->getType(); 20670b57cec5SDimitry Andric switch (BO->getOpcode()) { 20680b57cec5SDimitry Andric case Instruction::Shl: { 20690b57cec5SDimitry Andric // shl X, C --> mul X, (1 << C) 20700b57cec5SDimitry Andric Constant *C; 2071*0fca6ea1SDimitry Andric if (match(BO1, m_ImmConstant(C))) { 2072*0fca6ea1SDimitry Andric Constant *ShlOne = ConstantFoldBinaryOpOperands( 2073*0fca6ea1SDimitry Andric Instruction::Shl, ConstantInt::get(Ty, 1), C, DL); 2074*0fca6ea1SDimitry Andric assert(ShlOne && "Constant folding of immediate constants failed"); 20750b57cec5SDimitry Andric return {Instruction::Mul, BO0, ShlOne}; 20760b57cec5SDimitry Andric } 20770b57cec5SDimitry Andric break; 20780b57cec5SDimitry Andric } 20790b57cec5SDimitry Andric case Instruction::Or: { 2080*0fca6ea1SDimitry Andric // or disjoin X, C --> add X, C 2081*0fca6ea1SDimitry Andric if (cast<PossiblyDisjointInst>(BO)->isDisjoint()) 20820b57cec5SDimitry Andric return {Instruction::Add, BO0, BO1}; 20830b57cec5SDimitry Andric break; 20840b57cec5SDimitry Andric } 208581ad6265SDimitry Andric case Instruction::Sub: 208681ad6265SDimitry Andric // sub 0, X --> mul X, -1 208781ad6265SDimitry Andric if (match(BO0, m_ZeroInt())) 208881ad6265SDimitry Andric return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)}; 208981ad6265SDimitry Andric break; 20900b57cec5SDimitry Andric default: 20910b57cec5SDimitry Andric break; 20920b57cec5SDimitry Andric } 20930b57cec5SDimitry Andric return {}; 20940b57cec5SDimitry Andric } 20950b57cec5SDimitry Andric 2096bdd1243dSDimitry Andric /// A select shuffle of a select shuffle with a shared operand can be reduced 2097bdd1243dSDimitry Andric /// to a single select shuffle. This is an obvious improvement in IR, and the 2098bdd1243dSDimitry Andric /// backend is expected to lower select shuffles efficiently. 2099bdd1243dSDimitry Andric static Instruction *foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf) { 2100bdd1243dSDimitry Andric assert(Shuf.isSelect() && "Must have select-equivalent shuffle"); 2101bdd1243dSDimitry Andric 2102bdd1243dSDimitry Andric Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1); 2103bdd1243dSDimitry Andric SmallVector<int, 16> Mask; 2104bdd1243dSDimitry Andric Shuf.getShuffleMask(Mask); 2105bdd1243dSDimitry Andric unsigned NumElts = Mask.size(); 2106bdd1243dSDimitry Andric 2107bdd1243dSDimitry Andric // Canonicalize a select shuffle with common operand as Op1. 2108bdd1243dSDimitry Andric auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0); 2109bdd1243dSDimitry Andric if (ShufOp && ShufOp->isSelect() && 2110bdd1243dSDimitry Andric (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) { 2111bdd1243dSDimitry Andric std::swap(Op0, Op1); 2112bdd1243dSDimitry Andric ShuffleVectorInst::commuteShuffleMask(Mask, NumElts); 2113bdd1243dSDimitry Andric } 2114bdd1243dSDimitry Andric 2115bdd1243dSDimitry Andric ShufOp = dyn_cast<ShuffleVectorInst>(Op1); 2116bdd1243dSDimitry Andric if (!ShufOp || !ShufOp->isSelect() || 2117bdd1243dSDimitry Andric (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0)) 2118bdd1243dSDimitry Andric return nullptr; 2119bdd1243dSDimitry Andric 2120bdd1243dSDimitry Andric Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1); 2121bdd1243dSDimitry Andric SmallVector<int, 16> Mask1; 2122bdd1243dSDimitry Andric ShufOp->getShuffleMask(Mask1); 2123bdd1243dSDimitry Andric assert(Mask1.size() == NumElts && "Vector size changed with select shuffle"); 2124bdd1243dSDimitry Andric 2125bdd1243dSDimitry Andric // Canonicalize common operand (Op0) as X (first operand of first shuffle). 2126bdd1243dSDimitry Andric if (Y == Op0) { 2127bdd1243dSDimitry Andric std::swap(X, Y); 2128bdd1243dSDimitry Andric ShuffleVectorInst::commuteShuffleMask(Mask1, NumElts); 2129bdd1243dSDimitry Andric } 2130bdd1243dSDimitry Andric 2131bdd1243dSDimitry Andric // If the mask chooses from X (operand 0), it stays the same. 2132bdd1243dSDimitry Andric // If the mask chooses from the earlier shuffle, the other mask value is 2133bdd1243dSDimitry Andric // transferred to the combined select shuffle: 2134bdd1243dSDimitry Andric // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M' 2135bdd1243dSDimitry Andric SmallVector<int, 16> NewMask(NumElts); 2136bdd1243dSDimitry Andric for (unsigned i = 0; i != NumElts; ++i) 2137bdd1243dSDimitry Andric NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i]; 2138bdd1243dSDimitry Andric 2139bdd1243dSDimitry Andric // A select mask with undef elements might look like an identity mask. 21405f757f3fSDimitry Andric assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) || 21415f757f3fSDimitry Andric ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) && 2142bdd1243dSDimitry Andric "Unexpected shuffle mask"); 2143bdd1243dSDimitry Andric return new ShuffleVectorInst(X, Y, NewMask); 2144bdd1243dSDimitry Andric } 2145bdd1243dSDimitry Andric 2146*0fca6ea1SDimitry Andric static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf, 2147*0fca6ea1SDimitry Andric const SimplifyQuery &SQ) { 21480b57cec5SDimitry Andric assert(Shuf.isSelect() && "Must have select-equivalent shuffle"); 21490b57cec5SDimitry Andric 21500b57cec5SDimitry Andric // Are we shuffling together some value and that same value after it has been 21510b57cec5SDimitry Andric // modified by a binop with a constant? 21520b57cec5SDimitry Andric Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1); 21530b57cec5SDimitry Andric Constant *C; 21540b57cec5SDimitry Andric bool Op0IsBinop; 21550b57cec5SDimitry Andric if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C)))) 21560b57cec5SDimitry Andric Op0IsBinop = true; 21570b57cec5SDimitry Andric else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C)))) 21580b57cec5SDimitry Andric Op0IsBinop = false; 21590b57cec5SDimitry Andric else 21600b57cec5SDimitry Andric return nullptr; 21610b57cec5SDimitry Andric 21620b57cec5SDimitry Andric // The identity constant for a binop leaves a variable operand unchanged. For 21630b57cec5SDimitry Andric // a vector, this is a splat of something like 0, -1, or 1. 21640b57cec5SDimitry Andric // If there's no identity constant for this binop, we're done. 21650b57cec5SDimitry Andric auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1); 21660b57cec5SDimitry Andric BinaryOperator::BinaryOps BOpcode = BO->getOpcode(); 21670b57cec5SDimitry Andric Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true); 21680b57cec5SDimitry Andric if (!IdC) 21690b57cec5SDimitry Andric return nullptr; 21700b57cec5SDimitry Andric 2171*0fca6ea1SDimitry Andric Value *X = Op0IsBinop ? Op1 : Op0; 2172*0fca6ea1SDimitry Andric 2173*0fca6ea1SDimitry Andric // Prevent folding in the case the non-binop operand might have NaN values. 2174*0fca6ea1SDimitry Andric // If X can have NaN elements then we have that the floating point math 2175*0fca6ea1SDimitry Andric // operation in the transformed code may not preserve the exact NaN 2176*0fca6ea1SDimitry Andric // bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`. 2177*0fca6ea1SDimitry Andric // This makes the transformation incorrect since the original program would 2178*0fca6ea1SDimitry Andric // have preserved the exact NaN bit-pattern. 2179*0fca6ea1SDimitry Andric // Avoid the folding if X can have NaN elements. 2180*0fca6ea1SDimitry Andric if (Shuf.getType()->getElementType()->isFloatingPointTy() && 2181*0fca6ea1SDimitry Andric !isKnownNeverNaN(X, 0, SQ)) 2182*0fca6ea1SDimitry Andric return nullptr; 2183*0fca6ea1SDimitry Andric 21840b57cec5SDimitry Andric // Shuffle identity constants into the lanes that return the original value. 21850b57cec5SDimitry Andric // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4} 21860b57cec5SDimitry Andric // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4} 21870b57cec5SDimitry Andric // The existing binop constant vector remains in the same operand position. 21885ffd83dbSDimitry Andric ArrayRef<int> Mask = Shuf.getShuffleMask(); 21890b57cec5SDimitry Andric Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) : 21900b57cec5SDimitry Andric ConstantExpr::getShuffleVector(IdC, C, Mask); 21910b57cec5SDimitry Andric 21920b57cec5SDimitry Andric bool MightCreatePoisonOrUB = 219306c3fb27SDimitry Andric is_contained(Mask, PoisonMaskElem) && 21940b57cec5SDimitry Andric (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode)); 21950b57cec5SDimitry Andric if (MightCreatePoisonOrUB) 2196e8d8bef9SDimitry Andric NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true); 21970b57cec5SDimitry Andric 21980b57cec5SDimitry Andric // shuf (bop X, C), X, M --> bop X, C' 21990b57cec5SDimitry Andric // shuf X, (bop X, C), M --> bop X, C' 22000b57cec5SDimitry Andric Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC); 22010b57cec5SDimitry Andric NewBO->copyIRFlags(BO); 22020b57cec5SDimitry Andric 22030b57cec5SDimitry Andric // An undef shuffle mask element may propagate as an undef constant element in 22040b57cec5SDimitry Andric // the new binop. That would produce poison where the original code might not. 22050b57cec5SDimitry Andric // If we already made a safe constant, then there's no danger. 220606c3fb27SDimitry Andric if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB) 22070b57cec5SDimitry Andric NewBO->dropPoisonGeneratingFlags(); 22080b57cec5SDimitry Andric return NewBO; 22090b57cec5SDimitry Andric } 22100b57cec5SDimitry Andric 22110b57cec5SDimitry Andric /// If we have an insert of a scalar to a non-zero element of an undefined 22120b57cec5SDimitry Andric /// vector and then shuffle that value, that's the same as inserting to the zero 22130b57cec5SDimitry Andric /// element and shuffling. Splatting from the zero element is recognized as the 22140b57cec5SDimitry Andric /// canonical form of splat. 22150b57cec5SDimitry Andric static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf, 22160b57cec5SDimitry Andric InstCombiner::BuilderTy &Builder) { 22170b57cec5SDimitry Andric Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1); 22185ffd83dbSDimitry Andric ArrayRef<int> Mask = Shuf.getShuffleMask(); 22190b57cec5SDimitry Andric Value *X; 22200b57cec5SDimitry Andric uint64_t IndexC; 22210b57cec5SDimitry Andric 22220b57cec5SDimitry Andric // Match a shuffle that is a splat to a non-zero element. 2223*0fca6ea1SDimitry Andric if (!match(Op0, m_OneUse(m_InsertElt(m_Poison(), m_Value(X), 22240b57cec5SDimitry Andric m_ConstantInt(IndexC)))) || 2225*0fca6ea1SDimitry Andric !match(Op1, m_Poison()) || match(Mask, m_ZeroMask()) || IndexC == 0) 22260b57cec5SDimitry Andric return nullptr; 22270b57cec5SDimitry Andric 22285f757f3fSDimitry Andric // Insert into element 0 of a poison vector. 22295f757f3fSDimitry Andric PoisonValue *PoisonVec = PoisonValue::get(Shuf.getType()); 22305f757f3fSDimitry Andric Value *NewIns = Builder.CreateInsertElement(PoisonVec, X, (uint64_t)0); 22310b57cec5SDimitry Andric 2232*0fca6ea1SDimitry Andric // Splat from element 0. Any mask element that is poison remains poison. 22330b57cec5SDimitry Andric // For example: 2234*0fca6ea1SDimitry Andric // shuf (inselt poison, X, 2), _, <2,2,undef> 2235*0fca6ea1SDimitry Andric // --> shuf (inselt poison, X, 0), poison, <0,0,undef> 2236e8d8bef9SDimitry Andric unsigned NumMaskElts = 2237e8d8bef9SDimitry Andric cast<FixedVectorType>(Shuf.getType())->getNumElements(); 22385ffd83dbSDimitry Andric SmallVector<int, 16> NewMask(NumMaskElts, 0); 22390b57cec5SDimitry Andric for (unsigned i = 0; i != NumMaskElts; ++i) 224006c3fb27SDimitry Andric if (Mask[i] == PoisonMaskElem) 22415ffd83dbSDimitry Andric NewMask[i] = Mask[i]; 22420b57cec5SDimitry Andric 2243349cc55cSDimitry Andric return new ShuffleVectorInst(NewIns, NewMask); 22440b57cec5SDimitry Andric } 22450b57cec5SDimitry Andric 22460b57cec5SDimitry Andric /// Try to fold shuffles that are the equivalent of a vector select. 22470eae32dcSDimitry Andric Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) { 22480b57cec5SDimitry Andric if (!Shuf.isSelect()) 22490b57cec5SDimitry Andric return nullptr; 22500b57cec5SDimitry Andric 2251480093f4SDimitry Andric // Canonicalize to choose from operand 0 first unless operand 1 is undefined. 2252480093f4SDimitry Andric // Commuting undef to operand 0 conflicts with another canonicalization. 2253e8d8bef9SDimitry Andric unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements(); 2254fe6060f1SDimitry Andric if (!match(Shuf.getOperand(1), m_Undef()) && 2255480093f4SDimitry Andric Shuf.getMaskValue(0) >= (int)NumElts) { 22560b57cec5SDimitry Andric // TODO: Can we assert that both operands of a shuffle-select are not undef 22570b57cec5SDimitry Andric // (otherwise, it would have been folded by instsimplify? 22580b57cec5SDimitry Andric Shuf.commute(); 22590b57cec5SDimitry Andric return &Shuf; 22600b57cec5SDimitry Andric } 22610b57cec5SDimitry Andric 2262bdd1243dSDimitry Andric if (Instruction *I = foldSelectShuffleOfSelectShuffle(Shuf)) 2263bdd1243dSDimitry Andric return I; 2264bdd1243dSDimitry Andric 2265*0fca6ea1SDimitry Andric if (Instruction *I = foldSelectShuffleWith1Binop( 2266*0fca6ea1SDimitry Andric Shuf, getSimplifyQuery().getWithInstruction(&Shuf))) 22670b57cec5SDimitry Andric return I; 22680b57cec5SDimitry Andric 22690b57cec5SDimitry Andric BinaryOperator *B0, *B1; 22700b57cec5SDimitry Andric if (!match(Shuf.getOperand(0), m_BinOp(B0)) || 22710b57cec5SDimitry Andric !match(Shuf.getOperand(1), m_BinOp(B1))) 22720b57cec5SDimitry Andric return nullptr; 22730b57cec5SDimitry Andric 227481ad6265SDimitry Andric // If one operand is "0 - X", allow that to be viewed as "X * -1" 227581ad6265SDimitry Andric // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired 227681ad6265SDimitry Andric // with a multiply, we will exit because C0/C1 will not be set. 22770b57cec5SDimitry Andric Value *X, *Y; 227881ad6265SDimitry Andric Constant *C0 = nullptr, *C1 = nullptr; 22790b57cec5SDimitry Andric bool ConstantsAreOp1; 228081ad6265SDimitry Andric if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) && 22810b57cec5SDimitry Andric match(B1, m_BinOp(m_Constant(C1), m_Value(Y)))) 22820b57cec5SDimitry Andric ConstantsAreOp1 = false; 228381ad6265SDimitry Andric else if (match(B0, m_CombineOr(m_BinOp(m_Value(X), m_Constant(C0)), 228481ad6265SDimitry Andric m_Neg(m_Value(X)))) && 228581ad6265SDimitry Andric match(B1, m_CombineOr(m_BinOp(m_Value(Y), m_Constant(C1)), 228681ad6265SDimitry Andric m_Neg(m_Value(Y))))) 228781ad6265SDimitry Andric ConstantsAreOp1 = true; 22880b57cec5SDimitry Andric else 22890b57cec5SDimitry Andric return nullptr; 22900b57cec5SDimitry Andric 22910b57cec5SDimitry Andric // We need matching binops to fold the lanes together. 22920b57cec5SDimitry Andric BinaryOperator::BinaryOps Opc0 = B0->getOpcode(); 22930b57cec5SDimitry Andric BinaryOperator::BinaryOps Opc1 = B1->getOpcode(); 22940b57cec5SDimitry Andric bool DropNSW = false; 22950b57cec5SDimitry Andric if (ConstantsAreOp1 && Opc0 != Opc1) { 22960b57cec5SDimitry Andric // TODO: We drop "nsw" if shift is converted into multiply because it may 22970b57cec5SDimitry Andric // not be correct when the shift amount is BitWidth - 1. We could examine 22980b57cec5SDimitry Andric // each vector element to determine if it is safe to keep that flag. 22990b57cec5SDimitry Andric if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl) 23000b57cec5SDimitry Andric DropNSW = true; 23010b57cec5SDimitry Andric if (BinopElts AltB0 = getAlternateBinop(B0, DL)) { 23020b57cec5SDimitry Andric assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop"); 23030b57cec5SDimitry Andric Opc0 = AltB0.Opcode; 23040b57cec5SDimitry Andric C0 = cast<Constant>(AltB0.Op1); 23050b57cec5SDimitry Andric } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) { 23060b57cec5SDimitry Andric assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop"); 23070b57cec5SDimitry Andric Opc1 = AltB1.Opcode; 23080b57cec5SDimitry Andric C1 = cast<Constant>(AltB1.Op1); 23090b57cec5SDimitry Andric } 23100b57cec5SDimitry Andric } 23110b57cec5SDimitry Andric 231281ad6265SDimitry Andric if (Opc0 != Opc1 || !C0 || !C1) 23130b57cec5SDimitry Andric return nullptr; 23140b57cec5SDimitry Andric 23150b57cec5SDimitry Andric // The opcodes must be the same. Use a new name to make that clear. 23160b57cec5SDimitry Andric BinaryOperator::BinaryOps BOpc = Opc0; 23170b57cec5SDimitry Andric 23180b57cec5SDimitry Andric // Select the constant elements needed for the single binop. 23195ffd83dbSDimitry Andric ArrayRef<int> Mask = Shuf.getShuffleMask(); 23200b57cec5SDimitry Andric Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask); 23210b57cec5SDimitry Andric 23220b57cec5SDimitry Andric // We are moving a binop after a shuffle. When a shuffle has an undefined 23230b57cec5SDimitry Andric // mask element, the result is undefined, but it is not poison or undefined 23240b57cec5SDimitry Andric // behavior. That is not necessarily true for div/rem/shift. 23250b57cec5SDimitry Andric bool MightCreatePoisonOrUB = 232606c3fb27SDimitry Andric is_contained(Mask, PoisonMaskElem) && 23270b57cec5SDimitry Andric (Instruction::isIntDivRem(BOpc) || Instruction::isShift(BOpc)); 23280b57cec5SDimitry Andric if (MightCreatePoisonOrUB) 2329e8d8bef9SDimitry Andric NewC = InstCombiner::getSafeVectorConstantForBinop(BOpc, NewC, 2330e8d8bef9SDimitry Andric ConstantsAreOp1); 23310b57cec5SDimitry Andric 23320b57cec5SDimitry Andric Value *V; 23330b57cec5SDimitry Andric if (X == Y) { 23340b57cec5SDimitry Andric // Remove a binop and the shuffle by rearranging the constant: 23350b57cec5SDimitry Andric // shuffle (op V, C0), (op V, C1), M --> op V, C' 23360b57cec5SDimitry Andric // shuffle (op C0, V), (op C1, V), M --> op C', V 23370b57cec5SDimitry Andric V = X; 23380b57cec5SDimitry Andric } else { 23390b57cec5SDimitry Andric // If there are 2 different variable operands, we must create a new shuffle 23400b57cec5SDimitry Andric // (select) first, so check uses to ensure that we don't end up with more 23410b57cec5SDimitry Andric // instructions than we started with. 23420b57cec5SDimitry Andric if (!B0->hasOneUse() && !B1->hasOneUse()) 23430b57cec5SDimitry Andric return nullptr; 23440b57cec5SDimitry Andric 23450b57cec5SDimitry Andric // If we use the original shuffle mask and op1 is *variable*, we would be 23460b57cec5SDimitry Andric // putting an undef into operand 1 of div/rem/shift. This is either UB or 23470b57cec5SDimitry Andric // poison. We do not have to guard against UB when *constants* are op1 23480b57cec5SDimitry Andric // because safe constants guarantee that we do not overflow sdiv/srem (and 23490b57cec5SDimitry Andric // there's no danger for other opcodes). 23500b57cec5SDimitry Andric // TODO: To allow this case, create a new shuffle mask with no undefs. 23510b57cec5SDimitry Andric if (MightCreatePoisonOrUB && !ConstantsAreOp1) 23520b57cec5SDimitry Andric return nullptr; 23530b57cec5SDimitry Andric 23540b57cec5SDimitry Andric // Note: In general, we do not create new shuffles in InstCombine because we 23550b57cec5SDimitry Andric // do not know if a target can lower an arbitrary shuffle optimally. In this 23560b57cec5SDimitry Andric // case, the shuffle uses the existing mask, so there is no additional risk. 23570b57cec5SDimitry Andric 23580b57cec5SDimitry Andric // Select the variable vectors first, then perform the binop: 23590b57cec5SDimitry Andric // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C' 23600b57cec5SDimitry Andric // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M) 23610b57cec5SDimitry Andric V = Builder.CreateShuffleVector(X, Y, Mask); 23620b57cec5SDimitry Andric } 23630b57cec5SDimitry Andric 23640eae32dcSDimitry Andric Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(BOpc, V, NewC) : 23650eae32dcSDimitry Andric Builder.CreateBinOp(BOpc, NewC, V); 23660b57cec5SDimitry Andric 23670b57cec5SDimitry Andric // Flags are intersected from the 2 source binops. But there are 2 exceptions: 23680b57cec5SDimitry Andric // 1. If we changed an opcode, poison conditions might have changed. 23690b57cec5SDimitry Andric // 2. If the shuffle had undef mask elements, the new binop might have undefs 23700b57cec5SDimitry Andric // where the original code did not. But if we already made a safe constant, 23710b57cec5SDimitry Andric // then there's no danger. 23720eae32dcSDimitry Andric if (auto *NewI = dyn_cast<Instruction>(NewBO)) { 23730eae32dcSDimitry Andric NewI->copyIRFlags(B0); 23740eae32dcSDimitry Andric NewI->andIRFlags(B1); 23750b57cec5SDimitry Andric if (DropNSW) 23760eae32dcSDimitry Andric NewI->setHasNoSignedWrap(false); 237706c3fb27SDimitry Andric if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB) 23780eae32dcSDimitry Andric NewI->dropPoisonGeneratingFlags(); 23790eae32dcSDimitry Andric } 23800eae32dcSDimitry Andric return replaceInstUsesWith(Shuf, NewBO); 23810b57cec5SDimitry Andric } 23820b57cec5SDimitry Andric 23835ffd83dbSDimitry Andric /// Convert a narrowing shuffle of a bitcasted vector into a vector truncate. 23845ffd83dbSDimitry Andric /// Example (little endian): 23855ffd83dbSDimitry Andric /// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8> 23865ffd83dbSDimitry Andric static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf, 23875ffd83dbSDimitry Andric bool IsBigEndian) { 23885ffd83dbSDimitry Andric // This must be a bitcasted shuffle of 1 vector integer operand. 23895ffd83dbSDimitry Andric Type *DestType = Shuf.getType(); 23905ffd83dbSDimitry Andric Value *X; 23915ffd83dbSDimitry Andric if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) || 2392*0fca6ea1SDimitry Andric !match(Shuf.getOperand(1), m_Poison()) || !DestType->isIntOrIntVectorTy()) 23935ffd83dbSDimitry Andric return nullptr; 23945ffd83dbSDimitry Andric 23955ffd83dbSDimitry Andric // The source type must have the same number of elements as the shuffle, 23965ffd83dbSDimitry Andric // and the source element type must be larger than the shuffle element type. 23975ffd83dbSDimitry Andric Type *SrcType = X->getType(); 23985ffd83dbSDimitry Andric if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() || 2399e8d8bef9SDimitry Andric cast<FixedVectorType>(SrcType)->getNumElements() != 2400e8d8bef9SDimitry Andric cast<FixedVectorType>(DestType)->getNumElements() || 24015ffd83dbSDimitry Andric SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0) 24025ffd83dbSDimitry Andric return nullptr; 24035ffd83dbSDimitry Andric 24045ffd83dbSDimitry Andric assert(Shuf.changesLength() && !Shuf.increasesLength() && 24055ffd83dbSDimitry Andric "Expected a shuffle that decreases length"); 24065ffd83dbSDimitry Andric 24075ffd83dbSDimitry Andric // Last, check that the mask chooses the correct low bits for each narrow 24085ffd83dbSDimitry Andric // element in the result. 24095ffd83dbSDimitry Andric uint64_t TruncRatio = 24105ffd83dbSDimitry Andric SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits(); 24115ffd83dbSDimitry Andric ArrayRef<int> Mask = Shuf.getShuffleMask(); 24125ffd83dbSDimitry Andric for (unsigned i = 0, e = Mask.size(); i != e; ++i) { 241306c3fb27SDimitry Andric if (Mask[i] == PoisonMaskElem) 24145ffd83dbSDimitry Andric continue; 24155ffd83dbSDimitry Andric uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio; 2416e8d8bef9SDimitry Andric assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits"); 24175ffd83dbSDimitry Andric if (Mask[i] != (int)LSBIndex) 24185ffd83dbSDimitry Andric return nullptr; 24195ffd83dbSDimitry Andric } 24205ffd83dbSDimitry Andric 24215ffd83dbSDimitry Andric return new TruncInst(X, DestType); 24225ffd83dbSDimitry Andric } 24235ffd83dbSDimitry Andric 24240b57cec5SDimitry Andric /// Match a shuffle-select-shuffle pattern where the shuffles are widening and 2425*0fca6ea1SDimitry Andric /// narrowing (concatenating with poison and extracting back to the original 24260b57cec5SDimitry Andric /// length). This allows replacing the wide select with a narrow select. 24270b57cec5SDimitry Andric static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf, 24280b57cec5SDimitry Andric InstCombiner::BuilderTy &Builder) { 24290b57cec5SDimitry Andric // This must be a narrowing identity shuffle. It extracts the 1st N elements 24300b57cec5SDimitry Andric // of the 1st vector operand of a shuffle. 2431*0fca6ea1SDimitry Andric if (!match(Shuf.getOperand(1), m_Poison()) || !Shuf.isIdentityWithExtract()) 24320b57cec5SDimitry Andric return nullptr; 24330b57cec5SDimitry Andric 24340b57cec5SDimitry Andric // The vector being shuffled must be a vector select that we can eliminate. 24350b57cec5SDimitry Andric // TODO: The one-use requirement could be eased if X and/or Y are constants. 24360b57cec5SDimitry Andric Value *Cond, *X, *Y; 24370b57cec5SDimitry Andric if (!match(Shuf.getOperand(0), 24380b57cec5SDimitry Andric m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y))))) 24390b57cec5SDimitry Andric return nullptr; 24400b57cec5SDimitry Andric 2441*0fca6ea1SDimitry Andric // We need a narrow condition value. It must be extended with poison elements 24420b57cec5SDimitry Andric // and have the same number of elements as this shuffle. 2443e8d8bef9SDimitry Andric unsigned NarrowNumElts = 2444e8d8bef9SDimitry Andric cast<FixedVectorType>(Shuf.getType())->getNumElements(); 24450b57cec5SDimitry Andric Value *NarrowCond; 2446*0fca6ea1SDimitry Andric if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Poison()))) || 2447e8d8bef9SDimitry Andric cast<FixedVectorType>(NarrowCond->getType())->getNumElements() != 24485ffd83dbSDimitry Andric NarrowNumElts || 24490b57cec5SDimitry Andric !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding()) 24500b57cec5SDimitry Andric return nullptr; 24510b57cec5SDimitry Andric 2452*0fca6ea1SDimitry Andric // shuf (sel (shuf NarrowCond, poison, WideMask), X, Y), poison, NarrowMask) 2453*0fca6ea1SDimitry Andric // --> 2454*0fca6ea1SDimitry Andric // sel NarrowCond, (shuf X, poison, NarrowMask), (shuf Y, poison, NarrowMask) 2455e8d8bef9SDimitry Andric Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask()); 2456e8d8bef9SDimitry Andric Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask()); 24570b57cec5SDimitry Andric return SelectInst::Create(NarrowCond, NarrowX, NarrowY); 24580b57cec5SDimitry Andric } 24590b57cec5SDimitry Andric 246006c3fb27SDimitry Andric /// Canonicalize FP negate/abs after shuffle. 246106c3fb27SDimitry Andric static Instruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf, 246281ad6265SDimitry Andric InstCombiner::BuilderTy &Builder) { 246306c3fb27SDimitry Andric auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0)); 246481ad6265SDimitry Andric Value *X; 246506c3fb27SDimitry Andric if (!S0 || !match(S0, m_CombineOr(m_FNeg(m_Value(X)), m_FAbs(m_Value(X))))) 246681ad6265SDimitry Andric return nullptr; 246781ad6265SDimitry Andric 246806c3fb27SDimitry Andric bool IsFNeg = S0->getOpcode() == Instruction::FNeg; 246906c3fb27SDimitry Andric 247006c3fb27SDimitry Andric // Match 1-input (unary) shuffle. 247106c3fb27SDimitry Andric // shuffle (fneg/fabs X), Mask --> fneg/fabs (shuffle X, Mask) 2472*0fca6ea1SDimitry Andric if (S0->hasOneUse() && match(Shuf.getOperand(1), m_Poison())) { 247381ad6265SDimitry Andric Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask()); 247406c3fb27SDimitry Andric if (IsFNeg) 247506c3fb27SDimitry Andric return UnaryOperator::CreateFNegFMF(NewShuf, S0); 247606c3fb27SDimitry Andric 247706c3fb27SDimitry Andric Function *FAbs = Intrinsic::getDeclaration(Shuf.getModule(), 247806c3fb27SDimitry Andric Intrinsic::fabs, Shuf.getType()); 247906c3fb27SDimitry Andric CallInst *NewF = CallInst::Create(FAbs, {NewShuf}); 248006c3fb27SDimitry Andric NewF->setFastMathFlags(S0->getFastMathFlags()); 248106c3fb27SDimitry Andric return NewF; 248281ad6265SDimitry Andric } 248381ad6265SDimitry Andric 248406c3fb27SDimitry Andric // Match 2-input (binary) shuffle. 248506c3fb27SDimitry Andric auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1)); 248681ad6265SDimitry Andric Value *Y; 248706c3fb27SDimitry Andric if (!S1 || !match(S1, m_CombineOr(m_FNeg(m_Value(Y)), m_FAbs(m_Value(Y)))) || 248806c3fb27SDimitry Andric S0->getOpcode() != S1->getOpcode() || 248906c3fb27SDimitry Andric (!S0->hasOneUse() && !S1->hasOneUse())) 249081ad6265SDimitry Andric return nullptr; 249181ad6265SDimitry Andric 249206c3fb27SDimitry Andric // shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask) 249381ad6265SDimitry Andric Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask()); 249406c3fb27SDimitry Andric Instruction *NewF; 249506c3fb27SDimitry Andric if (IsFNeg) { 249606c3fb27SDimitry Andric NewF = UnaryOperator::CreateFNeg(NewShuf); 249706c3fb27SDimitry Andric } else { 249806c3fb27SDimitry Andric Function *FAbs = Intrinsic::getDeclaration(Shuf.getModule(), 249906c3fb27SDimitry Andric Intrinsic::fabs, Shuf.getType()); 250006c3fb27SDimitry Andric NewF = CallInst::Create(FAbs, {NewShuf}); 250181ad6265SDimitry Andric } 250206c3fb27SDimitry Andric NewF->copyIRFlags(S0); 250306c3fb27SDimitry Andric NewF->andIRFlags(S1); 250406c3fb27SDimitry Andric return NewF; 250581ad6265SDimitry Andric } 250681ad6265SDimitry Andric 250781ad6265SDimitry Andric /// Canonicalize casts after shuffle. 250881ad6265SDimitry Andric static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf, 250981ad6265SDimitry Andric InstCombiner::BuilderTy &Builder) { 251081ad6265SDimitry Andric // Do we have 2 matching cast operands? 251181ad6265SDimitry Andric auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0)); 251281ad6265SDimitry Andric auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1)); 251381ad6265SDimitry Andric if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() || 251481ad6265SDimitry Andric Cast0->getSrcTy() != Cast1->getSrcTy()) 251581ad6265SDimitry Andric return nullptr; 251681ad6265SDimitry Andric 251781ad6265SDimitry Andric // TODO: Allow other opcodes? That would require easing the type restrictions 251881ad6265SDimitry Andric // below here. 251981ad6265SDimitry Andric CastInst::CastOps CastOpcode = Cast0->getOpcode(); 252081ad6265SDimitry Andric switch (CastOpcode) { 252181ad6265SDimitry Andric case Instruction::FPToSI: 252281ad6265SDimitry Andric case Instruction::FPToUI: 252381ad6265SDimitry Andric case Instruction::SIToFP: 252481ad6265SDimitry Andric case Instruction::UIToFP: 252581ad6265SDimitry Andric break; 252681ad6265SDimitry Andric default: 252781ad6265SDimitry Andric return nullptr; 252881ad6265SDimitry Andric } 252981ad6265SDimitry Andric 253081ad6265SDimitry Andric VectorType *ShufTy = Shuf.getType(); 253181ad6265SDimitry Andric VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType()); 253281ad6265SDimitry Andric VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy()); 253381ad6265SDimitry Andric 253481ad6265SDimitry Andric // TODO: Allow length-increasing shuffles? 253581ad6265SDimitry Andric if (ShufTy->getElementCount().getKnownMinValue() > 253681ad6265SDimitry Andric ShufOpTy->getElementCount().getKnownMinValue()) 253781ad6265SDimitry Andric return nullptr; 253881ad6265SDimitry Andric 253981ad6265SDimitry Andric // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)? 254081ad6265SDimitry Andric assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) && 254181ad6265SDimitry Andric "Expected fixed vector operands for casts and binary shuffle"); 254281ad6265SDimitry Andric if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits()) 254381ad6265SDimitry Andric return nullptr; 254481ad6265SDimitry Andric 254581ad6265SDimitry Andric // At least one of the operands must have only one use (the shuffle). 254681ad6265SDimitry Andric if (!Cast0->hasOneUse() && !Cast1->hasOneUse()) 254781ad6265SDimitry Andric return nullptr; 254881ad6265SDimitry Andric 254981ad6265SDimitry Andric // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask) 255081ad6265SDimitry Andric Value *X = Cast0->getOperand(0); 255181ad6265SDimitry Andric Value *Y = Cast1->getOperand(0); 255281ad6265SDimitry Andric Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask()); 255381ad6265SDimitry Andric return CastInst::Create(CastOpcode, NewShuf, ShufTy); 255481ad6265SDimitry Andric } 255581ad6265SDimitry Andric 2556fe6060f1SDimitry Andric /// Try to fold an extract subvector operation. 25570b57cec5SDimitry Andric static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) { 25580b57cec5SDimitry Andric Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1); 2559*0fca6ea1SDimitry Andric if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Poison())) 25600b57cec5SDimitry Andric return nullptr; 25610b57cec5SDimitry Andric 2562fe6060f1SDimitry Andric // Check if we are extracting all bits of an inserted scalar: 2563fe6060f1SDimitry Andric // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type 2564fe6060f1SDimitry Andric Value *X; 2565fe6060f1SDimitry Andric if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) && 2566fe6060f1SDimitry Andric X->getType()->getPrimitiveSizeInBits() == 2567fe6060f1SDimitry Andric Shuf.getType()->getPrimitiveSizeInBits()) 2568fe6060f1SDimitry Andric return new BitCastInst(X, Shuf.getType()); 2569fe6060f1SDimitry Andric 2570fe6060f1SDimitry Andric // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask. 2571fe6060f1SDimitry Andric Value *Y; 25725ffd83dbSDimitry Andric ArrayRef<int> Mask; 25735ffd83dbSDimitry Andric if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask)))) 25740b57cec5SDimitry Andric return nullptr; 25750b57cec5SDimitry Andric 25760b57cec5SDimitry Andric // Be conservative with shuffle transforms. If we can't kill the 1st shuffle, 25770b57cec5SDimitry Andric // then combining may result in worse codegen. 25780b57cec5SDimitry Andric if (!Op0->hasOneUse()) 25790b57cec5SDimitry Andric return nullptr; 25800b57cec5SDimitry Andric 25810b57cec5SDimitry Andric // We are extracting a subvector from a shuffle. Remove excess elements from 25820b57cec5SDimitry Andric // the 1st shuffle mask to eliminate the extract. 25830b57cec5SDimitry Andric // 25840b57cec5SDimitry Andric // This transform is conservatively limited to identity extracts because we do 25850b57cec5SDimitry Andric // not allow arbitrary shuffle mask creation as a target-independent transform 25860b57cec5SDimitry Andric // (because we can't guarantee that will lower efficiently). 25870b57cec5SDimitry Andric // 2588*0fca6ea1SDimitry Andric // If the extracting shuffle has an poison mask element, it transfers to the 25890b57cec5SDimitry Andric // new shuffle mask. Otherwise, copy the original mask element. Example: 2590*0fca6ea1SDimitry Andric // shuf (shuf X, Y, <C0, C1, C2, poison, C4>), poison, <0, poison, 2, 3> --> 2591*0fca6ea1SDimitry Andric // shuf X, Y, <C0, poison, C2, poison> 2592e8d8bef9SDimitry Andric unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements(); 25935ffd83dbSDimitry Andric SmallVector<int, 16> NewMask(NumElts); 25945ffd83dbSDimitry Andric assert(NumElts < Mask.size() && 25950b57cec5SDimitry Andric "Identity with extract must have less elements than its inputs"); 25960b57cec5SDimitry Andric 25970b57cec5SDimitry Andric for (unsigned i = 0; i != NumElts; ++i) { 25985ffd83dbSDimitry Andric int ExtractMaskElt = Shuf.getMaskValue(i); 25995ffd83dbSDimitry Andric int MaskElt = Mask[i]; 260006c3fb27SDimitry Andric NewMask[i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt; 26010b57cec5SDimitry Andric } 26025ffd83dbSDimitry Andric return new ShuffleVectorInst(X, Y, NewMask); 26030b57cec5SDimitry Andric } 26040b57cec5SDimitry Andric 2605480093f4SDimitry Andric /// Try to replace a shuffle with an insertelement or try to replace a shuffle 2606480093f4SDimitry Andric /// operand with the operand of an insertelement. 26075ffd83dbSDimitry Andric static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf, 2608e8d8bef9SDimitry Andric InstCombinerImpl &IC) { 26090b57cec5SDimitry Andric Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1); 26105ffd83dbSDimitry Andric SmallVector<int, 16> Mask; 26115ffd83dbSDimitry Andric Shuf.getShuffleMask(Mask); 26120b57cec5SDimitry Andric 26130b57cec5SDimitry Andric int NumElts = Mask.size(); 2614349cc55cSDimitry Andric int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements(); 26150b57cec5SDimitry Andric 2616480093f4SDimitry Andric // This is a specialization of a fold in SimplifyDemandedVectorElts. We may 2617480093f4SDimitry Andric // not be able to handle it there if the insertelement has >1 use. 2618480093f4SDimitry Andric // If the shuffle has an insertelement operand but does not choose the 2619480093f4SDimitry Andric // inserted scalar element from that value, then we can replace that shuffle 2620480093f4SDimitry Andric // operand with the source vector of the insertelement. 2621480093f4SDimitry Andric Value *X; 2622480093f4SDimitry Andric uint64_t IdxC; 26235ffd83dbSDimitry Andric if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) { 2624480093f4SDimitry Andric // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask 2625e8d8bef9SDimitry Andric if (!is_contained(Mask, (int)IdxC)) 26265ffd83dbSDimitry Andric return IC.replaceOperand(Shuf, 0, X); 2627480093f4SDimitry Andric } 26285ffd83dbSDimitry Andric if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) { 2629480093f4SDimitry Andric // Offset the index constant by the vector width because we are checking for 2630480093f4SDimitry Andric // accesses to the 2nd vector input of the shuffle. 2631349cc55cSDimitry Andric IdxC += InpNumElts; 2632480093f4SDimitry Andric // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask 2633e8d8bef9SDimitry Andric if (!is_contained(Mask, (int)IdxC)) 26345ffd83dbSDimitry Andric return IC.replaceOperand(Shuf, 1, X); 2635480093f4SDimitry Andric } 2636349cc55cSDimitry Andric // For the rest of the transform, the shuffle must not change vector sizes. 2637349cc55cSDimitry Andric // TODO: This restriction could be removed if the insert has only one use 2638349cc55cSDimitry Andric // (because the transform would require a new length-changing shuffle). 2639349cc55cSDimitry Andric if (NumElts != InpNumElts) 2640349cc55cSDimitry Andric return nullptr; 2641480093f4SDimitry Andric 26420b57cec5SDimitry Andric // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC' 26430b57cec5SDimitry Andric auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) { 26440b57cec5SDimitry Andric // We need an insertelement with a constant index. 26455ffd83dbSDimitry Andric if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar), 26460b57cec5SDimitry Andric m_ConstantInt(IndexC)))) 26470b57cec5SDimitry Andric return false; 26480b57cec5SDimitry Andric 26490b57cec5SDimitry Andric // Test the shuffle mask to see if it splices the inserted scalar into the 26500b57cec5SDimitry Andric // operand 1 vector of the shuffle. 26510b57cec5SDimitry Andric int NewInsIndex = -1; 26520b57cec5SDimitry Andric for (int i = 0; i != NumElts; ++i) { 26530b57cec5SDimitry Andric // Ignore undef mask elements. 26540b57cec5SDimitry Andric if (Mask[i] == -1) 26550b57cec5SDimitry Andric continue; 26560b57cec5SDimitry Andric 26570b57cec5SDimitry Andric // The shuffle takes elements of operand 1 without lane changes. 26580b57cec5SDimitry Andric if (Mask[i] == NumElts + i) 26590b57cec5SDimitry Andric continue; 26600b57cec5SDimitry Andric 26610b57cec5SDimitry Andric // The shuffle must choose the inserted scalar exactly once. 26620b57cec5SDimitry Andric if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue()) 26630b57cec5SDimitry Andric return false; 26640b57cec5SDimitry Andric 26650b57cec5SDimitry Andric // The shuffle is placing the inserted scalar into element i. 26660b57cec5SDimitry Andric NewInsIndex = i; 26670b57cec5SDimitry Andric } 26680b57cec5SDimitry Andric 26690b57cec5SDimitry Andric assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?"); 26700b57cec5SDimitry Andric 26710b57cec5SDimitry Andric // Index is updated to the potentially translated insertion lane. 2672cb14a3feSDimitry Andric IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex); 26730b57cec5SDimitry Andric return true; 26740b57cec5SDimitry Andric }; 26750b57cec5SDimitry Andric 26760b57cec5SDimitry Andric // If the shuffle is unnecessary, insert the scalar operand directly into 26770b57cec5SDimitry Andric // operand 1 of the shuffle. Example: 26780b57cec5SDimitry Andric // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0 26790b57cec5SDimitry Andric Value *Scalar; 26800b57cec5SDimitry Andric ConstantInt *IndexC; 26810b57cec5SDimitry Andric if (isShufflingScalarIntoOp1(Scalar, IndexC)) 26820b57cec5SDimitry Andric return InsertElementInst::Create(V1, Scalar, IndexC); 26830b57cec5SDimitry Andric 26840b57cec5SDimitry Andric // Try again after commuting shuffle. Example: 26850b57cec5SDimitry Andric // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> --> 26860b57cec5SDimitry Andric // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3 26870b57cec5SDimitry Andric std::swap(V0, V1); 26880b57cec5SDimitry Andric ShuffleVectorInst::commuteShuffleMask(Mask, NumElts); 26890b57cec5SDimitry Andric if (isShufflingScalarIntoOp1(Scalar, IndexC)) 26900b57cec5SDimitry Andric return InsertElementInst::Create(V1, Scalar, IndexC); 26910b57cec5SDimitry Andric 26920b57cec5SDimitry Andric return nullptr; 26930b57cec5SDimitry Andric } 26940b57cec5SDimitry Andric 26950b57cec5SDimitry Andric static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) { 26960b57cec5SDimitry Andric // Match the operands as identity with padding (also known as concatenation 26970b57cec5SDimitry Andric // with undef) shuffles of the same source type. The backend is expected to 26980b57cec5SDimitry Andric // recreate these concatenations from a shuffle of narrow operands. 26990b57cec5SDimitry Andric auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0)); 27000b57cec5SDimitry Andric auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1)); 27010b57cec5SDimitry Andric if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() || 27020b57cec5SDimitry Andric !Shuffle1 || !Shuffle1->isIdentityWithPadding()) 27030b57cec5SDimitry Andric return nullptr; 27040b57cec5SDimitry Andric 27050b57cec5SDimitry Andric // We limit this transform to power-of-2 types because we expect that the 27060b57cec5SDimitry Andric // backend can convert the simplified IR patterns to identical nodes as the 27070b57cec5SDimitry Andric // original IR. 27080b57cec5SDimitry Andric // TODO: If we can verify the same behavior for arbitrary types, the 27090b57cec5SDimitry Andric // power-of-2 checks can be removed. 27100b57cec5SDimitry Andric Value *X = Shuffle0->getOperand(0); 27110b57cec5SDimitry Andric Value *Y = Shuffle1->getOperand(0); 27120b57cec5SDimitry Andric if (X->getType() != Y->getType() || 2713e8d8bef9SDimitry Andric !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) || 2714e8d8bef9SDimitry Andric !isPowerOf2_32( 2715e8d8bef9SDimitry Andric cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) || 2716e8d8bef9SDimitry Andric !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) || 2717fe6060f1SDimitry Andric match(X, m_Undef()) || match(Y, m_Undef())) 27180b57cec5SDimitry Andric return nullptr; 2719fe6060f1SDimitry Andric assert(match(Shuffle0->getOperand(1), m_Undef()) && 2720fe6060f1SDimitry Andric match(Shuffle1->getOperand(1), m_Undef()) && 27210b57cec5SDimitry Andric "Unexpected operand for identity shuffle"); 27220b57cec5SDimitry Andric 27230b57cec5SDimitry Andric // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source 27240b57cec5SDimitry Andric // operands directly by adjusting the shuffle mask to account for the narrower 27250b57cec5SDimitry Andric // types: 27260b57cec5SDimitry Andric // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask' 2727e8d8bef9SDimitry Andric int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements(); 2728e8d8bef9SDimitry Andric int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements(); 27290b57cec5SDimitry Andric assert(WideElts > NarrowElts && "Unexpected types for identity with padding"); 27300b57cec5SDimitry Andric 27315ffd83dbSDimitry Andric ArrayRef<int> Mask = Shuf.getShuffleMask(); 27325ffd83dbSDimitry Andric SmallVector<int, 16> NewMask(Mask.size(), -1); 27330b57cec5SDimitry Andric for (int i = 0, e = Mask.size(); i != e; ++i) { 27340b57cec5SDimitry Andric if (Mask[i] == -1) 27350b57cec5SDimitry Andric continue; 27360b57cec5SDimitry Andric 27370b57cec5SDimitry Andric // If this shuffle is choosing an undef element from 1 of the sources, that 27380b57cec5SDimitry Andric // element is undef. 27390b57cec5SDimitry Andric if (Mask[i] < WideElts) { 27400b57cec5SDimitry Andric if (Shuffle0->getMaskValue(Mask[i]) == -1) 27410b57cec5SDimitry Andric continue; 27420b57cec5SDimitry Andric } else { 27430b57cec5SDimitry Andric if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1) 27440b57cec5SDimitry Andric continue; 27450b57cec5SDimitry Andric } 27460b57cec5SDimitry Andric 27470b57cec5SDimitry Andric // If this shuffle is choosing from the 1st narrow op, the mask element is 27480b57cec5SDimitry Andric // the same. If this shuffle is choosing from the 2nd narrow op, the mask 27490b57cec5SDimitry Andric // element is offset down to adjust for the narrow vector widths. 27500b57cec5SDimitry Andric if (Mask[i] < WideElts) { 27510b57cec5SDimitry Andric assert(Mask[i] < NarrowElts && "Unexpected shuffle mask"); 27525ffd83dbSDimitry Andric NewMask[i] = Mask[i]; 27530b57cec5SDimitry Andric } else { 27540b57cec5SDimitry Andric assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask"); 27555ffd83dbSDimitry Andric NewMask[i] = Mask[i] - (WideElts - NarrowElts); 27560b57cec5SDimitry Andric } 27570b57cec5SDimitry Andric } 27585ffd83dbSDimitry Andric return new ShuffleVectorInst(X, Y, NewMask); 27590b57cec5SDimitry Andric } 27600b57cec5SDimitry Andric 2761bdd1243dSDimitry Andric // Splatting the first element of the result of a BinOp, where any of the 2762bdd1243dSDimitry Andric // BinOp's operands are the result of a first element splat can be simplified to 2763bdd1243dSDimitry Andric // splatting the first element of the result of the BinOp 2764bdd1243dSDimitry Andric Instruction *InstCombinerImpl::simplifyBinOpSplats(ShuffleVectorInst &SVI) { 2765*0fca6ea1SDimitry Andric if (!match(SVI.getOperand(1), m_Poison()) || 276606c3fb27SDimitry Andric !match(SVI.getShuffleMask(), m_ZeroMask()) || 276706c3fb27SDimitry Andric !SVI.getOperand(0)->hasOneUse()) 2768bdd1243dSDimitry Andric return nullptr; 2769bdd1243dSDimitry Andric 2770bdd1243dSDimitry Andric Value *Op0 = SVI.getOperand(0); 2771bdd1243dSDimitry Andric Value *X, *Y; 2772*0fca6ea1SDimitry Andric if (!match(Op0, m_BinOp(m_Shuffle(m_Value(X), m_Poison(), m_ZeroMask()), 2773bdd1243dSDimitry Andric m_Value(Y))) && 2774bdd1243dSDimitry Andric !match(Op0, m_BinOp(m_Value(X), 2775*0fca6ea1SDimitry Andric m_Shuffle(m_Value(Y), m_Poison(), m_ZeroMask())))) 2776bdd1243dSDimitry Andric return nullptr; 2777bdd1243dSDimitry Andric if (X->getType() != Y->getType()) 2778bdd1243dSDimitry Andric return nullptr; 2779bdd1243dSDimitry Andric 2780bdd1243dSDimitry Andric auto *BinOp = cast<BinaryOperator>(Op0); 2781bdd1243dSDimitry Andric if (!isSafeToSpeculativelyExecute(BinOp)) 2782bdd1243dSDimitry Andric return nullptr; 2783bdd1243dSDimitry Andric 2784bdd1243dSDimitry Andric Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y); 2785bdd1243dSDimitry Andric if (auto NewBOI = dyn_cast<Instruction>(NewBO)) 2786bdd1243dSDimitry Andric NewBOI->copyIRFlags(BinOp); 2787bdd1243dSDimitry Andric 2788bdd1243dSDimitry Andric return new ShuffleVectorInst(NewBO, SVI.getShuffleMask()); 2789bdd1243dSDimitry Andric } 2790bdd1243dSDimitry Andric 2791e8d8bef9SDimitry Andric Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { 27920b57cec5SDimitry Andric Value *LHS = SVI.getOperand(0); 27930b57cec5SDimitry Andric Value *RHS = SVI.getOperand(1); 27945ffd83dbSDimitry Andric SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI); 279581ad6265SDimitry Andric if (auto *V = simplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(), 27965ffd83dbSDimitry Andric SVI.getType(), ShufQuery)) 27970b57cec5SDimitry Andric return replaceInstUsesWith(SVI, V); 27980b57cec5SDimitry Andric 2799bdd1243dSDimitry Andric if (Instruction *I = simplifyBinOpSplats(SVI)) 2800bdd1243dSDimitry Andric return I; 2801bdd1243dSDimitry Andric 2802cb14a3feSDimitry Andric // Canonicalize splat shuffle to use poison RHS. Handle this explicitly in 2803cb14a3feSDimitry Andric // order to support scalable vectors. 2804cb14a3feSDimitry Andric if (match(SVI.getShuffleMask(), m_ZeroMask()) && !isa<PoisonValue>(RHS)) 2805cb14a3feSDimitry Andric return replaceOperand(SVI, 1, PoisonValue::get(RHS->getType())); 2806cb14a3feSDimitry Andric 2807e8d8bef9SDimitry Andric if (isa<ScalableVectorType>(LHS->getType())) 2808e8d8bef9SDimitry Andric return nullptr; 2809e8d8bef9SDimitry Andric 2810e8d8bef9SDimitry Andric unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements(); 2811e8d8bef9SDimitry Andric unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements(); 2812fe6060f1SDimitry Andric 2813fe6060f1SDimitry Andric // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask) 2814fe6060f1SDimitry Andric // 2815fe6060f1SDimitry Andric // if X and Y are of the same (vector) type, and the element size is not 2816fe6060f1SDimitry Andric // changed by the bitcasts, we can distribute the bitcasts through the 2817fe6060f1SDimitry Andric // shuffle, hopefully reducing the number of instructions. We make sure that 2818fe6060f1SDimitry Andric // at least one bitcast only has one use, so we don't *increase* the number of 2819fe6060f1SDimitry Andric // instructions here. 2820fe6060f1SDimitry Andric Value *X, *Y; 2821fe6060f1SDimitry Andric if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) && 2822fe6060f1SDimitry Andric X->getType()->isVectorTy() && X->getType() == Y->getType() && 2823fe6060f1SDimitry Andric X->getType()->getScalarSizeInBits() == 2824fe6060f1SDimitry Andric SVI.getType()->getScalarSizeInBits() && 2825fe6060f1SDimitry Andric (LHS->hasOneUse() || RHS->hasOneUse())) { 2826fe6060f1SDimitry Andric Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask(), 2827fe6060f1SDimitry Andric SVI.getName() + ".uncasted"); 2828fe6060f1SDimitry Andric return new BitCastInst(V, SVI.getType()); 2829fe6060f1SDimitry Andric } 2830fe6060f1SDimitry Andric 28315ffd83dbSDimitry Andric ArrayRef<int> Mask = SVI.getShuffleMask(); 28325ffd83dbSDimitry Andric 28335ffd83dbSDimitry Andric // Peek through a bitcasted shuffle operand by scaling the mask. If the 28345ffd83dbSDimitry Andric // simulated shuffle can simplify, then this shuffle is unnecessary: 28355ffd83dbSDimitry Andric // shuf (bitcast X), undef, Mask --> bitcast X' 28365ffd83dbSDimitry Andric // TODO: This could be extended to allow length-changing shuffles. 28375ffd83dbSDimitry Andric // The transform might also be obsoleted if we allowed canonicalization 28385ffd83dbSDimitry Andric // of bitcasted shuffles. 28395ffd83dbSDimitry Andric if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) && 28405ffd83dbSDimitry Andric X->getType()->isVectorTy() && VWidth == LHSWidth) { 28415ffd83dbSDimitry Andric // Try to create a scaled mask constant. 2842e8d8bef9SDimitry Andric auto *XType = cast<FixedVectorType>(X->getType()); 28435ffd83dbSDimitry Andric unsigned XNumElts = XType->getNumElements(); 28445ffd83dbSDimitry Andric SmallVector<int, 16> ScaledMask; 2845*0fca6ea1SDimitry Andric if (scaleShuffleMaskElts(XNumElts, Mask, ScaledMask)) { 28465ffd83dbSDimitry Andric // If the shuffled source vector simplifies, cast that value to this 28475ffd83dbSDimitry Andric // shuffle's type. 284881ad6265SDimitry Andric if (auto *V = simplifyShuffleVectorInst(X, UndefValue::get(XType), 28495ffd83dbSDimitry Andric ScaledMask, XType, ShufQuery)) 28505ffd83dbSDimitry Andric return BitCastInst::Create(Instruction::BitCast, V, SVI.getType()); 28515ffd83dbSDimitry Andric } 28525ffd83dbSDimitry Andric } 28535ffd83dbSDimitry Andric 2854fe6060f1SDimitry Andric // shuffle x, x, mask --> shuffle x, undef, mask' 2855480093f4SDimitry Andric if (LHS == RHS) { 2856fe6060f1SDimitry Andric assert(!match(RHS, m_Undef()) && 2857fe6060f1SDimitry Andric "Shuffle with 2 undef ops not simplified?"); 2858349cc55cSDimitry Andric return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth)); 28590b57cec5SDimitry Andric } 28600b57cec5SDimitry Andric 2861480093f4SDimitry Andric // shuffle undef, x, mask --> shuffle x, undef, mask' 2862fe6060f1SDimitry Andric if (match(LHS, m_Undef())) { 2863480093f4SDimitry Andric SVI.commute(); 2864480093f4SDimitry Andric return &SVI; 2865480093f4SDimitry Andric } 2866480093f4SDimitry Andric 28670b57cec5SDimitry Andric if (Instruction *I = canonicalizeInsertSplat(SVI, Builder)) 28680b57cec5SDimitry Andric return I; 28690b57cec5SDimitry Andric 28700eae32dcSDimitry Andric if (Instruction *I = foldSelectShuffle(SVI)) 28710b57cec5SDimitry Andric return I; 28720b57cec5SDimitry Andric 28735ffd83dbSDimitry Andric if (Instruction *I = foldTruncShuffle(SVI, DL.isBigEndian())) 28745ffd83dbSDimitry Andric return I; 28755ffd83dbSDimitry Andric 28760b57cec5SDimitry Andric if (Instruction *I = narrowVectorSelect(SVI, Builder)) 28770b57cec5SDimitry Andric return I; 28780b57cec5SDimitry Andric 287906c3fb27SDimitry Andric if (Instruction *I = foldShuffleOfUnaryOps(SVI, Builder)) 288081ad6265SDimitry Andric return I; 288181ad6265SDimitry Andric 288281ad6265SDimitry Andric if (Instruction *I = foldCastShuffle(SVI, Builder)) 288381ad6265SDimitry Andric return I; 288481ad6265SDimitry Andric 2885cb14a3feSDimitry Andric APInt PoisonElts(VWidth, 0); 2886349cc55cSDimitry Andric APInt AllOnesEltMask(APInt::getAllOnes(VWidth)); 2887cb14a3feSDimitry Andric if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, PoisonElts)) { 28880b57cec5SDimitry Andric if (V != &SVI) 28890b57cec5SDimitry Andric return replaceInstUsesWith(SVI, V); 28900b57cec5SDimitry Andric return &SVI; 28910b57cec5SDimitry Andric } 28920b57cec5SDimitry Andric 28930b57cec5SDimitry Andric if (Instruction *I = foldIdentityExtractShuffle(SVI)) 28940b57cec5SDimitry Andric return I; 28950b57cec5SDimitry Andric 28960b57cec5SDimitry Andric // These transforms have the potential to lose undef knowledge, so they are 28970b57cec5SDimitry Andric // intentionally placed after SimplifyDemandedVectorElts(). 28985ffd83dbSDimitry Andric if (Instruction *I = foldShuffleWithInsert(SVI, *this)) 28990b57cec5SDimitry Andric return I; 29000b57cec5SDimitry Andric if (Instruction *I = foldIdentityPaddedShuffles(SVI)) 29010b57cec5SDimitry Andric return I; 29020b57cec5SDimitry Andric 2903*0fca6ea1SDimitry Andric if (match(RHS, m_Poison()) && canEvaluateShuffled(LHS, Mask)) { 290406c3fb27SDimitry Andric Value *V = evaluateInDifferentElementOrder(LHS, Mask, Builder); 29050b57cec5SDimitry Andric return replaceInstUsesWith(SVI, V); 29060b57cec5SDimitry Andric } 29070b57cec5SDimitry Andric 29080b57cec5SDimitry Andric // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to 29090b57cec5SDimitry Andric // a non-vector type. We can instead bitcast the original vector followed by 29100b57cec5SDimitry Andric // an extract of the desired element: 29110b57cec5SDimitry Andric // 29120b57cec5SDimitry Andric // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, 29130b57cec5SDimitry Andric // <4 x i32> <i32 0, i32 1, i32 2, i32 3> 29140b57cec5SDimitry Andric // %1 = bitcast <4 x i8> %sroa to i32 29150b57cec5SDimitry Andric // Becomes: 29160b57cec5SDimitry Andric // %bc = bitcast <16 x i8> %in to <4 x i32> 29170b57cec5SDimitry Andric // %ext = extractelement <4 x i32> %bc, i32 0 29180b57cec5SDimitry Andric // 29190b57cec5SDimitry Andric // If the shuffle is extracting a contiguous range of values from the input 29200b57cec5SDimitry Andric // vector then each use which is a bitcast of the extracted size can be 29210b57cec5SDimitry Andric // replaced. This will work if the vector types are compatible, and the begin 29220b57cec5SDimitry Andric // index is aligned to a value in the casted vector type. If the begin index 29230b57cec5SDimitry Andric // isn't aligned then we can shuffle the original vector (keeping the same 29240b57cec5SDimitry Andric // vector type) before extracting. 29250b57cec5SDimitry Andric // 29260b57cec5SDimitry Andric // This code will bail out if the target type is fundamentally incompatible 29270b57cec5SDimitry Andric // with vectors of the source type. 29280b57cec5SDimitry Andric // 29290b57cec5SDimitry Andric // Example of <16 x i8>, target type i32: 29300b57cec5SDimitry Andric // Index range [4,8): v-----------v Will work. 29310b57cec5SDimitry Andric // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ 29320b57cec5SDimitry Andric // <16 x i8>: | | | | | | | | | | | | | | | | | 29330b57cec5SDimitry Andric // <4 x i32>: | | | | | 29340b57cec5SDimitry Andric // +-----------+-----------+-----------+-----------+ 29350b57cec5SDimitry Andric // Index range [6,10): ^-----------^ Needs an extra shuffle. 29360b57cec5SDimitry Andric // Target type i40: ^--------------^ Won't work, bail. 29370b57cec5SDimitry Andric bool MadeChange = false; 29380b57cec5SDimitry Andric if (isShuffleExtractingFromLHS(SVI, Mask)) { 29390b57cec5SDimitry Andric Value *V = LHS; 29400b57cec5SDimitry Andric unsigned MaskElems = Mask.size(); 2941e8d8bef9SDimitry Andric auto *SrcTy = cast<FixedVectorType>(V->getType()); 2942bdd1243dSDimitry Andric unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedValue(); 29430b57cec5SDimitry Andric unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType()); 29440b57cec5SDimitry Andric assert(SrcElemBitWidth && "vector elements must have a bitwidth"); 29450b57cec5SDimitry Andric unsigned SrcNumElems = SrcTy->getNumElements(); 29460b57cec5SDimitry Andric SmallVector<BitCastInst *, 8> BCs; 29470b57cec5SDimitry Andric DenseMap<Type *, Value *> NewBCs; 29480b57cec5SDimitry Andric for (User *U : SVI.users()) 29490b57cec5SDimitry Andric if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) 29500b57cec5SDimitry Andric if (!BC->use_empty()) 29510b57cec5SDimitry Andric // Only visit bitcasts that weren't previously handled. 29520b57cec5SDimitry Andric BCs.push_back(BC); 29530b57cec5SDimitry Andric for (BitCastInst *BC : BCs) { 29540b57cec5SDimitry Andric unsigned BegIdx = Mask.front(); 29550b57cec5SDimitry Andric Type *TgtTy = BC->getDestTy(); 29560b57cec5SDimitry Andric unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy); 29570b57cec5SDimitry Andric if (!TgtElemBitWidth) 29580b57cec5SDimitry Andric continue; 29590b57cec5SDimitry Andric unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth; 29600b57cec5SDimitry Andric bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth; 29610b57cec5SDimitry Andric bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth); 29620b57cec5SDimitry Andric if (!VecBitWidthsEqual) 29630b57cec5SDimitry Andric continue; 29640b57cec5SDimitry Andric if (!VectorType::isValidElementType(TgtTy)) 29650b57cec5SDimitry Andric continue; 29665ffd83dbSDimitry Andric auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems); 29670b57cec5SDimitry Andric if (!BegIsAligned) { 29680b57cec5SDimitry Andric // Shuffle the input so [0,NumElements) contains the output, and 29690b57cec5SDimitry Andric // [NumElems,SrcNumElems) is undef. 29705ffd83dbSDimitry Andric SmallVector<int, 16> ShuffleMask(SrcNumElems, -1); 29710b57cec5SDimitry Andric for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I) 29725ffd83dbSDimitry Andric ShuffleMask[I] = Idx; 2973e8d8bef9SDimitry Andric V = Builder.CreateShuffleVector(V, ShuffleMask, 29740b57cec5SDimitry Andric SVI.getName() + ".extract"); 29750b57cec5SDimitry Andric BegIdx = 0; 29760b57cec5SDimitry Andric } 29770b57cec5SDimitry Andric unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth; 29780b57cec5SDimitry Andric assert(SrcElemsPerTgtElem); 29790b57cec5SDimitry Andric BegIdx /= SrcElemsPerTgtElem; 298006c3fb27SDimitry Andric bool BCAlreadyExists = NewBCs.contains(CastSrcTy); 29810b57cec5SDimitry Andric auto *NewBC = 29820b57cec5SDimitry Andric BCAlreadyExists 29830b57cec5SDimitry Andric ? NewBCs[CastSrcTy] 29840b57cec5SDimitry Andric : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc"); 29850b57cec5SDimitry Andric if (!BCAlreadyExists) 29860b57cec5SDimitry Andric NewBCs[CastSrcTy] = NewBC; 298706c3fb27SDimitry Andric auto *Ext = Builder.CreateExtractElement(NewBC, BegIdx, 298806c3fb27SDimitry Andric SVI.getName() + ".extract"); 29890b57cec5SDimitry Andric // The shufflevector isn't being replaced: the bitcast that used it 29900b57cec5SDimitry Andric // is. InstCombine will visit the newly-created instructions. 29910b57cec5SDimitry Andric replaceInstUsesWith(*BC, Ext); 29920b57cec5SDimitry Andric MadeChange = true; 29930b57cec5SDimitry Andric } 29940b57cec5SDimitry Andric } 29950b57cec5SDimitry Andric 29960b57cec5SDimitry Andric // If the LHS is a shufflevector itself, see if we can combine it with this 29970b57cec5SDimitry Andric // one without producing an unusual shuffle. 29980b57cec5SDimitry Andric // Cases that might be simplified: 29990b57cec5SDimitry Andric // 1. 30000b57cec5SDimitry Andric // x1=shuffle(v1,v2,mask1) 30010b57cec5SDimitry Andric // x=shuffle(x1,undef,mask) 30020b57cec5SDimitry Andric // ==> 30030b57cec5SDimitry Andric // x=shuffle(v1,undef,newMask) 30040b57cec5SDimitry Andric // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1 30050b57cec5SDimitry Andric // 2. 30060b57cec5SDimitry Andric // x1=shuffle(v1,undef,mask1) 30070b57cec5SDimitry Andric // x=shuffle(x1,x2,mask) 30080b57cec5SDimitry Andric // where v1.size() == mask1.size() 30090b57cec5SDimitry Andric // ==> 30100b57cec5SDimitry Andric // x=shuffle(v1,x2,newMask) 30110b57cec5SDimitry Andric // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i] 30120b57cec5SDimitry Andric // 3. 30130b57cec5SDimitry Andric // x2=shuffle(v2,undef,mask2) 30140b57cec5SDimitry Andric // x=shuffle(x1,x2,mask) 30150b57cec5SDimitry Andric // where v2.size() == mask2.size() 30160b57cec5SDimitry Andric // ==> 30170b57cec5SDimitry Andric // x=shuffle(x1,v2,newMask) 30180b57cec5SDimitry Andric // newMask[i] = (mask[i] < x1.size()) 30190b57cec5SDimitry Andric // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size() 30200b57cec5SDimitry Andric // 4. 30210b57cec5SDimitry Andric // x1=shuffle(v1,undef,mask1) 30220b57cec5SDimitry Andric // x2=shuffle(v2,undef,mask2) 30230b57cec5SDimitry Andric // x=shuffle(x1,x2,mask) 30240b57cec5SDimitry Andric // where v1.size() == v2.size() 30250b57cec5SDimitry Andric // ==> 30260b57cec5SDimitry Andric // x=shuffle(v1,v2,newMask) 30270b57cec5SDimitry Andric // newMask[i] = (mask[i] < x1.size()) 30280b57cec5SDimitry Andric // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size() 30290b57cec5SDimitry Andric // 30300b57cec5SDimitry Andric // Here we are really conservative: 30310b57cec5SDimitry Andric // we are absolutely afraid of producing a shuffle mask not in the input 30320b57cec5SDimitry Andric // program, because the code gen may not be smart enough to turn a merged 30330b57cec5SDimitry Andric // shuffle into two specific shuffles: it may produce worse code. As such, 30340b57cec5SDimitry Andric // we only merge two shuffles if the result is either a splat or one of the 30350b57cec5SDimitry Andric // input shuffle masks. In this case, merging the shuffles just removes 30360b57cec5SDimitry Andric // one instruction, which we know is safe. This is good for things like 30370b57cec5SDimitry Andric // turning: (splat(splat)) -> splat, or 30380b57cec5SDimitry Andric // merge(V[0..n], V[n+1..2n]) -> V[0..2n] 30390b57cec5SDimitry Andric ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS); 30400b57cec5SDimitry Andric ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS); 30410b57cec5SDimitry Andric if (LHSShuffle) 3042cb14a3feSDimitry Andric if (!match(LHSShuffle->getOperand(1), m_Poison()) && 3043cb14a3feSDimitry Andric !match(RHS, m_Poison())) 30440b57cec5SDimitry Andric LHSShuffle = nullptr; 30450b57cec5SDimitry Andric if (RHSShuffle) 3046cb14a3feSDimitry Andric if (!match(RHSShuffle->getOperand(1), m_Poison())) 30470b57cec5SDimitry Andric RHSShuffle = nullptr; 30480b57cec5SDimitry Andric if (!LHSShuffle && !RHSShuffle) 30490b57cec5SDimitry Andric return MadeChange ? &SVI : nullptr; 30500b57cec5SDimitry Andric 30510b57cec5SDimitry Andric Value* LHSOp0 = nullptr; 30520b57cec5SDimitry Andric Value* LHSOp1 = nullptr; 30530b57cec5SDimitry Andric Value* RHSOp0 = nullptr; 30540b57cec5SDimitry Andric unsigned LHSOp0Width = 0; 30550b57cec5SDimitry Andric unsigned RHSOp0Width = 0; 30560b57cec5SDimitry Andric if (LHSShuffle) { 30570b57cec5SDimitry Andric LHSOp0 = LHSShuffle->getOperand(0); 30580b57cec5SDimitry Andric LHSOp1 = LHSShuffle->getOperand(1); 3059e8d8bef9SDimitry Andric LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements(); 30600b57cec5SDimitry Andric } 30610b57cec5SDimitry Andric if (RHSShuffle) { 30620b57cec5SDimitry Andric RHSOp0 = RHSShuffle->getOperand(0); 3063e8d8bef9SDimitry Andric RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements(); 30640b57cec5SDimitry Andric } 30650b57cec5SDimitry Andric Value* newLHS = LHS; 30660b57cec5SDimitry Andric Value* newRHS = RHS; 30670b57cec5SDimitry Andric if (LHSShuffle) { 30680b57cec5SDimitry Andric // case 1 3069cb14a3feSDimitry Andric if (match(RHS, m_Poison())) { 30700b57cec5SDimitry Andric newLHS = LHSOp0; 30710b57cec5SDimitry Andric newRHS = LHSOp1; 30720b57cec5SDimitry Andric } 30730b57cec5SDimitry Andric // case 2 or 4 30740b57cec5SDimitry Andric else if (LHSOp0Width == LHSWidth) { 30750b57cec5SDimitry Andric newLHS = LHSOp0; 30760b57cec5SDimitry Andric } 30770b57cec5SDimitry Andric } 30780b57cec5SDimitry Andric // case 3 or 4 30790b57cec5SDimitry Andric if (RHSShuffle && RHSOp0Width == LHSWidth) { 30800b57cec5SDimitry Andric newRHS = RHSOp0; 30810b57cec5SDimitry Andric } 30820b57cec5SDimitry Andric // case 4 30830b57cec5SDimitry Andric if (LHSOp0 == RHSOp0) { 30840b57cec5SDimitry Andric newLHS = LHSOp0; 30850b57cec5SDimitry Andric newRHS = nullptr; 30860b57cec5SDimitry Andric } 30870b57cec5SDimitry Andric 30880b57cec5SDimitry Andric if (newLHS == LHS && newRHS == RHS) 30890b57cec5SDimitry Andric return MadeChange ? &SVI : nullptr; 30900b57cec5SDimitry Andric 30915ffd83dbSDimitry Andric ArrayRef<int> LHSMask; 30925ffd83dbSDimitry Andric ArrayRef<int> RHSMask; 30930b57cec5SDimitry Andric if (newLHS != LHS) 30940b57cec5SDimitry Andric LHSMask = LHSShuffle->getShuffleMask(); 30950b57cec5SDimitry Andric if (RHSShuffle && newRHS != RHS) 30960b57cec5SDimitry Andric RHSMask = RHSShuffle->getShuffleMask(); 30970b57cec5SDimitry Andric 30980b57cec5SDimitry Andric unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth; 30990b57cec5SDimitry Andric SmallVector<int, 16> newMask; 31000b57cec5SDimitry Andric bool isSplat = true; 31010b57cec5SDimitry Andric int SplatElt = -1; 31020b57cec5SDimitry Andric // Create a new mask for the new ShuffleVectorInst so that the new 31030b57cec5SDimitry Andric // ShuffleVectorInst is equivalent to the original one. 31040b57cec5SDimitry Andric for (unsigned i = 0; i < VWidth; ++i) { 31050b57cec5SDimitry Andric int eltMask; 31060b57cec5SDimitry Andric if (Mask[i] < 0) { 310706c3fb27SDimitry Andric // This element is a poison value. 31080b57cec5SDimitry Andric eltMask = -1; 31090b57cec5SDimitry Andric } else if (Mask[i] < (int)LHSWidth) { 31100b57cec5SDimitry Andric // This element is from left hand side vector operand. 31110b57cec5SDimitry Andric // 31120b57cec5SDimitry Andric // If LHS is going to be replaced (case 1, 2, or 4), calculate the 31130b57cec5SDimitry Andric // new mask value for the element. 31140b57cec5SDimitry Andric if (newLHS != LHS) { 31150b57cec5SDimitry Andric eltMask = LHSMask[Mask[i]]; 311606c3fb27SDimitry Andric // If the value selected is an poison value, explicitly specify it 31170b57cec5SDimitry Andric // with a -1 mask value. 311806c3fb27SDimitry Andric if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(LHSOp1)) 31190b57cec5SDimitry Andric eltMask = -1; 31200b57cec5SDimitry Andric } else 31210b57cec5SDimitry Andric eltMask = Mask[i]; 31220b57cec5SDimitry Andric } else { 31230b57cec5SDimitry Andric // This element is from right hand side vector operand 31240b57cec5SDimitry Andric // 312506c3fb27SDimitry Andric // If the value selected is a poison value, explicitly specify it 31260b57cec5SDimitry Andric // with a -1 mask value. (case 1) 312706c3fb27SDimitry Andric if (match(RHS, m_Poison())) 31280b57cec5SDimitry Andric eltMask = -1; 31290b57cec5SDimitry Andric // If RHS is going to be replaced (case 3 or 4), calculate the 31300b57cec5SDimitry Andric // new mask value for the element. 31310b57cec5SDimitry Andric else if (newRHS != RHS) { 31320b57cec5SDimitry Andric eltMask = RHSMask[Mask[i]-LHSWidth]; 313306c3fb27SDimitry Andric // If the value selected is an poison value, explicitly specify it 31340b57cec5SDimitry Andric // with a -1 mask value. 31350b57cec5SDimitry Andric if (eltMask >= (int)RHSOp0Width) { 313606c3fb27SDimitry Andric assert(match(RHSShuffle->getOperand(1), m_Poison()) && 3137fe6060f1SDimitry Andric "should have been check above"); 31380b57cec5SDimitry Andric eltMask = -1; 31390b57cec5SDimitry Andric } 31400b57cec5SDimitry Andric } else 31410b57cec5SDimitry Andric eltMask = Mask[i]-LHSWidth; 31420b57cec5SDimitry Andric 31430b57cec5SDimitry Andric // If LHS's width is changed, shift the mask value accordingly. 31440b57cec5SDimitry Andric // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any 31450b57cec5SDimitry Andric // references from RHSOp0 to LHSOp0, so we don't need to shift the mask. 31460b57cec5SDimitry Andric // If newRHS == newLHS, we want to remap any references from newRHS to 31470b57cec5SDimitry Andric // newLHS so that we can properly identify splats that may occur due to 31480b57cec5SDimitry Andric // obfuscation across the two vectors. 31490b57cec5SDimitry Andric if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS) 31500b57cec5SDimitry Andric eltMask += newLHSWidth; 31510b57cec5SDimitry Andric } 31520b57cec5SDimitry Andric 31530b57cec5SDimitry Andric // Check if this could still be a splat. 31540b57cec5SDimitry Andric if (eltMask >= 0) { 31550b57cec5SDimitry Andric if (SplatElt >= 0 && SplatElt != eltMask) 31560b57cec5SDimitry Andric isSplat = false; 31570b57cec5SDimitry Andric SplatElt = eltMask; 31580b57cec5SDimitry Andric } 31590b57cec5SDimitry Andric 31600b57cec5SDimitry Andric newMask.push_back(eltMask); 31610b57cec5SDimitry Andric } 31620b57cec5SDimitry Andric 31630b57cec5SDimitry Andric // If the result mask is equal to one of the original shuffle masks, 31640b57cec5SDimitry Andric // or is a splat, do the replacement. 31650b57cec5SDimitry Andric if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) { 31660b57cec5SDimitry Andric if (!newRHS) 316706c3fb27SDimitry Andric newRHS = PoisonValue::get(newLHS->getType()); 3168e8d8bef9SDimitry Andric return new ShuffleVectorInst(newLHS, newRHS, newMask); 31690b57cec5SDimitry Andric } 31700b57cec5SDimitry Andric 31710b57cec5SDimitry Andric return MadeChange ? &SVI : nullptr; 31720b57cec5SDimitry Andric } 3173