xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- InstCombineVectorOps.cpp -------------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements instcombine for ExtractElement, InsertElement and
100b57cec5SDimitry Andric // ShuffleVector.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "InstCombineInternal.h"
150b57cec5SDimitry Andric #include "llvm/ADT/APInt.h"
160b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h"
170b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
180b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
195ffd83dbSDimitry Andric #include "llvm/ADT/SmallBitVector.h"
200b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
21e8d8bef9SDimitry Andric #include "llvm/ADT/Statistic.h"
220b57cec5SDimitry Andric #include "llvm/Analysis/InstructionSimplify.h"
230b57cec5SDimitry Andric #include "llvm/Analysis/VectorUtils.h"
240b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
250b57cec5SDimitry Andric #include "llvm/IR/Constant.h"
260b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
270b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
280b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h"
290b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
300b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
310b57cec5SDimitry Andric #include "llvm/IR/Operator.h"
320b57cec5SDimitry Andric #include "llvm/IR/PatternMatch.h"
330b57cec5SDimitry Andric #include "llvm/IR/Type.h"
340b57cec5SDimitry Andric #include "llvm/IR/User.h"
350b57cec5SDimitry Andric #include "llvm/IR/Value.h"
360b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
370b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
38e8d8bef9SDimitry Andric #include "llvm/Transforms/InstCombine/InstCombiner.h"
390b57cec5SDimitry Andric #include <cassert>
400b57cec5SDimitry Andric #include <cstdint>
410b57cec5SDimitry Andric #include <iterator>
420b57cec5SDimitry Andric #include <utility>
430b57cec5SDimitry Andric 
44349cc55cSDimitry Andric #define DEBUG_TYPE "instcombine"
45349cc55cSDimitry Andric 
460b57cec5SDimitry Andric using namespace llvm;
470b57cec5SDimitry Andric using namespace PatternMatch;
480b57cec5SDimitry Andric 
49e8d8bef9SDimitry Andric STATISTIC(NumAggregateReconstructionsSimplified,
50e8d8bef9SDimitry Andric           "Number of aggregate reconstructions turned into reuse of the "
51e8d8bef9SDimitry Andric           "original aggregate");
52e8d8bef9SDimitry Andric 
530b57cec5SDimitry Andric /// Return true if the value is cheaper to scalarize than it is to leave as a
54349cc55cSDimitry Andric /// vector operation. If the extract index \p EI is a constant integer then
55349cc55cSDimitry Andric /// some operations may be cheap to scalarize.
560b57cec5SDimitry Andric ///
570b57cec5SDimitry Andric /// FIXME: It's possible to create more instructions than previously existed.
58349cc55cSDimitry Andric static bool cheapToScalarize(Value *V, Value *EI) {
59349cc55cSDimitry Andric   ConstantInt *CEI = dyn_cast<ConstantInt>(EI);
60349cc55cSDimitry Andric 
610b57cec5SDimitry Andric   // If we can pick a scalar constant value out of a vector, that is free.
620b57cec5SDimitry Andric   if (auto *C = dyn_cast<Constant>(V))
63349cc55cSDimitry Andric     return CEI || C->getSplatValue();
64349cc55cSDimitry Andric 
65349cc55cSDimitry Andric   if (CEI && match(V, m_Intrinsic<Intrinsic::experimental_stepvector>())) {
66349cc55cSDimitry Andric     ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
67349cc55cSDimitry Andric     // Index needs to be lower than the minimum size of the vector, because
68349cc55cSDimitry Andric     // for scalable vector, the vector size is known at run time.
69349cc55cSDimitry Andric     return CEI->getValue().ult(EC.getKnownMinValue());
70349cc55cSDimitry Andric   }
710b57cec5SDimitry Andric 
720b57cec5SDimitry Andric   // An insertelement to the same constant index as our extract will simplify
730b57cec5SDimitry Andric   // to the scalar inserted element. An insertelement to a different constant
740b57cec5SDimitry Andric   // index is irrelevant to our extract.
755ffd83dbSDimitry Andric   if (match(V, m_InsertElt(m_Value(), m_Value(), m_ConstantInt())))
76349cc55cSDimitry Andric     return CEI;
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric   if (match(V, m_OneUse(m_Load(m_Value()))))
790b57cec5SDimitry Andric     return true;
800b57cec5SDimitry Andric 
815ffd83dbSDimitry Andric   if (match(V, m_OneUse(m_UnOp())))
825ffd83dbSDimitry Andric     return true;
835ffd83dbSDimitry Andric 
840b57cec5SDimitry Andric   Value *V0, *V1;
850b57cec5SDimitry Andric   if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
86349cc55cSDimitry Andric     if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
870b57cec5SDimitry Andric       return true;
880b57cec5SDimitry Andric 
890b57cec5SDimitry Andric   CmpInst::Predicate UnusedPred;
900b57cec5SDimitry Andric   if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
91349cc55cSDimitry Andric     if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
920b57cec5SDimitry Andric       return true;
930b57cec5SDimitry Andric 
940b57cec5SDimitry Andric   return false;
950b57cec5SDimitry Andric }
960b57cec5SDimitry Andric 
970b57cec5SDimitry Andric // If we have a PHI node with a vector type that is only used to feed
980b57cec5SDimitry Andric // itself and be an operand of extractelement at a constant location,
990b57cec5SDimitry Andric // try to replace the PHI of the vector type with a PHI of a scalar type.
100e8d8bef9SDimitry Andric Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101e8d8bef9SDimitry Andric                                             PHINode *PN) {
1020b57cec5SDimitry Andric   SmallVector<Instruction *, 2> Extracts;
1030b57cec5SDimitry Andric   // The users we want the PHI to have are:
1040b57cec5SDimitry Andric   // 1) The EI ExtractElement (we already know this)
1050b57cec5SDimitry Andric   // 2) Possibly more ExtractElements with the same index.
1060b57cec5SDimitry Andric   // 3) Another operand, which will feed back into the PHI.
1070b57cec5SDimitry Andric   Instruction *PHIUser = nullptr;
108bdd1243dSDimitry Andric   for (auto *U : PN->users()) {
1090b57cec5SDimitry Andric     if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
1100b57cec5SDimitry Andric       if (EI.getIndexOperand() == EU->getIndexOperand())
1110b57cec5SDimitry Andric         Extracts.push_back(EU);
1120b57cec5SDimitry Andric       else
1130b57cec5SDimitry Andric         return nullptr;
1140b57cec5SDimitry Andric     } else if (!PHIUser) {
1150b57cec5SDimitry Andric       PHIUser = cast<Instruction>(U);
1160b57cec5SDimitry Andric     } else {
1170b57cec5SDimitry Andric       return nullptr;
1180b57cec5SDimitry Andric     }
1190b57cec5SDimitry Andric   }
1200b57cec5SDimitry Andric 
1210b57cec5SDimitry Andric   if (!PHIUser)
1220b57cec5SDimitry Andric     return nullptr;
1230b57cec5SDimitry Andric 
1240b57cec5SDimitry Andric   // Verify that this PHI user has one use, which is the PHI itself,
1250b57cec5SDimitry Andric   // and that it is a binary operation which is cheap to scalarize.
1260b57cec5SDimitry Andric   // otherwise return nullptr.
1270b57cec5SDimitry Andric   if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
128349cc55cSDimitry Andric       !(isa<BinaryOperator>(PHIUser)) ||
129349cc55cSDimitry Andric       !cheapToScalarize(PHIUser, EI.getIndexOperand()))
1300b57cec5SDimitry Andric     return nullptr;
1310b57cec5SDimitry Andric 
1320b57cec5SDimitry Andric   // Create a scalar PHI node that will replace the vector PHI node
1330b57cec5SDimitry Andric   // just before the current PHI node.
1340b57cec5SDimitry Andric   PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
1355f757f3fSDimitry Andric       PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), PN->getIterator()));
1360b57cec5SDimitry Andric   // Scalarize each PHI operand.
1370b57cec5SDimitry Andric   for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
1380b57cec5SDimitry Andric     Value *PHIInVal = PN->getIncomingValue(i);
1390b57cec5SDimitry Andric     BasicBlock *inBB = PN->getIncomingBlock(i);
1400b57cec5SDimitry Andric     Value *Elt = EI.getIndexOperand();
1410b57cec5SDimitry Andric     // If the operand is the PHI induction variable:
1420b57cec5SDimitry Andric     if (PHIInVal == PHIUser) {
1430b57cec5SDimitry Andric       // Scalarize the binary operation. Its first operand is the
1440b57cec5SDimitry Andric       // scalar PHI, and the second operand is extracted from the other
1450b57cec5SDimitry Andric       // vector operand.
1460b57cec5SDimitry Andric       BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
1470b57cec5SDimitry Andric       unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
1480b57cec5SDimitry Andric       Value *Op = InsertNewInstWith(
1490b57cec5SDimitry Andric           ExtractElementInst::Create(B0->getOperand(opId), Elt,
1500b57cec5SDimitry Andric                                      B0->getOperand(opId)->getName() + ".Elt"),
1515f757f3fSDimitry Andric           B0->getIterator());
1520b57cec5SDimitry Andric       Value *newPHIUser = InsertNewInstWith(
1530b57cec5SDimitry Andric           BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(),
1545f757f3fSDimitry Andric                                                 scalarPHI, Op, B0), B0->getIterator());
1550b57cec5SDimitry Andric       scalarPHI->addIncoming(newPHIUser, inBB);
1560b57cec5SDimitry Andric     } else {
1570b57cec5SDimitry Andric       // Scalarize PHI input:
1580b57cec5SDimitry Andric       Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
1590b57cec5SDimitry Andric       // Insert the new instruction into the predecessor basic block.
1600b57cec5SDimitry Andric       Instruction *pos = dyn_cast<Instruction>(PHIInVal);
1610b57cec5SDimitry Andric       BasicBlock::iterator InsertPos;
1620b57cec5SDimitry Andric       if (pos && !isa<PHINode>(pos)) {
1630b57cec5SDimitry Andric         InsertPos = ++pos->getIterator();
1640b57cec5SDimitry Andric       } else {
1650b57cec5SDimitry Andric         InsertPos = inBB->getFirstInsertionPt();
1660b57cec5SDimitry Andric       }
1670b57cec5SDimitry Andric 
1685f757f3fSDimitry Andric       InsertNewInstWith(newEI, InsertPos);
1690b57cec5SDimitry Andric 
1700b57cec5SDimitry Andric       scalarPHI->addIncoming(newEI, inBB);
1710b57cec5SDimitry Andric     }
1720b57cec5SDimitry Andric   }
1730b57cec5SDimitry Andric 
17406c3fb27SDimitry Andric   for (auto *E : Extracts) {
1750b57cec5SDimitry Andric     replaceInstUsesWith(*E, scalarPHI);
17606c3fb27SDimitry Andric     // Add old extract to worklist for DCE.
17706c3fb27SDimitry Andric     addToWorklist(E);
17806c3fb27SDimitry Andric   }
1790b57cec5SDimitry Andric 
1800b57cec5SDimitry Andric   return &EI;
1810b57cec5SDimitry Andric }
1820b57cec5SDimitry Andric 
183349cc55cSDimitry Andric Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
1840b57cec5SDimitry Andric   Value *X;
1850b57cec5SDimitry Andric   uint64_t ExtIndexC;
1860b57cec5SDimitry Andric   if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
1870b57cec5SDimitry Andric       !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
1880b57cec5SDimitry Andric     return nullptr;
1890b57cec5SDimitry Andric 
190349cc55cSDimitry Andric   ElementCount NumElts =
191349cc55cSDimitry Andric       cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
192349cc55cSDimitry Andric   Type *DestTy = Ext.getType();
193bdd1243dSDimitry Andric   unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
194349cc55cSDimitry Andric   bool IsBigEndian = DL.isBigEndian();
195349cc55cSDimitry Andric 
196349cc55cSDimitry Andric   // If we are casting an integer to vector and extracting a portion, that is
197349cc55cSDimitry Andric   // a shift-right and truncate.
198bdd1243dSDimitry Andric   if (X->getType()->isIntegerTy()) {
199349cc55cSDimitry Andric     assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
200349cc55cSDimitry Andric            "Expected fixed vector type for bitcast from scalar integer");
201349cc55cSDimitry Andric 
202349cc55cSDimitry Andric     // Big endian requires adjusting the extract index since MSB is at index 0.
203349cc55cSDimitry Andric     // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
204349cc55cSDimitry Andric     // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
205349cc55cSDimitry Andric     if (IsBigEndian)
206349cc55cSDimitry Andric       ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
207bdd1243dSDimitry Andric     unsigned ShiftAmountC = ExtIndexC * DestWidth;
208bdd1243dSDimitry Andric     if (!ShiftAmountC ||
209bdd1243dSDimitry Andric         (isDesirableIntType(X->getType()->getPrimitiveSizeInBits()) &&
210bdd1243dSDimitry Andric         Ext.getVectorOperand()->hasOneUse())) {
211bdd1243dSDimitry Andric       if (ShiftAmountC)
212bdd1243dSDimitry Andric         X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
213bdd1243dSDimitry Andric       if (DestTy->isFloatingPointTy()) {
214bdd1243dSDimitry Andric         Type *DstIntTy = IntegerType::getIntNTy(X->getContext(), DestWidth);
215bdd1243dSDimitry Andric         Value *Trunc = Builder.CreateTrunc(X, DstIntTy);
216bdd1243dSDimitry Andric         return new BitCastInst(Trunc, DestTy);
217bdd1243dSDimitry Andric       }
218bdd1243dSDimitry Andric       return new TruncInst(X, DestTy);
219349cc55cSDimitry Andric     }
220349cc55cSDimitry Andric   }
221349cc55cSDimitry Andric 
222349cc55cSDimitry Andric   if (!X->getType()->isVectorTy())
223349cc55cSDimitry Andric     return nullptr;
224349cc55cSDimitry Andric 
2250b57cec5SDimitry Andric   // If this extractelement is using a bitcast from a vector of the same number
2260b57cec5SDimitry Andric   // of elements, see if we can find the source element from the source vector:
2270b57cec5SDimitry Andric   // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
2285ffd83dbSDimitry Andric   auto *SrcTy = cast<VectorType>(X->getType());
229e8d8bef9SDimitry Andric   ElementCount NumSrcElts = SrcTy->getElementCount();
2300b57cec5SDimitry Andric   if (NumSrcElts == NumElts)
2310b57cec5SDimitry Andric     if (Value *Elt = findScalarElement(X, ExtIndexC))
2320b57cec5SDimitry Andric       return new BitCastInst(Elt, DestTy);
2330b57cec5SDimitry Andric 
234e8d8bef9SDimitry Andric   assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
235e8d8bef9SDimitry Andric          "Src and Dst must be the same sort of vector type");
236e8d8bef9SDimitry Andric 
2370b57cec5SDimitry Andric   // If the source elements are wider than the destination, try to shift and
2380b57cec5SDimitry Andric   // truncate a subset of scalar bits of an insert op.
239e8d8bef9SDimitry Andric   if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
2400b57cec5SDimitry Andric     Value *Scalar;
241753f127fSDimitry Andric     Value *Vec;
2420b57cec5SDimitry Andric     uint64_t InsIndexC;
243753f127fSDimitry Andric     if (!match(X, m_InsertElt(m_Value(Vec), m_Value(Scalar),
2440b57cec5SDimitry Andric                               m_ConstantInt(InsIndexC))))
2450b57cec5SDimitry Andric       return nullptr;
2460b57cec5SDimitry Andric 
2470b57cec5SDimitry Andric     // The extract must be from the subset of vector elements that we inserted
2480b57cec5SDimitry Andric     // into. Example: if we inserted element 1 of a <2 x i64> and we are
2490b57cec5SDimitry Andric     // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
2500b57cec5SDimitry Andric     // of elements 4-7 of the bitcasted vector.
251e8d8bef9SDimitry Andric     unsigned NarrowingRatio =
252e8d8bef9SDimitry Andric         NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
253753f127fSDimitry Andric 
254753f127fSDimitry Andric     if (ExtIndexC / NarrowingRatio != InsIndexC) {
255753f127fSDimitry Andric       // Remove insertelement, if we don't use the inserted element.
256753f127fSDimitry Andric       // extractelement (bitcast (insertelement (Vec, b)), a) ->
257753f127fSDimitry Andric       // extractelement (bitcast (Vec), a)
258753f127fSDimitry Andric       // FIXME: this should be removed to SimplifyDemandedVectorElts,
259753f127fSDimitry Andric       // once scale vectors are supported.
260753f127fSDimitry Andric       if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
261753f127fSDimitry Andric         Value *NewBC = Builder.CreateBitCast(Vec, Ext.getVectorOperandType());
262753f127fSDimitry Andric         return ExtractElementInst::Create(NewBC, Ext.getIndexOperand());
263753f127fSDimitry Andric       }
2640b57cec5SDimitry Andric       return nullptr;
265753f127fSDimitry Andric     }
2660b57cec5SDimitry Andric 
2670b57cec5SDimitry Andric     // We are extracting part of the original scalar. How that scalar is
2680b57cec5SDimitry Andric     // inserted into the vector depends on the endian-ness. Example:
2690b57cec5SDimitry Andric     //              Vector Byte Elt Index:    0  1  2  3  4  5  6  7
2700b57cec5SDimitry Andric     //                                       +--+--+--+--+--+--+--+--+
2710b57cec5SDimitry Andric     // inselt <2 x i32> V, <i32> S, 1:       |V0|V1|V2|V3|S0|S1|S2|S3|
2720b57cec5SDimitry Andric     // extelt <4 x i16> V', 3:               |                 |S2|S3|
2730b57cec5SDimitry Andric     //                                       +--+--+--+--+--+--+--+--+
2740b57cec5SDimitry Andric     // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
2750b57cec5SDimitry Andric     // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
2760b57cec5SDimitry Andric     // In this example, we must right-shift little-endian. Big-endian is just a
2770b57cec5SDimitry Andric     // truncate.
2780b57cec5SDimitry Andric     unsigned Chunk = ExtIndexC % NarrowingRatio;
2790b57cec5SDimitry Andric     if (IsBigEndian)
2800b57cec5SDimitry Andric       Chunk = NarrowingRatio - 1 - Chunk;
2810b57cec5SDimitry Andric 
2820b57cec5SDimitry Andric     // Bail out if this is an FP vector to FP vector sequence. That would take
2830b57cec5SDimitry Andric     // more instructions than we started with unless there is no shift, and it
2840b57cec5SDimitry Andric     // may not be handled as well in the backend.
2850b57cec5SDimitry Andric     bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
2860b57cec5SDimitry Andric     bool NeedDestBitcast = DestTy->isFloatingPointTy();
2870b57cec5SDimitry Andric     if (NeedSrcBitcast && NeedDestBitcast)
2880b57cec5SDimitry Andric       return nullptr;
2890b57cec5SDimitry Andric 
2900b57cec5SDimitry Andric     unsigned SrcWidth = SrcTy->getScalarSizeInBits();
2910b57cec5SDimitry Andric     unsigned ShAmt = Chunk * DestWidth;
2920b57cec5SDimitry Andric 
2930b57cec5SDimitry Andric     // TODO: This limitation is more strict than necessary. We could sum the
2940b57cec5SDimitry Andric     // number of new instructions and subtract the number eliminated to know if
2950b57cec5SDimitry Andric     // we can proceed.
2960b57cec5SDimitry Andric     if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
2970b57cec5SDimitry Andric       if (NeedSrcBitcast || NeedDestBitcast)
2980b57cec5SDimitry Andric         return nullptr;
2990b57cec5SDimitry Andric 
3000b57cec5SDimitry Andric     if (NeedSrcBitcast) {
3010b57cec5SDimitry Andric       Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
3020b57cec5SDimitry Andric       Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
3030b57cec5SDimitry Andric     }
3040b57cec5SDimitry Andric 
3050b57cec5SDimitry Andric     if (ShAmt) {
3060b57cec5SDimitry Andric       // Bail out if we could end with more instructions than we started with.
3070b57cec5SDimitry Andric       if (!Ext.getVectorOperand()->hasOneUse())
3080b57cec5SDimitry Andric         return nullptr;
3090b57cec5SDimitry Andric       Scalar = Builder.CreateLShr(Scalar, ShAmt);
3100b57cec5SDimitry Andric     }
3110b57cec5SDimitry Andric 
3120b57cec5SDimitry Andric     if (NeedDestBitcast) {
3130b57cec5SDimitry Andric       Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
3140b57cec5SDimitry Andric       return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
3150b57cec5SDimitry Andric     }
3160b57cec5SDimitry Andric     return new TruncInst(Scalar, DestTy);
3170b57cec5SDimitry Andric   }
3180b57cec5SDimitry Andric 
3190b57cec5SDimitry Andric   return nullptr;
3200b57cec5SDimitry Andric }
3210b57cec5SDimitry Andric 
3228bcb0991SDimitry Andric /// Find elements of V demanded by UserInstr.
3238bcb0991SDimitry Andric static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) {
324e8d8bef9SDimitry Andric   unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
3258bcb0991SDimitry Andric 
3268bcb0991SDimitry Andric   // Conservatively assume that all elements are needed.
327349cc55cSDimitry Andric   APInt UsedElts(APInt::getAllOnes(VWidth));
3288bcb0991SDimitry Andric 
3298bcb0991SDimitry Andric   switch (UserInstr->getOpcode()) {
3308bcb0991SDimitry Andric   case Instruction::ExtractElement: {
3318bcb0991SDimitry Andric     ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
3328bcb0991SDimitry Andric     assert(EEI->getVectorOperand() == V);
3338bcb0991SDimitry Andric     ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
3348bcb0991SDimitry Andric     if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
3358bcb0991SDimitry Andric       UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
3368bcb0991SDimitry Andric     }
3378bcb0991SDimitry Andric     break;
3388bcb0991SDimitry Andric   }
3398bcb0991SDimitry Andric   case Instruction::ShuffleVector: {
3408bcb0991SDimitry Andric     ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
3415ffd83dbSDimitry Andric     unsigned MaskNumElts =
342e8d8bef9SDimitry Andric         cast<FixedVectorType>(UserInstr->getType())->getNumElements();
3438bcb0991SDimitry Andric 
3448bcb0991SDimitry Andric     UsedElts = APInt(VWidth, 0);
3458bcb0991SDimitry Andric     for (unsigned i = 0; i < MaskNumElts; i++) {
3468bcb0991SDimitry Andric       unsigned MaskVal = Shuffle->getMaskValue(i);
3478bcb0991SDimitry Andric       if (MaskVal == -1u || MaskVal >= 2 * VWidth)
3488bcb0991SDimitry Andric         continue;
3498bcb0991SDimitry Andric       if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
3508bcb0991SDimitry Andric         UsedElts.setBit(MaskVal);
3518bcb0991SDimitry Andric       if (Shuffle->getOperand(1) == V &&
3528bcb0991SDimitry Andric           ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
3538bcb0991SDimitry Andric         UsedElts.setBit(MaskVal - VWidth);
3548bcb0991SDimitry Andric     }
3558bcb0991SDimitry Andric     break;
3568bcb0991SDimitry Andric   }
3578bcb0991SDimitry Andric   default:
3588bcb0991SDimitry Andric     break;
3598bcb0991SDimitry Andric   }
3608bcb0991SDimitry Andric   return UsedElts;
3618bcb0991SDimitry Andric }
3628bcb0991SDimitry Andric 
3638bcb0991SDimitry Andric /// Find union of elements of V demanded by all its users.
3648bcb0991SDimitry Andric /// If it is known by querying findDemandedEltsBySingleUser that
3658bcb0991SDimitry Andric /// no user demands an element of V, then the corresponding bit
3668bcb0991SDimitry Andric /// remains unset in the returned value.
3678bcb0991SDimitry Andric static APInt findDemandedEltsByAllUsers(Value *V) {
368e8d8bef9SDimitry Andric   unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
3698bcb0991SDimitry Andric 
3708bcb0991SDimitry Andric   APInt UnionUsedElts(VWidth, 0);
3718bcb0991SDimitry Andric   for (const Use &U : V->uses()) {
3728bcb0991SDimitry Andric     if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
3738bcb0991SDimitry Andric       UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
3748bcb0991SDimitry Andric     } else {
375349cc55cSDimitry Andric       UnionUsedElts = APInt::getAllOnes(VWidth);
3768bcb0991SDimitry Andric       break;
3778bcb0991SDimitry Andric     }
3788bcb0991SDimitry Andric 
379349cc55cSDimitry Andric     if (UnionUsedElts.isAllOnes())
3808bcb0991SDimitry Andric       break;
3818bcb0991SDimitry Andric   }
3828bcb0991SDimitry Andric 
3838bcb0991SDimitry Andric   return UnionUsedElts;
3848bcb0991SDimitry Andric }
3858bcb0991SDimitry Andric 
3860eae32dcSDimitry Andric /// Given a constant index for a extractelement or insertelement instruction,
3870eae32dcSDimitry Andric /// return it with the canonical type if it isn't already canonical.  We
3880eae32dcSDimitry Andric /// arbitrarily pick 64 bit as our canonical type.  The actual bitwidth doesn't
3890eae32dcSDimitry Andric /// matter, we just want a consistent type to simplify CSE.
39006c3fb27SDimitry Andric static ConstantInt *getPreferredVectorIndex(ConstantInt *IndexC) {
391cb14a3feSDimitry Andric   const unsigned IndexBW = IndexC->getBitWidth();
3920eae32dcSDimitry Andric   if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
3930eae32dcSDimitry Andric     return nullptr;
3940eae32dcSDimitry Andric   return ConstantInt::get(IndexC->getContext(),
3950eae32dcSDimitry Andric                           IndexC->getValue().zextOrTrunc(64));
3960eae32dcSDimitry Andric }
3970eae32dcSDimitry Andric 
398e8d8bef9SDimitry Andric Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
3990b57cec5SDimitry Andric   Value *SrcVec = EI.getVectorOperand();
4000b57cec5SDimitry Andric   Value *Index = EI.getIndexOperand();
40181ad6265SDimitry Andric   if (Value *V = simplifyExtractElementInst(SrcVec, Index,
4020b57cec5SDimitry Andric                                             SQ.getWithInstruction(&EI)))
4030b57cec5SDimitry Andric     return replaceInstUsesWith(EI, V);
4040b57cec5SDimitry Andric 
405bdd1243dSDimitry Andric   // extractelt (select %x, %vec1, %vec2), %const ->
406bdd1243dSDimitry Andric   // select %x, %vec1[%const], %vec2[%const]
407bdd1243dSDimitry Andric   // TODO: Support constant folding of multiple select operands:
408bdd1243dSDimitry Andric   // extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
409bdd1243dSDimitry Andric   // If the extractelement will for instance try to do out of bounds accesses
410bdd1243dSDimitry Andric   // because of the values of %c1 and/or %c2, the sequence could be optimized
411bdd1243dSDimitry Andric   // early. This is currently not possible because constant folding will reach
412bdd1243dSDimitry Andric   // an unreachable assertion if it doesn't find a constant operand.
413bdd1243dSDimitry Andric   if (SelectInst *SI = dyn_cast<SelectInst>(EI.getVectorOperand()))
414bdd1243dSDimitry Andric     if (SI->getCondition()->getType()->isIntegerTy() &&
415bdd1243dSDimitry Andric         isa<Constant>(EI.getIndexOperand()))
416bdd1243dSDimitry Andric       if (Instruction *R = FoldOpIntoSelect(EI, SI))
417bdd1243dSDimitry Andric         return R;
418bdd1243dSDimitry Andric 
4190b57cec5SDimitry Andric   // If extracting a specified index from the vector, see if we can recursively
4200b57cec5SDimitry Andric   // find a previously computed scalar that was inserted into the vector.
4210b57cec5SDimitry Andric   auto *IndexC = dyn_cast<ConstantInt>(Index);
422*0fca6ea1SDimitry Andric   bool HasKnownValidIndex = false;
4230b57cec5SDimitry Andric   if (IndexC) {
4240eae32dcSDimitry Andric     // Canonicalize type of constant indices to i64 to simplify CSE
4250eae32dcSDimitry Andric     if (auto *NewIdx = getPreferredVectorIndex(IndexC))
4260eae32dcSDimitry Andric       return replaceOperand(EI, 1, NewIdx);
4270eae32dcSDimitry Andric 
4285ffd83dbSDimitry Andric     ElementCount EC = EI.getVectorOperandType()->getElementCount();
429e8d8bef9SDimitry Andric     unsigned NumElts = EC.getKnownMinValue();
430*0fca6ea1SDimitry Andric     HasKnownValidIndex = IndexC->getValue().ult(NumElts);
4310b57cec5SDimitry Andric 
432fe6060f1SDimitry Andric     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) {
433fe6060f1SDimitry Andric       Intrinsic::ID IID = II->getIntrinsicID();
434fe6060f1SDimitry Andric       // Index needs to be lower than the minimum size of the vector, because
435fe6060f1SDimitry Andric       // for scalable vector, the vector size is known at run time.
436fe6060f1SDimitry Andric       if (IID == Intrinsic::experimental_stepvector &&
437fe6060f1SDimitry Andric           IndexC->getValue().ult(NumElts)) {
438fe6060f1SDimitry Andric         Type *Ty = EI.getType();
439fe6060f1SDimitry Andric         unsigned BitWidth = Ty->getIntegerBitWidth();
440fe6060f1SDimitry Andric         Value *Idx;
441fe6060f1SDimitry Andric         // Return index when its value does not exceed the allowed limit
442fe6060f1SDimitry Andric         // for the element type of the vector, otherwise return undefined.
443fe6060f1SDimitry Andric         if (IndexC->getValue().getActiveBits() <= BitWidth)
444fe6060f1SDimitry Andric           Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
445fe6060f1SDimitry Andric         else
4465f757f3fSDimitry Andric           Idx = PoisonValue::get(Ty);
447fe6060f1SDimitry Andric         return replaceInstUsesWith(EI, Idx);
448fe6060f1SDimitry Andric       }
449fe6060f1SDimitry Andric     }
450fe6060f1SDimitry Andric 
4510b57cec5SDimitry Andric     // InstSimplify should handle cases where the index is invalid.
4525ffd83dbSDimitry Andric     // For fixed-length vector, it's invalid to extract out-of-range element.
453e8d8bef9SDimitry Andric     if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
4540b57cec5SDimitry Andric       return nullptr;
4550b57cec5SDimitry Andric 
456349cc55cSDimitry Andric     if (Instruction *I = foldBitcastExtElt(EI))
4570b57cec5SDimitry Andric       return I;
4580b57cec5SDimitry Andric 
4590b57cec5SDimitry Andric     // If there's a vector PHI feeding a scalar use through this extractelement
4600b57cec5SDimitry Andric     // instruction, try to scalarize the PHI.
4610b57cec5SDimitry Andric     if (auto *Phi = dyn_cast<PHINode>(SrcVec))
4620b57cec5SDimitry Andric       if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
4630b57cec5SDimitry Andric         return ScalarPHI;
4640b57cec5SDimitry Andric   }
4650b57cec5SDimitry Andric 
4665ffd83dbSDimitry Andric   // TODO come up with a n-ary matcher that subsumes both unary and
4675ffd83dbSDimitry Andric   // binary matchers.
4685ffd83dbSDimitry Andric   UnaryOperator *UO;
469349cc55cSDimitry Andric   if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
4705ffd83dbSDimitry Andric     // extelt (unop X), Index --> unop (extelt X, Index)
4715ffd83dbSDimitry Andric     Value *X = UO->getOperand(0);
4725ffd83dbSDimitry Andric     Value *E = Builder.CreateExtractElement(X, Index);
4735ffd83dbSDimitry Andric     return UnaryOperator::CreateWithCopiedFlags(UO->getOpcode(), E, UO);
4745ffd83dbSDimitry Andric   }
4755ffd83dbSDimitry Andric 
476*0fca6ea1SDimitry Andric   // If the binop is not speculatable, we cannot hoist the extractelement if
477*0fca6ea1SDimitry Andric   // it may make the operand poison.
4780b57cec5SDimitry Andric   BinaryOperator *BO;
479*0fca6ea1SDimitry Andric   if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index) &&
480*0fca6ea1SDimitry Andric       (HasKnownValidIndex || isSafeToSpeculativelyExecute(BO))) {
4810b57cec5SDimitry Andric     // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
4820b57cec5SDimitry Andric     Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
4830b57cec5SDimitry Andric     Value *E0 = Builder.CreateExtractElement(X, Index);
4840b57cec5SDimitry Andric     Value *E1 = Builder.CreateExtractElement(Y, Index);
4850b57cec5SDimitry Andric     return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
4860b57cec5SDimitry Andric   }
4870b57cec5SDimitry Andric 
4880b57cec5SDimitry Andric   Value *X, *Y;
4890b57cec5SDimitry Andric   CmpInst::Predicate Pred;
4900b57cec5SDimitry Andric   if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
491349cc55cSDimitry Andric       cheapToScalarize(SrcVec, Index)) {
4920b57cec5SDimitry Andric     // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
4930b57cec5SDimitry Andric     Value *E0 = Builder.CreateExtractElement(X, Index);
4940b57cec5SDimitry Andric     Value *E1 = Builder.CreateExtractElement(Y, Index);
495*0fca6ea1SDimitry Andric     CmpInst *SrcCmpInst = cast<CmpInst>(SrcVec);
496*0fca6ea1SDimitry Andric     return CmpInst::CreateWithCopiedFlags(SrcCmpInst->getOpcode(), Pred, E0, E1,
497*0fca6ea1SDimitry Andric                                           SrcCmpInst);
4980b57cec5SDimitry Andric   }
4990b57cec5SDimitry Andric 
5000b57cec5SDimitry Andric   if (auto *I = dyn_cast<Instruction>(SrcVec)) {
5010b57cec5SDimitry Andric     if (auto *IE = dyn_cast<InsertElementInst>(I)) {
5020eae32dcSDimitry Andric       // instsimplify already handled the case where the indices are constants
5030eae32dcSDimitry Andric       // and equal by value, if both are constants, they must not be the same
5040eae32dcSDimitry Andric       // value, extract from the pre-inserted value instead.
5055ffd83dbSDimitry Andric       if (isa<Constant>(IE->getOperand(2)) && IndexC)
5065ffd83dbSDimitry Andric         return replaceOperand(EI, 0, IE->getOperand(0));
507fe6060f1SDimitry Andric     } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
508fe6060f1SDimitry Andric       auto *VecType = cast<VectorType>(GEP->getType());
509fe6060f1SDimitry Andric       ElementCount EC = VecType->getElementCount();
510fe6060f1SDimitry Andric       uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
511fe6060f1SDimitry Andric       if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
512fe6060f1SDimitry Andric         // Find out why we have a vector result - these are a few examples:
513fe6060f1SDimitry Andric         //  1. We have a scalar pointer and a vector of indices, or
514fe6060f1SDimitry Andric         //  2. We have a vector of pointers and a scalar index, or
515fe6060f1SDimitry Andric         //  3. We have a vector of pointers and a vector of indices, etc.
516fe6060f1SDimitry Andric         // Here we only consider combining when there is exactly one vector
517fe6060f1SDimitry Andric         // operand, since the optimization is less obviously a win due to
518fe6060f1SDimitry Andric         // needing more than one extractelements.
519fe6060f1SDimitry Andric 
520fe6060f1SDimitry Andric         unsigned VectorOps =
521fe6060f1SDimitry Andric             llvm::count_if(GEP->operands(), [](const Value *V) {
522fe6060f1SDimitry Andric               return isa<VectorType>(V->getType());
523fe6060f1SDimitry Andric             });
5240eae32dcSDimitry Andric         if (VectorOps == 1) {
525fe6060f1SDimitry Andric           Value *NewPtr = GEP->getPointerOperand();
526fe6060f1SDimitry Andric           if (isa<VectorType>(NewPtr->getType()))
527fe6060f1SDimitry Andric             NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
528fe6060f1SDimitry Andric 
529fe6060f1SDimitry Andric           SmallVector<Value *> NewOps;
530fe6060f1SDimitry Andric           for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
531fe6060f1SDimitry Andric             Value *Op = GEP->getOperand(I);
532fe6060f1SDimitry Andric             if (isa<VectorType>(Op->getType()))
533fe6060f1SDimitry Andric               NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
534fe6060f1SDimitry Andric             else
535fe6060f1SDimitry Andric               NewOps.push_back(Op);
536fe6060f1SDimitry Andric           }
537fe6060f1SDimitry Andric 
538fe6060f1SDimitry Andric           GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
53904eeddc0SDimitry Andric               GEP->getSourceElementType(), NewPtr, NewOps);
540fe6060f1SDimitry Andric           NewGEP->setIsInBounds(GEP->isInBounds());
541fe6060f1SDimitry Andric           return NewGEP;
542fe6060f1SDimitry Andric         }
5430eae32dcSDimitry Andric       }
5440b57cec5SDimitry Andric     } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
5450b57cec5SDimitry Andric       // If this is extracting an element from a shufflevector, figure out where
5460b57cec5SDimitry Andric       // it came from and extract from the appropriate input element instead.
5475ffd83dbSDimitry Andric       // Restrict the following transformation to fixed-length vector.
5485ffd83dbSDimitry Andric       if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) {
5495ffd83dbSDimitry Andric         int SrcIdx =
5505ffd83dbSDimitry Andric             SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue());
5510b57cec5SDimitry Andric         Value *Src;
5525ffd83dbSDimitry Andric         unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType())
5535ffd83dbSDimitry Andric                                 ->getNumElements();
5540b57cec5SDimitry Andric 
5550b57cec5SDimitry Andric         if (SrcIdx < 0)
55606c3fb27SDimitry Andric           return replaceInstUsesWith(EI, PoisonValue::get(EI.getType()));
5570b57cec5SDimitry Andric         if (SrcIdx < (int)LHSWidth)
5580b57cec5SDimitry Andric           Src = SVI->getOperand(0);
5590b57cec5SDimitry Andric         else {
5600b57cec5SDimitry Andric           SrcIdx -= LHSWidth;
5610b57cec5SDimitry Andric           Src = SVI->getOperand(1);
5620b57cec5SDimitry Andric         }
56306c3fb27SDimitry Andric         Type *Int64Ty = Type::getInt64Ty(EI.getContext());
5645ffd83dbSDimitry Andric         return ExtractElementInst::Create(
56506c3fb27SDimitry Andric             Src, ConstantInt::get(Int64Ty, SrcIdx, false));
5660b57cec5SDimitry Andric       }
5670b57cec5SDimitry Andric     } else if (auto *CI = dyn_cast<CastInst>(I)) {
5680b57cec5SDimitry Andric       // Canonicalize extractelement(cast) -> cast(extractelement).
5690b57cec5SDimitry Andric       // Bitcasts can change the number of vector elements, and they cost
5700b57cec5SDimitry Andric       // nothing.
5710b57cec5SDimitry Andric       if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
5720b57cec5SDimitry Andric         Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
5730b57cec5SDimitry Andric         return CastInst::Create(CI->getOpcode(), EE, EI.getType());
5740b57cec5SDimitry Andric       }
5750b57cec5SDimitry Andric     }
5760b57cec5SDimitry Andric   }
5770eae32dcSDimitry Andric 
5780eae32dcSDimitry Andric   // Run demanded elements after other transforms as this can drop flags on
5790eae32dcSDimitry Andric   // binops.  If there's two paths to the same final result, we prefer the
5800eae32dcSDimitry Andric   // one which doesn't force us to drop flags.
5810eae32dcSDimitry Andric   if (IndexC) {
5820eae32dcSDimitry Andric     ElementCount EC = EI.getVectorOperandType()->getElementCount();
5830eae32dcSDimitry Andric     unsigned NumElts = EC.getKnownMinValue();
5840eae32dcSDimitry Andric     // This instruction only demands the single element from the input vector.
5850eae32dcSDimitry Andric     // Skip for scalable type, the number of elements is unknown at
5860eae32dcSDimitry Andric     // compile-time.
5870eae32dcSDimitry Andric     if (!EC.isScalable() && NumElts != 1) {
5880eae32dcSDimitry Andric       // If the input vector has a single use, simplify it based on this use
5890eae32dcSDimitry Andric       // property.
5900eae32dcSDimitry Andric       if (SrcVec->hasOneUse()) {
591cb14a3feSDimitry Andric         APInt PoisonElts(NumElts, 0);
5920eae32dcSDimitry Andric         APInt DemandedElts(NumElts, 0);
5930eae32dcSDimitry Andric         DemandedElts.setBit(IndexC->getZExtValue());
5940eae32dcSDimitry Andric         if (Value *V =
595cb14a3feSDimitry Andric                 SimplifyDemandedVectorElts(SrcVec, DemandedElts, PoisonElts))
5960eae32dcSDimitry Andric           return replaceOperand(EI, 0, V);
5970eae32dcSDimitry Andric       } else {
5980eae32dcSDimitry Andric         // If the input vector has multiple uses, simplify it based on a union
5990eae32dcSDimitry Andric         // of all elements used.
6000eae32dcSDimitry Andric         APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
6010eae32dcSDimitry Andric         if (!DemandedElts.isAllOnes()) {
602cb14a3feSDimitry Andric           APInt PoisonElts(NumElts, 0);
6030eae32dcSDimitry Andric           if (Value *V = SimplifyDemandedVectorElts(
604cb14a3feSDimitry Andric                   SrcVec, DemandedElts, PoisonElts, 0 /* Depth */,
6050eae32dcSDimitry Andric                   true /* AllowMultipleUsers */)) {
6060eae32dcSDimitry Andric             if (V != SrcVec) {
60706c3fb27SDimitry Andric               Worklist.addValue(SrcVec);
6080eae32dcSDimitry Andric               SrcVec->replaceAllUsesWith(V);
6090eae32dcSDimitry Andric               return &EI;
6100eae32dcSDimitry Andric             }
6110eae32dcSDimitry Andric           }
6120eae32dcSDimitry Andric         }
6130eae32dcSDimitry Andric       }
6140eae32dcSDimitry Andric     }
6150eae32dcSDimitry Andric   }
6160b57cec5SDimitry Andric   return nullptr;
6170b57cec5SDimitry Andric }
6180b57cec5SDimitry Andric 
6190b57cec5SDimitry Andric /// If V is a shuffle of values that ONLY returns elements from either LHS or
6200b57cec5SDimitry Andric /// RHS, return the shuffle mask and true. Otherwise, return false.
6210b57cec5SDimitry Andric static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
6225ffd83dbSDimitry Andric                                          SmallVectorImpl<int> &Mask) {
6230b57cec5SDimitry Andric   assert(LHS->getType() == RHS->getType() &&
6240b57cec5SDimitry Andric          "Invalid CollectSingleShuffleElements");
625e8d8bef9SDimitry Andric   unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
6260b57cec5SDimitry Andric 
627*0fca6ea1SDimitry Andric   if (match(V, m_Poison())) {
6285ffd83dbSDimitry Andric     Mask.assign(NumElts, -1);
6290b57cec5SDimitry Andric     return true;
6300b57cec5SDimitry Andric   }
6310b57cec5SDimitry Andric 
6320b57cec5SDimitry Andric   if (V == LHS) {
6330b57cec5SDimitry Andric     for (unsigned i = 0; i != NumElts; ++i)
6345ffd83dbSDimitry Andric       Mask.push_back(i);
6350b57cec5SDimitry Andric     return true;
6360b57cec5SDimitry Andric   }
6370b57cec5SDimitry Andric 
6380b57cec5SDimitry Andric   if (V == RHS) {
6390b57cec5SDimitry Andric     for (unsigned i = 0; i != NumElts; ++i)
6405ffd83dbSDimitry Andric       Mask.push_back(i + NumElts);
6410b57cec5SDimitry Andric     return true;
6420b57cec5SDimitry Andric   }
6430b57cec5SDimitry Andric 
6440b57cec5SDimitry Andric   if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
6450b57cec5SDimitry Andric     // If this is an insert of an extract from some other vector, include it.
6460b57cec5SDimitry Andric     Value *VecOp    = IEI->getOperand(0);
6470b57cec5SDimitry Andric     Value *ScalarOp = IEI->getOperand(1);
6480b57cec5SDimitry Andric     Value *IdxOp    = IEI->getOperand(2);
6490b57cec5SDimitry Andric 
6500b57cec5SDimitry Andric     if (!isa<ConstantInt>(IdxOp))
6510b57cec5SDimitry Andric       return false;
6520b57cec5SDimitry Andric     unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
6530b57cec5SDimitry Andric 
65406c3fb27SDimitry Andric     if (isa<PoisonValue>(ScalarOp)) {  // inserting poison into vector.
6550b57cec5SDimitry Andric       // We can handle this if the vector we are inserting into is
6560b57cec5SDimitry Andric       // transitively ok.
6570b57cec5SDimitry Andric       if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
65806c3fb27SDimitry Andric         // If so, update the mask to reflect the inserted poison.
6595ffd83dbSDimitry Andric         Mask[InsertedIdx] = -1;
6600b57cec5SDimitry Andric         return true;
6610b57cec5SDimitry Andric       }
6620b57cec5SDimitry Andric     } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
6630b57cec5SDimitry Andric       if (isa<ConstantInt>(EI->getOperand(1))) {
6640b57cec5SDimitry Andric         unsigned ExtractedIdx =
6650b57cec5SDimitry Andric         cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
6665ffd83dbSDimitry Andric         unsigned NumLHSElts =
667e8d8bef9SDimitry Andric             cast<FixedVectorType>(LHS->getType())->getNumElements();
6680b57cec5SDimitry Andric 
6690b57cec5SDimitry Andric         // This must be extracting from either LHS or RHS.
6700b57cec5SDimitry Andric         if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
6710b57cec5SDimitry Andric           // We can handle this if the vector we are inserting into is
6720b57cec5SDimitry Andric           // transitively ok.
6730b57cec5SDimitry Andric           if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
6740b57cec5SDimitry Andric             // If so, update the mask to reflect the inserted value.
6750b57cec5SDimitry Andric             if (EI->getOperand(0) == LHS) {
6765ffd83dbSDimitry Andric               Mask[InsertedIdx % NumElts] = ExtractedIdx;
6770b57cec5SDimitry Andric             } else {
6780b57cec5SDimitry Andric               assert(EI->getOperand(0) == RHS);
6795ffd83dbSDimitry Andric               Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
6800b57cec5SDimitry Andric             }
6810b57cec5SDimitry Andric             return true;
6820b57cec5SDimitry Andric           }
6830b57cec5SDimitry Andric         }
6840b57cec5SDimitry Andric       }
6850b57cec5SDimitry Andric     }
6860b57cec5SDimitry Andric   }
6870b57cec5SDimitry Andric 
6880b57cec5SDimitry Andric   return false;
6890b57cec5SDimitry Andric }
6900b57cec5SDimitry Andric 
6910b57cec5SDimitry Andric /// If we have insertion into a vector that is wider than the vector that we
6920b57cec5SDimitry Andric /// are extracting from, try to widen the source vector to allow a single
6930b57cec5SDimitry Andric /// shufflevector to replace one or more insert/extract pairs.
69406c3fb27SDimitry Andric static bool replaceExtractElements(InsertElementInst *InsElt,
6950b57cec5SDimitry Andric                                    ExtractElementInst *ExtElt,
696e8d8bef9SDimitry Andric                                    InstCombinerImpl &IC) {
697e8d8bef9SDimitry Andric   auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
698e8d8bef9SDimitry Andric   auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
6995ffd83dbSDimitry Andric   unsigned NumInsElts = InsVecType->getNumElements();
7005ffd83dbSDimitry Andric   unsigned NumExtElts = ExtVecType->getNumElements();
7010b57cec5SDimitry Andric 
7020b57cec5SDimitry Andric   // The inserted-to vector must be wider than the extracted-from vector.
7030b57cec5SDimitry Andric   if (InsVecType->getElementType() != ExtVecType->getElementType() ||
7040b57cec5SDimitry Andric       NumExtElts >= NumInsElts)
70506c3fb27SDimitry Andric     return false;
7060b57cec5SDimitry Andric 
707fe6060f1SDimitry Andric   // Create a shuffle mask to widen the extended-from vector using poison
7080b57cec5SDimitry Andric   // values. The mask selects all of the values of the original vector followed
709fe6060f1SDimitry Andric   // by as many poison values as needed to create a vector of the same length
7100b57cec5SDimitry Andric   // as the inserted-to vector.
7115ffd83dbSDimitry Andric   SmallVector<int, 16> ExtendMask;
7120b57cec5SDimitry Andric   for (unsigned i = 0; i < NumExtElts; ++i)
7135ffd83dbSDimitry Andric     ExtendMask.push_back(i);
7140b57cec5SDimitry Andric   for (unsigned i = NumExtElts; i < NumInsElts; ++i)
7155ffd83dbSDimitry Andric     ExtendMask.push_back(-1);
7160b57cec5SDimitry Andric 
7170b57cec5SDimitry Andric   Value *ExtVecOp = ExtElt->getVectorOperand();
7180b57cec5SDimitry Andric   auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
7190b57cec5SDimitry Andric   BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
7200b57cec5SDimitry Andric                                    ? ExtVecOpInst->getParent()
7210b57cec5SDimitry Andric                                    : ExtElt->getParent();
7220b57cec5SDimitry Andric 
7230b57cec5SDimitry Andric   // TODO: This restriction matches the basic block check below when creating
7240b57cec5SDimitry Andric   // new extractelement instructions. If that limitation is removed, this one
7250b57cec5SDimitry Andric   // could also be removed. But for now, we just bail out to ensure that we
7260b57cec5SDimitry Andric   // will replace the extractelement instruction that is feeding our
7270b57cec5SDimitry Andric   // insertelement instruction. This allows the insertelement to then be
7280b57cec5SDimitry Andric   // replaced by a shufflevector. If the insertelement is not replaced, we can
7290b57cec5SDimitry Andric   // induce infinite looping because there's an optimization for extractelement
7300b57cec5SDimitry Andric   // that will delete our widening shuffle. This would trigger another attempt
7310b57cec5SDimitry Andric   // here to create that shuffle, and we spin forever.
7320b57cec5SDimitry Andric   if (InsertionBlock != InsElt->getParent())
73306c3fb27SDimitry Andric     return false;
7340b57cec5SDimitry Andric 
7350b57cec5SDimitry Andric   // TODO: This restriction matches the check in visitInsertElementInst() and
7360b57cec5SDimitry Andric   // prevents an infinite loop caused by not turning the extract/insert pair
7370b57cec5SDimitry Andric   // into a shuffle. We really should not need either check, but we're lacking
7380b57cec5SDimitry Andric   // folds for shufflevectors because we're afraid to generate shuffle masks
7390b57cec5SDimitry Andric   // that the backend can't handle.
7400b57cec5SDimitry Andric   if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
74106c3fb27SDimitry Andric     return false;
7420b57cec5SDimitry Andric 
743349cc55cSDimitry Andric   auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
7440b57cec5SDimitry Andric 
7450b57cec5SDimitry Andric   // Insert the new shuffle after the vector operand of the extract is defined
7460b57cec5SDimitry Andric   // (as long as it's not a PHI) or at the start of the basic block of the
7470b57cec5SDimitry Andric   // extract, so any subsequent extracts in the same basic block can use it.
7480b57cec5SDimitry Andric   // TODO: Insert before the earliest ExtractElementInst that is replaced.
7490b57cec5SDimitry Andric   if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
7500b57cec5SDimitry Andric     WideVec->insertAfter(ExtVecOpInst);
7510b57cec5SDimitry Andric   else
7525f757f3fSDimitry Andric     IC.InsertNewInstWith(WideVec, ExtElt->getParent()->getFirstInsertionPt());
7530b57cec5SDimitry Andric 
7540b57cec5SDimitry Andric   // Replace extracts from the original narrow vector with extracts from the new
7550b57cec5SDimitry Andric   // wide vector.
7560b57cec5SDimitry Andric   for (User *U : ExtVecOp->users()) {
7570b57cec5SDimitry Andric     ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
7580b57cec5SDimitry Andric     if (!OldExt || OldExt->getParent() != WideVec->getParent())
7590b57cec5SDimitry Andric       continue;
7600b57cec5SDimitry Andric     auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
7615f757f3fSDimitry Andric     IC.InsertNewInstWith(NewExt, OldExt->getIterator());
7620b57cec5SDimitry Andric     IC.replaceInstUsesWith(*OldExt, NewExt);
76306c3fb27SDimitry Andric     // Add the old extracts to the worklist for DCE. We can't remove the
76406c3fb27SDimitry Andric     // extracts directly, because they may still be used by the calling code.
76506c3fb27SDimitry Andric     IC.addToWorklist(OldExt);
7660b57cec5SDimitry Andric   }
76706c3fb27SDimitry Andric 
76806c3fb27SDimitry Andric   return true;
7690b57cec5SDimitry Andric }
7700b57cec5SDimitry Andric 
7710b57cec5SDimitry Andric /// We are building a shuffle to create V, which is a sequence of insertelement,
7720b57cec5SDimitry Andric /// extractelement pairs. If PermittedRHS is set, then we must either use it or
7730b57cec5SDimitry Andric /// not rely on the second vector source. Return a std::pair containing the
7740b57cec5SDimitry Andric /// left and right vectors of the proposed shuffle (or 0), and set the Mask
7750b57cec5SDimitry Andric /// parameter as required.
7760b57cec5SDimitry Andric ///
7770b57cec5SDimitry Andric /// Note: we intentionally don't try to fold earlier shuffles since they have
7780b57cec5SDimitry Andric /// often been chosen carefully to be efficiently implementable on the target.
7790b57cec5SDimitry Andric using ShuffleOps = std::pair<Value *, Value *>;
7800b57cec5SDimitry Andric 
7815ffd83dbSDimitry Andric static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask,
7820b57cec5SDimitry Andric                                          Value *PermittedRHS,
78306c3fb27SDimitry Andric                                          InstCombinerImpl &IC, bool &Rerun) {
7840b57cec5SDimitry Andric   assert(V->getType()->isVectorTy() && "Invalid shuffle!");
7855ffd83dbSDimitry Andric   unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
7860b57cec5SDimitry Andric 
787cb14a3feSDimitry Andric   if (match(V, m_Poison())) {
7885ffd83dbSDimitry Andric     Mask.assign(NumElts, -1);
7890b57cec5SDimitry Andric     return std::make_pair(
790cb14a3feSDimitry Andric         PermittedRHS ? PoisonValue::get(PermittedRHS->getType()) : V, nullptr);
7910b57cec5SDimitry Andric   }
7920b57cec5SDimitry Andric 
7930b57cec5SDimitry Andric   if (isa<ConstantAggregateZero>(V)) {
7945ffd83dbSDimitry Andric     Mask.assign(NumElts, 0);
7950b57cec5SDimitry Andric     return std::make_pair(V, nullptr);
7960b57cec5SDimitry Andric   }
7970b57cec5SDimitry Andric 
7980b57cec5SDimitry Andric   if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
7990b57cec5SDimitry Andric     // If this is an insert of an extract from some other vector, include it.
8000b57cec5SDimitry Andric     Value *VecOp    = IEI->getOperand(0);
8010b57cec5SDimitry Andric     Value *ScalarOp = IEI->getOperand(1);
8020b57cec5SDimitry Andric     Value *IdxOp    = IEI->getOperand(2);
8030b57cec5SDimitry Andric 
8040b57cec5SDimitry Andric     if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
8050b57cec5SDimitry Andric       if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
8060b57cec5SDimitry Andric         unsigned ExtractedIdx =
8070b57cec5SDimitry Andric           cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
8080b57cec5SDimitry Andric         unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
8090b57cec5SDimitry Andric 
8100b57cec5SDimitry Andric         // Either the extracted from or inserted into vector must be RHSVec,
8110b57cec5SDimitry Andric         // otherwise we'd end up with a shuffle of three inputs.
8120b57cec5SDimitry Andric         if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
8130b57cec5SDimitry Andric           Value *RHS = EI->getOperand(0);
81406c3fb27SDimitry Andric           ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC, Rerun);
8150b57cec5SDimitry Andric           assert(LR.second == nullptr || LR.second == RHS);
8160b57cec5SDimitry Andric 
8170b57cec5SDimitry Andric           if (LR.first->getType() != RHS->getType()) {
8180b57cec5SDimitry Andric             // Although we are giving up for now, see if we can create extracts
8190b57cec5SDimitry Andric             // that match the inserts for another round of combining.
82006c3fb27SDimitry Andric             if (replaceExtractElements(IEI, EI, IC))
82106c3fb27SDimitry Andric               Rerun = true;
8220b57cec5SDimitry Andric 
8230b57cec5SDimitry Andric             // We tried our best, but we can't find anything compatible with RHS
8240b57cec5SDimitry Andric             // further up the chain. Return a trivial shuffle.
8250b57cec5SDimitry Andric             for (unsigned i = 0; i < NumElts; ++i)
8265ffd83dbSDimitry Andric               Mask[i] = i;
8270b57cec5SDimitry Andric             return std::make_pair(V, nullptr);
8280b57cec5SDimitry Andric           }
8290b57cec5SDimitry Andric 
8305ffd83dbSDimitry Andric           unsigned NumLHSElts =
831e8d8bef9SDimitry Andric               cast<FixedVectorType>(RHS->getType())->getNumElements();
8325ffd83dbSDimitry Andric           Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
8330b57cec5SDimitry Andric           return std::make_pair(LR.first, RHS);
8340b57cec5SDimitry Andric         }
8350b57cec5SDimitry Andric 
8360b57cec5SDimitry Andric         if (VecOp == PermittedRHS) {
8370b57cec5SDimitry Andric           // We've gone as far as we can: anything on the other side of the
8380b57cec5SDimitry Andric           // extractelement will already have been converted into a shuffle.
8390b57cec5SDimitry Andric           unsigned NumLHSElts =
840e8d8bef9SDimitry Andric               cast<FixedVectorType>(EI->getOperand(0)->getType())
841e8d8bef9SDimitry Andric                   ->getNumElements();
8420b57cec5SDimitry Andric           for (unsigned i = 0; i != NumElts; ++i)
8435ffd83dbSDimitry Andric             Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
8440b57cec5SDimitry Andric           return std::make_pair(EI->getOperand(0), PermittedRHS);
8450b57cec5SDimitry Andric         }
8460b57cec5SDimitry Andric 
8470b57cec5SDimitry Andric         // If this insertelement is a chain that comes from exactly these two
8480b57cec5SDimitry Andric         // vectors, return the vector and the effective shuffle.
8490b57cec5SDimitry Andric         if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
8500b57cec5SDimitry Andric             collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
8510b57cec5SDimitry Andric                                          Mask))
8520b57cec5SDimitry Andric           return std::make_pair(EI->getOperand(0), PermittedRHS);
8530b57cec5SDimitry Andric       }
8540b57cec5SDimitry Andric     }
8550b57cec5SDimitry Andric   }
8560b57cec5SDimitry Andric 
8570b57cec5SDimitry Andric   // Otherwise, we can't do anything fancy. Return an identity vector.
8580b57cec5SDimitry Andric   for (unsigned i = 0; i != NumElts; ++i)
8595ffd83dbSDimitry Andric     Mask.push_back(i);
8600b57cec5SDimitry Andric   return std::make_pair(V, nullptr);
8610b57cec5SDimitry Andric }
8620b57cec5SDimitry Andric 
863e8d8bef9SDimitry Andric /// Look for chain of insertvalue's that fully define an aggregate, and trace
864e8d8bef9SDimitry Andric /// back the values inserted, see if they are all were extractvalue'd from
865e8d8bef9SDimitry Andric /// the same source aggregate from the exact same element indexes.
866e8d8bef9SDimitry Andric /// If they were, just reuse the source aggregate.
867e8d8bef9SDimitry Andric /// This potentially deals with PHI indirections.
868e8d8bef9SDimitry Andric Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
869e8d8bef9SDimitry Andric     InsertValueInst &OrigIVI) {
870e8d8bef9SDimitry Andric   Type *AggTy = OrigIVI.getType();
871e8d8bef9SDimitry Andric   unsigned NumAggElts;
872e8d8bef9SDimitry Andric   switch (AggTy->getTypeID()) {
873e8d8bef9SDimitry Andric   case Type::StructTyID:
874e8d8bef9SDimitry Andric     NumAggElts = AggTy->getStructNumElements();
875e8d8bef9SDimitry Andric     break;
876e8d8bef9SDimitry Andric   case Type::ArrayTyID:
877e8d8bef9SDimitry Andric     NumAggElts = AggTy->getArrayNumElements();
878e8d8bef9SDimitry Andric     break;
879e8d8bef9SDimitry Andric   default:
880e8d8bef9SDimitry Andric     llvm_unreachable("Unhandled aggregate type?");
881e8d8bef9SDimitry Andric   }
882e8d8bef9SDimitry Andric 
883e8d8bef9SDimitry Andric   // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
884e8d8bef9SDimitry Andric   // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
885e8d8bef9SDimitry Andric   // FIXME: any interesting patterns to be caught with larger limit?
886e8d8bef9SDimitry Andric   assert(NumAggElts > 0 && "Aggregate should have elements.");
887e8d8bef9SDimitry Andric   if (NumAggElts > 2)
888e8d8bef9SDimitry Andric     return nullptr;
889e8d8bef9SDimitry Andric 
890bdd1243dSDimitry Andric   static constexpr auto NotFound = std::nullopt;
891e8d8bef9SDimitry Andric   static constexpr auto FoundMismatch = nullptr;
892e8d8bef9SDimitry Andric 
893e8d8bef9SDimitry Andric   // Try to find a value of each element of an aggregate.
894e8d8bef9SDimitry Andric   // FIXME: deal with more complex, not one-dimensional, aggregate types
895bdd1243dSDimitry Andric   SmallVector<std::optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
896e8d8bef9SDimitry Andric 
897e8d8bef9SDimitry Andric   // Do we know values for each element of the aggregate?
898e8d8bef9SDimitry Andric   auto KnowAllElts = [&AggElts]() {
899bdd1243dSDimitry Andric     return !llvm::is_contained(AggElts, NotFound);
900e8d8bef9SDimitry Andric   };
901e8d8bef9SDimitry Andric 
902e8d8bef9SDimitry Andric   int Depth = 0;
903e8d8bef9SDimitry Andric 
904e8d8bef9SDimitry Andric   // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
905e8d8bef9SDimitry Andric   // every element being overwritten twice, which should never happen.
906e8d8bef9SDimitry Andric   static const int DepthLimit = 2 * NumAggElts;
907e8d8bef9SDimitry Andric 
908e8d8bef9SDimitry Andric   // Recurse up the chain of `insertvalue` aggregate operands until either we've
909e8d8bef9SDimitry Andric   // reconstructed full initializer or can't visit any more `insertvalue`'s.
910e8d8bef9SDimitry Andric   for (InsertValueInst *CurrIVI = &OrigIVI;
911e8d8bef9SDimitry Andric        Depth < DepthLimit && CurrIVI && !KnowAllElts();
912e8d8bef9SDimitry Andric        CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
913e8d8bef9SDimitry Andric                        ++Depth) {
914fe6060f1SDimitry Andric     auto *InsertedValue =
915fe6060f1SDimitry Andric         dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
916fe6060f1SDimitry Andric     if (!InsertedValue)
917fe6060f1SDimitry Andric       return nullptr; // Inserted value must be produced by an instruction.
918fe6060f1SDimitry Andric 
919e8d8bef9SDimitry Andric     ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
920e8d8bef9SDimitry Andric 
921e8d8bef9SDimitry Andric     // Don't bother with more than single-level aggregates.
922e8d8bef9SDimitry Andric     if (Indices.size() != 1)
923e8d8bef9SDimitry Andric       return nullptr; // FIXME: deal with more complex aggregates?
924e8d8bef9SDimitry Andric 
925e8d8bef9SDimitry Andric     // Now, we may have already previously recorded the value for this element
926e8d8bef9SDimitry Andric     // of an aggregate. If we did, that means the CurrIVI will later be
927e8d8bef9SDimitry Andric     // overwritten with the already-recorded value. But if not, let's record it!
928bdd1243dSDimitry Andric     std::optional<Instruction *> &Elt = AggElts[Indices.front()];
92981ad6265SDimitry Andric     Elt = Elt.value_or(InsertedValue);
930e8d8bef9SDimitry Andric 
931e8d8bef9SDimitry Andric     // FIXME: should we handle chain-terminating undef base operand?
932e8d8bef9SDimitry Andric   }
933e8d8bef9SDimitry Andric 
934e8d8bef9SDimitry Andric   // Was that sufficient to deduce the full initializer for the aggregate?
935e8d8bef9SDimitry Andric   if (!KnowAllElts())
936e8d8bef9SDimitry Andric     return nullptr; // Give up then.
937e8d8bef9SDimitry Andric 
938e8d8bef9SDimitry Andric   // We now want to find the source[s] of the aggregate elements we've found.
939e8d8bef9SDimitry Andric   // And with "source" we mean the original aggregate[s] from which
940e8d8bef9SDimitry Andric   // the inserted elements were extracted. This may require PHI translation.
941e8d8bef9SDimitry Andric 
942e8d8bef9SDimitry Andric   enum class AggregateDescription {
943e8d8bef9SDimitry Andric     /// When analyzing the value that was inserted into an aggregate, we did
944e8d8bef9SDimitry Andric     /// not manage to find defining `extractvalue` instruction to analyze.
945e8d8bef9SDimitry Andric     NotFound,
946e8d8bef9SDimitry Andric     /// When analyzing the value that was inserted into an aggregate, we did
947e8d8bef9SDimitry Andric     /// manage to find defining `extractvalue` instruction[s], and everything
948e8d8bef9SDimitry Andric     /// matched perfectly - aggregate type, element insertion/extraction index.
949e8d8bef9SDimitry Andric     Found,
950e8d8bef9SDimitry Andric     /// When analyzing the value that was inserted into an aggregate, we did
951e8d8bef9SDimitry Andric     /// manage to find defining `extractvalue` instruction, but there was
952e8d8bef9SDimitry Andric     /// a mismatch: either the source type from which the extraction was didn't
953e8d8bef9SDimitry Andric     /// match the aggregate type into which the insertion was,
954e8d8bef9SDimitry Andric     /// or the extraction/insertion channels mismatched,
955e8d8bef9SDimitry Andric     /// or different elements had different source aggregates.
956e8d8bef9SDimitry Andric     FoundMismatch
957e8d8bef9SDimitry Andric   };
958bdd1243dSDimitry Andric   auto Describe = [](std::optional<Value *> SourceAggregate) {
959e8d8bef9SDimitry Andric     if (SourceAggregate == NotFound)
960e8d8bef9SDimitry Andric       return AggregateDescription::NotFound;
961e8d8bef9SDimitry Andric     if (*SourceAggregate == FoundMismatch)
962e8d8bef9SDimitry Andric       return AggregateDescription::FoundMismatch;
963e8d8bef9SDimitry Andric     return AggregateDescription::Found;
964e8d8bef9SDimitry Andric   };
965e8d8bef9SDimitry Andric 
966e8d8bef9SDimitry Andric   // Given the value \p Elt that was being inserted into element \p EltIdx of an
967e8d8bef9SDimitry Andric   // aggregate AggTy, see if \p Elt was originally defined by an
968e8d8bef9SDimitry Andric   // appropriate extractvalue (same element index, same aggregate type).
969e8d8bef9SDimitry Andric   // If found, return the source aggregate from which the extraction was.
970e8d8bef9SDimitry Andric   // If \p PredBB is provided, does PHI translation of an \p Elt first.
971e8d8bef9SDimitry Andric   auto FindSourceAggregate =
972bdd1243dSDimitry Andric       [&](Instruction *Elt, unsigned EltIdx, std::optional<BasicBlock *> UseBB,
973bdd1243dSDimitry Andric           std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
974e8d8bef9SDimitry Andric     // For now(?), only deal with, at most, a single level of PHI indirection.
975e8d8bef9SDimitry Andric     if (UseBB && PredBB)
976fe6060f1SDimitry Andric       Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
977e8d8bef9SDimitry Andric     // FIXME: deal with multiple levels of PHI indirection?
978e8d8bef9SDimitry Andric 
979e8d8bef9SDimitry Andric     // Did we find an extraction?
980fe6060f1SDimitry Andric     auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
981e8d8bef9SDimitry Andric     if (!EVI)
982e8d8bef9SDimitry Andric       return NotFound;
983e8d8bef9SDimitry Andric 
984e8d8bef9SDimitry Andric     Value *SourceAggregate = EVI->getAggregateOperand();
985e8d8bef9SDimitry Andric 
986e8d8bef9SDimitry Andric     // Is the extraction from the same type into which the insertion was?
987e8d8bef9SDimitry Andric     if (SourceAggregate->getType() != AggTy)
988e8d8bef9SDimitry Andric       return FoundMismatch;
989e8d8bef9SDimitry Andric     // And the element index doesn't change between extraction and insertion?
990e8d8bef9SDimitry Andric     if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
991e8d8bef9SDimitry Andric       return FoundMismatch;
992e8d8bef9SDimitry Andric 
993e8d8bef9SDimitry Andric     return SourceAggregate; // AggregateDescription::Found
994e8d8bef9SDimitry Andric   };
995e8d8bef9SDimitry Andric 
996e8d8bef9SDimitry Andric   // Given elements AggElts that were constructing an aggregate OrigIVI,
997e8d8bef9SDimitry Andric   // see if we can find appropriate source aggregate for each of the elements,
998e8d8bef9SDimitry Andric   // and see it's the same aggregate for each element. If so, return it.
999e8d8bef9SDimitry Andric   auto FindCommonSourceAggregate =
1000bdd1243dSDimitry Andric       [&](std::optional<BasicBlock *> UseBB,
1001bdd1243dSDimitry Andric           std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
1002bdd1243dSDimitry Andric     std::optional<Value *> SourceAggregate;
1003e8d8bef9SDimitry Andric 
1004e8d8bef9SDimitry Andric     for (auto I : enumerate(AggElts)) {
1005e8d8bef9SDimitry Andric       assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
1006e8d8bef9SDimitry Andric              "We don't store nullptr in SourceAggregate!");
1007e8d8bef9SDimitry Andric       assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
1008e8d8bef9SDimitry Andric                  (I.index() != 0) &&
1009349cc55cSDimitry Andric              "SourceAggregate should be valid after the first element,");
1010e8d8bef9SDimitry Andric 
1011e8d8bef9SDimitry Andric       // For this element, is there a plausible source aggregate?
1012e8d8bef9SDimitry Andric       // FIXME: we could special-case undef element, IFF we know that in the
1013e8d8bef9SDimitry Andric       //        source aggregate said element isn't poison.
1014bdd1243dSDimitry Andric       std::optional<Value *> SourceAggregateForElement =
1015e8d8bef9SDimitry Andric           FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
1016e8d8bef9SDimitry Andric 
1017e8d8bef9SDimitry Andric       // Okay, what have we found? Does that correlate with previous findings?
1018e8d8bef9SDimitry Andric 
1019e8d8bef9SDimitry Andric       // Regardless of whether or not we have previously found source
1020e8d8bef9SDimitry Andric       // aggregate for previous elements (if any), if we didn't find one for
1021e8d8bef9SDimitry Andric       // this element, passthrough whatever we have just found.
1022e8d8bef9SDimitry Andric       if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
1023e8d8bef9SDimitry Andric         return SourceAggregateForElement;
1024e8d8bef9SDimitry Andric 
1025e8d8bef9SDimitry Andric       // Okay, we have found source aggregate for this element.
1026e8d8bef9SDimitry Andric       // Let's see what we already know from previous elements, if any.
1027e8d8bef9SDimitry Andric       switch (Describe(SourceAggregate)) {
1028e8d8bef9SDimitry Andric       case AggregateDescription::NotFound:
1029e8d8bef9SDimitry Andric         // This is apparently the first element that we have examined.
1030e8d8bef9SDimitry Andric         SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1031e8d8bef9SDimitry Andric         continue; // Great, now look at next element.
1032e8d8bef9SDimitry Andric       case AggregateDescription::Found:
1033e8d8bef9SDimitry Andric         // We have previously already successfully examined other elements.
1034e8d8bef9SDimitry Andric         // Is this the same source aggregate we've found for other elements?
1035e8d8bef9SDimitry Andric         if (*SourceAggregateForElement != *SourceAggregate)
1036e8d8bef9SDimitry Andric           return FoundMismatch;
1037e8d8bef9SDimitry Andric         continue; // Still the same aggregate, look at next element.
1038e8d8bef9SDimitry Andric       case AggregateDescription::FoundMismatch:
1039e8d8bef9SDimitry Andric         llvm_unreachable("Can't happen. We would have early-exited then.");
1040e8d8bef9SDimitry Andric       };
1041e8d8bef9SDimitry Andric     }
1042e8d8bef9SDimitry Andric 
1043e8d8bef9SDimitry Andric     assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1044e8d8bef9SDimitry Andric            "Must be a valid Value");
1045e8d8bef9SDimitry Andric     return *SourceAggregate;
1046e8d8bef9SDimitry Andric   };
1047e8d8bef9SDimitry Andric 
1048bdd1243dSDimitry Andric   std::optional<Value *> SourceAggregate;
1049e8d8bef9SDimitry Andric 
1050e8d8bef9SDimitry Andric   // Can we find the source aggregate without looking at predecessors?
1051bdd1243dSDimitry Andric   SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/std::nullopt,
1052bdd1243dSDimitry Andric                                               /*PredBB=*/std::nullopt);
1053e8d8bef9SDimitry Andric   if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
1054e8d8bef9SDimitry Andric     if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
1055e8d8bef9SDimitry Andric       return nullptr; // Conflicting source aggregates!
1056e8d8bef9SDimitry Andric     ++NumAggregateReconstructionsSimplified;
1057e8d8bef9SDimitry Andric     return replaceInstUsesWith(OrigIVI, *SourceAggregate);
1058e8d8bef9SDimitry Andric   }
1059e8d8bef9SDimitry Andric 
1060e8d8bef9SDimitry Andric   // Okay, apparently we need to look at predecessors.
1061e8d8bef9SDimitry Andric 
1062e8d8bef9SDimitry Andric   // We should be smart about picking the "use" basic block, which will be the
1063e8d8bef9SDimitry Andric   // merge point for aggregate, where we'll insert the final PHI that will be
1064e8d8bef9SDimitry Andric   // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
1065e8d8bef9SDimitry Andric   // We should look in which blocks each of the AggElts is being defined,
1066e8d8bef9SDimitry Andric   // they all should be defined in the same basic block.
1067e8d8bef9SDimitry Andric   BasicBlock *UseBB = nullptr;
1068e8d8bef9SDimitry Andric 
1069bdd1243dSDimitry Andric   for (const std::optional<Instruction *> &I : AggElts) {
1070fe6060f1SDimitry Andric     BasicBlock *BB = (*I)->getParent();
1071e8d8bef9SDimitry Andric     // If it's the first instruction we've encountered, record the basic block.
1072e8d8bef9SDimitry Andric     if (!UseBB) {
1073e8d8bef9SDimitry Andric       UseBB = BB;
1074e8d8bef9SDimitry Andric       continue;
1075e8d8bef9SDimitry Andric     }
1076e8d8bef9SDimitry Andric     // Otherwise, this must be the same basic block we've seen previously.
1077e8d8bef9SDimitry Andric     if (UseBB != BB)
1078e8d8bef9SDimitry Andric       return nullptr;
1079e8d8bef9SDimitry Andric   }
1080e8d8bef9SDimitry Andric 
1081e8d8bef9SDimitry Andric   // If *all* of the elements are basic-block-independent, meaning they are
1082e8d8bef9SDimitry Andric   // either function arguments, or constant expressions, then if we didn't
1083e8d8bef9SDimitry Andric   // handle them without predecessor-aware handling, we won't handle them now.
1084e8d8bef9SDimitry Andric   if (!UseBB)
1085e8d8bef9SDimitry Andric     return nullptr;
1086e8d8bef9SDimitry Andric 
1087e8d8bef9SDimitry Andric   // If we didn't manage to find source aggregate without looking at
1088e8d8bef9SDimitry Andric   // predecessors, and there are no predecessors to look at, then we're done.
1089e8d8bef9SDimitry Andric   if (pred_empty(UseBB))
1090e8d8bef9SDimitry Andric     return nullptr;
1091e8d8bef9SDimitry Andric 
1092e8d8bef9SDimitry Andric   // Arbitrary predecessor count limit.
1093e8d8bef9SDimitry Andric   static const int PredCountLimit = 64;
1094e8d8bef9SDimitry Andric 
1095e8d8bef9SDimitry Andric   // Cache the (non-uniqified!) list of predecessors in a vector,
1096e8d8bef9SDimitry Andric   // checking the limit at the same time for efficiency.
1097e8d8bef9SDimitry Andric   SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1098e8d8bef9SDimitry Andric   for (BasicBlock *Pred : predecessors(UseBB)) {
1099e8d8bef9SDimitry Andric     // Don't bother if there are too many predecessors.
1100e8d8bef9SDimitry Andric     if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1101e8d8bef9SDimitry Andric       return nullptr;
1102e8d8bef9SDimitry Andric     Preds.emplace_back(Pred);
1103e8d8bef9SDimitry Andric   }
1104e8d8bef9SDimitry Andric 
1105e8d8bef9SDimitry Andric   // For each predecessor, what is the source aggregate,
1106e8d8bef9SDimitry Andric   // from which all the elements were originally extracted from?
1107e8d8bef9SDimitry Andric   // Note that we want for the map to have stable iteration order!
1108e8d8bef9SDimitry Andric   SmallDenseMap<BasicBlock *, Value *, 4> SourceAggregates;
1109e8d8bef9SDimitry Andric   for (BasicBlock *Pred : Preds) {
1110e8d8bef9SDimitry Andric     std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1111e8d8bef9SDimitry Andric         SourceAggregates.insert({Pred, nullptr});
1112e8d8bef9SDimitry Andric     // Did we already evaluate this predecessor?
1113e8d8bef9SDimitry Andric     if (!IV.second)
1114e8d8bef9SDimitry Andric       continue;
1115e8d8bef9SDimitry Andric 
1116e8d8bef9SDimitry Andric     // Let's hope that when coming from predecessor Pred, all elements of the
1117e8d8bef9SDimitry Andric     // aggregate produced by OrigIVI must have been originally extracted from
1118e8d8bef9SDimitry Andric     // the same aggregate. Is that so? Can we find said original aggregate?
1119e8d8bef9SDimitry Andric     SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1120e8d8bef9SDimitry Andric     if (Describe(SourceAggregate) != AggregateDescription::Found)
1121e8d8bef9SDimitry Andric       return nullptr; // Give up.
1122e8d8bef9SDimitry Andric     IV.first->second = *SourceAggregate;
1123e8d8bef9SDimitry Andric   }
1124e8d8bef9SDimitry Andric 
1125e8d8bef9SDimitry Andric   // All good! Now we just need to thread the source aggregates here.
1126e8d8bef9SDimitry Andric   // Note that we have to insert the new PHI here, ourselves, because we can't
1127e8d8bef9SDimitry Andric   // rely on InstCombinerImpl::run() inserting it into the right basic block.
1128e8d8bef9SDimitry Andric   // Note that the same block can be a predecessor more than once,
1129e8d8bef9SDimitry Andric   // and we need to preserve that invariant for the PHI node.
1130e8d8bef9SDimitry Andric   BuilderTy::InsertPointGuard Guard(Builder);
11315f757f3fSDimitry Andric   Builder.SetInsertPoint(UseBB, UseBB->getFirstNonPHIIt());
1132e8d8bef9SDimitry Andric   auto *PHI =
1133e8d8bef9SDimitry Andric       Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
1134e8d8bef9SDimitry Andric   for (BasicBlock *Pred : Preds)
1135e8d8bef9SDimitry Andric     PHI->addIncoming(SourceAggregates[Pred], Pred);
1136e8d8bef9SDimitry Andric 
1137e8d8bef9SDimitry Andric   ++NumAggregateReconstructionsSimplified;
1138e8d8bef9SDimitry Andric   return replaceInstUsesWith(OrigIVI, PHI);
1139e8d8bef9SDimitry Andric }
1140e8d8bef9SDimitry Andric 
11410b57cec5SDimitry Andric /// Try to find redundant insertvalue instructions, like the following ones:
11420b57cec5SDimitry Andric ///  %0 = insertvalue { i8, i32 } undef, i8 %x, 0
11430b57cec5SDimitry Andric ///  %1 = insertvalue { i8, i32 } %0,    i8 %y, 0
11440b57cec5SDimitry Andric /// Here the second instruction inserts values at the same indices, as the
11450b57cec5SDimitry Andric /// first one, making the first one redundant.
11460b57cec5SDimitry Andric /// It should be transformed to:
11470b57cec5SDimitry Andric ///  %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1148e8d8bef9SDimitry Andric Instruction *InstCombinerImpl::visitInsertValueInst(InsertValueInst &I) {
114906c3fb27SDimitry Andric   if (Value *V = simplifyInsertValueInst(
115006c3fb27SDimitry Andric           I.getAggregateOperand(), I.getInsertedValueOperand(), I.getIndices(),
115106c3fb27SDimitry Andric           SQ.getWithInstruction(&I)))
115206c3fb27SDimitry Andric     return replaceInstUsesWith(I, V);
115306c3fb27SDimitry Andric 
11540b57cec5SDimitry Andric   bool IsRedundant = false;
11550b57cec5SDimitry Andric   ArrayRef<unsigned int> FirstIndices = I.getIndices();
11560b57cec5SDimitry Andric 
11570b57cec5SDimitry Andric   // If there is a chain of insertvalue instructions (each of them except the
11580b57cec5SDimitry Andric   // last one has only one use and it's another insertvalue insn from this
11590b57cec5SDimitry Andric   // chain), check if any of the 'children' uses the same indices as the first
11600b57cec5SDimitry Andric   // instruction. In this case, the first one is redundant.
11610b57cec5SDimitry Andric   Value *V = &I;
11620b57cec5SDimitry Andric   unsigned Depth = 0;
11630b57cec5SDimitry Andric   while (V->hasOneUse() && Depth < 10) {
11640b57cec5SDimitry Andric     User *U = V->user_back();
11650b57cec5SDimitry Andric     auto UserInsInst = dyn_cast<InsertValueInst>(U);
11660b57cec5SDimitry Andric     if (!UserInsInst || U->getOperand(0) != V)
11670b57cec5SDimitry Andric       break;
11680b57cec5SDimitry Andric     if (UserInsInst->getIndices() == FirstIndices) {
11690b57cec5SDimitry Andric       IsRedundant = true;
11700b57cec5SDimitry Andric       break;
11710b57cec5SDimitry Andric     }
11720b57cec5SDimitry Andric     V = UserInsInst;
11730b57cec5SDimitry Andric     Depth++;
11740b57cec5SDimitry Andric   }
11750b57cec5SDimitry Andric 
11760b57cec5SDimitry Andric   if (IsRedundant)
11770b57cec5SDimitry Andric     return replaceInstUsesWith(I, I.getOperand(0));
1178e8d8bef9SDimitry Andric 
1179e8d8bef9SDimitry Andric   if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(I))
1180e8d8bef9SDimitry Andric     return NewI;
1181e8d8bef9SDimitry Andric 
11820b57cec5SDimitry Andric   return nullptr;
11830b57cec5SDimitry Andric }
11840b57cec5SDimitry Andric 
11850b57cec5SDimitry Andric static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
11865ffd83dbSDimitry Andric   // Can not analyze scalable type, the number of elements is not a compile-time
11875ffd83dbSDimitry Andric   // constant.
11885ffd83dbSDimitry Andric   if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType()))
11895ffd83dbSDimitry Andric     return false;
11905ffd83dbSDimitry Andric 
11915ffd83dbSDimitry Andric   int MaskSize = Shuf.getShuffleMask().size();
11925ffd83dbSDimitry Andric   int VecSize =
11935ffd83dbSDimitry Andric       cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
11940b57cec5SDimitry Andric 
11950b57cec5SDimitry Andric   // A vector select does not change the size of the operands.
11960b57cec5SDimitry Andric   if (MaskSize != VecSize)
11970b57cec5SDimitry Andric     return false;
11980b57cec5SDimitry Andric 
11990b57cec5SDimitry Andric   // Each mask element must be undefined or choose a vector element from one of
12000b57cec5SDimitry Andric   // the source operands without crossing vector lanes.
12010b57cec5SDimitry Andric   for (int i = 0; i != MaskSize; ++i) {
12020b57cec5SDimitry Andric     int Elt = Shuf.getMaskValue(i);
12030b57cec5SDimitry Andric     if (Elt != -1 && Elt != i && Elt != i + VecSize)
12040b57cec5SDimitry Andric       return false;
12050b57cec5SDimitry Andric   }
12060b57cec5SDimitry Andric 
12070b57cec5SDimitry Andric   return true;
12080b57cec5SDimitry Andric }
12090b57cec5SDimitry Andric 
12100b57cec5SDimitry Andric /// Turn a chain of inserts that splats a value into an insert + shuffle:
12110b57cec5SDimitry Andric /// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1212fe6060f1SDimitry Andric /// shufflevector(insertelt(X, %k, 0), poison, zero)
12130b57cec5SDimitry Andric static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
12140b57cec5SDimitry Andric   // We are interested in the last insert in a chain. So if this insert has a
12150b57cec5SDimitry Andric   // single user and that user is an insert, bail.
12160b57cec5SDimitry Andric   if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
12170b57cec5SDimitry Andric     return nullptr;
12180b57cec5SDimitry Andric 
12195ffd83dbSDimitry Andric   VectorType *VecTy = InsElt.getType();
12205ffd83dbSDimitry Andric   // Can not handle scalable type, the number of elements is not a compile-time
12215ffd83dbSDimitry Andric   // constant.
12225ffd83dbSDimitry Andric   if (isa<ScalableVectorType>(VecTy))
12235ffd83dbSDimitry Andric     return nullptr;
12245ffd83dbSDimitry Andric   unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
12250b57cec5SDimitry Andric 
12260b57cec5SDimitry Andric   // Do not try to do this for a one-element vector, since that's a nop,
12270b57cec5SDimitry Andric   // and will cause an inf-loop.
12280b57cec5SDimitry Andric   if (NumElements == 1)
12290b57cec5SDimitry Andric     return nullptr;
12300b57cec5SDimitry Andric 
12310b57cec5SDimitry Andric   Value *SplatVal = InsElt.getOperand(1);
12320b57cec5SDimitry Andric   InsertElementInst *CurrIE = &InsElt;
12335ffd83dbSDimitry Andric   SmallBitVector ElementPresent(NumElements, false);
12340b57cec5SDimitry Andric   InsertElementInst *FirstIE = nullptr;
12350b57cec5SDimitry Andric 
12360b57cec5SDimitry Andric   // Walk the chain backwards, keeping track of which indices we inserted into,
12370b57cec5SDimitry Andric   // until we hit something that isn't an insert of the splatted value.
12380b57cec5SDimitry Andric   while (CurrIE) {
12390b57cec5SDimitry Andric     auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
12400b57cec5SDimitry Andric     if (!Idx || CurrIE->getOperand(1) != SplatVal)
12410b57cec5SDimitry Andric       return nullptr;
12420b57cec5SDimitry Andric 
12430b57cec5SDimitry Andric     auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
12440b57cec5SDimitry Andric     // Check none of the intermediate steps have any additional uses, except
12450b57cec5SDimitry Andric     // for the root insertelement instruction, which can be re-used, if it
12460b57cec5SDimitry Andric     // inserts at position 0.
12470b57cec5SDimitry Andric     if (CurrIE != &InsElt &&
12480b57cec5SDimitry Andric         (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
12490b57cec5SDimitry Andric       return nullptr;
12500b57cec5SDimitry Andric 
12510b57cec5SDimitry Andric     ElementPresent[Idx->getZExtValue()] = true;
12520b57cec5SDimitry Andric     FirstIE = CurrIE;
12530b57cec5SDimitry Andric     CurrIE = NextIE;
12540b57cec5SDimitry Andric   }
12550b57cec5SDimitry Andric 
12560b57cec5SDimitry Andric   // If this is just a single insertelement (not a sequence), we are done.
12570b57cec5SDimitry Andric   if (FirstIE == &InsElt)
12580b57cec5SDimitry Andric     return nullptr;
12590b57cec5SDimitry Andric 
126006c3fb27SDimitry Andric   // If we are not inserting into a poison vector, make sure we've seen an
12610b57cec5SDimitry Andric   // insert into every element.
12620b57cec5SDimitry Andric   // TODO: If the base vector is not undef, it might be better to create a splat
12630b57cec5SDimitry Andric   //       and then a select-shuffle (blend) with the base vector.
126406c3fb27SDimitry Andric   if (!match(FirstIE->getOperand(0), m_Poison()))
12655ffd83dbSDimitry Andric     if (!ElementPresent.all())
12660b57cec5SDimitry Andric       return nullptr;
12670b57cec5SDimitry Andric 
12680b57cec5SDimitry Andric   // Create the insert + shuffle.
126906c3fb27SDimitry Andric   Type *Int64Ty = Type::getInt64Ty(InsElt.getContext());
1270fe6060f1SDimitry Andric   PoisonValue *PoisonVec = PoisonValue::get(VecTy);
127106c3fb27SDimitry Andric   Constant *Zero = ConstantInt::get(Int64Ty, 0);
12720b57cec5SDimitry Andric   if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1273*0fca6ea1SDimitry Andric     FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "",
1274*0fca6ea1SDimitry Andric                                         InsElt.getIterator());
12750b57cec5SDimitry Andric 
127606c3fb27SDimitry Andric   // Splat from element 0, but replace absent elements with poison in the mask.
12775ffd83dbSDimitry Andric   SmallVector<int, 16> Mask(NumElements, 0);
12780b57cec5SDimitry Andric   for (unsigned i = 0; i != NumElements; ++i)
12790b57cec5SDimitry Andric     if (!ElementPresent[i])
12805ffd83dbSDimitry Andric       Mask[i] = -1;
12810b57cec5SDimitry Andric 
1282349cc55cSDimitry Andric   return new ShuffleVectorInst(FirstIE, Mask);
12830b57cec5SDimitry Andric }
12840b57cec5SDimitry Andric 
12850b57cec5SDimitry Andric /// Try to fold an insert element into an existing splat shuffle by changing
12860b57cec5SDimitry Andric /// the shuffle's mask to include the index of this insert element.
12870b57cec5SDimitry Andric static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
12880b57cec5SDimitry Andric   // Check if the vector operand of this insert is a canonical splat shuffle.
12890b57cec5SDimitry Andric   auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
12900b57cec5SDimitry Andric   if (!Shuf || !Shuf->isZeroEltSplat())
12910b57cec5SDimitry Andric     return nullptr;
12920b57cec5SDimitry Andric 
12935ffd83dbSDimitry Andric   // Bail out early if shuffle is scalable type. The number of elements in
12945ffd83dbSDimitry Andric   // shuffle mask is unknown at compile-time.
12955ffd83dbSDimitry Andric   if (isa<ScalableVectorType>(Shuf->getType()))
12965ffd83dbSDimitry Andric     return nullptr;
12975ffd83dbSDimitry Andric 
12980b57cec5SDimitry Andric   // Check for a constant insertion index.
12990b57cec5SDimitry Andric   uint64_t IdxC;
13000b57cec5SDimitry Andric   if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
13010b57cec5SDimitry Andric     return nullptr;
13020b57cec5SDimitry Andric 
13030b57cec5SDimitry Andric   // Check if the splat shuffle's input is the same as this insert's scalar op.
13040b57cec5SDimitry Andric   Value *X = InsElt.getOperand(1);
13050b57cec5SDimitry Andric   Value *Op0 = Shuf->getOperand(0);
13065ffd83dbSDimitry Andric   if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
13070b57cec5SDimitry Andric     return nullptr;
13080b57cec5SDimitry Andric 
13090b57cec5SDimitry Andric   // Replace the shuffle mask element at the index of this insert with a zero.
13100b57cec5SDimitry Andric   // For example:
1311349cc55cSDimitry Andric   // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1312349cc55cSDimitry Andric   //   --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1313e8d8bef9SDimitry Andric   unsigned NumMaskElts =
1314e8d8bef9SDimitry Andric       cast<FixedVectorType>(Shuf->getType())->getNumElements();
13155ffd83dbSDimitry Andric   SmallVector<int, 16> NewMask(NumMaskElts);
13160b57cec5SDimitry Andric   for (unsigned i = 0; i != NumMaskElts; ++i)
13175ffd83dbSDimitry Andric     NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
13180b57cec5SDimitry Andric 
1319349cc55cSDimitry Andric   return new ShuffleVectorInst(Op0, NewMask);
13200b57cec5SDimitry Andric }
13210b57cec5SDimitry Andric 
13228bcb0991SDimitry Andric /// Try to fold an extract+insert element into an existing identity shuffle by
13238bcb0991SDimitry Andric /// changing the shuffle's mask to include the index of this insert element.
13248bcb0991SDimitry Andric static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {
13258bcb0991SDimitry Andric   // Check if the vector operand of this insert is an identity shuffle.
13268bcb0991SDimitry Andric   auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1327*0fca6ea1SDimitry Andric   if (!Shuf || !match(Shuf->getOperand(1), m_Poison()) ||
13288bcb0991SDimitry Andric       !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
13298bcb0991SDimitry Andric     return nullptr;
13308bcb0991SDimitry Andric 
13315ffd83dbSDimitry Andric   // Bail out early if shuffle is scalable type. The number of elements in
13325ffd83dbSDimitry Andric   // shuffle mask is unknown at compile-time.
13335ffd83dbSDimitry Andric   if (isa<ScalableVectorType>(Shuf->getType()))
13345ffd83dbSDimitry Andric     return nullptr;
13355ffd83dbSDimitry Andric 
13368bcb0991SDimitry Andric   // Check for a constant insertion index.
13378bcb0991SDimitry Andric   uint64_t IdxC;
13388bcb0991SDimitry Andric   if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
13398bcb0991SDimitry Andric     return nullptr;
13408bcb0991SDimitry Andric 
13418bcb0991SDimitry Andric   // Check if this insert's scalar op is extracted from the identity shuffle's
13428bcb0991SDimitry Andric   // input vector.
13438bcb0991SDimitry Andric   Value *Scalar = InsElt.getOperand(1);
13448bcb0991SDimitry Andric   Value *X = Shuf->getOperand(0);
13455ffd83dbSDimitry Andric   if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
13468bcb0991SDimitry Andric     return nullptr;
13478bcb0991SDimitry Andric 
13488bcb0991SDimitry Andric   // Replace the shuffle mask element at the index of this extract+insert with
13498bcb0991SDimitry Andric   // that same index value.
13508bcb0991SDimitry Andric   // For example:
13518bcb0991SDimitry Andric   // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1352e8d8bef9SDimitry Andric   unsigned NumMaskElts =
1353e8d8bef9SDimitry Andric       cast<FixedVectorType>(Shuf->getType())->getNumElements();
13545ffd83dbSDimitry Andric   SmallVector<int, 16> NewMask(NumMaskElts);
13555ffd83dbSDimitry Andric   ArrayRef<int> OldMask = Shuf->getShuffleMask();
13568bcb0991SDimitry Andric   for (unsigned i = 0; i != NumMaskElts; ++i) {
13578bcb0991SDimitry Andric     if (i != IdxC) {
13588bcb0991SDimitry Andric       // All mask elements besides the inserted element remain the same.
13595ffd83dbSDimitry Andric       NewMask[i] = OldMask[i];
13605ffd83dbSDimitry Andric     } else if (OldMask[i] == (int)IdxC) {
13618bcb0991SDimitry Andric       // If the mask element was already set, there's nothing to do
13628bcb0991SDimitry Andric       // (demanded elements analysis may unset it later).
13638bcb0991SDimitry Andric       return nullptr;
13648bcb0991SDimitry Andric     } else {
136506c3fb27SDimitry Andric       assert(OldMask[i] == PoisonMaskElem &&
13668bcb0991SDimitry Andric              "Unexpected shuffle mask element for identity shuffle");
13675ffd83dbSDimitry Andric       NewMask[i] = IdxC;
13688bcb0991SDimitry Andric     }
13698bcb0991SDimitry Andric   }
13708bcb0991SDimitry Andric 
13718bcb0991SDimitry Andric   return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
13728bcb0991SDimitry Andric }
13738bcb0991SDimitry Andric 
13740b57cec5SDimitry Andric /// If we have an insertelement instruction feeding into another insertelement
13750b57cec5SDimitry Andric /// and the 2nd is inserting a constant into the vector, canonicalize that
13760b57cec5SDimitry Andric /// constant insertion before the insertion of a variable:
13770b57cec5SDimitry Andric ///
13780b57cec5SDimitry Andric /// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
13790b57cec5SDimitry Andric /// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
13800b57cec5SDimitry Andric ///
13810b57cec5SDimitry Andric /// This has the potential of eliminating the 2nd insertelement instruction
13820b57cec5SDimitry Andric /// via constant folding of the scalar constant into a vector constant.
13830b57cec5SDimitry Andric static Instruction *hoistInsEltConst(InsertElementInst &InsElt2,
13840b57cec5SDimitry Andric                                      InstCombiner::BuilderTy &Builder) {
13850b57cec5SDimitry Andric   auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
13860b57cec5SDimitry Andric   if (!InsElt1 || !InsElt1->hasOneUse())
13870b57cec5SDimitry Andric     return nullptr;
13880b57cec5SDimitry Andric 
13890b57cec5SDimitry Andric   Value *X, *Y;
13900b57cec5SDimitry Andric   Constant *ScalarC;
13910b57cec5SDimitry Andric   ConstantInt *IdxC1, *IdxC2;
13920b57cec5SDimitry Andric   if (match(InsElt1->getOperand(0), m_Value(X)) &&
13930b57cec5SDimitry Andric       match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
13940b57cec5SDimitry Andric       match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
13950b57cec5SDimitry Andric       match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
13960b57cec5SDimitry Andric       match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
13970b57cec5SDimitry Andric     Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
13980b57cec5SDimitry Andric     return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
13990b57cec5SDimitry Andric   }
14000b57cec5SDimitry Andric 
14010b57cec5SDimitry Andric   return nullptr;
14020b57cec5SDimitry Andric }
14030b57cec5SDimitry Andric 
14040b57cec5SDimitry Andric /// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
14050b57cec5SDimitry Andric /// --> shufflevector X, CVec', Mask'
14060b57cec5SDimitry Andric static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
14070b57cec5SDimitry Andric   auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
14080b57cec5SDimitry Andric   // Bail out if the parent has more than one use. In that case, we'd be
14090b57cec5SDimitry Andric   // replacing the insertelt with a shuffle, and that's not a clear win.
14100b57cec5SDimitry Andric   if (!Inst || !Inst->hasOneUse())
14110b57cec5SDimitry Andric     return nullptr;
14120b57cec5SDimitry Andric   if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
14130b57cec5SDimitry Andric     // The shuffle must have a constant vector operand. The insertelt must have
14140b57cec5SDimitry Andric     // a constant scalar being inserted at a constant position in the vector.
14150b57cec5SDimitry Andric     Constant *ShufConstVec, *InsEltScalar;
14160b57cec5SDimitry Andric     uint64_t InsEltIndex;
14170b57cec5SDimitry Andric     if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
14180b57cec5SDimitry Andric         !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
14190b57cec5SDimitry Andric         !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
14200b57cec5SDimitry Andric       return nullptr;
14210b57cec5SDimitry Andric 
14220b57cec5SDimitry Andric     // Adding an element to an arbitrary shuffle could be expensive, but a
14230b57cec5SDimitry Andric     // shuffle that selects elements from vectors without crossing lanes is
14240b57cec5SDimitry Andric     // assumed cheap.
14250b57cec5SDimitry Andric     // If we're just adding a constant into that shuffle, it will still be
14260b57cec5SDimitry Andric     // cheap.
14270b57cec5SDimitry Andric     if (!isShuffleEquivalentToSelect(*Shuf))
14280b57cec5SDimitry Andric       return nullptr;
14290b57cec5SDimitry Andric 
14300b57cec5SDimitry Andric     // From the above 'select' check, we know that the mask has the same number
14310b57cec5SDimitry Andric     // of elements as the vector input operands. We also know that each constant
14320b57cec5SDimitry Andric     // input element is used in its lane and can not be used more than once by
14330b57cec5SDimitry Andric     // the shuffle. Therefore, replace the constant in the shuffle's constant
14340b57cec5SDimitry Andric     // vector with the insertelt constant. Replace the constant in the shuffle's
14350b57cec5SDimitry Andric     // mask vector with the insertelt index plus the length of the vector
14360b57cec5SDimitry Andric     // (because the constant vector operand of a shuffle is always the 2nd
14370b57cec5SDimitry Andric     // operand).
14385ffd83dbSDimitry Andric     ArrayRef<int> Mask = Shuf->getShuffleMask();
14395ffd83dbSDimitry Andric     unsigned NumElts = Mask.size();
14400b57cec5SDimitry Andric     SmallVector<Constant *, 16> NewShufElts(NumElts);
14415ffd83dbSDimitry Andric     SmallVector<int, 16> NewMaskElts(NumElts);
14420b57cec5SDimitry Andric     for (unsigned I = 0; I != NumElts; ++I) {
14430b57cec5SDimitry Andric       if (I == InsEltIndex) {
14440b57cec5SDimitry Andric         NewShufElts[I] = InsEltScalar;
14455ffd83dbSDimitry Andric         NewMaskElts[I] = InsEltIndex + NumElts;
14460b57cec5SDimitry Andric       } else {
14470b57cec5SDimitry Andric         // Copy over the existing values.
14480b57cec5SDimitry Andric         NewShufElts[I] = ShufConstVec->getAggregateElement(I);
14495ffd83dbSDimitry Andric         NewMaskElts[I] = Mask[I];
14500b57cec5SDimitry Andric       }
1451349cc55cSDimitry Andric 
1452349cc55cSDimitry Andric       // Bail if we failed to find an element.
1453349cc55cSDimitry Andric       if (!NewShufElts[I])
1454349cc55cSDimitry Andric         return nullptr;
14550b57cec5SDimitry Andric     }
14560b57cec5SDimitry Andric 
14570b57cec5SDimitry Andric     // Create new operands for a shuffle that includes the constant of the
14580b57cec5SDimitry Andric     // original insertelt. The old shuffle will be dead now.
14590b57cec5SDimitry Andric     return new ShuffleVectorInst(Shuf->getOperand(0),
14605ffd83dbSDimitry Andric                                  ConstantVector::get(NewShufElts), NewMaskElts);
14610b57cec5SDimitry Andric   } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
14620b57cec5SDimitry Andric     // Transform sequences of insertelements ops with constant data/indexes into
14630b57cec5SDimitry Andric     // a single shuffle op.
14645ffd83dbSDimitry Andric     // Can not handle scalable type, the number of elements needed to create
14655ffd83dbSDimitry Andric     // shuffle mask is not a compile-time constant.
14665ffd83dbSDimitry Andric     if (isa<ScalableVectorType>(InsElt.getType()))
14675ffd83dbSDimitry Andric       return nullptr;
14685ffd83dbSDimitry Andric     unsigned NumElts =
14695ffd83dbSDimitry Andric         cast<FixedVectorType>(InsElt.getType())->getNumElements();
14700b57cec5SDimitry Andric 
14710b57cec5SDimitry Andric     uint64_t InsertIdx[2];
14720b57cec5SDimitry Andric     Constant *Val[2];
14730b57cec5SDimitry Andric     if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
14740b57cec5SDimitry Andric         !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
14750b57cec5SDimitry Andric         !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
14760b57cec5SDimitry Andric         !match(IEI->getOperand(1), m_Constant(Val[1])))
14770b57cec5SDimitry Andric       return nullptr;
14780b57cec5SDimitry Andric     SmallVector<Constant *, 16> Values(NumElts);
14795ffd83dbSDimitry Andric     SmallVector<int, 16> Mask(NumElts);
14800b57cec5SDimitry Andric     auto ValI = std::begin(Val);
14810b57cec5SDimitry Andric     // Generate new constant vector and mask.
14820b57cec5SDimitry Andric     // We have 2 values/masks from the insertelements instructions. Insert them
14830b57cec5SDimitry Andric     // into new value/mask vectors.
14840b57cec5SDimitry Andric     for (uint64_t I : InsertIdx) {
14850b57cec5SDimitry Andric       if (!Values[I]) {
14860b57cec5SDimitry Andric         Values[I] = *ValI;
14875ffd83dbSDimitry Andric         Mask[I] = NumElts + I;
14880b57cec5SDimitry Andric       }
14890b57cec5SDimitry Andric       ++ValI;
14900b57cec5SDimitry Andric     }
149106c3fb27SDimitry Andric     // Remaining values are filled with 'poison' values.
14920b57cec5SDimitry Andric     for (unsigned I = 0; I < NumElts; ++I) {
14930b57cec5SDimitry Andric       if (!Values[I]) {
149406c3fb27SDimitry Andric         Values[I] = PoisonValue::get(InsElt.getType()->getElementType());
14955ffd83dbSDimitry Andric         Mask[I] = I;
14960b57cec5SDimitry Andric       }
14970b57cec5SDimitry Andric     }
14980b57cec5SDimitry Andric     // Create new operands for a shuffle that includes the constant of the
14990b57cec5SDimitry Andric     // original insertelt.
15000b57cec5SDimitry Andric     return new ShuffleVectorInst(IEI->getOperand(0),
15015ffd83dbSDimitry Andric                                  ConstantVector::get(Values), Mask);
15020b57cec5SDimitry Andric   }
15030b57cec5SDimitry Andric   return nullptr;
15040b57cec5SDimitry Andric }
15050b57cec5SDimitry Andric 
1506349cc55cSDimitry Andric /// If both the base vector and the inserted element are extended from the same
1507349cc55cSDimitry Andric /// type, do the insert element in the narrow source type followed by extend.
1508349cc55cSDimitry Andric /// TODO: This can be extended to include other cast opcodes, but particularly
1509349cc55cSDimitry Andric ///       if we create a wider insertelement, make sure codegen is not harmed.
1510349cc55cSDimitry Andric static Instruction *narrowInsElt(InsertElementInst &InsElt,
1511349cc55cSDimitry Andric                                  InstCombiner::BuilderTy &Builder) {
1512349cc55cSDimitry Andric   // We are creating a vector extend. If the original vector extend has another
1513349cc55cSDimitry Andric   // use, that would mean we end up with 2 vector extends, so avoid that.
1514349cc55cSDimitry Andric   // TODO: We could ease the use-clause to "if at least one op has one use"
1515349cc55cSDimitry Andric   //       (assuming that the source types match - see next TODO comment).
1516349cc55cSDimitry Andric   Value *Vec = InsElt.getOperand(0);
1517349cc55cSDimitry Andric   if (!Vec->hasOneUse())
1518349cc55cSDimitry Andric     return nullptr;
1519349cc55cSDimitry Andric 
1520349cc55cSDimitry Andric   Value *Scalar = InsElt.getOperand(1);
1521349cc55cSDimitry Andric   Value *X, *Y;
1522349cc55cSDimitry Andric   CastInst::CastOps CastOpcode;
1523349cc55cSDimitry Andric   if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
1524349cc55cSDimitry Andric     CastOpcode = Instruction::FPExt;
1525349cc55cSDimitry Andric   else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
1526349cc55cSDimitry Andric     CastOpcode = Instruction::SExt;
1527349cc55cSDimitry Andric   else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
1528349cc55cSDimitry Andric     CastOpcode = Instruction::ZExt;
1529349cc55cSDimitry Andric   else
1530349cc55cSDimitry Andric     return nullptr;
1531349cc55cSDimitry Andric 
1532349cc55cSDimitry Andric   // TODO: We can allow mismatched types by creating an intermediate cast.
1533349cc55cSDimitry Andric   if (X->getType()->getScalarType() != Y->getType())
1534349cc55cSDimitry Andric     return nullptr;
1535349cc55cSDimitry Andric 
1536349cc55cSDimitry Andric   // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1537349cc55cSDimitry Andric   Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
1538349cc55cSDimitry Andric   return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
1539349cc55cSDimitry Andric }
1540349cc55cSDimitry Andric 
1541bdd1243dSDimitry Andric /// If we are inserting 2 halves of a value into adjacent elements of a vector,
1542bdd1243dSDimitry Andric /// try to convert to a single insert with appropriate bitcasts.
1543bdd1243dSDimitry Andric static Instruction *foldTruncInsEltPair(InsertElementInst &InsElt,
1544bdd1243dSDimitry Andric                                         bool IsBigEndian,
1545bdd1243dSDimitry Andric                                         InstCombiner::BuilderTy &Builder) {
1546bdd1243dSDimitry Andric   Value *VecOp    = InsElt.getOperand(0);
1547bdd1243dSDimitry Andric   Value *ScalarOp = InsElt.getOperand(1);
1548bdd1243dSDimitry Andric   Value *IndexOp  = InsElt.getOperand(2);
1549bdd1243dSDimitry Andric 
1550bdd1243dSDimitry Andric   // Pattern depends on endian because we expect lower index is inserted first.
1551bdd1243dSDimitry Andric   // Big endian:
1552bdd1243dSDimitry Andric   // inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1
1553bdd1243dSDimitry Andric   // Little endian:
1554bdd1243dSDimitry Andric   // inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1555bdd1243dSDimitry Andric   // Note: It is not safe to do this transform with an arbitrary base vector
1556bdd1243dSDimitry Andric   //       because the bitcast of that vector to fewer/larger elements could
1557bdd1243dSDimitry Andric   //       allow poison to spill into an element that was not poison before.
1558bdd1243dSDimitry Andric   // TODO: Detect smaller fractions of the scalar.
1559bdd1243dSDimitry Andric   // TODO: One-use checks are conservative.
1560bdd1243dSDimitry Andric   auto *VTy = dyn_cast<FixedVectorType>(InsElt.getType());
1561bdd1243dSDimitry Andric   Value *Scalar0, *BaseVec;
1562bdd1243dSDimitry Andric   uint64_t Index0, Index1;
1563bdd1243dSDimitry Andric   if (!VTy || (VTy->getNumElements() & 1) ||
1564bdd1243dSDimitry Andric       !match(IndexOp, m_ConstantInt(Index1)) ||
1565bdd1243dSDimitry Andric       !match(VecOp, m_InsertElt(m_Value(BaseVec), m_Value(Scalar0),
1566bdd1243dSDimitry Andric                                 m_ConstantInt(Index0))) ||
1567bdd1243dSDimitry Andric       !match(BaseVec, m_Undef()))
1568bdd1243dSDimitry Andric     return nullptr;
1569bdd1243dSDimitry Andric 
1570bdd1243dSDimitry Andric   // The first insert must be to the index one less than this one, and
1571bdd1243dSDimitry Andric   // the first insert must be to an even index.
1572bdd1243dSDimitry Andric   if (Index0 + 1 != Index1 || Index0 & 1)
1573bdd1243dSDimitry Andric     return nullptr;
1574bdd1243dSDimitry Andric 
1575bdd1243dSDimitry Andric   // For big endian, the high half of the value should be inserted first.
1576bdd1243dSDimitry Andric   // For little endian, the low half of the value should be inserted first.
1577bdd1243dSDimitry Andric   Value *X;
1578bdd1243dSDimitry Andric   uint64_t ShAmt;
1579bdd1243dSDimitry Andric   if (IsBigEndian) {
1580bdd1243dSDimitry Andric     if (!match(ScalarOp, m_Trunc(m_Value(X))) ||
1581bdd1243dSDimitry Andric         !match(Scalar0, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1582bdd1243dSDimitry Andric       return nullptr;
1583bdd1243dSDimitry Andric   } else {
1584bdd1243dSDimitry Andric     if (!match(Scalar0, m_Trunc(m_Value(X))) ||
1585bdd1243dSDimitry Andric         !match(ScalarOp, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1586bdd1243dSDimitry Andric       return nullptr;
1587bdd1243dSDimitry Andric   }
1588bdd1243dSDimitry Andric 
1589bdd1243dSDimitry Andric   Type *SrcTy = X->getType();
1590bdd1243dSDimitry Andric   unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1591bdd1243dSDimitry Andric   unsigned VecEltWidth = VTy->getScalarSizeInBits();
1592bdd1243dSDimitry Andric   if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth)
1593bdd1243dSDimitry Andric     return nullptr;
1594bdd1243dSDimitry Andric 
1595bdd1243dSDimitry Andric   // Bitcast the base vector to a vector type with the source element type.
1596bdd1243dSDimitry Andric   Type *CastTy = FixedVectorType::get(SrcTy, VTy->getNumElements() / 2);
1597bdd1243dSDimitry Andric   Value *CastBaseVec = Builder.CreateBitCast(BaseVec, CastTy);
1598bdd1243dSDimitry Andric 
1599bdd1243dSDimitry Andric   // Scale the insert index for a vector with half as many elements.
1600bdd1243dSDimitry Andric   // bitcast (inselt (bitcast BaseVec), X, NewIndex)
1601bdd1243dSDimitry Andric   uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2;
1602bdd1243dSDimitry Andric   Value *NewInsert = Builder.CreateInsertElement(CastBaseVec, X, NewIndex);
1603bdd1243dSDimitry Andric   return new BitCastInst(NewInsert, VTy);
1604bdd1243dSDimitry Andric }
1605bdd1243dSDimitry Andric 
1606e8d8bef9SDimitry Andric Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
16070b57cec5SDimitry Andric   Value *VecOp    = IE.getOperand(0);
16080b57cec5SDimitry Andric   Value *ScalarOp = IE.getOperand(1);
16090b57cec5SDimitry Andric   Value *IdxOp    = IE.getOperand(2);
16100b57cec5SDimitry Andric 
161181ad6265SDimitry Andric   if (auto *V = simplifyInsertElementInst(
16120b57cec5SDimitry Andric           VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
16130b57cec5SDimitry Andric     return replaceInstUsesWith(IE, V);
16140b57cec5SDimitry Andric 
16150eae32dcSDimitry Andric   // Canonicalize type of constant indices to i64 to simplify CSE
1616bdd1243dSDimitry Andric   if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp)) {
16170eae32dcSDimitry Andric     if (auto *NewIdx = getPreferredVectorIndex(IndexC))
16180eae32dcSDimitry Andric       return replaceOperand(IE, 2, NewIdx);
16190eae32dcSDimitry Andric 
1620bdd1243dSDimitry Andric     Value *BaseVec, *OtherScalar;
1621bdd1243dSDimitry Andric     uint64_t OtherIndexVal;
1622bdd1243dSDimitry Andric     if (match(VecOp, m_OneUse(m_InsertElt(m_Value(BaseVec),
1623bdd1243dSDimitry Andric                                           m_Value(OtherScalar),
1624bdd1243dSDimitry Andric                                           m_ConstantInt(OtherIndexVal)))) &&
1625bdd1243dSDimitry Andric         !isa<Constant>(OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1626bdd1243dSDimitry Andric       Value *NewIns = Builder.CreateInsertElement(BaseVec, ScalarOp, IdxOp);
1627bdd1243dSDimitry Andric       return InsertElementInst::Create(NewIns, OtherScalar,
1628bdd1243dSDimitry Andric                                        Builder.getInt64(OtherIndexVal));
1629bdd1243dSDimitry Andric     }
1630bdd1243dSDimitry Andric   }
1631bdd1243dSDimitry Andric 
16325ffd83dbSDimitry Andric   // If the scalar is bitcast and inserted into undef, do the insert in the
16335ffd83dbSDimitry Andric   // source type followed by bitcast.
16345ffd83dbSDimitry Andric   // TODO: Generalize for insert into any constant, not just undef?
16355ffd83dbSDimitry Andric   Value *ScalarSrc;
16365ffd83dbSDimitry Andric   if (match(VecOp, m_Undef()) &&
16375ffd83dbSDimitry Andric       match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
16385ffd83dbSDimitry Andric       (ScalarSrc->getType()->isIntegerTy() ||
16395ffd83dbSDimitry Andric        ScalarSrc->getType()->isFloatingPointTy())) {
16405ffd83dbSDimitry Andric     // inselt undef, (bitcast ScalarSrc), IdxOp -->
16415ffd83dbSDimitry Andric     //   bitcast (inselt undef, ScalarSrc, IdxOp)
16425ffd83dbSDimitry Andric     Type *ScalarTy = ScalarSrc->getType();
16435ffd83dbSDimitry Andric     Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
1644cb14a3feSDimitry Andric     Constant *NewUndef = isa<PoisonValue>(VecOp) ? PoisonValue::get(VecTy)
1645cb14a3feSDimitry Andric                                                  : UndefValue::get(VecTy);
16465ffd83dbSDimitry Andric     Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
16475ffd83dbSDimitry Andric     return new BitCastInst(NewInsElt, IE.getType());
16485ffd83dbSDimitry Andric   }
16495ffd83dbSDimitry Andric 
16500b57cec5SDimitry Andric   // If the vector and scalar are both bitcast from the same element type, do
16510b57cec5SDimitry Andric   // the insert in that source type followed by bitcast.
16525ffd83dbSDimitry Andric   Value *VecSrc;
16530b57cec5SDimitry Andric   if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
16540b57cec5SDimitry Andric       match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
16550b57cec5SDimitry Andric       (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
16560b57cec5SDimitry Andric       VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
16575ffd83dbSDimitry Andric       cast<VectorType>(VecSrc->getType())->getElementType() ==
16585ffd83dbSDimitry Andric           ScalarSrc->getType()) {
16590b57cec5SDimitry Andric     // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
16600b57cec5SDimitry Andric     //   bitcast (inselt VecSrc, ScalarSrc, IdxOp)
16610b57cec5SDimitry Andric     Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
16620b57cec5SDimitry Andric     return new BitCastInst(NewInsElt, IE.getType());
16630b57cec5SDimitry Andric   }
16640b57cec5SDimitry Andric 
16655ffd83dbSDimitry Andric   // If the inserted element was extracted from some other fixed-length vector
16665ffd83dbSDimitry Andric   // and both indexes are valid constants, try to turn this into a shuffle.
16675ffd83dbSDimitry Andric   // Can not handle scalable vector type, the number of elements needed to
16685ffd83dbSDimitry Andric   // create shuffle mask is not a compile-time constant.
16690b57cec5SDimitry Andric   uint64_t InsertedIdx, ExtractedIdx;
16700b57cec5SDimitry Andric   Value *ExtVecOp;
16715ffd83dbSDimitry Andric   if (isa<FixedVectorType>(IE.getType()) &&
16725ffd83dbSDimitry Andric       match(IdxOp, m_ConstantInt(InsertedIdx)) &&
16735ffd83dbSDimitry Andric       match(ScalarOp,
16745ffd83dbSDimitry Andric             m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
16755ffd83dbSDimitry Andric       isa<FixedVectorType>(ExtVecOp->getType()) &&
16765ffd83dbSDimitry Andric       ExtractedIdx <
16775ffd83dbSDimitry Andric           cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
16780b57cec5SDimitry Andric     // TODO: Looking at the user(s) to determine if this insert is a
16790b57cec5SDimitry Andric     // fold-to-shuffle opportunity does not match the usual instcombine
16800b57cec5SDimitry Andric     // constraints. We should decide if the transform is worthy based only
16810b57cec5SDimitry Andric     // on this instruction and its operands, but that may not work currently.
16820b57cec5SDimitry Andric     //
16830b57cec5SDimitry Andric     // Here, we are trying to avoid creating shuffles before reaching
16840b57cec5SDimitry Andric     // the end of a chain of extract-insert pairs. This is complicated because
16850b57cec5SDimitry Andric     // we do not generally form arbitrary shuffle masks in instcombine
16860b57cec5SDimitry Andric     // (because those may codegen poorly), but collectShuffleElements() does
16870b57cec5SDimitry Andric     // exactly that.
16880b57cec5SDimitry Andric     //
16890b57cec5SDimitry Andric     // The rules for determining what is an acceptable target-independent
16900b57cec5SDimitry Andric     // shuffle mask are fuzzy because they evolve based on the backend's
16910b57cec5SDimitry Andric     // capabilities and real-world impact.
16920b57cec5SDimitry Andric     auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
16930b57cec5SDimitry Andric       if (!Insert.hasOneUse())
16940b57cec5SDimitry Andric         return true;
16950b57cec5SDimitry Andric       auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
16960b57cec5SDimitry Andric       if (!InsertUser)
16970b57cec5SDimitry Andric         return true;
16980b57cec5SDimitry Andric       return false;
16990b57cec5SDimitry Andric     };
17000b57cec5SDimitry Andric 
17010b57cec5SDimitry Andric     // Try to form a shuffle from a chain of extract-insert ops.
17020b57cec5SDimitry Andric     if (isShuffleRootCandidate(IE)) {
170306c3fb27SDimitry Andric       bool Rerun = true;
170406c3fb27SDimitry Andric       while (Rerun) {
170506c3fb27SDimitry Andric         Rerun = false;
170606c3fb27SDimitry Andric 
17075ffd83dbSDimitry Andric         SmallVector<int, 16> Mask;
170806c3fb27SDimitry Andric         ShuffleOps LR =
170906c3fb27SDimitry Andric             collectShuffleElements(&IE, Mask, nullptr, *this, Rerun);
17100b57cec5SDimitry Andric 
17110b57cec5SDimitry Andric         // The proposed shuffle may be trivial, in which case we shouldn't
17120b57cec5SDimitry Andric         // perform the combine.
17130b57cec5SDimitry Andric         if (LR.first != &IE && LR.second != &IE) {
17140b57cec5SDimitry Andric           // We now have a shuffle of LHS, RHS, Mask.
17150b57cec5SDimitry Andric           if (LR.second == nullptr)
171606c3fb27SDimitry Andric             LR.second = PoisonValue::get(LR.first->getType());
17175ffd83dbSDimitry Andric           return new ShuffleVectorInst(LR.first, LR.second, Mask);
17180b57cec5SDimitry Andric         }
17190b57cec5SDimitry Andric       }
17200b57cec5SDimitry Andric     }
172106c3fb27SDimitry Andric   }
17220b57cec5SDimitry Andric 
17235ffd83dbSDimitry Andric   if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
17245ffd83dbSDimitry Andric     unsigned VWidth = VecTy->getNumElements();
1725cb14a3feSDimitry Andric     APInt PoisonElts(VWidth, 0);
1726349cc55cSDimitry Andric     APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1727cb14a3feSDimitry Andric     if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask,
1728cb14a3feSDimitry Andric                                               PoisonElts)) {
17290b57cec5SDimitry Andric       if (V != &IE)
17300b57cec5SDimitry Andric         return replaceInstUsesWith(IE, V);
17310b57cec5SDimitry Andric       return &IE;
17320b57cec5SDimitry Andric     }
17335ffd83dbSDimitry Andric   }
17340b57cec5SDimitry Andric 
17350b57cec5SDimitry Andric   if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE))
17360b57cec5SDimitry Andric     return Shuf;
17370b57cec5SDimitry Andric 
17380b57cec5SDimitry Andric   if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
17390b57cec5SDimitry Andric     return NewInsElt;
17400b57cec5SDimitry Andric 
17410b57cec5SDimitry Andric   if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
17420b57cec5SDimitry Andric     return Broadcast;
17430b57cec5SDimitry Andric 
17440b57cec5SDimitry Andric   if (Instruction *Splat = foldInsEltIntoSplat(IE))
17450b57cec5SDimitry Andric     return Splat;
17460b57cec5SDimitry Andric 
17478bcb0991SDimitry Andric   if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
17488bcb0991SDimitry Andric     return IdentityShuf;
17498bcb0991SDimitry Andric 
1750349cc55cSDimitry Andric   if (Instruction *Ext = narrowInsElt(IE, Builder))
1751349cc55cSDimitry Andric     return Ext;
1752349cc55cSDimitry Andric 
1753bdd1243dSDimitry Andric   if (Instruction *Ext = foldTruncInsEltPair(IE, DL.isBigEndian(), Builder))
1754bdd1243dSDimitry Andric     return Ext;
1755bdd1243dSDimitry Andric 
17560b57cec5SDimitry Andric   return nullptr;
17570b57cec5SDimitry Andric }
17580b57cec5SDimitry Andric 
17590b57cec5SDimitry Andric /// Return true if we can evaluate the specified expression tree if the vector
17600b57cec5SDimitry Andric /// elements were shuffled in a different order.
17610b57cec5SDimitry Andric static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask,
17620b57cec5SDimitry Andric                                 unsigned Depth = 5) {
17630b57cec5SDimitry Andric   // We can always reorder the elements of a constant.
17640b57cec5SDimitry Andric   if (isa<Constant>(V))
17650b57cec5SDimitry Andric     return true;
17660b57cec5SDimitry Andric 
17670b57cec5SDimitry Andric   // We won't reorder vector arguments. No IPO here.
17680b57cec5SDimitry Andric   Instruction *I = dyn_cast<Instruction>(V);
17690b57cec5SDimitry Andric   if (!I) return false;
17700b57cec5SDimitry Andric 
17710b57cec5SDimitry Andric   // Two users may expect different orders of the elements. Don't try it.
17720b57cec5SDimitry Andric   if (!I->hasOneUse())
17730b57cec5SDimitry Andric     return false;
17740b57cec5SDimitry Andric 
17750b57cec5SDimitry Andric   if (Depth == 0) return false;
17760b57cec5SDimitry Andric 
17770b57cec5SDimitry Andric   switch (I->getOpcode()) {
17788bcb0991SDimitry Andric     case Instruction::UDiv:
17798bcb0991SDimitry Andric     case Instruction::SDiv:
17808bcb0991SDimitry Andric     case Instruction::URem:
17818bcb0991SDimitry Andric     case Instruction::SRem:
17828bcb0991SDimitry Andric       // Propagating an undefined shuffle mask element to integer div/rem is not
17838bcb0991SDimitry Andric       // allowed because those opcodes can create immediate undefined behavior
17848bcb0991SDimitry Andric       // from an undefined element in an operand.
1785e8d8bef9SDimitry Andric       if (llvm::is_contained(Mask, -1))
17868bcb0991SDimitry Andric         return false;
1787bdd1243dSDimitry Andric       [[fallthrough]];
17880b57cec5SDimitry Andric     case Instruction::Add:
17890b57cec5SDimitry Andric     case Instruction::FAdd:
17900b57cec5SDimitry Andric     case Instruction::Sub:
17910b57cec5SDimitry Andric     case Instruction::FSub:
17920b57cec5SDimitry Andric     case Instruction::Mul:
17930b57cec5SDimitry Andric     case Instruction::FMul:
17940b57cec5SDimitry Andric     case Instruction::FDiv:
17950b57cec5SDimitry Andric     case Instruction::FRem:
17960b57cec5SDimitry Andric     case Instruction::Shl:
17970b57cec5SDimitry Andric     case Instruction::LShr:
17980b57cec5SDimitry Andric     case Instruction::AShr:
17990b57cec5SDimitry Andric     case Instruction::And:
18000b57cec5SDimitry Andric     case Instruction::Or:
18010b57cec5SDimitry Andric     case Instruction::Xor:
18020b57cec5SDimitry Andric     case Instruction::ICmp:
18030b57cec5SDimitry Andric     case Instruction::FCmp:
18040b57cec5SDimitry Andric     case Instruction::Trunc:
18050b57cec5SDimitry Andric     case Instruction::ZExt:
18060b57cec5SDimitry Andric     case Instruction::SExt:
18070b57cec5SDimitry Andric     case Instruction::FPToUI:
18080b57cec5SDimitry Andric     case Instruction::FPToSI:
18090b57cec5SDimitry Andric     case Instruction::UIToFP:
18100b57cec5SDimitry Andric     case Instruction::SIToFP:
18110b57cec5SDimitry Andric     case Instruction::FPTrunc:
18120b57cec5SDimitry Andric     case Instruction::FPExt:
18130b57cec5SDimitry Andric     case Instruction::GetElementPtr: {
18140b57cec5SDimitry Andric       // Bail out if we would create longer vector ops. We could allow creating
18158bcb0991SDimitry Andric       // longer vector ops, but that may result in more expensive codegen.
18160b57cec5SDimitry Andric       Type *ITy = I->getType();
18175ffd83dbSDimitry Andric       if (ITy->isVectorTy() &&
1818e8d8bef9SDimitry Andric           Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
18190b57cec5SDimitry Andric         return false;
18200b57cec5SDimitry Andric       for (Value *Operand : I->operands()) {
18210b57cec5SDimitry Andric         if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
18220b57cec5SDimitry Andric           return false;
18230b57cec5SDimitry Andric       }
18240b57cec5SDimitry Andric       return true;
18250b57cec5SDimitry Andric     }
18260b57cec5SDimitry Andric     case Instruction::InsertElement: {
18270b57cec5SDimitry Andric       ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
18280b57cec5SDimitry Andric       if (!CI) return false;
18290b57cec5SDimitry Andric       int ElementNumber = CI->getLimitedValue();
18300b57cec5SDimitry Andric 
18310b57cec5SDimitry Andric       // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
18320b57cec5SDimitry Andric       // can't put an element into multiple indices.
18330b57cec5SDimitry Andric       bool SeenOnce = false;
1834bdd1243dSDimitry Andric       for (int I : Mask) {
1835bdd1243dSDimitry Andric         if (I == ElementNumber) {
18360b57cec5SDimitry Andric           if (SeenOnce)
18370b57cec5SDimitry Andric             return false;
18380b57cec5SDimitry Andric           SeenOnce = true;
18390b57cec5SDimitry Andric         }
18400b57cec5SDimitry Andric       }
18410b57cec5SDimitry Andric       return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
18420b57cec5SDimitry Andric     }
18430b57cec5SDimitry Andric   }
18440b57cec5SDimitry Andric   return false;
18450b57cec5SDimitry Andric }
18460b57cec5SDimitry Andric 
18470b57cec5SDimitry Andric /// Rebuild a new instruction just like 'I' but with the new operands given.
18480b57cec5SDimitry Andric /// In the event of type mismatch, the type of the operands is correct.
184906c3fb27SDimitry Andric static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps,
185006c3fb27SDimitry Andric                        IRBuilderBase &Builder) {
185106c3fb27SDimitry Andric   Builder.SetInsertPoint(I);
18520b57cec5SDimitry Andric   switch (I->getOpcode()) {
18530b57cec5SDimitry Andric     case Instruction::Add:
18540b57cec5SDimitry Andric     case Instruction::FAdd:
18550b57cec5SDimitry Andric     case Instruction::Sub:
18560b57cec5SDimitry Andric     case Instruction::FSub:
18570b57cec5SDimitry Andric     case Instruction::Mul:
18580b57cec5SDimitry Andric     case Instruction::FMul:
18590b57cec5SDimitry Andric     case Instruction::UDiv:
18600b57cec5SDimitry Andric     case Instruction::SDiv:
18610b57cec5SDimitry Andric     case Instruction::FDiv:
18620b57cec5SDimitry Andric     case Instruction::URem:
18630b57cec5SDimitry Andric     case Instruction::SRem:
18640b57cec5SDimitry Andric     case Instruction::FRem:
18650b57cec5SDimitry Andric     case Instruction::Shl:
18660b57cec5SDimitry Andric     case Instruction::LShr:
18670b57cec5SDimitry Andric     case Instruction::AShr:
18680b57cec5SDimitry Andric     case Instruction::And:
18690b57cec5SDimitry Andric     case Instruction::Or:
18700b57cec5SDimitry Andric     case Instruction::Xor: {
18710b57cec5SDimitry Andric       BinaryOperator *BO = cast<BinaryOperator>(I);
18720b57cec5SDimitry Andric       assert(NewOps.size() == 2 && "binary operator with #ops != 2");
187306c3fb27SDimitry Andric       Value *New = Builder.CreateBinOp(cast<BinaryOperator>(I)->getOpcode(),
187406c3fb27SDimitry Andric                                        NewOps[0], NewOps[1]);
187506c3fb27SDimitry Andric       if (auto *NewI = dyn_cast<Instruction>(New)) {
18760b57cec5SDimitry Andric         if (isa<OverflowingBinaryOperator>(BO)) {
187706c3fb27SDimitry Andric           NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
187806c3fb27SDimitry Andric           NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());
18790b57cec5SDimitry Andric         }
18800b57cec5SDimitry Andric         if (isa<PossiblyExactOperator>(BO)) {
188106c3fb27SDimitry Andric           NewI->setIsExact(BO->isExact());
18820b57cec5SDimitry Andric         }
18830b57cec5SDimitry Andric         if (isa<FPMathOperator>(BO))
188406c3fb27SDimitry Andric           NewI->copyFastMathFlags(I);
188506c3fb27SDimitry Andric       }
18860b57cec5SDimitry Andric       return New;
18870b57cec5SDimitry Andric     }
18880b57cec5SDimitry Andric     case Instruction::ICmp:
18890b57cec5SDimitry Andric       assert(NewOps.size() == 2 && "icmp with #ops != 2");
189006c3fb27SDimitry Andric       return Builder.CreateICmp(cast<ICmpInst>(I)->getPredicate(), NewOps[0],
189106c3fb27SDimitry Andric                                 NewOps[1]);
18920b57cec5SDimitry Andric     case Instruction::FCmp:
18930b57cec5SDimitry Andric       assert(NewOps.size() == 2 && "fcmp with #ops != 2");
189406c3fb27SDimitry Andric       return Builder.CreateFCmp(cast<FCmpInst>(I)->getPredicate(), NewOps[0],
189506c3fb27SDimitry Andric                                 NewOps[1]);
18960b57cec5SDimitry Andric     case Instruction::Trunc:
18970b57cec5SDimitry Andric     case Instruction::ZExt:
18980b57cec5SDimitry Andric     case Instruction::SExt:
18990b57cec5SDimitry Andric     case Instruction::FPToUI:
19000b57cec5SDimitry Andric     case Instruction::FPToSI:
19010b57cec5SDimitry Andric     case Instruction::UIToFP:
19020b57cec5SDimitry Andric     case Instruction::SIToFP:
19030b57cec5SDimitry Andric     case Instruction::FPTrunc:
19040b57cec5SDimitry Andric     case Instruction::FPExt: {
19050b57cec5SDimitry Andric       // It's possible that the mask has a different number of elements from
19060b57cec5SDimitry Andric       // the original cast. We recompute the destination type to match the mask.
19075ffd83dbSDimitry Andric       Type *DestTy = VectorType::get(
19085ffd83dbSDimitry Andric           I->getType()->getScalarType(),
19095ffd83dbSDimitry Andric           cast<VectorType>(NewOps[0]->getType())->getElementCount());
19100b57cec5SDimitry Andric       assert(NewOps.size() == 1 && "cast with #ops != 1");
191106c3fb27SDimitry Andric       return Builder.CreateCast(cast<CastInst>(I)->getOpcode(), NewOps[0],
191206c3fb27SDimitry Andric                                 DestTy);
19130b57cec5SDimitry Andric     }
19140b57cec5SDimitry Andric     case Instruction::GetElementPtr: {
19150b57cec5SDimitry Andric       Value *Ptr = NewOps[0];
19160b57cec5SDimitry Andric       ArrayRef<Value*> Idx = NewOps.slice(1);
191706c3fb27SDimitry Andric       return Builder.CreateGEP(cast<GEPOperator>(I)->getSourceElementType(),
191806c3fb27SDimitry Andric                                Ptr, Idx, "",
191906c3fb27SDimitry Andric                                cast<GEPOperator>(I)->isInBounds());
19200b57cec5SDimitry Andric     }
19210b57cec5SDimitry Andric   }
19220b57cec5SDimitry Andric   llvm_unreachable("failed to rebuild vector instructions");
19230b57cec5SDimitry Andric }
19240b57cec5SDimitry Andric 
192506c3fb27SDimitry Andric static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask,
192606c3fb27SDimitry Andric                                               IRBuilderBase &Builder) {
19270b57cec5SDimitry Andric   // Mask.size() does not need to be equal to the number of vector elements.
19280b57cec5SDimitry Andric 
19290b57cec5SDimitry Andric   assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
19300b57cec5SDimitry Andric   Type *EltTy = V->getType()->getScalarType();
1931cb14a3feSDimitry Andric 
1932cb14a3feSDimitry Andric   if (isa<PoisonValue>(V))
1933cb14a3feSDimitry Andric     return PoisonValue::get(FixedVectorType::get(EltTy, Mask.size()));
1934cb14a3feSDimitry Andric 
1935fe6060f1SDimitry Andric   if (match(V, m_Undef()))
19365ffd83dbSDimitry Andric     return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
19370b57cec5SDimitry Andric 
19380b57cec5SDimitry Andric   if (isa<ConstantAggregateZero>(V))
19395ffd83dbSDimitry Andric     return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
19400b57cec5SDimitry Andric 
19415ffd83dbSDimitry Andric   if (Constant *C = dyn_cast<Constant>(V))
1942fe6060f1SDimitry Andric     return ConstantExpr::getShuffleVector(C, PoisonValue::get(C->getType()),
19435ffd83dbSDimitry Andric                                           Mask);
19440b57cec5SDimitry Andric 
19450b57cec5SDimitry Andric   Instruction *I = cast<Instruction>(V);
19460b57cec5SDimitry Andric   switch (I->getOpcode()) {
19470b57cec5SDimitry Andric     case Instruction::Add:
19480b57cec5SDimitry Andric     case Instruction::FAdd:
19490b57cec5SDimitry Andric     case Instruction::Sub:
19500b57cec5SDimitry Andric     case Instruction::FSub:
19510b57cec5SDimitry Andric     case Instruction::Mul:
19520b57cec5SDimitry Andric     case Instruction::FMul:
19530b57cec5SDimitry Andric     case Instruction::UDiv:
19540b57cec5SDimitry Andric     case Instruction::SDiv:
19550b57cec5SDimitry Andric     case Instruction::FDiv:
19560b57cec5SDimitry Andric     case Instruction::URem:
19570b57cec5SDimitry Andric     case Instruction::SRem:
19580b57cec5SDimitry Andric     case Instruction::FRem:
19590b57cec5SDimitry Andric     case Instruction::Shl:
19600b57cec5SDimitry Andric     case Instruction::LShr:
19610b57cec5SDimitry Andric     case Instruction::AShr:
19620b57cec5SDimitry Andric     case Instruction::And:
19630b57cec5SDimitry Andric     case Instruction::Or:
19640b57cec5SDimitry Andric     case Instruction::Xor:
19650b57cec5SDimitry Andric     case Instruction::ICmp:
19660b57cec5SDimitry Andric     case Instruction::FCmp:
19670b57cec5SDimitry Andric     case Instruction::Trunc:
19680b57cec5SDimitry Andric     case Instruction::ZExt:
19690b57cec5SDimitry Andric     case Instruction::SExt:
19700b57cec5SDimitry Andric     case Instruction::FPToUI:
19710b57cec5SDimitry Andric     case Instruction::FPToSI:
19720b57cec5SDimitry Andric     case Instruction::UIToFP:
19730b57cec5SDimitry Andric     case Instruction::SIToFP:
19740b57cec5SDimitry Andric     case Instruction::FPTrunc:
19750b57cec5SDimitry Andric     case Instruction::FPExt:
19760b57cec5SDimitry Andric     case Instruction::Select:
19770b57cec5SDimitry Andric     case Instruction::GetElementPtr: {
19780b57cec5SDimitry Andric       SmallVector<Value*, 8> NewOps;
19795ffd83dbSDimitry Andric       bool NeedsRebuild =
1980e8d8bef9SDimitry Andric           (Mask.size() !=
1981e8d8bef9SDimitry Andric            cast<FixedVectorType>(I->getType())->getNumElements());
19820b57cec5SDimitry Andric       for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
19830b57cec5SDimitry Andric         Value *V;
19840b57cec5SDimitry Andric         // Recursively call evaluateInDifferentElementOrder on vector arguments
19850b57cec5SDimitry Andric         // as well. E.g. GetElementPtr may have scalar operands even if the
19860b57cec5SDimitry Andric         // return value is a vector, so we need to examine the operand type.
19870b57cec5SDimitry Andric         if (I->getOperand(i)->getType()->isVectorTy())
198806c3fb27SDimitry Andric           V = evaluateInDifferentElementOrder(I->getOperand(i), Mask, Builder);
19890b57cec5SDimitry Andric         else
19900b57cec5SDimitry Andric           V = I->getOperand(i);
19910b57cec5SDimitry Andric         NewOps.push_back(V);
19920b57cec5SDimitry Andric         NeedsRebuild |= (V != I->getOperand(i));
19930b57cec5SDimitry Andric       }
199406c3fb27SDimitry Andric       if (NeedsRebuild)
199506c3fb27SDimitry Andric         return buildNew(I, NewOps, Builder);
19960b57cec5SDimitry Andric       return I;
19970b57cec5SDimitry Andric     }
19980b57cec5SDimitry Andric     case Instruction::InsertElement: {
19990b57cec5SDimitry Andric       int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
20000b57cec5SDimitry Andric 
20010b57cec5SDimitry Andric       // The insertelement was inserting at Element. Figure out which element
20020b57cec5SDimitry Andric       // that becomes after shuffling. The answer is guaranteed to be unique
20030b57cec5SDimitry Andric       // by CanEvaluateShuffled.
20040b57cec5SDimitry Andric       bool Found = false;
20050b57cec5SDimitry Andric       int Index = 0;
20060b57cec5SDimitry Andric       for (int e = Mask.size(); Index != e; ++Index) {
20070b57cec5SDimitry Andric         if (Mask[Index] == Element) {
20080b57cec5SDimitry Andric           Found = true;
20090b57cec5SDimitry Andric           break;
20100b57cec5SDimitry Andric         }
20110b57cec5SDimitry Andric       }
20120b57cec5SDimitry Andric 
20130b57cec5SDimitry Andric       // If element is not in Mask, no need to handle the operand 1 (element to
20140b57cec5SDimitry Andric       // be inserted). Just evaluate values in operand 0 according to Mask.
20150b57cec5SDimitry Andric       if (!Found)
201606c3fb27SDimitry Andric         return evaluateInDifferentElementOrder(I->getOperand(0), Mask, Builder);
20170b57cec5SDimitry Andric 
201806c3fb27SDimitry Andric       Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask,
201906c3fb27SDimitry Andric                                                  Builder);
202006c3fb27SDimitry Andric       Builder.SetInsertPoint(I);
202106c3fb27SDimitry Andric       return Builder.CreateInsertElement(V, I->getOperand(1), Index);
20220b57cec5SDimitry Andric     }
20230b57cec5SDimitry Andric   }
20240b57cec5SDimitry Andric   llvm_unreachable("failed to reorder elements of vector instruction!");
20250b57cec5SDimitry Andric }
20260b57cec5SDimitry Andric 
20270b57cec5SDimitry Andric // Returns true if the shuffle is extracting a contiguous range of values from
20280b57cec5SDimitry Andric // LHS, for example:
20290b57cec5SDimitry Andric //                 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
20300b57cec5SDimitry Andric //   Input:        |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
20310b57cec5SDimitry Andric //   Shuffles to:  |EE|FF|GG|HH|
20320b57cec5SDimitry Andric //                 +--+--+--+--+
20330b57cec5SDimitry Andric static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
20345ffd83dbSDimitry Andric                                        ArrayRef<int> Mask) {
20355ffd83dbSDimitry Andric   unsigned LHSElems =
2036e8d8bef9SDimitry Andric       cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
20370b57cec5SDimitry Andric   unsigned MaskElems = Mask.size();
20380b57cec5SDimitry Andric   unsigned BegIdx = Mask.front();
20390b57cec5SDimitry Andric   unsigned EndIdx = Mask.back();
20400b57cec5SDimitry Andric   if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
20410b57cec5SDimitry Andric     return false;
20420b57cec5SDimitry Andric   for (unsigned I = 0; I != MaskElems; ++I)
20430b57cec5SDimitry Andric     if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
20440b57cec5SDimitry Andric       return false;
20450b57cec5SDimitry Andric   return true;
20460b57cec5SDimitry Andric }
20470b57cec5SDimitry Andric 
20480b57cec5SDimitry Andric /// These are the ingredients in an alternate form binary operator as described
20490b57cec5SDimitry Andric /// below.
20500b57cec5SDimitry Andric struct BinopElts {
20510b57cec5SDimitry Andric   BinaryOperator::BinaryOps Opcode;
20520b57cec5SDimitry Andric   Value *Op0;
20530b57cec5SDimitry Andric   Value *Op1;
20540b57cec5SDimitry Andric   BinopElts(BinaryOperator::BinaryOps Opc = (BinaryOperator::BinaryOps)0,
20550b57cec5SDimitry Andric             Value *V0 = nullptr, Value *V1 = nullptr) :
20560b57cec5SDimitry Andric       Opcode(Opc), Op0(V0), Op1(V1) {}
20570b57cec5SDimitry Andric   operator bool() const { return Opcode != 0; }
20580b57cec5SDimitry Andric };
20590b57cec5SDimitry Andric 
20600b57cec5SDimitry Andric /// Binops may be transformed into binops with different opcodes and operands.
20610b57cec5SDimitry Andric /// Reverse the usual canonicalization to enable folds with the non-canonical
20620b57cec5SDimitry Andric /// form of the binop. If a transform is possible, return the elements of the
20630b57cec5SDimitry Andric /// new binop. If not, return invalid elements.
20640b57cec5SDimitry Andric static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) {
20650b57cec5SDimitry Andric   Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
20660b57cec5SDimitry Andric   Type *Ty = BO->getType();
20670b57cec5SDimitry Andric   switch (BO->getOpcode()) {
20680b57cec5SDimitry Andric   case Instruction::Shl: {
20690b57cec5SDimitry Andric     // shl X, C --> mul X, (1 << C)
20700b57cec5SDimitry Andric     Constant *C;
2071*0fca6ea1SDimitry Andric     if (match(BO1, m_ImmConstant(C))) {
2072*0fca6ea1SDimitry Andric       Constant *ShlOne = ConstantFoldBinaryOpOperands(
2073*0fca6ea1SDimitry Andric           Instruction::Shl, ConstantInt::get(Ty, 1), C, DL);
2074*0fca6ea1SDimitry Andric       assert(ShlOne && "Constant folding of immediate constants failed");
20750b57cec5SDimitry Andric       return {Instruction::Mul, BO0, ShlOne};
20760b57cec5SDimitry Andric     }
20770b57cec5SDimitry Andric     break;
20780b57cec5SDimitry Andric   }
20790b57cec5SDimitry Andric   case Instruction::Or: {
2080*0fca6ea1SDimitry Andric     // or disjoin X, C --> add X, C
2081*0fca6ea1SDimitry Andric     if (cast<PossiblyDisjointInst>(BO)->isDisjoint())
20820b57cec5SDimitry Andric       return {Instruction::Add, BO0, BO1};
20830b57cec5SDimitry Andric     break;
20840b57cec5SDimitry Andric   }
208581ad6265SDimitry Andric   case Instruction::Sub:
208681ad6265SDimitry Andric     // sub 0, X --> mul X, -1
208781ad6265SDimitry Andric     if (match(BO0, m_ZeroInt()))
208881ad6265SDimitry Andric       return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};
208981ad6265SDimitry Andric     break;
20900b57cec5SDimitry Andric   default:
20910b57cec5SDimitry Andric     break;
20920b57cec5SDimitry Andric   }
20930b57cec5SDimitry Andric   return {};
20940b57cec5SDimitry Andric }
20950b57cec5SDimitry Andric 
2096bdd1243dSDimitry Andric /// A select shuffle of a select shuffle with a shared operand can be reduced
2097bdd1243dSDimitry Andric /// to a single select shuffle. This is an obvious improvement in IR, and the
2098bdd1243dSDimitry Andric /// backend is expected to lower select shuffles efficiently.
2099bdd1243dSDimitry Andric static Instruction *foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf) {
2100bdd1243dSDimitry Andric   assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2101bdd1243dSDimitry Andric 
2102bdd1243dSDimitry Andric   Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2103bdd1243dSDimitry Andric   SmallVector<int, 16> Mask;
2104bdd1243dSDimitry Andric   Shuf.getShuffleMask(Mask);
2105bdd1243dSDimitry Andric   unsigned NumElts = Mask.size();
2106bdd1243dSDimitry Andric 
2107bdd1243dSDimitry Andric   // Canonicalize a select shuffle with common operand as Op1.
2108bdd1243dSDimitry Andric   auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);
2109bdd1243dSDimitry Andric   if (ShufOp && ShufOp->isSelect() &&
2110bdd1243dSDimitry Andric       (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {
2111bdd1243dSDimitry Andric     std::swap(Op0, Op1);
2112bdd1243dSDimitry Andric     ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
2113bdd1243dSDimitry Andric   }
2114bdd1243dSDimitry Andric 
2115bdd1243dSDimitry Andric   ShufOp = dyn_cast<ShuffleVectorInst>(Op1);
2116bdd1243dSDimitry Andric   if (!ShufOp || !ShufOp->isSelect() ||
2117bdd1243dSDimitry Andric       (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))
2118bdd1243dSDimitry Andric     return nullptr;
2119bdd1243dSDimitry Andric 
2120bdd1243dSDimitry Andric   Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);
2121bdd1243dSDimitry Andric   SmallVector<int, 16> Mask1;
2122bdd1243dSDimitry Andric   ShufOp->getShuffleMask(Mask1);
2123bdd1243dSDimitry Andric   assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2124bdd1243dSDimitry Andric 
2125bdd1243dSDimitry Andric   // Canonicalize common operand (Op0) as X (first operand of first shuffle).
2126bdd1243dSDimitry Andric   if (Y == Op0) {
2127bdd1243dSDimitry Andric     std::swap(X, Y);
2128bdd1243dSDimitry Andric     ShuffleVectorInst::commuteShuffleMask(Mask1, NumElts);
2129bdd1243dSDimitry Andric   }
2130bdd1243dSDimitry Andric 
2131bdd1243dSDimitry Andric   // If the mask chooses from X (operand 0), it stays the same.
2132bdd1243dSDimitry Andric   // If the mask chooses from the earlier shuffle, the other mask value is
2133bdd1243dSDimitry Andric   // transferred to the combined select shuffle:
2134bdd1243dSDimitry Andric   // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2135bdd1243dSDimitry Andric   SmallVector<int, 16> NewMask(NumElts);
2136bdd1243dSDimitry Andric   for (unsigned i = 0; i != NumElts; ++i)
2137bdd1243dSDimitry Andric     NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
2138bdd1243dSDimitry Andric 
2139bdd1243dSDimitry Andric   // A select mask with undef elements might look like an identity mask.
21405f757f3fSDimitry Andric   assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) ||
21415f757f3fSDimitry Andric           ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&
2142bdd1243dSDimitry Andric          "Unexpected shuffle mask");
2143bdd1243dSDimitry Andric   return new ShuffleVectorInst(X, Y, NewMask);
2144bdd1243dSDimitry Andric }
2145bdd1243dSDimitry Andric 
2146*0fca6ea1SDimitry Andric static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf,
2147*0fca6ea1SDimitry Andric                                                 const SimplifyQuery &SQ) {
21480b57cec5SDimitry Andric   assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
21490b57cec5SDimitry Andric 
21500b57cec5SDimitry Andric   // Are we shuffling together some value and that same value after it has been
21510b57cec5SDimitry Andric   // modified by a binop with a constant?
21520b57cec5SDimitry Andric   Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
21530b57cec5SDimitry Andric   Constant *C;
21540b57cec5SDimitry Andric   bool Op0IsBinop;
21550b57cec5SDimitry Andric   if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
21560b57cec5SDimitry Andric     Op0IsBinop = true;
21570b57cec5SDimitry Andric   else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
21580b57cec5SDimitry Andric     Op0IsBinop = false;
21590b57cec5SDimitry Andric   else
21600b57cec5SDimitry Andric     return nullptr;
21610b57cec5SDimitry Andric 
21620b57cec5SDimitry Andric   // The identity constant for a binop leaves a variable operand unchanged. For
21630b57cec5SDimitry Andric   // a vector, this is a splat of something like 0, -1, or 1.
21640b57cec5SDimitry Andric   // If there's no identity constant for this binop, we're done.
21650b57cec5SDimitry Andric   auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
21660b57cec5SDimitry Andric   BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
21670b57cec5SDimitry Andric   Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
21680b57cec5SDimitry Andric   if (!IdC)
21690b57cec5SDimitry Andric     return nullptr;
21700b57cec5SDimitry Andric 
2171*0fca6ea1SDimitry Andric   Value *X = Op0IsBinop ? Op1 : Op0;
2172*0fca6ea1SDimitry Andric 
2173*0fca6ea1SDimitry Andric   // Prevent folding in the case the non-binop operand might have NaN values.
2174*0fca6ea1SDimitry Andric   // If X can have NaN elements then we have that the floating point math
2175*0fca6ea1SDimitry Andric   // operation in the transformed code may not preserve the exact NaN
2176*0fca6ea1SDimitry Andric   // bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.
2177*0fca6ea1SDimitry Andric   // This makes the transformation incorrect since the original program would
2178*0fca6ea1SDimitry Andric   // have preserved the exact NaN bit-pattern.
2179*0fca6ea1SDimitry Andric   // Avoid the folding if X can have NaN elements.
2180*0fca6ea1SDimitry Andric   if (Shuf.getType()->getElementType()->isFloatingPointTy() &&
2181*0fca6ea1SDimitry Andric       !isKnownNeverNaN(X, 0, SQ))
2182*0fca6ea1SDimitry Andric     return nullptr;
2183*0fca6ea1SDimitry Andric 
21840b57cec5SDimitry Andric   // Shuffle identity constants into the lanes that return the original value.
21850b57cec5SDimitry Andric   // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
21860b57cec5SDimitry Andric   // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
21870b57cec5SDimitry Andric   // The existing binop constant vector remains in the same operand position.
21885ffd83dbSDimitry Andric   ArrayRef<int> Mask = Shuf.getShuffleMask();
21890b57cec5SDimitry Andric   Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
21900b57cec5SDimitry Andric                                 ConstantExpr::getShuffleVector(IdC, C, Mask);
21910b57cec5SDimitry Andric 
21920b57cec5SDimitry Andric   bool MightCreatePoisonOrUB =
219306c3fb27SDimitry Andric       is_contained(Mask, PoisonMaskElem) &&
21940b57cec5SDimitry Andric       (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
21950b57cec5SDimitry Andric   if (MightCreatePoisonOrUB)
2196e8d8bef9SDimitry Andric     NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
21970b57cec5SDimitry Andric 
21980b57cec5SDimitry Andric   // shuf (bop X, C), X, M --> bop X, C'
21990b57cec5SDimitry Andric   // shuf X, (bop X, C), M --> bop X, C'
22000b57cec5SDimitry Andric   Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
22010b57cec5SDimitry Andric   NewBO->copyIRFlags(BO);
22020b57cec5SDimitry Andric 
22030b57cec5SDimitry Andric   // An undef shuffle mask element may propagate as an undef constant element in
22040b57cec5SDimitry Andric   // the new binop. That would produce poison where the original code might not.
22050b57cec5SDimitry Andric   // If we already made a safe constant, then there's no danger.
220606c3fb27SDimitry Andric   if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
22070b57cec5SDimitry Andric     NewBO->dropPoisonGeneratingFlags();
22080b57cec5SDimitry Andric   return NewBO;
22090b57cec5SDimitry Andric }
22100b57cec5SDimitry Andric 
22110b57cec5SDimitry Andric /// If we have an insert of a scalar to a non-zero element of an undefined
22120b57cec5SDimitry Andric /// vector and then shuffle that value, that's the same as inserting to the zero
22130b57cec5SDimitry Andric /// element and shuffling. Splatting from the zero element is recognized as the
22140b57cec5SDimitry Andric /// canonical form of splat.
22150b57cec5SDimitry Andric static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
22160b57cec5SDimitry Andric                                             InstCombiner::BuilderTy &Builder) {
22170b57cec5SDimitry Andric   Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
22185ffd83dbSDimitry Andric   ArrayRef<int> Mask = Shuf.getShuffleMask();
22190b57cec5SDimitry Andric   Value *X;
22200b57cec5SDimitry Andric   uint64_t IndexC;
22210b57cec5SDimitry Andric 
22220b57cec5SDimitry Andric   // Match a shuffle that is a splat to a non-zero element.
2223*0fca6ea1SDimitry Andric   if (!match(Op0, m_OneUse(m_InsertElt(m_Poison(), m_Value(X),
22240b57cec5SDimitry Andric                                        m_ConstantInt(IndexC)))) ||
2225*0fca6ea1SDimitry Andric       !match(Op1, m_Poison()) || match(Mask, m_ZeroMask()) || IndexC == 0)
22260b57cec5SDimitry Andric     return nullptr;
22270b57cec5SDimitry Andric 
22285f757f3fSDimitry Andric   // Insert into element 0 of a poison vector.
22295f757f3fSDimitry Andric   PoisonValue *PoisonVec = PoisonValue::get(Shuf.getType());
22305f757f3fSDimitry Andric   Value *NewIns = Builder.CreateInsertElement(PoisonVec, X, (uint64_t)0);
22310b57cec5SDimitry Andric 
2232*0fca6ea1SDimitry Andric   // Splat from element 0. Any mask element that is poison remains poison.
22330b57cec5SDimitry Andric   // For example:
2234*0fca6ea1SDimitry Andric   // shuf (inselt poison, X, 2), _, <2,2,undef>
2235*0fca6ea1SDimitry Andric   //   --> shuf (inselt poison, X, 0), poison, <0,0,undef>
2236e8d8bef9SDimitry Andric   unsigned NumMaskElts =
2237e8d8bef9SDimitry Andric       cast<FixedVectorType>(Shuf.getType())->getNumElements();
22385ffd83dbSDimitry Andric   SmallVector<int, 16> NewMask(NumMaskElts, 0);
22390b57cec5SDimitry Andric   for (unsigned i = 0; i != NumMaskElts; ++i)
224006c3fb27SDimitry Andric     if (Mask[i] == PoisonMaskElem)
22415ffd83dbSDimitry Andric       NewMask[i] = Mask[i];
22420b57cec5SDimitry Andric 
2243349cc55cSDimitry Andric   return new ShuffleVectorInst(NewIns, NewMask);
22440b57cec5SDimitry Andric }
22450b57cec5SDimitry Andric 
22460b57cec5SDimitry Andric /// Try to fold shuffles that are the equivalent of a vector select.
22470eae32dcSDimitry Andric Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) {
22480b57cec5SDimitry Andric   if (!Shuf.isSelect())
22490b57cec5SDimitry Andric     return nullptr;
22500b57cec5SDimitry Andric 
2251480093f4SDimitry Andric   // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2252480093f4SDimitry Andric   // Commuting undef to operand 0 conflicts with another canonicalization.
2253e8d8bef9SDimitry Andric   unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2254fe6060f1SDimitry Andric   if (!match(Shuf.getOperand(1), m_Undef()) &&
2255480093f4SDimitry Andric       Shuf.getMaskValue(0) >= (int)NumElts) {
22560b57cec5SDimitry Andric     // TODO: Can we assert that both operands of a shuffle-select are not undef
22570b57cec5SDimitry Andric     // (otherwise, it would have been folded by instsimplify?
22580b57cec5SDimitry Andric     Shuf.commute();
22590b57cec5SDimitry Andric     return &Shuf;
22600b57cec5SDimitry Andric   }
22610b57cec5SDimitry Andric 
2262bdd1243dSDimitry Andric   if (Instruction *I = foldSelectShuffleOfSelectShuffle(Shuf))
2263bdd1243dSDimitry Andric     return I;
2264bdd1243dSDimitry Andric 
2265*0fca6ea1SDimitry Andric   if (Instruction *I = foldSelectShuffleWith1Binop(
2266*0fca6ea1SDimitry Andric           Shuf, getSimplifyQuery().getWithInstruction(&Shuf)))
22670b57cec5SDimitry Andric     return I;
22680b57cec5SDimitry Andric 
22690b57cec5SDimitry Andric   BinaryOperator *B0, *B1;
22700b57cec5SDimitry Andric   if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
22710b57cec5SDimitry Andric       !match(Shuf.getOperand(1), m_BinOp(B1)))
22720b57cec5SDimitry Andric     return nullptr;
22730b57cec5SDimitry Andric 
227481ad6265SDimitry Andric   // If one operand is "0 - X", allow that to be viewed as "X * -1"
227581ad6265SDimitry Andric   // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
227681ad6265SDimitry Andric   // with a multiply, we will exit because C0/C1 will not be set.
22770b57cec5SDimitry Andric   Value *X, *Y;
227881ad6265SDimitry Andric   Constant *C0 = nullptr, *C1 = nullptr;
22790b57cec5SDimitry Andric   bool ConstantsAreOp1;
228081ad6265SDimitry Andric   if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
22810b57cec5SDimitry Andric       match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
22820b57cec5SDimitry Andric     ConstantsAreOp1 = false;
228381ad6265SDimitry Andric   else if (match(B0, m_CombineOr(m_BinOp(m_Value(X), m_Constant(C0)),
228481ad6265SDimitry Andric                                  m_Neg(m_Value(X)))) &&
228581ad6265SDimitry Andric            match(B1, m_CombineOr(m_BinOp(m_Value(Y), m_Constant(C1)),
228681ad6265SDimitry Andric                                  m_Neg(m_Value(Y)))))
228781ad6265SDimitry Andric     ConstantsAreOp1 = true;
22880b57cec5SDimitry Andric   else
22890b57cec5SDimitry Andric     return nullptr;
22900b57cec5SDimitry Andric 
22910b57cec5SDimitry Andric   // We need matching binops to fold the lanes together.
22920b57cec5SDimitry Andric   BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
22930b57cec5SDimitry Andric   BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
22940b57cec5SDimitry Andric   bool DropNSW = false;
22950b57cec5SDimitry Andric   if (ConstantsAreOp1 && Opc0 != Opc1) {
22960b57cec5SDimitry Andric     // TODO: We drop "nsw" if shift is converted into multiply because it may
22970b57cec5SDimitry Andric     // not be correct when the shift amount is BitWidth - 1. We could examine
22980b57cec5SDimitry Andric     // each vector element to determine if it is safe to keep that flag.
22990b57cec5SDimitry Andric     if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
23000b57cec5SDimitry Andric       DropNSW = true;
23010b57cec5SDimitry Andric     if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
23020b57cec5SDimitry Andric       assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
23030b57cec5SDimitry Andric       Opc0 = AltB0.Opcode;
23040b57cec5SDimitry Andric       C0 = cast<Constant>(AltB0.Op1);
23050b57cec5SDimitry Andric     } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
23060b57cec5SDimitry Andric       assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
23070b57cec5SDimitry Andric       Opc1 = AltB1.Opcode;
23080b57cec5SDimitry Andric       C1 = cast<Constant>(AltB1.Op1);
23090b57cec5SDimitry Andric     }
23100b57cec5SDimitry Andric   }
23110b57cec5SDimitry Andric 
231281ad6265SDimitry Andric   if (Opc0 != Opc1 || !C0 || !C1)
23130b57cec5SDimitry Andric     return nullptr;
23140b57cec5SDimitry Andric 
23150b57cec5SDimitry Andric   // The opcodes must be the same. Use a new name to make that clear.
23160b57cec5SDimitry Andric   BinaryOperator::BinaryOps BOpc = Opc0;
23170b57cec5SDimitry Andric 
23180b57cec5SDimitry Andric   // Select the constant elements needed for the single binop.
23195ffd83dbSDimitry Andric   ArrayRef<int> Mask = Shuf.getShuffleMask();
23200b57cec5SDimitry Andric   Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
23210b57cec5SDimitry Andric 
23220b57cec5SDimitry Andric   // We are moving a binop after a shuffle. When a shuffle has an undefined
23230b57cec5SDimitry Andric   // mask element, the result is undefined, but it is not poison or undefined
23240b57cec5SDimitry Andric   // behavior. That is not necessarily true for div/rem/shift.
23250b57cec5SDimitry Andric   bool MightCreatePoisonOrUB =
232606c3fb27SDimitry Andric       is_contained(Mask, PoisonMaskElem) &&
23270b57cec5SDimitry Andric       (Instruction::isIntDivRem(BOpc) || Instruction::isShift(BOpc));
23280b57cec5SDimitry Andric   if (MightCreatePoisonOrUB)
2329e8d8bef9SDimitry Andric     NewC = InstCombiner::getSafeVectorConstantForBinop(BOpc, NewC,
2330e8d8bef9SDimitry Andric                                                        ConstantsAreOp1);
23310b57cec5SDimitry Andric 
23320b57cec5SDimitry Andric   Value *V;
23330b57cec5SDimitry Andric   if (X == Y) {
23340b57cec5SDimitry Andric     // Remove a binop and the shuffle by rearranging the constant:
23350b57cec5SDimitry Andric     // shuffle (op V, C0), (op V, C1), M --> op V, C'
23360b57cec5SDimitry Andric     // shuffle (op C0, V), (op C1, V), M --> op C', V
23370b57cec5SDimitry Andric     V = X;
23380b57cec5SDimitry Andric   } else {
23390b57cec5SDimitry Andric     // If there are 2 different variable operands, we must create a new shuffle
23400b57cec5SDimitry Andric     // (select) first, so check uses to ensure that we don't end up with more
23410b57cec5SDimitry Andric     // instructions than we started with.
23420b57cec5SDimitry Andric     if (!B0->hasOneUse() && !B1->hasOneUse())
23430b57cec5SDimitry Andric       return nullptr;
23440b57cec5SDimitry Andric 
23450b57cec5SDimitry Andric     // If we use the original shuffle mask and op1 is *variable*, we would be
23460b57cec5SDimitry Andric     // putting an undef into operand 1 of div/rem/shift. This is either UB or
23470b57cec5SDimitry Andric     // poison. We do not have to guard against UB when *constants* are op1
23480b57cec5SDimitry Andric     // because safe constants guarantee that we do not overflow sdiv/srem (and
23490b57cec5SDimitry Andric     // there's no danger for other opcodes).
23500b57cec5SDimitry Andric     // TODO: To allow this case, create a new shuffle mask with no undefs.
23510b57cec5SDimitry Andric     if (MightCreatePoisonOrUB && !ConstantsAreOp1)
23520b57cec5SDimitry Andric       return nullptr;
23530b57cec5SDimitry Andric 
23540b57cec5SDimitry Andric     // Note: In general, we do not create new shuffles in InstCombine because we
23550b57cec5SDimitry Andric     // do not know if a target can lower an arbitrary shuffle optimally. In this
23560b57cec5SDimitry Andric     // case, the shuffle uses the existing mask, so there is no additional risk.
23570b57cec5SDimitry Andric 
23580b57cec5SDimitry Andric     // Select the variable vectors first, then perform the binop:
23590b57cec5SDimitry Andric     // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
23600b57cec5SDimitry Andric     // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
23610b57cec5SDimitry Andric     V = Builder.CreateShuffleVector(X, Y, Mask);
23620b57cec5SDimitry Andric   }
23630b57cec5SDimitry Andric 
23640eae32dcSDimitry Andric   Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(BOpc, V, NewC) :
23650eae32dcSDimitry Andric                                    Builder.CreateBinOp(BOpc, NewC, V);
23660b57cec5SDimitry Andric 
23670b57cec5SDimitry Andric   // Flags are intersected from the 2 source binops. But there are 2 exceptions:
23680b57cec5SDimitry Andric   // 1. If we changed an opcode, poison conditions might have changed.
23690b57cec5SDimitry Andric   // 2. If the shuffle had undef mask elements, the new binop might have undefs
23700b57cec5SDimitry Andric   //    where the original code did not. But if we already made a safe constant,
23710b57cec5SDimitry Andric   //    then there's no danger.
23720eae32dcSDimitry Andric   if (auto *NewI = dyn_cast<Instruction>(NewBO)) {
23730eae32dcSDimitry Andric     NewI->copyIRFlags(B0);
23740eae32dcSDimitry Andric     NewI->andIRFlags(B1);
23750b57cec5SDimitry Andric     if (DropNSW)
23760eae32dcSDimitry Andric       NewI->setHasNoSignedWrap(false);
237706c3fb27SDimitry Andric     if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
23780eae32dcSDimitry Andric       NewI->dropPoisonGeneratingFlags();
23790eae32dcSDimitry Andric   }
23800eae32dcSDimitry Andric   return replaceInstUsesWith(Shuf, NewBO);
23810b57cec5SDimitry Andric }
23820b57cec5SDimitry Andric 
23835ffd83dbSDimitry Andric /// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
23845ffd83dbSDimitry Andric /// Example (little endian):
23855ffd83dbSDimitry Andric /// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
23865ffd83dbSDimitry Andric static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf,
23875ffd83dbSDimitry Andric                                      bool IsBigEndian) {
23885ffd83dbSDimitry Andric   // This must be a bitcasted shuffle of 1 vector integer operand.
23895ffd83dbSDimitry Andric   Type *DestType = Shuf.getType();
23905ffd83dbSDimitry Andric   Value *X;
23915ffd83dbSDimitry Andric   if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
2392*0fca6ea1SDimitry Andric       !match(Shuf.getOperand(1), m_Poison()) || !DestType->isIntOrIntVectorTy())
23935ffd83dbSDimitry Andric     return nullptr;
23945ffd83dbSDimitry Andric 
23955ffd83dbSDimitry Andric   // The source type must have the same number of elements as the shuffle,
23965ffd83dbSDimitry Andric   // and the source element type must be larger than the shuffle element type.
23975ffd83dbSDimitry Andric   Type *SrcType = X->getType();
23985ffd83dbSDimitry Andric   if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2399e8d8bef9SDimitry Andric       cast<FixedVectorType>(SrcType)->getNumElements() !=
2400e8d8bef9SDimitry Andric           cast<FixedVectorType>(DestType)->getNumElements() ||
24015ffd83dbSDimitry Andric       SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
24025ffd83dbSDimitry Andric     return nullptr;
24035ffd83dbSDimitry Andric 
24045ffd83dbSDimitry Andric   assert(Shuf.changesLength() && !Shuf.increasesLength() &&
24055ffd83dbSDimitry Andric          "Expected a shuffle that decreases length");
24065ffd83dbSDimitry Andric 
24075ffd83dbSDimitry Andric   // Last, check that the mask chooses the correct low bits for each narrow
24085ffd83dbSDimitry Andric   // element in the result.
24095ffd83dbSDimitry Andric   uint64_t TruncRatio =
24105ffd83dbSDimitry Andric       SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
24115ffd83dbSDimitry Andric   ArrayRef<int> Mask = Shuf.getShuffleMask();
24125ffd83dbSDimitry Andric   for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
241306c3fb27SDimitry Andric     if (Mask[i] == PoisonMaskElem)
24145ffd83dbSDimitry Andric       continue;
24155ffd83dbSDimitry Andric     uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2416e8d8bef9SDimitry Andric     assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
24175ffd83dbSDimitry Andric     if (Mask[i] != (int)LSBIndex)
24185ffd83dbSDimitry Andric       return nullptr;
24195ffd83dbSDimitry Andric   }
24205ffd83dbSDimitry Andric 
24215ffd83dbSDimitry Andric   return new TruncInst(X, DestType);
24225ffd83dbSDimitry Andric }
24235ffd83dbSDimitry Andric 
24240b57cec5SDimitry Andric /// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2425*0fca6ea1SDimitry Andric /// narrowing (concatenating with poison and extracting back to the original
24260b57cec5SDimitry Andric /// length). This allows replacing the wide select with a narrow select.
24270b57cec5SDimitry Andric static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
24280b57cec5SDimitry Andric                                        InstCombiner::BuilderTy &Builder) {
24290b57cec5SDimitry Andric   // This must be a narrowing identity shuffle. It extracts the 1st N elements
24300b57cec5SDimitry Andric   // of the 1st vector operand of a shuffle.
2431*0fca6ea1SDimitry Andric   if (!match(Shuf.getOperand(1), m_Poison()) || !Shuf.isIdentityWithExtract())
24320b57cec5SDimitry Andric     return nullptr;
24330b57cec5SDimitry Andric 
24340b57cec5SDimitry Andric   // The vector being shuffled must be a vector select that we can eliminate.
24350b57cec5SDimitry Andric   // TODO: The one-use requirement could be eased if X and/or Y are constants.
24360b57cec5SDimitry Andric   Value *Cond, *X, *Y;
24370b57cec5SDimitry Andric   if (!match(Shuf.getOperand(0),
24380b57cec5SDimitry Andric              m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))))
24390b57cec5SDimitry Andric     return nullptr;
24400b57cec5SDimitry Andric 
2441*0fca6ea1SDimitry Andric   // We need a narrow condition value. It must be extended with poison elements
24420b57cec5SDimitry Andric   // and have the same number of elements as this shuffle.
2443e8d8bef9SDimitry Andric   unsigned NarrowNumElts =
2444e8d8bef9SDimitry Andric       cast<FixedVectorType>(Shuf.getType())->getNumElements();
24450b57cec5SDimitry Andric   Value *NarrowCond;
2446*0fca6ea1SDimitry Andric   if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Poison()))) ||
2447e8d8bef9SDimitry Andric       cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
24485ffd83dbSDimitry Andric           NarrowNumElts ||
24490b57cec5SDimitry Andric       !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
24500b57cec5SDimitry Andric     return nullptr;
24510b57cec5SDimitry Andric 
2452*0fca6ea1SDimitry Andric   // shuf (sel (shuf NarrowCond, poison, WideMask), X, Y), poison, NarrowMask)
2453*0fca6ea1SDimitry Andric   // -->
2454*0fca6ea1SDimitry Andric   // sel NarrowCond, (shuf X, poison, NarrowMask), (shuf Y, poison, NarrowMask)
2455e8d8bef9SDimitry Andric   Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2456e8d8bef9SDimitry Andric   Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
24570b57cec5SDimitry Andric   return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
24580b57cec5SDimitry Andric }
24590b57cec5SDimitry Andric 
246006c3fb27SDimitry Andric /// Canonicalize FP negate/abs after shuffle.
246106c3fb27SDimitry Andric static Instruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf,
246281ad6265SDimitry Andric                                           InstCombiner::BuilderTy &Builder) {
246306c3fb27SDimitry Andric   auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0));
246481ad6265SDimitry Andric   Value *X;
246506c3fb27SDimitry Andric   if (!S0 || !match(S0, m_CombineOr(m_FNeg(m_Value(X)), m_FAbs(m_Value(X)))))
246681ad6265SDimitry Andric     return nullptr;
246781ad6265SDimitry Andric 
246806c3fb27SDimitry Andric   bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
246906c3fb27SDimitry Andric 
247006c3fb27SDimitry Andric   // Match 1-input (unary) shuffle.
247106c3fb27SDimitry Andric   // shuffle (fneg/fabs X), Mask --> fneg/fabs (shuffle X, Mask)
2472*0fca6ea1SDimitry Andric   if (S0->hasOneUse() && match(Shuf.getOperand(1), m_Poison())) {
247381ad6265SDimitry Andric     Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
247406c3fb27SDimitry Andric     if (IsFNeg)
247506c3fb27SDimitry Andric       return UnaryOperator::CreateFNegFMF(NewShuf, S0);
247606c3fb27SDimitry Andric 
247706c3fb27SDimitry Andric     Function *FAbs = Intrinsic::getDeclaration(Shuf.getModule(),
247806c3fb27SDimitry Andric                                                Intrinsic::fabs, Shuf.getType());
247906c3fb27SDimitry Andric     CallInst *NewF = CallInst::Create(FAbs, {NewShuf});
248006c3fb27SDimitry Andric     NewF->setFastMathFlags(S0->getFastMathFlags());
248106c3fb27SDimitry Andric     return NewF;
248281ad6265SDimitry Andric   }
248381ad6265SDimitry Andric 
248406c3fb27SDimitry Andric   // Match 2-input (binary) shuffle.
248506c3fb27SDimitry Andric   auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));
248681ad6265SDimitry Andric   Value *Y;
248706c3fb27SDimitry Andric   if (!S1 || !match(S1, m_CombineOr(m_FNeg(m_Value(Y)), m_FAbs(m_Value(Y)))) ||
248806c3fb27SDimitry Andric       S0->getOpcode() != S1->getOpcode() ||
248906c3fb27SDimitry Andric       (!S0->hasOneUse() && !S1->hasOneUse()))
249081ad6265SDimitry Andric     return nullptr;
249181ad6265SDimitry Andric 
249206c3fb27SDimitry Andric   // shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
249381ad6265SDimitry Andric   Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
249406c3fb27SDimitry Andric   Instruction *NewF;
249506c3fb27SDimitry Andric   if (IsFNeg) {
249606c3fb27SDimitry Andric     NewF = UnaryOperator::CreateFNeg(NewShuf);
249706c3fb27SDimitry Andric   } else {
249806c3fb27SDimitry Andric     Function *FAbs = Intrinsic::getDeclaration(Shuf.getModule(),
249906c3fb27SDimitry Andric                                                Intrinsic::fabs, Shuf.getType());
250006c3fb27SDimitry Andric     NewF = CallInst::Create(FAbs, {NewShuf});
250181ad6265SDimitry Andric   }
250206c3fb27SDimitry Andric   NewF->copyIRFlags(S0);
250306c3fb27SDimitry Andric   NewF->andIRFlags(S1);
250406c3fb27SDimitry Andric   return NewF;
250581ad6265SDimitry Andric }
250681ad6265SDimitry Andric 
250781ad6265SDimitry Andric /// Canonicalize casts after shuffle.
250881ad6265SDimitry Andric static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf,
250981ad6265SDimitry Andric                                     InstCombiner::BuilderTy &Builder) {
251081ad6265SDimitry Andric   // Do we have 2 matching cast operands?
251181ad6265SDimitry Andric   auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
251281ad6265SDimitry Andric   auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
251381ad6265SDimitry Andric   if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
251481ad6265SDimitry Andric       Cast0->getSrcTy() != Cast1->getSrcTy())
251581ad6265SDimitry Andric     return nullptr;
251681ad6265SDimitry Andric 
251781ad6265SDimitry Andric   // TODO: Allow other opcodes? That would require easing the type restrictions
251881ad6265SDimitry Andric   //       below here.
251981ad6265SDimitry Andric   CastInst::CastOps CastOpcode = Cast0->getOpcode();
252081ad6265SDimitry Andric   switch (CastOpcode) {
252181ad6265SDimitry Andric   case Instruction::FPToSI:
252281ad6265SDimitry Andric   case Instruction::FPToUI:
252381ad6265SDimitry Andric   case Instruction::SIToFP:
252481ad6265SDimitry Andric   case Instruction::UIToFP:
252581ad6265SDimitry Andric     break;
252681ad6265SDimitry Andric   default:
252781ad6265SDimitry Andric     return nullptr;
252881ad6265SDimitry Andric   }
252981ad6265SDimitry Andric 
253081ad6265SDimitry Andric   VectorType *ShufTy = Shuf.getType();
253181ad6265SDimitry Andric   VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
253281ad6265SDimitry Andric   VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
253381ad6265SDimitry Andric 
253481ad6265SDimitry Andric   // TODO: Allow length-increasing shuffles?
253581ad6265SDimitry Andric   if (ShufTy->getElementCount().getKnownMinValue() >
253681ad6265SDimitry Andric       ShufOpTy->getElementCount().getKnownMinValue())
253781ad6265SDimitry Andric     return nullptr;
253881ad6265SDimitry Andric 
253981ad6265SDimitry Andric   // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
254081ad6265SDimitry Andric   assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
254181ad6265SDimitry Andric          "Expected fixed vector operands for casts and binary shuffle");
254281ad6265SDimitry Andric   if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
254381ad6265SDimitry Andric     return nullptr;
254481ad6265SDimitry Andric 
254581ad6265SDimitry Andric   // At least one of the operands must have only one use (the shuffle).
254681ad6265SDimitry Andric   if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
254781ad6265SDimitry Andric     return nullptr;
254881ad6265SDimitry Andric 
254981ad6265SDimitry Andric   // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
255081ad6265SDimitry Andric   Value *X = Cast0->getOperand(0);
255181ad6265SDimitry Andric   Value *Y = Cast1->getOperand(0);
255281ad6265SDimitry Andric   Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
255381ad6265SDimitry Andric   return CastInst::Create(CastOpcode, NewShuf, ShufTy);
255481ad6265SDimitry Andric }
255581ad6265SDimitry Andric 
2556fe6060f1SDimitry Andric /// Try to fold an extract subvector operation.
25570b57cec5SDimitry Andric static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
25580b57cec5SDimitry Andric   Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2559*0fca6ea1SDimitry Andric   if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Poison()))
25600b57cec5SDimitry Andric     return nullptr;
25610b57cec5SDimitry Andric 
2562fe6060f1SDimitry Andric   // Check if we are extracting all bits of an inserted scalar:
2563fe6060f1SDimitry Andric   // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2564fe6060f1SDimitry Andric   Value *X;
2565fe6060f1SDimitry Andric   if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
2566fe6060f1SDimitry Andric       X->getType()->getPrimitiveSizeInBits() ==
2567fe6060f1SDimitry Andric           Shuf.getType()->getPrimitiveSizeInBits())
2568fe6060f1SDimitry Andric     return new BitCastInst(X, Shuf.getType());
2569fe6060f1SDimitry Andric 
2570fe6060f1SDimitry Andric   // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2571fe6060f1SDimitry Andric   Value *Y;
25725ffd83dbSDimitry Andric   ArrayRef<int> Mask;
25735ffd83dbSDimitry Andric   if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
25740b57cec5SDimitry Andric     return nullptr;
25750b57cec5SDimitry Andric 
25760b57cec5SDimitry Andric   // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
25770b57cec5SDimitry Andric   // then combining may result in worse codegen.
25780b57cec5SDimitry Andric   if (!Op0->hasOneUse())
25790b57cec5SDimitry Andric     return nullptr;
25800b57cec5SDimitry Andric 
25810b57cec5SDimitry Andric   // We are extracting a subvector from a shuffle. Remove excess elements from
25820b57cec5SDimitry Andric   // the 1st shuffle mask to eliminate the extract.
25830b57cec5SDimitry Andric   //
25840b57cec5SDimitry Andric   // This transform is conservatively limited to identity extracts because we do
25850b57cec5SDimitry Andric   // not allow arbitrary shuffle mask creation as a target-independent transform
25860b57cec5SDimitry Andric   // (because we can't guarantee that will lower efficiently).
25870b57cec5SDimitry Andric   //
2588*0fca6ea1SDimitry Andric   // If the extracting shuffle has an poison mask element, it transfers to the
25890b57cec5SDimitry Andric   // new shuffle mask. Otherwise, copy the original mask element. Example:
2590*0fca6ea1SDimitry Andric   //   shuf (shuf X, Y, <C0, C1, C2, poison, C4>), poison, <0, poison, 2, 3> -->
2591*0fca6ea1SDimitry Andric   //   shuf X, Y, <C0, poison, C2, poison>
2592e8d8bef9SDimitry Andric   unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
25935ffd83dbSDimitry Andric   SmallVector<int, 16> NewMask(NumElts);
25945ffd83dbSDimitry Andric   assert(NumElts < Mask.size() &&
25950b57cec5SDimitry Andric          "Identity with extract must have less elements than its inputs");
25960b57cec5SDimitry Andric 
25970b57cec5SDimitry Andric   for (unsigned i = 0; i != NumElts; ++i) {
25985ffd83dbSDimitry Andric     int ExtractMaskElt = Shuf.getMaskValue(i);
25995ffd83dbSDimitry Andric     int MaskElt = Mask[i];
260006c3fb27SDimitry Andric     NewMask[i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt;
26010b57cec5SDimitry Andric   }
26025ffd83dbSDimitry Andric   return new ShuffleVectorInst(X, Y, NewMask);
26030b57cec5SDimitry Andric }
26040b57cec5SDimitry Andric 
2605480093f4SDimitry Andric /// Try to replace a shuffle with an insertelement or try to replace a shuffle
2606480093f4SDimitry Andric /// operand with the operand of an insertelement.
26075ffd83dbSDimitry Andric static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
2608e8d8bef9SDimitry Andric                                           InstCombinerImpl &IC) {
26090b57cec5SDimitry Andric   Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
26105ffd83dbSDimitry Andric   SmallVector<int, 16> Mask;
26115ffd83dbSDimitry Andric   Shuf.getShuffleMask(Mask);
26120b57cec5SDimitry Andric 
26130b57cec5SDimitry Andric   int NumElts = Mask.size();
2614349cc55cSDimitry Andric   int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();
26150b57cec5SDimitry Andric 
2616480093f4SDimitry Andric   // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2617480093f4SDimitry Andric   // not be able to handle it there if the insertelement has >1 use.
2618480093f4SDimitry Andric   // If the shuffle has an insertelement operand but does not choose the
2619480093f4SDimitry Andric   // inserted scalar element from that value, then we can replace that shuffle
2620480093f4SDimitry Andric   // operand with the source vector of the insertelement.
2621480093f4SDimitry Andric   Value *X;
2622480093f4SDimitry Andric   uint64_t IdxC;
26235ffd83dbSDimitry Andric   if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2624480093f4SDimitry Andric     // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2625e8d8bef9SDimitry Andric     if (!is_contained(Mask, (int)IdxC))
26265ffd83dbSDimitry Andric       return IC.replaceOperand(Shuf, 0, X);
2627480093f4SDimitry Andric   }
26285ffd83dbSDimitry Andric   if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2629480093f4SDimitry Andric     // Offset the index constant by the vector width because we are checking for
2630480093f4SDimitry Andric     // accesses to the 2nd vector input of the shuffle.
2631349cc55cSDimitry Andric     IdxC += InpNumElts;
2632480093f4SDimitry Andric     // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2633e8d8bef9SDimitry Andric     if (!is_contained(Mask, (int)IdxC))
26345ffd83dbSDimitry Andric       return IC.replaceOperand(Shuf, 1, X);
2635480093f4SDimitry Andric   }
2636349cc55cSDimitry Andric   // For the rest of the transform, the shuffle must not change vector sizes.
2637349cc55cSDimitry Andric   // TODO: This restriction could be removed if the insert has only one use
2638349cc55cSDimitry Andric   //       (because the transform would require a new length-changing shuffle).
2639349cc55cSDimitry Andric   if (NumElts != InpNumElts)
2640349cc55cSDimitry Andric     return nullptr;
2641480093f4SDimitry Andric 
26420b57cec5SDimitry Andric   // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
26430b57cec5SDimitry Andric   auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
26440b57cec5SDimitry Andric     // We need an insertelement with a constant index.
26455ffd83dbSDimitry Andric     if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
26460b57cec5SDimitry Andric                                m_ConstantInt(IndexC))))
26470b57cec5SDimitry Andric       return false;
26480b57cec5SDimitry Andric 
26490b57cec5SDimitry Andric     // Test the shuffle mask to see if it splices the inserted scalar into the
26500b57cec5SDimitry Andric     // operand 1 vector of the shuffle.
26510b57cec5SDimitry Andric     int NewInsIndex = -1;
26520b57cec5SDimitry Andric     for (int i = 0; i != NumElts; ++i) {
26530b57cec5SDimitry Andric       // Ignore undef mask elements.
26540b57cec5SDimitry Andric       if (Mask[i] == -1)
26550b57cec5SDimitry Andric         continue;
26560b57cec5SDimitry Andric 
26570b57cec5SDimitry Andric       // The shuffle takes elements of operand 1 without lane changes.
26580b57cec5SDimitry Andric       if (Mask[i] == NumElts + i)
26590b57cec5SDimitry Andric         continue;
26600b57cec5SDimitry Andric 
26610b57cec5SDimitry Andric       // The shuffle must choose the inserted scalar exactly once.
26620b57cec5SDimitry Andric       if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
26630b57cec5SDimitry Andric         return false;
26640b57cec5SDimitry Andric 
26650b57cec5SDimitry Andric       // The shuffle is placing the inserted scalar into element i.
26660b57cec5SDimitry Andric       NewInsIndex = i;
26670b57cec5SDimitry Andric     }
26680b57cec5SDimitry Andric 
26690b57cec5SDimitry Andric     assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
26700b57cec5SDimitry Andric 
26710b57cec5SDimitry Andric     // Index is updated to the potentially translated insertion lane.
2672cb14a3feSDimitry Andric     IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex);
26730b57cec5SDimitry Andric     return true;
26740b57cec5SDimitry Andric   };
26750b57cec5SDimitry Andric 
26760b57cec5SDimitry Andric   // If the shuffle is unnecessary, insert the scalar operand directly into
26770b57cec5SDimitry Andric   // operand 1 of the shuffle. Example:
26780b57cec5SDimitry Andric   // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
26790b57cec5SDimitry Andric   Value *Scalar;
26800b57cec5SDimitry Andric   ConstantInt *IndexC;
26810b57cec5SDimitry Andric   if (isShufflingScalarIntoOp1(Scalar, IndexC))
26820b57cec5SDimitry Andric     return InsertElementInst::Create(V1, Scalar, IndexC);
26830b57cec5SDimitry Andric 
26840b57cec5SDimitry Andric   // Try again after commuting shuffle. Example:
26850b57cec5SDimitry Andric   // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
26860b57cec5SDimitry Andric   // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
26870b57cec5SDimitry Andric   std::swap(V0, V1);
26880b57cec5SDimitry Andric   ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
26890b57cec5SDimitry Andric   if (isShufflingScalarIntoOp1(Scalar, IndexC))
26900b57cec5SDimitry Andric     return InsertElementInst::Create(V1, Scalar, IndexC);
26910b57cec5SDimitry Andric 
26920b57cec5SDimitry Andric   return nullptr;
26930b57cec5SDimitry Andric }
26940b57cec5SDimitry Andric 
26950b57cec5SDimitry Andric static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
26960b57cec5SDimitry Andric   // Match the operands as identity with padding (also known as concatenation
26970b57cec5SDimitry Andric   // with undef) shuffles of the same source type. The backend is expected to
26980b57cec5SDimitry Andric   // recreate these concatenations from a shuffle of narrow operands.
26990b57cec5SDimitry Andric   auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
27000b57cec5SDimitry Andric   auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
27010b57cec5SDimitry Andric   if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
27020b57cec5SDimitry Andric       !Shuffle1 || !Shuffle1->isIdentityWithPadding())
27030b57cec5SDimitry Andric     return nullptr;
27040b57cec5SDimitry Andric 
27050b57cec5SDimitry Andric   // We limit this transform to power-of-2 types because we expect that the
27060b57cec5SDimitry Andric   // backend can convert the simplified IR patterns to identical nodes as the
27070b57cec5SDimitry Andric   // original IR.
27080b57cec5SDimitry Andric   // TODO: If we can verify the same behavior for arbitrary types, the
27090b57cec5SDimitry Andric   //       power-of-2 checks can be removed.
27100b57cec5SDimitry Andric   Value *X = Shuffle0->getOperand(0);
27110b57cec5SDimitry Andric   Value *Y = Shuffle1->getOperand(0);
27120b57cec5SDimitry Andric   if (X->getType() != Y->getType() ||
2713e8d8bef9SDimitry Andric       !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2714e8d8bef9SDimitry Andric       !isPowerOf2_32(
2715e8d8bef9SDimitry Andric           cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2716e8d8bef9SDimitry Andric       !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2717fe6060f1SDimitry Andric       match(X, m_Undef()) || match(Y, m_Undef()))
27180b57cec5SDimitry Andric     return nullptr;
2719fe6060f1SDimitry Andric   assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2720fe6060f1SDimitry Andric          match(Shuffle1->getOperand(1), m_Undef()) &&
27210b57cec5SDimitry Andric          "Unexpected operand for identity shuffle");
27220b57cec5SDimitry Andric 
27230b57cec5SDimitry Andric   // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
27240b57cec5SDimitry Andric   // operands directly by adjusting the shuffle mask to account for the narrower
27250b57cec5SDimitry Andric   // types:
27260b57cec5SDimitry Andric   // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2727e8d8bef9SDimitry Andric   int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2728e8d8bef9SDimitry Andric   int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
27290b57cec5SDimitry Andric   assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
27300b57cec5SDimitry Andric 
27315ffd83dbSDimitry Andric   ArrayRef<int> Mask = Shuf.getShuffleMask();
27325ffd83dbSDimitry Andric   SmallVector<int, 16> NewMask(Mask.size(), -1);
27330b57cec5SDimitry Andric   for (int i = 0, e = Mask.size(); i != e; ++i) {
27340b57cec5SDimitry Andric     if (Mask[i] == -1)
27350b57cec5SDimitry Andric       continue;
27360b57cec5SDimitry Andric 
27370b57cec5SDimitry Andric     // If this shuffle is choosing an undef element from 1 of the sources, that
27380b57cec5SDimitry Andric     // element is undef.
27390b57cec5SDimitry Andric     if (Mask[i] < WideElts) {
27400b57cec5SDimitry Andric       if (Shuffle0->getMaskValue(Mask[i]) == -1)
27410b57cec5SDimitry Andric         continue;
27420b57cec5SDimitry Andric     } else {
27430b57cec5SDimitry Andric       if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
27440b57cec5SDimitry Andric         continue;
27450b57cec5SDimitry Andric     }
27460b57cec5SDimitry Andric 
27470b57cec5SDimitry Andric     // If this shuffle is choosing from the 1st narrow op, the mask element is
27480b57cec5SDimitry Andric     // the same. If this shuffle is choosing from the 2nd narrow op, the mask
27490b57cec5SDimitry Andric     // element is offset down to adjust for the narrow vector widths.
27500b57cec5SDimitry Andric     if (Mask[i] < WideElts) {
27510b57cec5SDimitry Andric       assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
27525ffd83dbSDimitry Andric       NewMask[i] = Mask[i];
27530b57cec5SDimitry Andric     } else {
27540b57cec5SDimitry Andric       assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
27555ffd83dbSDimitry Andric       NewMask[i] = Mask[i] - (WideElts - NarrowElts);
27560b57cec5SDimitry Andric     }
27570b57cec5SDimitry Andric   }
27585ffd83dbSDimitry Andric   return new ShuffleVectorInst(X, Y, NewMask);
27590b57cec5SDimitry Andric }
27600b57cec5SDimitry Andric 
2761bdd1243dSDimitry Andric // Splatting the first element of the result of a BinOp, where any of the
2762bdd1243dSDimitry Andric // BinOp's operands are the result of a first element splat can be simplified to
2763bdd1243dSDimitry Andric // splatting the first element of the result of the BinOp
2764bdd1243dSDimitry Andric Instruction *InstCombinerImpl::simplifyBinOpSplats(ShuffleVectorInst &SVI) {
2765*0fca6ea1SDimitry Andric   if (!match(SVI.getOperand(1), m_Poison()) ||
276606c3fb27SDimitry Andric       !match(SVI.getShuffleMask(), m_ZeroMask()) ||
276706c3fb27SDimitry Andric       !SVI.getOperand(0)->hasOneUse())
2768bdd1243dSDimitry Andric     return nullptr;
2769bdd1243dSDimitry Andric 
2770bdd1243dSDimitry Andric   Value *Op0 = SVI.getOperand(0);
2771bdd1243dSDimitry Andric   Value *X, *Y;
2772*0fca6ea1SDimitry Andric   if (!match(Op0, m_BinOp(m_Shuffle(m_Value(X), m_Poison(), m_ZeroMask()),
2773bdd1243dSDimitry Andric                           m_Value(Y))) &&
2774bdd1243dSDimitry Andric       !match(Op0, m_BinOp(m_Value(X),
2775*0fca6ea1SDimitry Andric                           m_Shuffle(m_Value(Y), m_Poison(), m_ZeroMask()))))
2776bdd1243dSDimitry Andric     return nullptr;
2777bdd1243dSDimitry Andric   if (X->getType() != Y->getType())
2778bdd1243dSDimitry Andric     return nullptr;
2779bdd1243dSDimitry Andric 
2780bdd1243dSDimitry Andric   auto *BinOp = cast<BinaryOperator>(Op0);
2781bdd1243dSDimitry Andric   if (!isSafeToSpeculativelyExecute(BinOp))
2782bdd1243dSDimitry Andric     return nullptr;
2783bdd1243dSDimitry Andric 
2784bdd1243dSDimitry Andric   Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y);
2785bdd1243dSDimitry Andric   if (auto NewBOI = dyn_cast<Instruction>(NewBO))
2786bdd1243dSDimitry Andric     NewBOI->copyIRFlags(BinOp);
2787bdd1243dSDimitry Andric 
2788bdd1243dSDimitry Andric   return new ShuffleVectorInst(NewBO, SVI.getShuffleMask());
2789bdd1243dSDimitry Andric }
2790bdd1243dSDimitry Andric 
2791e8d8bef9SDimitry Andric Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
27920b57cec5SDimitry Andric   Value *LHS = SVI.getOperand(0);
27930b57cec5SDimitry Andric   Value *RHS = SVI.getOperand(1);
27945ffd83dbSDimitry Andric   SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
279581ad6265SDimitry Andric   if (auto *V = simplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
27965ffd83dbSDimitry Andric                                           SVI.getType(), ShufQuery))
27970b57cec5SDimitry Andric     return replaceInstUsesWith(SVI, V);
27980b57cec5SDimitry Andric 
2799bdd1243dSDimitry Andric   if (Instruction *I = simplifyBinOpSplats(SVI))
2800bdd1243dSDimitry Andric     return I;
2801bdd1243dSDimitry Andric 
2802cb14a3feSDimitry Andric   // Canonicalize splat shuffle to use poison RHS. Handle this explicitly in
2803cb14a3feSDimitry Andric   // order to support scalable vectors.
2804cb14a3feSDimitry Andric   if (match(SVI.getShuffleMask(), m_ZeroMask()) && !isa<PoisonValue>(RHS))
2805cb14a3feSDimitry Andric     return replaceOperand(SVI, 1, PoisonValue::get(RHS->getType()));
2806cb14a3feSDimitry Andric 
2807e8d8bef9SDimitry Andric   if (isa<ScalableVectorType>(LHS->getType()))
2808e8d8bef9SDimitry Andric     return nullptr;
2809e8d8bef9SDimitry Andric 
2810e8d8bef9SDimitry Andric   unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2811e8d8bef9SDimitry Andric   unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2812fe6060f1SDimitry Andric 
2813fe6060f1SDimitry Andric   // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2814fe6060f1SDimitry Andric   //
2815fe6060f1SDimitry Andric   // if X and Y are of the same (vector) type, and the element size is not
2816fe6060f1SDimitry Andric   // changed by the bitcasts, we can distribute the bitcasts through the
2817fe6060f1SDimitry Andric   // shuffle, hopefully reducing the number of instructions. We make sure that
2818fe6060f1SDimitry Andric   // at least one bitcast only has one use, so we don't *increase* the number of
2819fe6060f1SDimitry Andric   // instructions here.
2820fe6060f1SDimitry Andric   Value *X, *Y;
2821fe6060f1SDimitry Andric   if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) &&
2822fe6060f1SDimitry Andric       X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2823fe6060f1SDimitry Andric       X->getType()->getScalarSizeInBits() ==
2824fe6060f1SDimitry Andric           SVI.getType()->getScalarSizeInBits() &&
2825fe6060f1SDimitry Andric       (LHS->hasOneUse() || RHS->hasOneUse())) {
2826fe6060f1SDimitry Andric     Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask(),
2827fe6060f1SDimitry Andric                                            SVI.getName() + ".uncasted");
2828fe6060f1SDimitry Andric     return new BitCastInst(V, SVI.getType());
2829fe6060f1SDimitry Andric   }
2830fe6060f1SDimitry Andric 
28315ffd83dbSDimitry Andric   ArrayRef<int> Mask = SVI.getShuffleMask();
28325ffd83dbSDimitry Andric 
28335ffd83dbSDimitry Andric   // Peek through a bitcasted shuffle operand by scaling the mask. If the
28345ffd83dbSDimitry Andric   // simulated shuffle can simplify, then this shuffle is unnecessary:
28355ffd83dbSDimitry Andric   // shuf (bitcast X), undef, Mask --> bitcast X'
28365ffd83dbSDimitry Andric   // TODO: This could be extended to allow length-changing shuffles.
28375ffd83dbSDimitry Andric   //       The transform might also be obsoleted if we allowed canonicalization
28385ffd83dbSDimitry Andric   //       of bitcasted shuffles.
28395ffd83dbSDimitry Andric   if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
28405ffd83dbSDimitry Andric       X->getType()->isVectorTy() && VWidth == LHSWidth) {
28415ffd83dbSDimitry Andric     // Try to create a scaled mask constant.
2842e8d8bef9SDimitry Andric     auto *XType = cast<FixedVectorType>(X->getType());
28435ffd83dbSDimitry Andric     unsigned XNumElts = XType->getNumElements();
28445ffd83dbSDimitry Andric     SmallVector<int, 16> ScaledMask;
2845*0fca6ea1SDimitry Andric     if (scaleShuffleMaskElts(XNumElts, Mask, ScaledMask)) {
28465ffd83dbSDimitry Andric       // If the shuffled source vector simplifies, cast that value to this
28475ffd83dbSDimitry Andric       // shuffle's type.
284881ad6265SDimitry Andric       if (auto *V = simplifyShuffleVectorInst(X, UndefValue::get(XType),
28495ffd83dbSDimitry Andric                                               ScaledMask, XType, ShufQuery))
28505ffd83dbSDimitry Andric         return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
28515ffd83dbSDimitry Andric     }
28525ffd83dbSDimitry Andric   }
28535ffd83dbSDimitry Andric 
2854fe6060f1SDimitry Andric   // shuffle x, x, mask --> shuffle x, undef, mask'
2855480093f4SDimitry Andric   if (LHS == RHS) {
2856fe6060f1SDimitry Andric     assert(!match(RHS, m_Undef()) &&
2857fe6060f1SDimitry Andric            "Shuffle with 2 undef ops not simplified?");
2858349cc55cSDimitry Andric     return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth));
28590b57cec5SDimitry Andric   }
28600b57cec5SDimitry Andric 
2861480093f4SDimitry Andric   // shuffle undef, x, mask --> shuffle x, undef, mask'
2862fe6060f1SDimitry Andric   if (match(LHS, m_Undef())) {
2863480093f4SDimitry Andric     SVI.commute();
2864480093f4SDimitry Andric     return &SVI;
2865480093f4SDimitry Andric   }
2866480093f4SDimitry Andric 
28670b57cec5SDimitry Andric   if (Instruction *I = canonicalizeInsertSplat(SVI, Builder))
28680b57cec5SDimitry Andric     return I;
28690b57cec5SDimitry Andric 
28700eae32dcSDimitry Andric   if (Instruction *I = foldSelectShuffle(SVI))
28710b57cec5SDimitry Andric     return I;
28720b57cec5SDimitry Andric 
28735ffd83dbSDimitry Andric   if (Instruction *I = foldTruncShuffle(SVI, DL.isBigEndian()))
28745ffd83dbSDimitry Andric     return I;
28755ffd83dbSDimitry Andric 
28760b57cec5SDimitry Andric   if (Instruction *I = narrowVectorSelect(SVI, Builder))
28770b57cec5SDimitry Andric     return I;
28780b57cec5SDimitry Andric 
287906c3fb27SDimitry Andric   if (Instruction *I = foldShuffleOfUnaryOps(SVI, Builder))
288081ad6265SDimitry Andric     return I;
288181ad6265SDimitry Andric 
288281ad6265SDimitry Andric   if (Instruction *I = foldCastShuffle(SVI, Builder))
288381ad6265SDimitry Andric     return I;
288481ad6265SDimitry Andric 
2885cb14a3feSDimitry Andric   APInt PoisonElts(VWidth, 0);
2886349cc55cSDimitry Andric   APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2887cb14a3feSDimitry Andric   if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, PoisonElts)) {
28880b57cec5SDimitry Andric     if (V != &SVI)
28890b57cec5SDimitry Andric       return replaceInstUsesWith(SVI, V);
28900b57cec5SDimitry Andric     return &SVI;
28910b57cec5SDimitry Andric   }
28920b57cec5SDimitry Andric 
28930b57cec5SDimitry Andric   if (Instruction *I = foldIdentityExtractShuffle(SVI))
28940b57cec5SDimitry Andric     return I;
28950b57cec5SDimitry Andric 
28960b57cec5SDimitry Andric   // These transforms have the potential to lose undef knowledge, so they are
28970b57cec5SDimitry Andric   // intentionally placed after SimplifyDemandedVectorElts().
28985ffd83dbSDimitry Andric   if (Instruction *I = foldShuffleWithInsert(SVI, *this))
28990b57cec5SDimitry Andric     return I;
29000b57cec5SDimitry Andric   if (Instruction *I = foldIdentityPaddedShuffles(SVI))
29010b57cec5SDimitry Andric     return I;
29020b57cec5SDimitry Andric 
2903*0fca6ea1SDimitry Andric   if (match(RHS, m_Poison()) && canEvaluateShuffled(LHS, Mask)) {
290406c3fb27SDimitry Andric     Value *V = evaluateInDifferentElementOrder(LHS, Mask, Builder);
29050b57cec5SDimitry Andric     return replaceInstUsesWith(SVI, V);
29060b57cec5SDimitry Andric   }
29070b57cec5SDimitry Andric 
29080b57cec5SDimitry Andric   // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
29090b57cec5SDimitry Andric   // a non-vector type. We can instead bitcast the original vector followed by
29100b57cec5SDimitry Andric   // an extract of the desired element:
29110b57cec5SDimitry Andric   //
29120b57cec5SDimitry Andric   //   %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
29130b57cec5SDimitry Andric   //                         <4 x i32> <i32 0, i32 1, i32 2, i32 3>
29140b57cec5SDimitry Andric   //   %1 = bitcast <4 x i8> %sroa to i32
29150b57cec5SDimitry Andric   // Becomes:
29160b57cec5SDimitry Andric   //   %bc = bitcast <16 x i8> %in to <4 x i32>
29170b57cec5SDimitry Andric   //   %ext = extractelement <4 x i32> %bc, i32 0
29180b57cec5SDimitry Andric   //
29190b57cec5SDimitry Andric   // If the shuffle is extracting a contiguous range of values from the input
29200b57cec5SDimitry Andric   // vector then each use which is a bitcast of the extracted size can be
29210b57cec5SDimitry Andric   // replaced. This will work if the vector types are compatible, and the begin
29220b57cec5SDimitry Andric   // index is aligned to a value in the casted vector type. If the begin index
29230b57cec5SDimitry Andric   // isn't aligned then we can shuffle the original vector (keeping the same
29240b57cec5SDimitry Andric   // vector type) before extracting.
29250b57cec5SDimitry Andric   //
29260b57cec5SDimitry Andric   // This code will bail out if the target type is fundamentally incompatible
29270b57cec5SDimitry Andric   // with vectors of the source type.
29280b57cec5SDimitry Andric   //
29290b57cec5SDimitry Andric   // Example of <16 x i8>, target type i32:
29300b57cec5SDimitry Andric   // Index range [4,8):         v-----------v Will work.
29310b57cec5SDimitry Andric   //                +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
29320b57cec5SDimitry Andric   //     <16 x i8>: |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |
29330b57cec5SDimitry Andric   //     <4 x i32>: |           |           |           |           |
29340b57cec5SDimitry Andric   //                +-----------+-----------+-----------+-----------+
29350b57cec5SDimitry Andric   // Index range [6,10):              ^-----------^ Needs an extra shuffle.
29360b57cec5SDimitry Andric   // Target type i40:           ^--------------^ Won't work, bail.
29370b57cec5SDimitry Andric   bool MadeChange = false;
29380b57cec5SDimitry Andric   if (isShuffleExtractingFromLHS(SVI, Mask)) {
29390b57cec5SDimitry Andric     Value *V = LHS;
29400b57cec5SDimitry Andric     unsigned MaskElems = Mask.size();
2941e8d8bef9SDimitry Andric     auto *SrcTy = cast<FixedVectorType>(V->getType());
2942bdd1243dSDimitry Andric     unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedValue();
29430b57cec5SDimitry Andric     unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
29440b57cec5SDimitry Andric     assert(SrcElemBitWidth && "vector elements must have a bitwidth");
29450b57cec5SDimitry Andric     unsigned SrcNumElems = SrcTy->getNumElements();
29460b57cec5SDimitry Andric     SmallVector<BitCastInst *, 8> BCs;
29470b57cec5SDimitry Andric     DenseMap<Type *, Value *> NewBCs;
29480b57cec5SDimitry Andric     for (User *U : SVI.users())
29490b57cec5SDimitry Andric       if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
29500b57cec5SDimitry Andric         if (!BC->use_empty())
29510b57cec5SDimitry Andric           // Only visit bitcasts that weren't previously handled.
29520b57cec5SDimitry Andric           BCs.push_back(BC);
29530b57cec5SDimitry Andric     for (BitCastInst *BC : BCs) {
29540b57cec5SDimitry Andric       unsigned BegIdx = Mask.front();
29550b57cec5SDimitry Andric       Type *TgtTy = BC->getDestTy();
29560b57cec5SDimitry Andric       unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
29570b57cec5SDimitry Andric       if (!TgtElemBitWidth)
29580b57cec5SDimitry Andric         continue;
29590b57cec5SDimitry Andric       unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
29600b57cec5SDimitry Andric       bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
29610b57cec5SDimitry Andric       bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
29620b57cec5SDimitry Andric       if (!VecBitWidthsEqual)
29630b57cec5SDimitry Andric         continue;
29640b57cec5SDimitry Andric       if (!VectorType::isValidElementType(TgtTy))
29650b57cec5SDimitry Andric         continue;
29665ffd83dbSDimitry Andric       auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
29670b57cec5SDimitry Andric       if (!BegIsAligned) {
29680b57cec5SDimitry Andric         // Shuffle the input so [0,NumElements) contains the output, and
29690b57cec5SDimitry Andric         // [NumElems,SrcNumElems) is undef.
29705ffd83dbSDimitry Andric         SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
29710b57cec5SDimitry Andric         for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
29725ffd83dbSDimitry Andric           ShuffleMask[I] = Idx;
2973e8d8bef9SDimitry Andric         V = Builder.CreateShuffleVector(V, ShuffleMask,
29740b57cec5SDimitry Andric                                         SVI.getName() + ".extract");
29750b57cec5SDimitry Andric         BegIdx = 0;
29760b57cec5SDimitry Andric       }
29770b57cec5SDimitry Andric       unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
29780b57cec5SDimitry Andric       assert(SrcElemsPerTgtElem);
29790b57cec5SDimitry Andric       BegIdx /= SrcElemsPerTgtElem;
298006c3fb27SDimitry Andric       bool BCAlreadyExists = NewBCs.contains(CastSrcTy);
29810b57cec5SDimitry Andric       auto *NewBC =
29820b57cec5SDimitry Andric           BCAlreadyExists
29830b57cec5SDimitry Andric               ? NewBCs[CastSrcTy]
29840b57cec5SDimitry Andric               : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
29850b57cec5SDimitry Andric       if (!BCAlreadyExists)
29860b57cec5SDimitry Andric         NewBCs[CastSrcTy] = NewBC;
298706c3fb27SDimitry Andric       auto *Ext = Builder.CreateExtractElement(NewBC, BegIdx,
298806c3fb27SDimitry Andric                                                SVI.getName() + ".extract");
29890b57cec5SDimitry Andric       // The shufflevector isn't being replaced: the bitcast that used it
29900b57cec5SDimitry Andric       // is. InstCombine will visit the newly-created instructions.
29910b57cec5SDimitry Andric       replaceInstUsesWith(*BC, Ext);
29920b57cec5SDimitry Andric       MadeChange = true;
29930b57cec5SDimitry Andric     }
29940b57cec5SDimitry Andric   }
29950b57cec5SDimitry Andric 
29960b57cec5SDimitry Andric   // If the LHS is a shufflevector itself, see if we can combine it with this
29970b57cec5SDimitry Andric   // one without producing an unusual shuffle.
29980b57cec5SDimitry Andric   // Cases that might be simplified:
29990b57cec5SDimitry Andric   // 1.
30000b57cec5SDimitry Andric   // x1=shuffle(v1,v2,mask1)
30010b57cec5SDimitry Andric   //  x=shuffle(x1,undef,mask)
30020b57cec5SDimitry Andric   //        ==>
30030b57cec5SDimitry Andric   //  x=shuffle(v1,undef,newMask)
30040b57cec5SDimitry Andric   // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
30050b57cec5SDimitry Andric   // 2.
30060b57cec5SDimitry Andric   // x1=shuffle(v1,undef,mask1)
30070b57cec5SDimitry Andric   //  x=shuffle(x1,x2,mask)
30080b57cec5SDimitry Andric   // where v1.size() == mask1.size()
30090b57cec5SDimitry Andric   //        ==>
30100b57cec5SDimitry Andric   //  x=shuffle(v1,x2,newMask)
30110b57cec5SDimitry Andric   // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
30120b57cec5SDimitry Andric   // 3.
30130b57cec5SDimitry Andric   // x2=shuffle(v2,undef,mask2)
30140b57cec5SDimitry Andric   //  x=shuffle(x1,x2,mask)
30150b57cec5SDimitry Andric   // where v2.size() == mask2.size()
30160b57cec5SDimitry Andric   //        ==>
30170b57cec5SDimitry Andric   //  x=shuffle(x1,v2,newMask)
30180b57cec5SDimitry Andric   // newMask[i] = (mask[i] < x1.size())
30190b57cec5SDimitry Andric   //              ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
30200b57cec5SDimitry Andric   // 4.
30210b57cec5SDimitry Andric   // x1=shuffle(v1,undef,mask1)
30220b57cec5SDimitry Andric   // x2=shuffle(v2,undef,mask2)
30230b57cec5SDimitry Andric   //  x=shuffle(x1,x2,mask)
30240b57cec5SDimitry Andric   // where v1.size() == v2.size()
30250b57cec5SDimitry Andric   //        ==>
30260b57cec5SDimitry Andric   //  x=shuffle(v1,v2,newMask)
30270b57cec5SDimitry Andric   // newMask[i] = (mask[i] < x1.size())
30280b57cec5SDimitry Andric   //              ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
30290b57cec5SDimitry Andric   //
30300b57cec5SDimitry Andric   // Here we are really conservative:
30310b57cec5SDimitry Andric   // we are absolutely afraid of producing a shuffle mask not in the input
30320b57cec5SDimitry Andric   // program, because the code gen may not be smart enough to turn a merged
30330b57cec5SDimitry Andric   // shuffle into two specific shuffles: it may produce worse code.  As such,
30340b57cec5SDimitry Andric   // we only merge two shuffles if the result is either a splat or one of the
30350b57cec5SDimitry Andric   // input shuffle masks.  In this case, merging the shuffles just removes
30360b57cec5SDimitry Andric   // one instruction, which we know is safe.  This is good for things like
30370b57cec5SDimitry Andric   // turning: (splat(splat)) -> splat, or
30380b57cec5SDimitry Andric   // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
30390b57cec5SDimitry Andric   ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
30400b57cec5SDimitry Andric   ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
30410b57cec5SDimitry Andric   if (LHSShuffle)
3042cb14a3feSDimitry Andric     if (!match(LHSShuffle->getOperand(1), m_Poison()) &&
3043cb14a3feSDimitry Andric         !match(RHS, m_Poison()))
30440b57cec5SDimitry Andric       LHSShuffle = nullptr;
30450b57cec5SDimitry Andric   if (RHSShuffle)
3046cb14a3feSDimitry Andric     if (!match(RHSShuffle->getOperand(1), m_Poison()))
30470b57cec5SDimitry Andric       RHSShuffle = nullptr;
30480b57cec5SDimitry Andric   if (!LHSShuffle && !RHSShuffle)
30490b57cec5SDimitry Andric     return MadeChange ? &SVI : nullptr;
30500b57cec5SDimitry Andric 
30510b57cec5SDimitry Andric   Value* LHSOp0 = nullptr;
30520b57cec5SDimitry Andric   Value* LHSOp1 = nullptr;
30530b57cec5SDimitry Andric   Value* RHSOp0 = nullptr;
30540b57cec5SDimitry Andric   unsigned LHSOp0Width = 0;
30550b57cec5SDimitry Andric   unsigned RHSOp0Width = 0;
30560b57cec5SDimitry Andric   if (LHSShuffle) {
30570b57cec5SDimitry Andric     LHSOp0 = LHSShuffle->getOperand(0);
30580b57cec5SDimitry Andric     LHSOp1 = LHSShuffle->getOperand(1);
3059e8d8bef9SDimitry Andric     LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
30600b57cec5SDimitry Andric   }
30610b57cec5SDimitry Andric   if (RHSShuffle) {
30620b57cec5SDimitry Andric     RHSOp0 = RHSShuffle->getOperand(0);
3063e8d8bef9SDimitry Andric     RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
30640b57cec5SDimitry Andric   }
30650b57cec5SDimitry Andric   Value* newLHS = LHS;
30660b57cec5SDimitry Andric   Value* newRHS = RHS;
30670b57cec5SDimitry Andric   if (LHSShuffle) {
30680b57cec5SDimitry Andric     // case 1
3069cb14a3feSDimitry Andric     if (match(RHS, m_Poison())) {
30700b57cec5SDimitry Andric       newLHS = LHSOp0;
30710b57cec5SDimitry Andric       newRHS = LHSOp1;
30720b57cec5SDimitry Andric     }
30730b57cec5SDimitry Andric     // case 2 or 4
30740b57cec5SDimitry Andric     else if (LHSOp0Width == LHSWidth) {
30750b57cec5SDimitry Andric       newLHS = LHSOp0;
30760b57cec5SDimitry Andric     }
30770b57cec5SDimitry Andric   }
30780b57cec5SDimitry Andric   // case 3 or 4
30790b57cec5SDimitry Andric   if (RHSShuffle && RHSOp0Width == LHSWidth) {
30800b57cec5SDimitry Andric     newRHS = RHSOp0;
30810b57cec5SDimitry Andric   }
30820b57cec5SDimitry Andric   // case 4
30830b57cec5SDimitry Andric   if (LHSOp0 == RHSOp0) {
30840b57cec5SDimitry Andric     newLHS = LHSOp0;
30850b57cec5SDimitry Andric     newRHS = nullptr;
30860b57cec5SDimitry Andric   }
30870b57cec5SDimitry Andric 
30880b57cec5SDimitry Andric   if (newLHS == LHS && newRHS == RHS)
30890b57cec5SDimitry Andric     return MadeChange ? &SVI : nullptr;
30900b57cec5SDimitry Andric 
30915ffd83dbSDimitry Andric   ArrayRef<int> LHSMask;
30925ffd83dbSDimitry Andric   ArrayRef<int> RHSMask;
30930b57cec5SDimitry Andric   if (newLHS != LHS)
30940b57cec5SDimitry Andric     LHSMask = LHSShuffle->getShuffleMask();
30950b57cec5SDimitry Andric   if (RHSShuffle && newRHS != RHS)
30960b57cec5SDimitry Andric     RHSMask = RHSShuffle->getShuffleMask();
30970b57cec5SDimitry Andric 
30980b57cec5SDimitry Andric   unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
30990b57cec5SDimitry Andric   SmallVector<int, 16> newMask;
31000b57cec5SDimitry Andric   bool isSplat = true;
31010b57cec5SDimitry Andric   int SplatElt = -1;
31020b57cec5SDimitry Andric   // Create a new mask for the new ShuffleVectorInst so that the new
31030b57cec5SDimitry Andric   // ShuffleVectorInst is equivalent to the original one.
31040b57cec5SDimitry Andric   for (unsigned i = 0; i < VWidth; ++i) {
31050b57cec5SDimitry Andric     int eltMask;
31060b57cec5SDimitry Andric     if (Mask[i] < 0) {
310706c3fb27SDimitry Andric       // This element is a poison value.
31080b57cec5SDimitry Andric       eltMask = -1;
31090b57cec5SDimitry Andric     } else if (Mask[i] < (int)LHSWidth) {
31100b57cec5SDimitry Andric       // This element is from left hand side vector operand.
31110b57cec5SDimitry Andric       //
31120b57cec5SDimitry Andric       // If LHS is going to be replaced (case 1, 2, or 4), calculate the
31130b57cec5SDimitry Andric       // new mask value for the element.
31140b57cec5SDimitry Andric       if (newLHS != LHS) {
31150b57cec5SDimitry Andric         eltMask = LHSMask[Mask[i]];
311606c3fb27SDimitry Andric         // If the value selected is an poison value, explicitly specify it
31170b57cec5SDimitry Andric         // with a -1 mask value.
311806c3fb27SDimitry Andric         if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(LHSOp1))
31190b57cec5SDimitry Andric           eltMask = -1;
31200b57cec5SDimitry Andric       } else
31210b57cec5SDimitry Andric         eltMask = Mask[i];
31220b57cec5SDimitry Andric     } else {
31230b57cec5SDimitry Andric       // This element is from right hand side vector operand
31240b57cec5SDimitry Andric       //
312506c3fb27SDimitry Andric       // If the value selected is a poison value, explicitly specify it
31260b57cec5SDimitry Andric       // with a -1 mask value. (case 1)
312706c3fb27SDimitry Andric       if (match(RHS, m_Poison()))
31280b57cec5SDimitry Andric         eltMask = -1;
31290b57cec5SDimitry Andric       // If RHS is going to be replaced (case 3 or 4), calculate the
31300b57cec5SDimitry Andric       // new mask value for the element.
31310b57cec5SDimitry Andric       else if (newRHS != RHS) {
31320b57cec5SDimitry Andric         eltMask = RHSMask[Mask[i]-LHSWidth];
313306c3fb27SDimitry Andric         // If the value selected is an poison value, explicitly specify it
31340b57cec5SDimitry Andric         // with a -1 mask value.
31350b57cec5SDimitry Andric         if (eltMask >= (int)RHSOp0Width) {
313606c3fb27SDimitry Andric           assert(match(RHSShuffle->getOperand(1), m_Poison()) &&
3137fe6060f1SDimitry Andric                  "should have been check above");
31380b57cec5SDimitry Andric           eltMask = -1;
31390b57cec5SDimitry Andric         }
31400b57cec5SDimitry Andric       } else
31410b57cec5SDimitry Andric         eltMask = Mask[i]-LHSWidth;
31420b57cec5SDimitry Andric 
31430b57cec5SDimitry Andric       // If LHS's width is changed, shift the mask value accordingly.
31440b57cec5SDimitry Andric       // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
31450b57cec5SDimitry Andric       // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
31460b57cec5SDimitry Andric       // If newRHS == newLHS, we want to remap any references from newRHS to
31470b57cec5SDimitry Andric       // newLHS so that we can properly identify splats that may occur due to
31480b57cec5SDimitry Andric       // obfuscation across the two vectors.
31490b57cec5SDimitry Andric       if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
31500b57cec5SDimitry Andric         eltMask += newLHSWidth;
31510b57cec5SDimitry Andric     }
31520b57cec5SDimitry Andric 
31530b57cec5SDimitry Andric     // Check if this could still be a splat.
31540b57cec5SDimitry Andric     if (eltMask >= 0) {
31550b57cec5SDimitry Andric       if (SplatElt >= 0 && SplatElt != eltMask)
31560b57cec5SDimitry Andric         isSplat = false;
31570b57cec5SDimitry Andric       SplatElt = eltMask;
31580b57cec5SDimitry Andric     }
31590b57cec5SDimitry Andric 
31600b57cec5SDimitry Andric     newMask.push_back(eltMask);
31610b57cec5SDimitry Andric   }
31620b57cec5SDimitry Andric 
31630b57cec5SDimitry Andric   // If the result mask is equal to one of the original shuffle masks,
31640b57cec5SDimitry Andric   // or is a splat, do the replacement.
31650b57cec5SDimitry Andric   if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
31660b57cec5SDimitry Andric     if (!newRHS)
316706c3fb27SDimitry Andric       newRHS = PoisonValue::get(newLHS->getType());
3168e8d8bef9SDimitry Andric     return new ShuffleVectorInst(newLHS, newRHS, newMask);
31690b57cec5SDimitry Andric   }
31700b57cec5SDimitry Andric 
31710b57cec5SDimitry Andric   return MadeChange ? &SVI : nullptr;
31720b57cec5SDimitry Andric }
3173