Lines Matching defs:NarrowTy

49 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
51 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
57 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
61 unsigned NarrowSize = NarrowTy.getSizeInBits();
69 if (NarrowTy.isVector()) {
251 LLT NarrowTy, Register SrcReg) {
253 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
258 LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
261 LLT LCMTy = getLCMType(DstTy, NarrowTy);
263 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
264 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
320 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
322 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
338 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
1415 LLT NarrowTy) {
1417 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1433 LLT ImplicitTy = NarrowTy;
1448 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1461 unsigned NarrowSize = NarrowTy.getSizeInBits();
1467 auto K = MIRBuilder.buildConstant(NarrowTy,
1484 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1492 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1498 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1499 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1503 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1515 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1518 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1522 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1540 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1543 return narrowScalarMul(MI, NarrowTy);
1545 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1547 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1556 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1563 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1571 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1601 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1607 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1614 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1617 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1630 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1635 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1645 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1648 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1650 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1656 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1664 narrowScalarSrc(MI, NarrowTy, 1);
1672 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1688 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1694 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1713 narrowScalarSrc(MI, NarrowTy, OpIdx);
1723 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1725 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1731 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1743 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1748 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1759 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1760 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1769 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1771 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1836 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1848 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1853 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1857 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1880 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1889 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1896 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
1899 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1907 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1914 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1915 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1935 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1939 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1955 narrowScalarSrc(MI, NarrowTy, 2);
1963 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1968 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1973 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
1979 const APInt One(NarrowTy.getSizeInBits(), 1);
1980 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
2019 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
2022 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
2035 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
2038 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
4815 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
4818 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
4822 DstOps.push_back(NarrowTy);
4986 LLT NarrowTy) {
4992 if (TypeIdx != 1 || NarrowTy == DstTy)
4998 // legalization compatible with NarrowTy.
4999 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5000 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5002 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5003 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5008 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5009 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5013 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5014 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5015 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5016 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5034 LLT NarrowTy) {
5038 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5039 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5040 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5041 if (NarrowTy == SrcTy)
5050 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5051 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5052 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5058 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5059 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5060 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5061 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5072 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5078 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5087 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5088 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5093 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5094 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5098 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5099 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5100 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5102 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5104 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5110 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5154 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5194 LLT NarrowTy) {
5220 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
5222 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5270 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5272 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5279 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5289 LLT NarrowTy) {
5292 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5428 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5431 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5435 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5438 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5441 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5445 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5448 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5450 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5454 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5464 LLT NarrowTy) {
5474 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5486 MIRBuilder.buildBitcast(NarrowTy, SrcVRegs[i]).getReg(0));
5494 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5511 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5513 NarrowTy =
5515 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5518 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5519 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5580 LLT EltTy = NarrowTy.getElementType();
5609 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5612 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5617 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5620 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5632 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5642 if (NarrowTy.isVector() &&
5643 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5648 // If NarrowTy is a scalar then we're being asked to scalarize.
5650 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5653 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5654 if (NarrowTy.isScalar()) {
5655 if (DstTy != NarrowTy)
5666 .buildInstr(ScalarOpc, {NarrowTy},
5682 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5698 isPowerOf2_32(NarrowTy.getNumElements())) {
5699 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5720 LLT NarrowTy) {
5723 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5724 DstTy != NarrowTy)
5736 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5739 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5749 LLT SrcTy, LLT NarrowTy,
5752 // Split the sources into NarrowTy size pieces.
5753 extractParts(SrcReg, NarrowTy,
5754 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5757 // one NarrowTy size value left.
5765 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5770 // Finally generate the requested NarrowTy based reduction.
6399 LLT NarrowTy) {
6406 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
6417 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
6424 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
6436 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
6438 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
6441 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
6443 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
6444 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
6448 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
6450 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
6461 LLT NarrowTy) {
6513 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
6515 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
6547 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
6556 LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
6564 unsigned NarrowSize = NarrowTy.getSizeInBits();
6574 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
6575 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
6576 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
6587 LLT NarrowTy) {
6600 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
6604 narrowScalarDst(MI, NarrowTy, 0,
6612 LLT NarrowTy) {
6616 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6627 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
6639 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6680 LLT NarrowTy) {
6689 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
6694 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6701 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6710 // The leftover reg is smaller than NarrowTy, so we need to extend it.
6711 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
6743 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
6763 LLT NarrowTy) {
6773 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
6778 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
6783 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
6795 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6804 LLT NarrowTy) {
6815 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
6816 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
6825 LLT NarrowTy) {
6841 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
6846 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
6851 auto Select = MIRBuilder.buildSelect(NarrowTy,
6862 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6871 LLT NarrowTy) {
6876 unsigned NarrowSize = NarrowTy.getSizeInBits();
6882 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6884 auto C_0 = B.buildConstant(NarrowTy, 0);
6904 LLT NarrowTy) {
6909 unsigned NarrowSize = NarrowTy.getSizeInBits();
6915 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6917 auto C_0 = B.buildConstant(NarrowTy, 0);
6937 LLT NarrowTy) {
6942 unsigned NarrowSize = NarrowTy.getSizeInBits();
6945 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
6960 LLT NarrowTy) {
6968 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
6976 auto Trunc = B.buildTrunc(NarrowTy, Clamp);