Lines Matching defs:NarrowTy

48 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
50 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
56 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
60 unsigned NarrowSize = NarrowTy.getSizeInBits();
68 if (NarrowTy.isVector()) {
251 LLT NarrowTy, Register SrcReg) {
253 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
258 LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
261 LLT LCMTy = getLCMType(DstTy, NarrowTy);
263 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
264 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
320 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
322 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
338 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
1222 LLT NarrowTy) {
1224 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1240 LLT ImplicitTy = NarrowTy;
1255 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1268 unsigned NarrowSize = NarrowTy.getSizeInBits();
1274 auto K = MIRBuilder.buildConstant(NarrowTy,
1291 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1299 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1305 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1306 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1310 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1322 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1325 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1329 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1347 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1350 return narrowScalarMul(MI, NarrowTy);
1352 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1354 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1363 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1370 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1378 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1408 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1414 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1421 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1424 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1437 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1442 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1452 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1455 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1457 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1463 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1471 narrowScalarSrc(MI, NarrowTy, 1);
1479 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1495 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1501 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1520 narrowScalarSrc(MI, NarrowTy, OpIdx);
1535 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1537 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1543 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1555 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1560 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1571 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1572 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1581 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1583 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1608 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1620 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1625 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1629 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1652 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1661 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1668 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
1671 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1679 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1686 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1687 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1707 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1711 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1727 narrowScalarSrc(MI, NarrowTy, 2);
1733 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1738 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1743 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
1749 const APInt One(NarrowTy.getSizeInBits(), 1);
1750 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
1789 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
1792 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1805 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
1808 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
4234 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
4237 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
4241 DstOps.push_back(NarrowTy);
4405 LLT NarrowTy) {
4411 if (TypeIdx != 1 || NarrowTy == DstTy)
4417 // legalization compatible with NarrowTy.
4418 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4419 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4421 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4422 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
4427 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
4428 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
4432 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
4433 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
4434 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
4435 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
4453 LLT NarrowTy) {
4457 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
4458 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4459 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4460 if (NarrowTy == SrcTy)
4469 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4470 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4471 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
4477 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
4478 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
4479 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
4480 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
4491 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
4497 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
4506 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
4507 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
4512 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
4513 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
4517 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
4518 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
4519 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
4521 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
4523 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
4529 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
4573 // Build a sequence of NarrowTy pieces in VecParts for this operand.
4613 LLT NarrowTy) {
4639 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
4641 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
4689 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
4691 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
4698 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
4708 LLT NarrowTy) {
4711 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
4840 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4843 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4847 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4850 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
4853 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
4857 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
4860 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
4862 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
4866 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
4876 LLT NarrowTy) {
4887 LLT::fixed_vector(NarrowTy.getSizeInBits() / SrcScalSize, SrcScalSize);
4898 MIRBuilder.buildBitcast(NarrowTy, SrcVRegs[i]).getReg(0));
4906 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4923 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
4925 NarrowTy =
4927 unsigned NewElts = NarrowTy.getNumElements();
4930 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
4931 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
4992 LLT EltTy = NarrowTy.getElementType();
5021 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5024 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5029 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5032 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5044 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5054 if (NarrowTy.isVector() &&
5055 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5060 // If NarrowTy is a scalar then we're being asked to scalarize.
5062 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5065 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5066 if (NarrowTy.isScalar()) {
5067 if (DstTy != NarrowTy)
5078 .buildInstr(ScalarOpc, {NarrowTy},
5094 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5110 isPowerOf2_32(NarrowTy.getNumElements())) {
5111 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5132 LLT NarrowTy) {
5135 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5136 DstTy != NarrowTy)
5148 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5151 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5161 LLT SrcTy, LLT NarrowTy,
5164 // Split the sources into NarrowTy size pieces.
5165 extractParts(SrcReg, NarrowTy,
5166 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5169 // one NarrowTy size value left.
5177 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5182 // Finally generate the requested NarrowTy based reduction.
5812 LLT NarrowTy) {
5819 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
5830 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
5837 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
5849 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
5851 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
5854 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
5856 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
5857 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
5861 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
5863 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
5874 LLT NarrowTy) {
5926 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
5928 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
5960 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
5969 LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
5977 unsigned NarrowSize = NarrowTy.getSizeInBits();
5987 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
5988 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
5989 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
6000 LLT NarrowTy) {
6013 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
6017 narrowScalarDst(MI, NarrowTy, 0,
6025 LLT NarrowTy) {
6029 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6040 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
6052 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6093 LLT NarrowTy) {
6102 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
6108 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6115 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
6124 // The leftover reg is smaller than NarrowTy, so we need to extend it.
6125 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
6157 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
6177 LLT NarrowTy) {
6187 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
6192 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
6197 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
6209 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6218 LLT NarrowTy) {
6229 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
6230 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
6239 LLT NarrowTy) {
6255 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
6260 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
6265 auto Select = MIRBuilder.buildSelect(NarrowTy,
6276 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6285 LLT NarrowTy) {
6290 unsigned NarrowSize = NarrowTy.getSizeInBits();
6296 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6298 auto C_0 = B.buildConstant(NarrowTy, 0);
6318 LLT NarrowTy) {
6323 unsigned NarrowSize = NarrowTy.getSizeInBits();
6329 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6331 auto C_0 = B.buildConstant(NarrowTy, 0);
6351 LLT NarrowTy) {
6356 unsigned NarrowSize = NarrowTy.getSizeInBits();
6359 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
6374 LLT NarrowTy) {
6382 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
6390 auto Trunc = B.buildTrunc(NarrowTy, Clamp);