Lines Matching defs:S32

285 static const LLT S32 = LLT::scalar(32);
335 static std::initializer_list<LLT> AllScalarTypes = {S32, S64, S96, S128,
599 const LLT S32 = LLT::scalar(32);
606 B.buildExtractVectorElementConstant(S32, VectorReg, I).getReg(0);
689 S32, S64
693 S32, S64, S16
697 S32, S64, S16, V2S16
700 const LLT MinScalarFPTy = ST.has16BitInsts() ? S16 : S32;
703 getActionDefinitionsBuilder(G_BRCOND).legalFor({S1, S32});
708 .legalFor({S32, S64, V2S16, S16, V4S16, S1, S128, S256})
717 .clampMaxNumElements(0, S32, 16)
725 .legalFor({S64, S32, S16, V2S16})
730 .maxScalar(0, S32);
733 .legalFor({S32, S16, V2S16})
738 .maxScalar(0, S32);
743 .legalFor({S64, S32, S16, V2S16})
751 .legalFor({S32, S16, V2S16})
761 .legalFor({S32, S16, V2S16}) // Clamp modifier
769 .legalFor({S32, S16})
772 .maxScalar(0, S32)
776 .legalFor({S32, S16})
786 .legalFor({S32, S16}) // Clamp modifier
800 .legalFor({S32})
802 .clampScalar(0, S32, S32)
806 .legalFor({S32})
808 .minScalar(0, S32)
814 Mul.maxScalar(0, S32);
818 .legalFor({S32}) // Clamp modifier.
820 .minScalarOrElt(0, S32)
825 .minScalar(0, S32)
833 .minScalar(0, S32)
840 .customFor({S32, S64})
841 .clampScalar(0, S32, S64)
846 .legalFor({S32})
847 .maxScalar(0, S32);
862 .legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16})
863 .clampScalar(0, S32, S64)
871 .legalFor({{S32, S1}, {S32, S32}})
872 .clampScalar(0, S32, S32)
881 .legalFor({S1, S32, S64, S16, GlobalPtr,
884 .clampScalar(0, S32, S64)
888 .legalFor({S32, S64, S16})
897 .clampScalarOrElt(0, S32, MaxScalar)
899 .clampMaxNumElements(0, S32, 16);
906 .legalFor({{PrivatePtr, S32}});
923 .legalFor({S32, S64});
925 .customFor({S32, S64});
927 .customFor({S32, S64});
941 FPOpActions.clampMaxNumElementsStrict(0, S32, 2);
959 .clampScalar(0, S32, S64)
968 .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
972 .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
976 .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
987 .customFor({S32, S64})
991 .legalFor({S32, S64, S16})
996 .legalFor({{S32, S32}, {S64, S32}, {S16, S16}})
999 .clampScalar(1, S32, S32)
1003 .customFor({{S32, S32}, {S64, S32}, {S16, S16}, {S16, S32}})
1008 .customFor({S32, S64, S16})
1016 .legalFor({S32, S64})
1018 .clampScalar(0, S32, S64);
1021 .legalFor({S32, S64})
1023 .clampScalar(0, S32, S64);
1027 .legalFor({{S32, S32}, {S64, S32}})
1029 .clampScalar(0, S32, S64)
1030 .clampScalar(1, S32, S32)
1034 .customFor({{S32, S32}, {S64, S32}})
1036 .minScalar(0, S32)
1037 .clampScalar(1, S32, S32)
1042 .legalFor({{S32, S64}, {S16, S32}})
1047 .legalFor({{S64, S32}, {S32, S16}})
1048 .narrowScalarFor({{S64, S16}}, changeTo(0, S32))
1055 .legalFor({S32, S16})
1061 .legalFor({S32})
1068 .clampScalar(0, S32, S64);
1073 FMad.customFor({S32, S16});
1075 FMad.customFor({S32});
1083 FRem.customFor({S16, S32, S64});
1085 FRem.minScalar(0, S32)
1086 .customFor({S32, S64});
1102 .legalFor({{S64, S32}, {S32, S16}, {S64, S16},
1103 {S32, S1}, {S64, S1}, {S16, S1}})
1105 .clampScalar(0, S32, S64)
1110 .legalFor({{S32, S32}, {S64, S32}, {S16, S32}})
1112 .customFor({{S32, S64}, {S64, S64}});
1115 IToFP.clampScalar(1, S32, S64)
1116 .minScalar(0, S32)
1121 .legalFor({{S32, S32}, {S32, S64}, {S32, S16}})
1122 .customFor({{S64, S32}, {S64, S64}})
1123 .narrowScalarFor({{S64, S16}}, changeTo(0, S32));
1127 FPToI.minScalar(1, S32);
1129 FPToI.minScalar(0, S32)
1135 .customFor({S16, S32})
1147 .legalFor({S16, S32, S64})
1153 .legalFor({S32, S64})
1154 .clampScalar(0, S32, S64)
1159 .legalFor({S32})
1161 .clampScalar(0, S32, S64)
1172 .legalIf(all(sameSize(0, 1), typeInSet(1, {S64, S32})))
1189 {S1}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr})
1191 {S32}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr});
1198 .clampScalar(1, S32, S64)
1200 .legalIf(all(typeInSet(0, {S1, S32}), isPointer(1)));
1207 FCmpBuilder.legalForCartesianProduct({S32}, {S16, S32});
1211 .clampScalar(1, S32, S64)
1217 ExpOps.customFor({{S32}, {S16}});
1219 ExpOps.customFor({S32});
1220 ExpOps.clampScalar(0, MinScalarFPTy, S32)
1224 .clampScalar(0, MinScalarFPTy, S32)
1228 Log2Ops.customFor({S32});
1238 LogOps.customFor({S32, S16});
1239 LogOps.clampScalar(0, MinScalarFPTy, S32)
1244 .legalFor({{S32, S32}, {S32, S64}})
1245 .clampScalar(0, S32, S32)
1247 .clampScalar(1, S32, S64)
1271 .clampScalar(0, S32, S32)
1272 .clampScalar(1, S32, S64)
1279 .legalFor({{S32, S32}, {S32, S64}})
1281 .clampScalar(0, S32, S32)
1282 .clampScalar(1, S32, S64)
1288 .legalFor({{S32, S32}, {S32, S64}})
1289 .clampScalar(0, S32, S32)
1290 .clampScalar(1, S32, S64)
1298 .legalFor({S32, S64})
1299 .clampScalar(0, S32, S64)
1305 .legalFor({S16, S32, V2S16})
1310 .clampScalar(0, S16, S32)
1315 .legalFor({S32, S16, V2S16})
1323 .legalFor({S32, S16})
1332 .legalFor({S32})
1337 .maxScalar(0, S32)
1342 .legalFor({S32})
1343 .minScalar(0, S32)
1352 .legalForCartesianProduct(AddrSpaces32, {S32})
1368 .legalForCartesianProduct(AddrSpaces32, {S32})
1431 Actions.legalForTypesWithMemDesc({{S32, GlobalPtr, S32, GlobalAlign32},
1437 {S32, GlobalPtr, S8, GlobalAlign8},
1438 {S32, GlobalPtr, S16, GlobalAlign16},
1440 {S32, LocalPtr, S32, 32},
1443 {S32, LocalPtr, S8, 8},
1444 {S32, LocalPtr, S16, 16},
1445 {V2S16, LocalPtr, S32, 32},
1447 {S32, PrivatePtr, S32, 32},
1448 {S32, PrivatePtr, S8, 8},
1449 {S32, PrivatePtr, S16, 16},
1450 {V2S16, PrivatePtr, S32, 32},
1452 {S32, ConstantPtr, S32, GlobalAlign32},
1590 .minScalar(0, S32)
1591 .narrowScalarIf(isWideScalarExtLoadTruncStore(0), changeTo(0, S32))
1599 .legalForTypesWithMemDesc({{S32, GlobalPtr, S8, 8},
1600 {S32, GlobalPtr, S16, 2 * 8},
1601 {S32, LocalPtr, S8, 8},
1602 {S32, LocalPtr, S16, 16},
1603 {S32, PrivatePtr, S8, 8},
1604 {S32, PrivatePtr, S16, 16},
1605 {S32, ConstantPtr, S8, 8},
1606 {S32, ConstantPtr, S16, 2 * 8}})
1614 {{S32, FlatPtr, S8, 8}, {S32, FlatPtr, S16, 16}});
1624 ExtLoads.clampScalar(0, S32, S32)
1633 .legalFor({{S32, GlobalPtr}, {S32, LocalPtr},
1635 {S32, RegionPtr}, {S64, RegionPtr}});
1637 Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
1643 Atomic.legalFor({{S32, LocalPtr}, {S32, RegionPtr}});
1650 Atomic.legalFor({{S32, GlobalPtr}});
1652 Atomic.legalFor({{S32, FlatPtr}});
1659 {S32, GlobalPtr},
1692 .customFor({{S32, GlobalPtr}, {S64, GlobalPtr},
1693 {S32, FlatPtr}, {S64, FlatPtr}})
1694 .legalFor({{S32, LocalPtr}, {S64, LocalPtr},
1695 {S32, RegionPtr}, {S64, RegionPtr}});
1700 .legalForCartesianProduct({S32, S64, S16, V2S32, V2S16, V4S16, GlobalPtr,
1704 {S1, S32})
1709 .clampMaxNumElements(0, S32, 2)
1714 .legalIf(all(isPointer(0), typeInSet(1, {S1, S32})));
1719 .legalFor({{S32, S32}, {S64, S32}});
1738 Shifts.clampScalar(1, S32, S32);
1750 Shifts.clampScalar(1, S32, S32);
1752 Shifts.clampScalar(0, S32, S64);
1755 .minScalar(0, S32)
1807 .clampScalar(EltTypeIdx, S32, S64)
1808 .clampScalar(VecTypeIdx, S32, S64)
1809 .clampScalar(IdxTypeIdx, S32, S32)
1810 .clampMaxNumElements(VecTypeIdx, S32, 32)
1865 .legalForCartesianProduct(AllS32Vectors, {S32})
1881 .legalFor({V2S16, S32})
1885 BuildVector.minScalarOrElt(0, S32);
1888 .customFor({V2S16, S32})
1897 .clampMaxNumElements(0, S32, 32)
1938 .clampScalar(LitTyIdx, S32, S512)
1947 .clampScalar(BigTyIdx, S32, MaxScalar);
1956 changeTo(LitTyIdx, S32));
1984 .legalFor({{S32}, {S64}});
1993 SextInReg.lowerFor({{S32}, {S64}, {S16}});
1997 SextInReg.lowerFor({{S32}, {S64}});
2002 .clampScalar(0, S32, S64)
2011 .legalFor({{S32, S32}})
2039 .minScalar(0, S32)
2043 .legalFor({{S32, S32}, {S64, S32}})
2044 .clampScalar(1, S32, S32)
2045 .clampScalar(0, S32, S64)
2197 const LLT S32 = LLT::scalar(32);
2219 return B.buildUnmerge(S32, Dst).getReg(1);
2253 return B.buildLoad(S32, LoadAddr, *MMO).getReg(0);
2274 return B.buildLoad(S32, LoadAddr, *MMO).getReg(0);
2309 const LLT S32 = LLT::scalar(32);
2366 Register SrcAsInt = B.buildPtrToInt(S32, Src).getReg(0);
2404 auto PtrLo = B.buildPtrToInt(S32, Src);
2405 auto HighAddr = B.buildConstant(S32, AddrHiVal);
2497 LLT S32 = LLT::scalar(32);
2499 auto Const0 = B.buildConstant(S32, FractBits - 32);
2500 auto Const1 = B.buildConstant(S32, ExpBits);
2502 auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32})
2507 return B.buildSub(S32, ExpPart, B.buildConstant(S32, 1023));
2514 const LLT S32 = LLT::scalar(32);
2521 auto Unmerge = B.buildUnmerge({S32, S32}, Src);
2531 const auto SignBitMask = B.buildConstant(S32, UINT32_C(1) << 31);
2532 auto SignBit = B.buildAnd(S32, Hi, SignBitMask);
2536 const auto Zero32 = B.buildConstant(S32, 0);
2544 auto FiftyOne = B.buildConstant(S32, FractBits - 1);
2563 const LLT S32 = LLT::scalar(32);
2567 auto Unmerge = B.buildUnmerge({S32, S32}, Src);
2568 auto ThirtyTwo = B.buildConstant(S32, 32);
2583 assert(MRI.getType(Dst) == S32);
2585 auto One = B.buildConstant(S32, 1);
2589 auto ThirtyOne = B.buildConstant(S32, 31);
2590 auto X = B.buildXor(S32, Unmerge.getReg(0), Unmerge.getReg(1));
2591 auto OppositeSign = B.buildAShr(S32, X, ThirtyOne);
2592 auto MaxShAmt = B.buildAdd(S32, ThirtyTwo, OppositeSign);
2593 auto LS = B.buildIntrinsic(Intrinsic::amdgcn_sffbh, {S32})
2595 auto LS2 = B.buildSub(S32, LS, One);
2596 ShAmt = B.buildUMin(S32, LS2, MaxShAmt);
2598 ShAmt = B.buildCTLZ(S32, Unmerge.getReg(1));
2600 auto Unmerge2 = B.buildUnmerge({S32, S32}, Norm);
2601 auto Adjust = B.buildUMin(S32, One, Unmerge2.getReg(0));
2602 auto Norm2 = B.buildOr(S32, Unmerge2.getReg(1), Adjust);
2603 auto FVal = Signed ? B.buildSITOFP(S32, Norm2) : B.buildUITOFP(S32, Norm2);
2604 auto Scale = B.buildSub(S32, ThirtyTwo, ShAmt);
2621 const LLT S32 = LLT::scalar(32);
2624 assert((SrcLT == S32 || SrcLT == S64) && MRI.getType(Dst) == S64);
2639 if (Signed && SrcLT == S32) {
2645 Sign = B.buildAShr(S32, Src, B.buildConstant(S32, 31));
2646 Trunc = B.buildFAbs(S32, Trunc, Flags);
2656 S32, llvm::bit_cast<float>(UINT32_C(/*2^-32*/ 0x2f800000)));
2658 S32, llvm::bit_cast<float>(UINT32_C(/*-2^32*/ 0xcf800000)));
2665 auto Hi = (Signed && SrcLT == S64) ? B.buildFPTOSI(S32, FloorMul)
2666 : B.buildFPTOUI(S32, FloorMul);
2667 auto Lo = B.buildFPTOUI(S32, Fma);
2669 if (Signed && SrcLT == S32) {
2900 LLT S32 = LLT::scalar(32);
2906 : MRI.createGenericVirtualRegister(S32);
2921 Register AddrHi = MRI.createGenericVirtualRegister(S32);
3000 LLT S32 = LLT::scalar(32);
3001 auto Sz = B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32});
3838 const LLT S32 = LLT::scalar(32);
3846 assert(MRI.getType(Src0) == S32);
3851 auto Merge = B.buildMergeLikeInstr(S32, {Src0, Src1});
3881 const LLT S32 = LLT::scalar(32);
3889 Zero32 = B.buildConstant(S32, 0).getReg(0);
3917 LocalAccum = B.buildZExt(S32, CarryIn[0]).getReg(0);
3923 CarryAccum = B.buildZExt(S32, CarryIn[0]).getReg(0);
3926 B.buildUAdde(S32, S1, CarryAccum, getZero32(), CarryIn[i])
3937 B.buildUAdde(S32, S1, CarryAccum, LocalAccum, CarryIn.back());
3972 auto Mul = B.buildMul(S32, Src0[j0], Src1[j1]);
3977 LocalAccum[0] = B.buildAdd(S32, LocalAccum[0], Mul).getReg(0);
3980 B.buildUAdde(S32, S1, LocalAccum[0], Mul, CarryIn.back())
4027 auto Unmerge = B.buildUnmerge(S32, Tmp);
4085 Lo = B.buildUAddo(S32, S1, Accum[2 * i - 1], SeparateOddOut[0]);
4087 Lo = B.buildAdd(S32, Accum[2 * i - 1], SeparateOddOut[0]);
4089 Lo = B.buildUAdde(S32, S1, Accum[2 * i - 1], SeparateOddOut[0],
4095 auto Hi = B.buildUAdde(S32, S1, Accum[2 * i], SeparateOddOut[1],
4150 LLT S32 = LLT::scalar(32);
4153 Src0Parts.push_back(MRI.createGenericVirtualRegister(S32));
4154 Src1Parts.push_back(MRI.createGenericVirtualRegister(S32));
4199 auto ShiftAmt = B.buildConstant(S32, 32u - NumBits);
4200 auto Extend = B.buildAnyExt(S32, {Src}).getReg(0u);
4201 auto Shift = B.buildShl(S32, Extend, ShiftAmt);
4202 auto Ctlz = B.buildInstr(AMDGPU::G_AMDGPU_FFBH_U32, {S32}, {Shift});
4271 const LLT S32 = LLT::scalar(32);
4280 auto ShiftAmt = B.buildConstant(S32, Shift);
4281 AndMaskSrc = B.buildLShr(S32, LiveIn, ShiftAmt).getReg(0);
4284 B.buildAnd(DstReg, AndMaskSrc, B.buildConstant(S32, Mask >> Shift));
4455 LLT S32 = LLT::scalar(32);
4460 if (DstTy == S32)
4474 const LLT S32 = LLT::scalar(32);
4480 auto FloatY = B.buildUITOFP(S32, Y);
4481 auto RcpIFlag = B.buildInstr(AMDGPU::G_AMDGPU_RCP_IFLAG, {S32}, {FloatY});
4482 auto Scale = B.buildFConstant(S32, llvm::bit_cast<float>(0x4f7ffffe));
4483 auto ScaledY = B.buildFMul(S32, RcpIFlag, Scale);
4484 auto Z = B.buildFPTOUI(S32, ScaledY);
4487 auto NegY = B.buildSub(S32, B.buildConstant(S32, 0), Y);
4488 auto NegYZ = B.buildMul(S32, NegY, Z);
4489 Z = B.buildAdd(S32, Z, B.buildUMulH(S32, Z, NegYZ));
4492 auto Q = B.buildUMulH(S32, X, Z);
4493 auto R = B.buildSub(S32, X, B.buildMul(S32, Q, Y));
4496 auto One = B.buildConstant(S32, 1);
4499 Q = B.buildSelect(S32, Cond, B.buildAdd(S32, Q, One), Q);
4500 R = B.buildSelect(S32, Cond, B.buildSub(S32, R, Y), R);
4505 B.buildSelect(DstDivReg, Cond, B.buildAdd(S32, Q, One), Q);
4508 B.buildSelect(DstRemReg, Cond, B.buildSub(S32, R, Y), R);
4526 const LLT S32 = LLT::scalar(32);
4527 auto Unmerge = B.buildUnmerge(S32, Val);
4529 auto CvtLo = B.buildUITOFP(S32, Unmerge.getReg(0));
4530 auto CvtHi = B.buildUITOFP(S32, Unmerge.getReg(1));
4533 S32, CvtHi, // 2**32
4534 B.buildFConstant(S32, llvm::bit_cast<float>(0x4f800000)), CvtLo);
4536 auto Rcp = B.buildInstr(AMDGPU::G_AMDGPU_RCP_IFLAG, {S32}, {Mad});
4538 S32, Rcp, B.buildFConstant(S32, llvm::bit_cast<float>(0x5f7ffffc)));
4542 S32, Mul1, B.buildFConstant(S32, llvm::bit_cast<float>(0x2f800000)));
4543 auto Trunc = B.buildIntrinsicTrunc(S32, Mul2);
4547 S32, Trunc, B.buildFConstant(S32, llvm::bit_cast<float>(0xcf800000)),
4550 auto ResultLo = B.buildFPTOUI(S32, Mad2);
4551 auto ResultHi = B.buildFPTOUI(S32, Trunc);
4561 const LLT S32 = LLT::scalar(32);
4576 auto UnmergeMulHi1 = B.buildUnmerge(S32, MulHi1);
4580 auto Add1_Lo = B.buildUAddo(S32, S1, RcpLo, MulHi1_Lo);
4581 auto Add1_Hi = B.buildUAdde(S32, S1, RcpHi, MulHi1_Hi, Add1_Lo.getReg(1));
4586 auto UnmergeMulHi2 = B.buildUnmerge(S32, MulHi2);
4590 auto Zero32 = B.buildConstant(S32, 0);
4591 auto Add2_Lo = B.buildUAddo(S32, S1, Add1_Lo, MulHi2_Lo);
4592 auto Add2_Hi = B.buildUAdde(S32, S1, Add1_Hi, MulHi2_Hi, Add2_Lo.getReg(1));
4595 auto UnmergeNumer = B.buildUnmerge(S32, Numer);
4601 auto UnmergeMul3 = B.buildUnmerge(S32, Mul3);
4604 auto Sub1_Lo = B.buildUSubo(S32, S1, NumerLo, Mul3_Lo);
4605 auto Sub1_Hi = B.buildUSube(S32, S1, NumerHi, Mul3_Hi, Sub1_Lo.getReg(1));
4606 auto Sub1_Mi = B.buildSub(S32, NumerHi, Mul3_Hi);
4609 auto UnmergeDenom = B.buildUnmerge(S32, Denom);
4614 auto C1 = B.buildSExt(S32, CmpHi);
4617 auto C2 = B.buildSExt(S32, CmpLo);
4620 auto C3 = B.buildSelect(S32, CmpEq, C2, C1);
4627 auto Sub2_Lo = B.buildUSubo(S32, S1, Sub1_Lo, DenomLo);
4628 auto Sub2_Mi = B.buildUSube(S32, S1, Sub1_Mi, DenomHi, Sub1_Lo.getReg(1));
4629 auto Sub2_Hi = B.buildUSube(S32, S1, Sub2_Mi, Zero32, Sub2_Lo.getReg(1));
4636 B.buildSExt(S32, B.buildICmp(CmpInst::ICMP_UGE, S1, Sub2_Hi, DenomHi));
4638 B.buildSExt(S32, B.buildICmp(CmpInst::ICMP_UGE, S1, Sub2_Lo, DenomLo));
4640 S32, B.buildICmp(CmpInst::ICMP_EQ, S1, Sub2_Hi, DenomHi), C5, C4);
4644 auto Sub3_Lo = B.buildUSubo(S32, S1, Sub2_Lo, DenomLo);
4646 auto Sub3_Mi = B.buildUSube(S32, S1, Sub2_Mi, DenomHi, Sub2_Lo.getReg(1));
4647 auto Sub3_Hi = B.buildUSube(S32, S1, Sub3_Mi, Zero32, Sub3_Lo.getReg(1));
4691 const LLT S32 = LLT::scalar(32);
4697 if (Ty == S32)
4712 const LLT S32 = LLT::scalar(32);
4715 if (Ty != S32 && Ty != S64)
4722 auto SignBitOffset = B.buildConstant(S32, Ty.getSizeInBits() - 1);
4755 if (Ty == S32)
4888 LLT S32 = LLT::scalar(32);
4890 auto LHSExt = B.buildFPExt(S32, LHS, Flags);
4891 auto RHSExt = B.buildFPExt(S32, RHS, Flags);
4893 auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32})
4897 auto QUOT = B.buildFMul(S32, LHSExt, RCP, Flags);
4951 LLT S32 = LLT::scalar(32);
4954 auto One = B.buildFConstant(S32, 1.0f);
4957 B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1})
4963 B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1})
4969 auto ApproxRcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32})
4972 auto NegDivScale0 = B.buildFNeg(S32, DenominatorScaled, Flags);
4990 auto Fma0 = B.buildFMA(S32, NegDivScale0, ApproxRcp, One, Flags);
4991 auto Fma1 = B.buildFMA(S32, Fma0, ApproxRcp, ApproxRcp, Flags);
4992 auto Mul = B.buildFMul(S32, NumeratorScaled, Fma1, Flags);
4993 auto Fma2 = B.buildFMA(S32, NegDivScale0, Mul, NumeratorScaled, Flags);
4994 auto Fma3 = B.buildFMA(S32, Fma2, Fma1, Mul, Flags);
4995 auto Fma4 = B.buildFMA(S32, NegDivScale0, Fma3, NumeratorScaled, Flags);
5007 auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32})
5072 LLT S32 = LLT::scalar(32);
5074 auto NumUnmerge = B.buildUnmerge(S32, LHS);
5075 auto DenUnmerge = B.buildUnmerge(S32, RHS);
5076 auto Scale0Unmerge = B.buildUnmerge(S32, DivScale0);
5077 auto Scale1Unmerge = B.buildUnmerge(S32, DivScale1);
5148 LLT S32 = LLT::scalar(32);
5151 auto Abs = B.buildFAbs(S32, RHS, Flags);
5154 auto C0 = B.buildFConstant(S32, 0x1p+96f);
5155 auto C1 = B.buildFConstant(S32, 0x1p-32f);
5156 auto C2 = B.buildFConstant(S32, 1.0f);
5159 auto Sel = B.buildSelect(S32, CmpRes, C1, C2, Flags);
5161 auto Mul0 = B.buildFMul(S32, RHS, Sel, Flags);
5163 auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32})
5167 auto Mul1 = B.buildFMul(S32, LHS, RCP, Flags);
5297 const LLT S32 = LLT::scalar(32);
5308 auto ZeroInt = B.buildConstant(S32, 0);
5312 auto ScaleUpFactor = B.buildConstant(S32, 256);
5313 auto ScaleUp = B.buildSelect(S32, Scaling, ScaleUpFactor, ZeroInt);
5340 auto ScaleDownFactor = B.buildConstant(S32, -128);
5341 auto ScaleDown = B.buildSelect(S32, Scaling, ScaleDownFactor, ZeroInt);
5478 Src0 = B.buildAnyExt(S32, Src0).getReg(0);
5486 Register LaneOpDst = createLaneOp(Src0, Src1, Src2, S32);
5495 LLT PartialResTy = S32;
5506 // Handle all other cases via S32 pieces;
5570 LLT S32 = LLT::scalar(32);
5573 auto Unmerge = B.buildUnmerge(S32, Pointer);
5577 auto AndMask = B.buildConstant(S32, 0x0000ffff);
5578 auto Masked = B.buildAnd(S32, HighHalf, AndMask);
5588 ShiftedStride = B.buildConstant(S32, ShiftedStrideVal);
5590 auto ExtStride = B.buildAnyExt(S32, Stride);
5591 auto ShiftConst = B.buildConstant(S32, 16);
5592 ShiftedStride = B.buildShl(S32, ExtStride, ShiftConst);
5594 NewHighHalf = B.buildOr(S32, Masked, ShiftedStride);
5673 const LLT S32 = LLT::scalar(32);
5700 BaseReg = B.buildConstant(S32, Overflow).getReg(0);
5702 auto OverflowVal = B.buildConstant(S32, Overflow);
5703 BaseReg = B.buildAdd(S32, BaseReg, OverflowVal).getReg(0);
5708 BaseReg = B.buildConstant(S32, 0).getReg(0);
5719 const LLT S32 = LLT::scalar(32);
5728 WideRegs.push_back(B.buildAnyExt(S32, Unmerge.getReg(I)).getReg(0));
5732 return B.buildBuildVector(LLT::fixed_vector(NumElts, S32), WideRegs)
5739 Reg = B.buildBitcast(S32, Reg).getReg(0);
5741 PackedRegs.resize(2, B.buildUndef(S32).getReg(0));
5742 return B.buildBuildVector(LLT::fixed_vector(2, S32), PackedRegs)
5753 return B.buildBitcast(LLT::fixed_vector(3, S32), Reg).getReg(0);
5758 Reg = B.buildBitcast(LLT::fixed_vector(2, S32), Reg).getReg(0);
5759 auto Unmerge = B.buildUnmerge(S32, Reg);
5762 PackedRegs.resize(4, B.buildUndef(S32).getReg(0));
5763 return B.buildBuildVector(LLT::fixed_vector(4, S32), PackedRegs)
5813 const LLT S32 = LLT::scalar(32);
5835 VIndex = B.buildConstant(S32, 0).getReg(0);
5920 const LLT S32 = LLT::scalar(32);
5946 VIndex = B.buildConstant(S32, 0).getReg(0);
6011 LLT LoadTy = LLT::fixed_vector(NumLoadDWords, S32);
6016 Register ExtDst = B.getMRI()->createGenericVirtualRegister(S32);
6024 LoadElts.push_back(B.getMRI()->createGenericVirtualRegister(S32));
6032 Register LoadDstReg = B.getMRI()->createGenericVirtualRegister(S32);
6044 auto Unmerge = B.buildUnmerge(S32, LoadDstReg);
6278 const LLT S32 = LLT::scalar(32);
6279 (void)S32;
6285 assert(B.getMRI()->getType(SrcOp.getReg()) == S32);
6332 const LLT S32 = LLT::scalar(32);
6557 // S32 vector to cover all data, plus TFE result element.
6560 // Register type to use for each loaded component. Will be S32 or V2S16.
6567 RegTy = S32;
6574 TFETy = LLT::fixed_vector(RoundedSize / 32 + 1, S32);
6575 RegTy = !IsTFE && EltSize == 16 ? V2S16 : S32;
6604 if (MRI->getType(Dst1Reg) != S32)
6611 if (Ty == S32) {
6655 // S32. Cast back to the expected type.
6932 const LLT S32 = LLT::scalar(32);
6985 auto packLanes = [&Ops, &S32, &V3S32, &B](Register Src) {
6986 auto Unmerge = B.buildUnmerge({S32, S32, S32}, Src);
7002 S32, B.buildMergeLikeInstr(V2S16, {UnmergeRayInvDir.getReg(0),
7006 S32, B.buildMergeLikeInstr(V2S16, {UnmergeRayInvDir.getReg(1),
7010 S32, B.buildMergeLikeInstr(V2S16, {UnmergeRayInvDir.getReg(2),
7020 auto Unmerge = B.buildUnmerge({S32, S32}, NodePtr);
7028 auto packLanes = [&Ops, &S32, &B](Register Src) {
7029 auto Unmerge = B.buildUnmerge({S32, S32, S32}, Src);
7039 Register R1 = MRI.createGenericVirtualRegister(S32);
7040 Register R2 = MRI.createGenericVirtualRegister(S32);
7041 Register R3 = MRI.createGenericVirtualRegister(S32);
7117 LLT S32 = LLT::scalar(32);
7119 auto TTMP8 = B.buildCopy(S32, Register(AMDGPU::TTMP8));
7120 auto LSB = B.buildConstant(S32, 25);
7121 auto Width = B.buildConstant(S32, 5);
7141 B.buildIntrinsic(Intrinsic::amdgcn_s_getreg, {S32},
7145 B.buildIntrinsic(Intrinsic::amdgcn_s_getreg, {S32},
7160 auto Unmerge = B.buildUnmerge({S32, S32}, MI.getOperand(0));
7462 LLT S32 = LLT::scalar(32);
7463 if (MRI.getType(Index) != S32)
7464 MI.getOperand(5).setReg(B.buildAnyExt(S32, Index).getReg(0));
7471 LLT S32 = LLT::scalar(32);
7472 if (MRI.getType(Index) != S32)
7473 MI.getOperand(7).setReg(B.buildAnyExt(S32, Index).getReg(0));