Lines Matching defs:S32

285 static const LLT S32 = LLT::scalar(32);
338 S32, S64, S96, S128, S160, S192, S224, S256, S512, S1024};
600 const LLT S32 = LLT::scalar(32);
607 B.buildExtractVectorElementConstant(S32, VectorReg, I).getReg(0);
690 S32, S64
694 S32, S64, S16
698 S32, S64, S16, V2S16
701 const LLT MinScalarFPTy = ST.has16BitInsts() ? S16 : S32;
704 getActionDefinitionsBuilder(G_BRCOND).legalFor({S1, S32});
709 .legalFor({S32, S64, V2S16, S16, V4S16, S1, S128, S256})
718 .clampMaxNumElements(0, S32, 16)
726 .legalFor({S64, S32, S16, V2S16})
731 .maxScalar(0, S32);
734 .legalFor({S32, S16, V2S16})
739 .maxScalar(0, S32);
744 .legalFor({S64, S32, S16, V2S16})
752 .legalFor({S32, S16, V2S16})
762 .legalFor({S32, S16, V2S16}) // Clamp modifier
770 .legalFor({S32, S16})
773 .maxScalar(0, S32)
777 .legalFor({S32, S16})
787 .legalFor({S32, S16}) // Clamp modifier
801 .legalFor({S32})
803 .clampScalar(0, S32, S32)
807 .legalFor({S32})
809 .minScalar(0, S32)
815 Mul.maxScalar(0, S32);
819 .legalFor({S32}) // Clamp modifier.
821 .minScalarOrElt(0, S32)
826 .minScalar(0, S32)
834 .minScalar(0, S32)
841 .customFor({S32, S64})
842 .clampScalar(0, S32, S64)
847 .legalFor({S32})
848 .maxScalar(0, S32);
863 .legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16})
864 .clampScalar(0, S32, S64)
872 .legalFor({{S32, S1}, {S32, S32}})
873 .clampScalar(0, S32, S32)
882 .legalFor({S1, S32, S64, S16, GlobalPtr,
885 .clampScalar(0, S32, S64)
889 .legalFor({S32, S64, S16})
899 .clampScalarOrElt(0, S32, MaxScalar)
901 .clampMaxNumElements(0, S32, 16);
908 .legalFor({{PrivatePtr, S32}});
925 .legalFor({S32, S64});
927 .customFor({S32, S64});
929 .customFor({S32, S64});
943 FPOpActions.clampMaxNumElementsStrict(0, S32, 2);
961 .clampScalar(0, S32, S64)
970 .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
974 .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
978 .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
989 .customFor({S32, S64})
993 .legalFor({S32, S64, S16})
998 .legalFor({{S32, S32}, {S64, S32}, {S16, S16}})
1001 .clampScalar(1, S32, S32)
1005 .customFor({{S32, S32}, {S64, S32}, {S16, S16}, {S16, S32}})
1010 .customFor({S32, S64, S16})
1018 .legalFor({S32, S64})
1020 .clampScalar(0, S32, S64);
1023 .legalFor({S32, S64})
1025 .clampScalar(0, S32, S64);
1029 .legalFor({{S32, S32}, {S64, S32}})
1031 .clampScalar(0, S32, S64)
1032 .clampScalar(1, S32, S32)
1036 .customFor({{S32, S32}, {S64, S32}})
1038 .minScalar(0, S32)
1039 .clampScalar(1, S32, S32)
1046 {{S32, S64}, {S16, S32}, {V2S16, V2S32}, {V2S16, V2S64}});
1048 FPTruncActions.legalFor({{S32, S64}, {S16, S32}});
1052 .legalFor({{S64, S32}, {S32, S16}})
1053 .narrowScalarFor({{S64, S16}}, changeTo(0, S32))
1060 .legalFor({S32, S16})
1066 .legalFor({S32})
1073 .clampScalar(0, S32, S64);
1078 FMad.customFor({S32, S16});
1080 FMad.customFor({S32});
1088 FRem.customFor({S16, S32, S64});
1090 FRem.minScalar(0, S32)
1091 .customFor({S32, S64});
1107 .legalFor({{S64, S32}, {S32, S16}, {S64, S16},
1108 {S32, S1}, {S64, S1}, {S16, S1}})
1110 .clampScalar(0, S32, S64)
1115 .legalFor({{S32, S32}, {S64, S32}, {S16, S32}})
1117 .customFor({{S32, S64}, {S64, S64}});
1120 IToFP.clampScalar(1, S32, S64)
1121 .minScalar(0, S32)
1126 .legalFor({{S32, S32}, {S32, S64}, {S32, S16}})
1127 .customFor({{S64, S32}, {S64, S64}})
1128 .narrowScalarFor({{S64, S16}}, changeTo(0, S32));
1132 FPToI.minScalar(1, S32);
1134 FPToI.minScalar(0, S32)
1145 .legalFor({S16, S32})
1162 .legalFor({S16, S32, S64})
1168 .legalFor({S32, S64})
1169 .clampScalar(0, S32, S64)
1174 .legalFor({S32})
1176 .clampScalar(0, S32, S64)
1187 .legalIf(all(sameSize(0, 1), typeInSet(1, {S64, S32})))
1204 {S1}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr})
1206 {S32}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr});
1213 .clampScalar(1, S32, S64)
1215 .legalIf(all(typeInSet(0, {S1, S32}), isPointer(1)));
1222 FCmpBuilder.legalForCartesianProduct({S32}, {S16, S32});
1226 .clampScalar(1, S32, S64)
1232 ExpOps.customFor({{S32}, {S16}});
1234 ExpOps.customFor({S32});
1235 ExpOps.clampScalar(0, MinScalarFPTy, S32)
1239 .clampScalar(0, MinScalarFPTy, S32)
1243 Log2Ops.customFor({S32});
1253 LogOps.customFor({S32, S16});
1254 LogOps.clampScalar(0, MinScalarFPTy, S32)
1259 .legalFor({{S32, S32}, {S32, S64}})
1260 .clampScalar(0, S32, S32)
1262 .clampScalar(1, S32, S64)
1286 .clampScalar(0, S32, S32)
1287 .clampScalar(1, S32, S64)
1294 .legalFor({{S32, S32}, {S32, S64}})
1296 .clampScalar(0, S32, S32)
1297 .clampScalar(1, S32, S64)
1303 .legalFor({{S32, S32}, {S32, S64}})
1304 .clampScalar(0, S32, S32)
1305 .clampScalar(1, S32, S64)
1313 .legalFor({S32, S64})
1314 .clampScalar(0, S32, S64)
1320 .legalFor({S16, S32, V2S16})
1325 .clampScalar(0, S16, S32)
1330 .legalFor({S32, S16, V2S16})
1338 .legalFor({S32, S16})
1347 .legalFor({S32})
1352 .maxScalar(0, S32)
1357 .legalFor({S32})
1358 .minScalar(0, S32)
1367 .legalForCartesianProduct(AddrSpaces32, {S32})
1383 .legalForCartesianProduct(AddrSpaces32, {S32})
1446 Actions.legalForTypesWithMemDesc({{S32, GlobalPtr, S32, GlobalAlign32},
1452 {S32, GlobalPtr, S8, GlobalAlign8},
1453 {S32, GlobalPtr, S16, GlobalAlign16},
1455 {S32, LocalPtr, S32, 32},
1458 {S32, LocalPtr, S8, 8},
1459 {S32, LocalPtr, S16, 16},
1460 {V2S16, LocalPtr, S32, 32},
1462 {S32, PrivatePtr, S32, 32},
1463 {S32, PrivatePtr, S8, 8},
1464 {S32, PrivatePtr, S16, 16},
1465 {V2S16, PrivatePtr, S32, 32},
1467 {S32, ConstantPtr, S32, GlobalAlign32},
1605 .minScalar(0, S32)
1606 .narrowScalarIf(isWideScalarExtLoadTruncStore(0), changeTo(0, S32))
1614 .legalForTypesWithMemDesc({{S32, GlobalPtr, S8, 8},
1615 {S32, GlobalPtr, S16, 2 * 8},
1616 {S32, LocalPtr, S8, 8},
1617 {S32, LocalPtr, S16, 16},
1618 {S32, PrivatePtr, S8, 8},
1619 {S32, PrivatePtr, S16, 16},
1620 {S32, ConstantPtr, S8, 8},
1621 {S32, ConstantPtr, S16, 2 * 8}})
1629 {{S32, FlatPtr, S8, 8}, {S32, FlatPtr, S16, 16}});
1639 ExtLoads.clampScalar(0, S32, S32)
1648 .legalFor({{S32, GlobalPtr}, {S32, LocalPtr},
1650 {S32, RegionPtr}, {S64, RegionPtr}});
1652 Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
1658 Atomic.legalFor({{S32, LocalPtr}, {S32, RegionPtr}});
1665 Atomic.legalFor({{S32, GlobalPtr}});
1667 Atomic.legalFor({{S32, FlatPtr}});
1674 {S32, GlobalPtr},
1707 .customFor({{S32, GlobalPtr}, {S64, GlobalPtr},
1708 {S32, FlatPtr}, {S64, FlatPtr}})
1709 .legalFor({{S32, LocalPtr}, {S64, LocalPtr},
1710 {S32, RegionPtr}, {S64, RegionPtr}});
1715 .legalForCartesianProduct({S32, S64, S16, V2S32, V2S16, V4S16, GlobalPtr,
1719 {S1, S32})
1724 .clampMaxNumElements(0, S32, 2)
1729 .legalIf(all(isPointer(0), typeInSet(1, {S1, S32})));
1734 .legalFor({{S32, S32}, {S64, S32}});
1753 Shifts.clampScalar(1, S32, S32);
1765 Shifts.clampScalar(1, S32, S32);
1767 Shifts.clampScalar(0, S32, S64);
1770 .minScalar(0, S32)
1822 .clampScalar(EltTypeIdx, S32, S64)
1823 .clampScalar(VecTypeIdx, S32, S64)
1824 .clampScalar(IdxTypeIdx, S32, S32)
1825 .clampMaxNumElements(VecTypeIdx, S32, 32)
1880 .legalForCartesianProduct(AllS32Vectors, {S32})
1896 .legalFor({V2S16, S32})
1900 BuildVector.minScalarOrElt(0, S32);
1903 .customFor({V2S16, S32})
1912 .clampMaxNumElements(0, S32, 32)
1953 .clampScalar(LitTyIdx, S32, S512)
1962 .clampScalar(BigTyIdx, S32, MaxScalar);
1971 changeTo(LitTyIdx, S32));
1999 .legalFor({{S32}, {S64}});
2008 SextInReg.lowerFor({{S32}, {S64}, {S16}});
2012 SextInReg.lowerFor({{S32}, {S64}});
2017 .clampScalar(0, S32, S64)
2026 .legalFor({{S32, S32}})
2054 .minScalar(0, S32)
2058 .legalFor({{S32, S32}, {S64, S32}})
2059 .clampScalar(1, S32, S32)
2060 .clampScalar(0, S32, S64)
2210 const LLT S32 = LLT::scalar(32);
2232 return B.buildUnmerge(S32, Dst).getReg(1);
2266 return B.buildLoad(S32, LoadAddr, *MMO).getReg(0);
2287 return B.buildLoad(S32, LoadAddr, *MMO).getReg(0);
2322 const LLT S32 = LLT::scalar(32);
2381 Register SrcAsInt = B.buildPtrToInt(S32, Src).getReg(0);
2422 auto PtrLo = B.buildPtrToInt(S32, Src);
2423 auto HighAddr = B.buildConstant(S32, AddrHiVal);
2515 LLT S32 = LLT::scalar(32);
2517 auto Const0 = B.buildConstant(S32, FractBits - 32);
2518 auto Const1 = B.buildConstant(S32, ExpBits);
2520 auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32})
2525 return B.buildSub(S32, ExpPart, B.buildConstant(S32, 1023));
2532 const LLT S32 = LLT::scalar(32);
2539 auto Unmerge = B.buildUnmerge({S32, S32}, Src);
2549 const auto SignBitMask = B.buildConstant(S32, UINT32_C(1) << 31);
2550 auto SignBit = B.buildAnd(S32, Hi, SignBitMask);
2554 const auto Zero32 = B.buildConstant(S32, 0);
2562 auto FiftyOne = B.buildConstant(S32, FractBits - 1);
2581 const LLT S32 = LLT::scalar(32);
2585 auto Unmerge = B.buildUnmerge({S32, S32}, Src);
2586 auto ThirtyTwo = B.buildConstant(S32, 32);
2601 assert(MRI.getType(Dst) == S32);
2603 auto One = B.buildConstant(S32, 1);
2607 auto ThirtyOne = B.buildConstant(S32, 31);
2608 auto X = B.buildXor(S32, Unmerge.getReg(0), Unmerge.getReg(1));
2609 auto OppositeSign = B.buildAShr(S32, X, ThirtyOne);
2610 auto MaxShAmt = B.buildAdd(S32, ThirtyTwo, OppositeSign);
2611 auto LS = B.buildIntrinsic(Intrinsic::amdgcn_sffbh, {S32})
2613 auto LS2 = B.buildSub(S32, LS, One);
2614 ShAmt = B.buildUMin(S32, LS2, MaxShAmt);
2616 ShAmt = B.buildCTLZ(S32, Unmerge.getReg(1));
2618 auto Unmerge2 = B.buildUnmerge({S32, S32}, Norm);
2619 auto Adjust = B.buildUMin(S32, One, Unmerge2.getReg(0));
2620 auto Norm2 = B.buildOr(S32, Unmerge2.getReg(1), Adjust);
2621 auto FVal = Signed ? B.buildSITOFP(S32, Norm2) : B.buildUITOFP(S32, Norm2);
2622 auto Scale = B.buildSub(S32, ThirtyTwo, ShAmt);
2639 const LLT S32 = LLT::scalar(32);
2642 assert((SrcLT == S32 || SrcLT == S64) && MRI.getType(Dst) == S64);
2657 if (Signed && SrcLT == S32) {
2663 Sign = B.buildAShr(S32, Src, B.buildConstant(S32, 31));
2664 Trunc = B.buildFAbs(S32, Trunc, Flags);
2674 S32, llvm::bit_cast<float>(UINT32_C(/*2^-32*/ 0x2f800000)));
2676 S32, llvm::bit_cast<float>(UINT32_C(/*-2^32*/ 0xcf800000)));
2683 auto Hi = (Signed && SrcLT == S64) ? B.buildFPTOSI(S32, FloorMul)
2684 : B.buildFPTOUI(S32, FloorMul);
2685 auto Lo = B.buildFPTOUI(S32, Fma);
2687 if (Signed && SrcLT == S32) {
2918 LLT S32 = LLT::scalar(32);
2924 : MRI.createGenericVirtualRegister(S32);
2939 Register AddrHi = MRI.createGenericVirtualRegister(S32);
3019 LLT S32 = LLT::scalar(32);
3020 auto Sz = B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32});
3857 const LLT S32 = LLT::scalar(32);
3865 assert(MRI.getType(Src0) == S32);
3870 auto Merge = B.buildMergeLikeInstr(S32, {Src0, Src1});
3900 const LLT S32 = LLT::scalar(32);
3908 Zero32 = B.buildConstant(S32, 0).getReg(0);
3936 LocalAccum = B.buildZExt(S32, CarryIn[0]).getReg(0);
3942 CarryAccum = B.buildZExt(S32, CarryIn[0]).getReg(0);
3945 B.buildUAdde(S32, S1, CarryAccum, getZero32(), CarryIn[i])
3956 B.buildUAdde(S32, S1, CarryAccum, LocalAccum, CarryIn.back());
3991 auto Mul = B.buildMul(S32, Src0[j0], Src1[j1]);
3996 LocalAccum[0] = B.buildAdd(S32, LocalAccum[0], Mul).getReg(0);
3999 B.buildUAdde(S32, S1, LocalAccum[0], Mul, CarryIn.back())
4046 auto Unmerge = B.buildUnmerge(S32, Tmp);
4104 Lo = B.buildUAddo(S32, S1, Accum[2 * i - 1], SeparateOddOut[0]);
4106 Lo = B.buildAdd(S32, Accum[2 * i - 1], SeparateOddOut[0]);
4108 Lo = B.buildUAdde(S32, S1, Accum[2 * i - 1], SeparateOddOut[0],
4114 auto Hi = B.buildUAdde(S32, S1, Accum[2 * i], SeparateOddOut[1],
4169 LLT S32 = LLT::scalar(32);
4172 Src0Parts.push_back(MRI.createGenericVirtualRegister(S32));
4173 Src1Parts.push_back(MRI.createGenericVirtualRegister(S32));
4218 auto ShiftAmt = B.buildConstant(S32, 32u - NumBits);
4219 auto Extend = B.buildAnyExt(S32, {Src}).getReg(0u);
4220 auto Shift = B.buildShl(S32, Extend, ShiftAmt);
4221 auto Ctlz = B.buildInstr(AMDGPU::G_AMDGPU_FFBH_U32, {S32}, {Shift});
4290 const LLT S32 = LLT::scalar(32);
4299 auto ShiftAmt = B.buildConstant(S32, Shift);
4300 AndMaskSrc = B.buildLShr(S32, LiveIn, ShiftAmt).getReg(0);
4303 B.buildAnd(DstReg, AndMaskSrc, B.buildConstant(S32, Mask >> Shift));
4474 LLT S32 = LLT::scalar(32);
4479 if (DstTy == S32)
4493 const LLT S32 = LLT::scalar(32);
4499 auto FloatY = B.buildUITOFP(S32, Y);
4500 auto RcpIFlag = B.buildInstr(AMDGPU::G_AMDGPU_RCP_IFLAG, {S32}, {FloatY});
4501 auto Scale = B.buildFConstant(S32, llvm::bit_cast<float>(0x4f7ffffe));
4502 auto ScaledY = B.buildFMul(S32, RcpIFlag, Scale);
4503 auto Z = B.buildFPTOUI(S32, ScaledY);
4506 auto NegY = B.buildSub(S32, B.buildConstant(S32, 0), Y);
4507 auto NegYZ = B.buildMul(S32, NegY, Z);
4508 Z = B.buildAdd(S32, Z, B.buildUMulH(S32, Z, NegYZ));
4511 auto Q = B.buildUMulH(S32, X, Z);
4512 auto R = B.buildSub(S32, X, B.buildMul(S32, Q, Y));
4515 auto One = B.buildConstant(S32, 1);
4518 Q = B.buildSelect(S32, Cond, B.buildAdd(S32, Q, One), Q);
4519 R = B.buildSelect(S32, Cond, B.buildSub(S32, R, Y), R);
4524 B.buildSelect(DstDivReg, Cond, B.buildAdd(S32, Q, One), Q);
4527 B.buildSelect(DstRemReg, Cond, B.buildSub(S32, R, Y), R);
4545 const LLT S32 = LLT::scalar(32);
4546 auto Unmerge = B.buildUnmerge(S32, Val);
4548 auto CvtLo = B.buildUITOFP(S32, Unmerge.getReg(0));
4549 auto CvtHi = B.buildUITOFP(S32, Unmerge.getReg(1));
4552 S32, CvtHi, // 2**32
4553 B.buildFConstant(S32, llvm::bit_cast<float>(0x4f800000)), CvtLo);
4555 auto Rcp = B.buildInstr(AMDGPU::G_AMDGPU_RCP_IFLAG, {S32}, {Mad});
4557 S32, Rcp, B.buildFConstant(S32, llvm::bit_cast<float>(0x5f7ffffc)));
4561 S32, Mul1, B.buildFConstant(S32, llvm::bit_cast<float>(0x2f800000)));
4562 auto Trunc = B.buildIntrinsicTrunc(S32, Mul2);
4566 S32, Trunc, B.buildFConstant(S32, llvm::bit_cast<float>(0xcf800000)),
4569 auto ResultLo = B.buildFPTOUI(S32, Mad2);
4570 auto ResultHi = B.buildFPTOUI(S32, Trunc);
4580 const LLT S32 = LLT::scalar(32);
4595 auto UnmergeMulHi1 = B.buildUnmerge(S32, MulHi1);
4599 auto Add1_Lo = B.buildUAddo(S32, S1, RcpLo, MulHi1_Lo);
4600 auto Add1_Hi = B.buildUAdde(S32, S1, RcpHi, MulHi1_Hi, Add1_Lo.getReg(1));
4605 auto UnmergeMulHi2 = B.buildUnmerge(S32, MulHi2);
4609 auto Zero32 = B.buildConstant(S32, 0);
4610 auto Add2_Lo = B.buildUAddo(S32, S1, Add1_Lo, MulHi2_Lo);
4611 auto Add2_Hi = B.buildUAdde(S32, S1, Add1_Hi, MulHi2_Hi, Add2_Lo.getReg(1));
4614 auto UnmergeNumer = B.buildUnmerge(S32, Numer);
4620 auto UnmergeMul3 = B.buildUnmerge(S32, Mul3);
4623 auto Sub1_Lo = B.buildUSubo(S32, S1, NumerLo, Mul3_Lo);
4624 auto Sub1_Hi = B.buildUSube(S32, S1, NumerHi, Mul3_Hi, Sub1_Lo.getReg(1));
4625 auto Sub1_Mi = B.buildSub(S32, NumerHi, Mul3_Hi);
4628 auto UnmergeDenom = B.buildUnmerge(S32, Denom);
4633 auto C1 = B.buildSExt(S32, CmpHi);
4636 auto C2 = B.buildSExt(S32, CmpLo);
4639 auto C3 = B.buildSelect(S32, CmpEq, C2, C1);
4646 auto Sub2_Lo = B.buildUSubo(S32, S1, Sub1_Lo, DenomLo);
4647 auto Sub2_Mi = B.buildUSube(S32, S1, Sub1_Mi, DenomHi, Sub1_Lo.getReg(1));
4648 auto Sub2_Hi = B.buildUSube(S32, S1, Sub2_Mi, Zero32, Sub2_Lo.getReg(1));
4655 B.buildSExt(S32, B.buildICmp(CmpInst::ICMP_UGE, S1, Sub2_Hi, DenomHi));
4657 B.buildSExt(S32, B.buildICmp(CmpInst::ICMP_UGE, S1, Sub2_Lo, DenomLo));
4659 S32, B.buildICmp(CmpInst::ICMP_EQ, S1, Sub2_Hi, DenomHi), C5, C4);
4663 auto Sub3_Lo = B.buildUSubo(S32, S1, Sub2_Lo, DenomLo);
4665 auto Sub3_Mi = B.buildUSube(S32, S1, Sub2_Mi, DenomHi, Sub2_Lo.getReg(1));
4666 auto Sub3_Hi = B.buildUSube(S32, S1, Sub3_Mi, Zero32, Sub3_Lo.getReg(1));
4710 const LLT S32 = LLT::scalar(32);
4716 if (Ty == S32)
4731 const LLT S32 = LLT::scalar(32);
4734 if (Ty != S32 && Ty != S64)
4741 auto SignBitOffset = B.buildConstant(S32, Ty.getSizeInBits() - 1);
4774 if (Ty == S32)
4907 LLT S32 = LLT::scalar(32);
4922 auto LHSExt = B.buildFPExt(S32, LHS, Flags);
4923 auto RHSExt = B.buildFPExt(S32, RHS, Flags);
4924 auto NegRHSExt = B.buildFNeg(S32, RHSExt);
4925 auto Rcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32})
4928 auto Quot = B.buildFMul(S32, LHSExt, Rcp, Flags);
4931 Err = B.buildFMAD(S32, NegRHSExt, Quot, LHSExt, Flags);
4932 Quot = B.buildFMAD(S32, Err, Rcp, Quot, Flags);
4933 Err = B.buildFMAD(S32, NegRHSExt, Quot, LHSExt, Flags);
4935 Err = B.buildFMA(S32, NegRHSExt, Quot, LHSExt, Flags);
4936 Quot = B.buildFMA(S32, Err, Rcp, Quot, Flags);
4937 Err = B.buildFMA(S32, NegRHSExt, Quot, LHSExt, Flags);
4939 auto Tmp = B.buildFMul(S32, Err, Rcp, Flags);
4940 Tmp = B.buildAnd(S32, Tmp, B.buildConstant(S32, 0xff800000));
4941 Quot = B.buildFAdd(S32, Tmp, Quot, Flags);
4994 LLT S32 = LLT::scalar(32);
4997 auto One = B.buildFConstant(S32, 1.0f);
5000 B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1})
5006 B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1})
5012 auto ApproxRcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32})
5015 auto NegDivScale0 = B.buildFNeg(S32, DenominatorScaled, Flags);
5033 auto Fma0 = B.buildFMA(S32, NegDivScale0, ApproxRcp, One, Flags);
5034 auto Fma1 = B.buildFMA(S32, Fma0, ApproxRcp, ApproxRcp, Flags);
5035 auto Mul = B.buildFMul(S32, NumeratorScaled, Fma1, Flags);
5036 auto Fma2 = B.buildFMA(S32, NegDivScale0, Mul, NumeratorScaled, Flags);
5037 auto Fma3 = B.buildFMA(S32, Fma2, Fma1, Mul, Flags);
5038 auto Fma4 = B.buildFMA(S32, NegDivScale0, Fma3, NumeratorScaled, Flags);
5050 auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32})
5115 LLT S32 = LLT::scalar(32);
5117 auto NumUnmerge = B.buildUnmerge(S32, LHS);
5118 auto DenUnmerge = B.buildUnmerge(S32, RHS);
5119 auto Scale0Unmerge = B.buildUnmerge(S32, DivScale0);
5120 auto Scale1Unmerge = B.buildUnmerge(S32, DivScale1);
5191 LLT S32 = LLT::scalar(32);
5194 auto Abs = B.buildFAbs(S32, RHS, Flags);
5197 auto C0 = B.buildFConstant(S32, 0x1p+96f);
5198 auto C1 = B.buildFConstant(S32, 0x1p-32f);
5199 auto C2 = B.buildFConstant(S32, 1.0f);
5202 auto Sel = B.buildSelect(S32, CmpRes, C1, C2, Flags);
5204 auto Mul0 = B.buildFMul(S32, RHS, Sel, Flags);
5206 auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32})
5210 auto Mul1 = B.buildFMul(S32, LHS, RCP, Flags);
5340 const LLT S32 = LLT::scalar(32);
5351 auto ZeroInt = B.buildConstant(S32, 0);
5355 auto ScaleUpFactor = B.buildConstant(S32, 256);
5356 auto ScaleUp = B.buildSelect(S32, Scaling, ScaleUpFactor, ZeroInt);
5383 auto ScaleDownFactor = B.buildConstant(S32, -128);
5384 auto ScaleDown = B.buildSelect(S32, Scaling, ScaleDownFactor, ZeroInt);
5540 Src0 = B.buildAnyExt(S32, Src0).getReg(0);
5548 Register LaneOpDst = createLaneOp(Src0, Src1, Src2, S32);
5567 // Handle all other cases via S32/S64 pieces;
5629 LLT S32 = LLT::scalar(32);
5632 auto Unmerge = B.buildUnmerge(S32, Pointer);
5636 auto AndMask = B.buildConstant(S32, 0x0000ffff);
5637 auto Masked = B.buildAnd(S32, HighHalf, AndMask);
5647 ShiftedStride = B.buildConstant(S32, ShiftedStrideVal);
5649 auto ExtStride = B.buildAnyExt(S32, Stride);
5650 auto ShiftConst = B.buildConstant(S32, 16);
5651 ShiftedStride = B.buildShl(S32, ExtStride, ShiftConst);
5653 NewHighHalf = B.buildOr(S32, Masked, ShiftedStride);
5732 const LLT S32 = LLT::scalar(32);
5759 BaseReg = B.buildConstant(S32, Overflow).getReg(0);
5761 auto OverflowVal = B.buildConstant(S32, Overflow);
5762 BaseReg = B.buildAdd(S32, BaseReg, OverflowVal).getReg(0);
5767 BaseReg = B.buildConstant(S32, 0).getReg(0);
5778 const LLT S32 = LLT::scalar(32);
5787 WideRegs.push_back(B.buildAnyExt(S32, Unmerge.getReg(I)).getReg(0));
5791 return B.buildBuildVector(LLT::fixed_vector(NumElts, S32), WideRegs)
5798 Reg = B.buildBitcast(S32, Reg).getReg(0);
5800 PackedRegs.resize(2, B.buildUndef(S32).getReg(0));
5801 return B.buildBuildVector(LLT::fixed_vector(2, S32), PackedRegs)
5812 return B.buildBitcast(LLT::fixed_vector(3, S32), Reg).getReg(0);
5817 Reg = B.buildBitcast(LLT::fixed_vector(2, S32), Reg).getReg(0);
5818 auto Unmerge = B.buildUnmerge(S32, Reg);
5821 PackedRegs.resize(4, B.buildUndef(S32).getReg(0));
5822 return B.buildBuildVector(LLT::fixed_vector(4, S32), PackedRegs)
5879 const LLT S32 = LLT::scalar(32);
5903 VIndex = B.buildConstant(S32, 0).getReg(0);
5991 const LLT S32 = LLT::scalar(32);
6017 VIndex = B.buildConstant(S32, 0).getReg(0);
6094 LLT LoadTy = LLT::fixed_vector(NumLoadDWords, S32);
6099 Register ExtDst = B.getMRI()->createGenericVirtualRegister(S32);
6107 LoadElts.push_back(B.getMRI()->createGenericVirtualRegister(S32));
6115 Register LoadDstReg = B.getMRI()->createGenericVirtualRegister(S32);
6127 auto Unmerge = B.buildUnmerge(S32, LoadDstReg);
6361 const LLT S32 = LLT::scalar(32);
6362 (void)S32;
6368 assert(B.getMRI()->getType(SrcOp.getReg()) == S32);
6415 const LLT S32 = LLT::scalar(32);
6640 // S32 vector to cover all data, plus TFE result element.
6643 // Register type to use for each loaded component. Will be S32 or V2S16.
6650 RegTy = S32;
6657 TFETy = LLT::fixed_vector(RoundedSize / 32 + 1, S32);
6658 RegTy = !IsTFE && EltSize == 16 ? V2S16 : S32;
6687 if (MRI->getType(Dst1Reg) != S32)
6694 if (Ty == S32) {
6738 // S32. Cast back to the expected type.
7027 const LLT S32 = LLT::scalar(32);
7080 auto packLanes = [&Ops, &S32, &V3S32, &B](Register Src) {
7081 auto Unmerge = B.buildUnmerge({S32, S32, S32}, Src);
7097 S32, B.buildMergeLikeInstr(V2S16, {UnmergeRayInvDir.getReg(0),
7101 S32, B.buildMergeLikeInstr(V2S16, {UnmergeRayInvDir.getReg(1),
7105 S32, B.buildMergeLikeInstr(V2S16, {UnmergeRayInvDir.getReg(2),
7115 auto Unmerge = B.buildUnmerge({S32, S32}, NodePtr);
7123 auto packLanes = [&Ops, &S32, &B](Register Src) {
7124 auto Unmerge = B.buildUnmerge({S32, S32, S32}, Src);
7134 Register R1 = MRI.createGenericVirtualRegister(S32);
7135 Register R2 = MRI.createGenericVirtualRegister(S32);
7136 Register R3 = MRI.createGenericVirtualRegister(S32);
7191 LLT S32 = LLT::scalar(32);
7193 auto TTMP8 = B.buildCopy(S32, Register(AMDGPU::TTMP8));
7194 auto LSB = B.buildConstant(S32, 25);
7195 auto Width = B.buildConstant(S32, 5);
7215 B.buildIntrinsic(Intrinsic::amdgcn_s_getreg, {S32},
7219 B.buildIntrinsic(Intrinsic::amdgcn_s_getreg, {S32},
7234 auto Unmerge = B.buildUnmerge({S32, S32}, MI.getOperand(0));
7538 LLT S32 = LLT::scalar(32);
7539 if (MRI.getType(Index) != S32)
7540 MI.getOperand(5).setReg(B.buildAnyExt(S32, Index).getReg(0));
7547 LLT S32 = LLT::scalar(32);
7548 if (MRI.getType(Index) != S32)
7549 MI.getOperand(7).setReg(B.buildAnyExt(S32, Index).getReg(0));