AMDGPULegalizerInfo.cpp - OpenGrok cross reference for /llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines Matching defs:S64
287 static const LLT S64 = LLT::scalar(64);
338     S32, S64, S96, S128, S160, S192, S224, S256, S512, S1024};
690     S32, S64
694     S32, S64, S16
698     S32, S64, S16, V2S16
709       .legalFor({S32, S64, V2S16, S16, V4S16, S1, S128, S256})
726           .legalFor({S64, S32, S16, V2S16})
744           .legalFor({S64, S32, S16, V2S16})
841       .customFor({S32, S64})
842       .clampScalar(0, S32, S64)
863     .legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16})
864     .clampScalar(0, S32, S64)
882     .legalFor({S1, S32, S64, S16, GlobalPtr,
885     .clampScalar(0, S32, S64)
889     .legalFor({S32, S64, S16})
890     .clampScalar(0, S16, S64);
915   getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV}).customFor({S64});
925     .legalFor({S32, S64});
927     .customFor({S32, S64});
929     .customFor({S32, S64});
953       .clampScalar(0, S16, S64)
957       .clampScalar(0, S16, S64)
961       .clampScalar(0, S32, S64)
970     .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
974     .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
978     .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
984     .clampScalar(0, S16, S64);
989       .customFor({S32, S64})
993       .legalFor({S32, S64, S16})
995       .clampScalar(0, S16, S64);
998       .legalFor({{S32, S32}, {S64, S32}, {S16, S16}})
1005       .customFor({{S32, S32}, {S64, S32}, {S16, S16}, {S16, S32}})
1010       .customFor({S32, S64, S16})
1017         .customFor({S64})
1018         .legalFor({S32, S64})
1020         .clampScalar(0, S32, S64);
1023         .legalFor({S32, S64})
1025         .clampScalar(0, S32, S64);
1029       .legalFor({{S32, S32}, {S64, S32}})
1031       .clampScalar(0, S32, S64)
1036       .customFor({{S32, S32}, {S64, S32}})
1046         {{S32, S64}, {S16, S32}, {V2S16, V2S32}, {V2S16, V2S64}});
1048     FPTruncActions.legalFor({{S32, S64}, {S16, S32}});
1052     .legalFor({{S64, S32}, {S32, S16}})
1053     .narrowScalarFor({{S64, S16}}, changeTo(0, S32))
1062       .lowerFor({S64, V2S16});
1068       .lowerFor({S64, S16, V2S16});
1073     .clampScalar(0, S32, S64);
1088     FRem.customFor({S16, S32, S64});
1091         .customFor({S32, S64});
1107     .legalFor({{S64, S32}, {S32, S16}, {S64, S16},
1108                {S32, S1}, {S64, S1}, {S16, S1}})
1110     .clampScalar(0, S32, S64)
1115                     .legalFor({{S32, S32}, {S64, S32}, {S16, S32}})
1117                     .customFor({{S32, S64}, {S64, S64}});
1120   IToFP.clampScalar(1, S32, S64)
1126     .legalFor({{S32, S32}, {S32, S64}, {S32, S16}})
1127     .customFor({{S64, S32}, {S64, S64}})
1128     .narrowScalarFor({{S64, S16}}, changeTo(0, S32));
1140       .clampScalar(0, S16, S64)
1155       .clampScalar(0, S16, S64)
1162         .legalFor({S16, S32, S64})
1163         .clampScalar(0, S16, S64)
1168         .legalFor({S32, S64})
1169         .clampScalar(0, S32, S64)
1175         .customFor({S64})
1176         .clampScalar(0, S32, S64)
1187     .legalIf(all(sameSize(0, 1), typeInSet(1, {S64, S32})))
1204       {S1}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr})
1206       {S32}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr});
1213     .clampScalar(1, S32, S64)
1226     .clampScalar(1, S32, S64)
1259     .legalFor({{S32, S32}, {S32, S64}})
1262     .clampScalar(1, S32, S64)
1287     .clampScalar(1, S32, S64)
1294       .legalFor({{S32, S32}, {S32, S64}})
1297       .clampScalar(1, S32, S64)
1303       .legalFor({{S32, S32}, {S32, S64}})
1305       .clampScalar(1, S32, S64)
1310   // S64 is only legal on SALU, and needs to be broken into 32-bit elements in
1313     .legalFor({S32, S64})
1314     .clampScalar(0, S32, S64)
1366       .legalForCartesianProduct(AddrSpaces64, {S64})
1382       .legalForCartesianProduct(AddrSpaces64, {S64})
1449                                       {S64, GlobalPtr, S64, GlobalAlign32},
1456                                       {S64, LocalPtr, S64, 32},
1470                                       {S64, ConstantPtr, S64, GlobalAlign32},
1649                {S64, GlobalPtr}, {S64, LocalPtr},
1650                {S32, RegionPtr}, {S64, RegionPtr}});
1652     Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
1660       Atomic.legalFor({{S64, LocalPtr}});
1675         {S64, GlobalPtr},
1676         {S64, FlatPtr}
1707     .customFor({{S32, GlobalPtr}, {S64, GlobalPtr},
1708                 {S32, FlatPtr}, {S64, FlatPtr}})
1709     .legalFor({{S32, LocalPtr}, {S64, LocalPtr},
1710                {S32, RegionPtr}, {S64, RegionPtr}});
1715       .legalForCartesianProduct({S32, S64, S16, V2S32, V2S16, V4S16, GlobalPtr,
1720       .clampScalar(0, S16, S64)
1734     .legalFor({{S32, S32}, {S64, S32}});
1755     Shifts.clampScalar(0, S16, S64);
1767     Shifts.clampScalar(0, S32, S64);
1822       .clampScalar(EltTypeIdx, S32, S64)
1823       .clampScalar(VecTypeIdx, S32, S64)
1881     .legalForCartesianProduct(AllS64Vectors, {S64})
1996   // S64 is only legal on SALU, and needs to be broken into 32-bit elements in
1999     .legalFor({{S32}, {S64}});
2008     SextInReg.lowerFor({{S32}, {S64}, {S16}});
2012     SextInReg.lowerFor({{S32}, {S64}});
2017     .clampScalar(0, S32, S64)
2045     .legalFor({S64});
2047   getActionDefinitionsBuilder(G_READSTEADYCOUNTER).legalFor({S64});
2058       .legalFor({{S32, S32}, {S64, S32}})
2060       .clampScalar(0, S32, S64)
2211   const LLT S64 = LLT::scalar(64);
2229     Register Dst = MRI.createGenericVirtualRegister(S64);
2470   const LLT S64 = LLT::scalar(64);
2473   assert(MRI.getType(Src) == S64);
2479   auto Trunc = B.buildIntrinsicTrunc(S64, Src);
2481   const auto Zero = B.buildFConstant(S64, 0.0);
2482   const auto One = B.buildFConstant(S64, 1.0);
2486   auto Add = B.buildSelect(S64, And, One, Zero);
2533   const LLT S64 = LLT::scalar(64);
2536   assert(MRI.getType(Src) == S64);
2552   const auto FractMask = B.buildConstant(S64, (UINT64_C(1) << FractBits) - 1);
2557   auto SignBit64 = B.buildMergeLikeInstr(S64, {Zero32, SignBit});
2559   auto Shr = B.buildAShr(S64, FractMask, Exp);
2560   auto Not = B.buildNot(S64, Shr);
2561   auto Tmp0 = B.buildAnd(S64, Src, Not);
2567   auto Tmp1 = B.buildSelect(S64, ExpLt0, SignBit64, Tmp0);
2580   const LLT S64 = LLT::scalar(64);
2583   assert(MRI.getType(Src) == S64);
2588   if (MRI.getType(Dst) == S64) {
2589     auto CvtHi = Signed ? B.buildSITOFP(S64, Unmerge.getReg(1))
2590                         : B.buildUITOFP(S64, Unmerge.getReg(1));
2592     auto CvtLo = B.buildUITOFP(S64, Unmerge.getReg(0));
2593     auto LdExp = B.buildFLdexp(S64, CvtHi, ThirtyTwo);
2617   auto Norm = B.buildShl(S64, Src, ShAmt);
2638   const LLT S64 = LLT::scalar(64);
2642   assert((SrcLT == S32 || SrcLT == S64) && MRI.getType(Dst) == S64);
2667   if (SrcLT == S64) {
2669         S64, llvm::bit_cast<double>(UINT64_C(/*2^-32*/ 0x3df0000000000000)));
2671         S64, llvm::bit_cast<double>(UINT64_C(/*-2^32*/ 0xc1f0000000000000)));
2683   auto Hi = (Signed && SrcLT == S64) ? B.buildFPTOSI(S32, FloorMul)
2689     Sign = B.buildMergeLikeInstr(S64, {Sign, Sign});
2691     B.buildSub(Dst, B.buildXor(S64, B.buildMergeLikeInstr(S64, {Lo, Hi}), Sign),
3901   const LLT S64 = LLT::scalar(64);
3913       Zero64 = B.buildConstant(S64, 0).getReg(0);
4015               Tmp = B.buildAnyExt(S64, LocalAccum[0]).getReg(0);
4018               Tmp = B.buildMergeLikeInstr(S64, LocalAccum).getReg(0);
4021               Tmp = B.buildZExt(S64, LocalAccum[0]).getReg(0);
4036             auto Mad = B.buildInstr(AMDGPU::G_AMDGPU_MAD_U64_U32, {S64, S1},
4475   LLT S64 = LLT::scalar(64);
4481   if (DstTy == S64)
4581   const LLT S64 = LLT::scalar(64);
4587   auto Rcp = B.buildMergeLikeInstr(S64, {RcpLo, RcpHi});
4589   auto Zero64 = B.buildConstant(S64, 0);
4590   auto NegDenom = B.buildSub(S64, Zero64, Denom);
4592   auto MulLo1 = B.buildMul(S64, NegDenom, Rcp);
4593   auto MulHi1 = B.buildUMulH(S64, Rcp, MulLo1);
4601   auto Add1 = B.buildMergeLikeInstr(S64, {Add1_Lo, Add1_Hi});
4603   auto MulLo2 = B.buildMul(S64, NegDenom, Add1);
4604   auto MulHi2 = B.buildUMulH(S64, Add1, MulLo2);
4612   auto Add2 = B.buildMergeLikeInstr(S64, {Add2_Lo, Add2_Hi});
4618   auto MulHi3 = B.buildUMulH(S64, Numer, Add2);
4619   auto Mul3 = B.buildMul(S64, Denom, MulHi3);
4626   auto Sub1 = B.buildMergeLikeInstr(S64, {Sub1_Lo, Sub1_Hi});
4649   auto Sub2 = B.buildMergeLikeInstr(S64, {Sub2_Lo, Sub2_Hi});
4651   auto One64 = B.buildConstant(S64, 1);
4652   auto Add3 = B.buildAdd(S64, MulHi3, One64);
4662   auto Add4 = B.buildAdd(S64, Add3, One64);
4667   auto Sub3 = B.buildMergeLikeInstr(S64, {Sub3_Lo, Sub3_Hi});
4674         S64, B.buildICmp(CmpInst::ICMP_NE, S1, C6, Zero32), Add4, Add3);
4681         S64, B.buildICmp(CmpInst::ICMP_NE, S1, C6, Zero32), Sub3, Sub2);
4709   const LLT S64 = LLT::scalar(64);
4718   else if (Ty == S64)
4730   const LLT S64 = LLT::scalar(64);
4734   if (Ty != S32 && Ty != S64)
5079   LLT S64 = LLT::scalar(64);
5082   auto One = B.buildFConstant(S64, 1.0);
5084   auto DivScale0 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1})
5090   auto NegDivScale0 = B.buildFNeg(S64, DivScale0.getReg(0), Flags);
5092   auto Rcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S64})
5096   auto Fma0 = B.buildFMA(S64, NegDivScale0, Rcp, One, Flags);
5097   auto Fma1 = B.buildFMA(S64, Rcp, Fma0, Rcp, Flags);
5098   auto Fma2 = B.buildFMA(S64, NegDivScale0, Fma1, One, Flags);
5100   auto DivScale1 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1})
5106   auto Fma3 = B.buildFMA(S64, Fma1, Fma2, Fma1, Flags);
5107   auto Mul = B.buildFMul(S64, DivScale1.getReg(0), Fma3, Flags);
5108   auto Fma4 = B.buildFMA(S64, NegDivScale0, Mul, DivScale1.getReg(0), Flags);
5131   auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S64})
5567     // Handle all other cases via S32/S64 pieces;
6926   const LLT S64 = LLT::scalar(64);
6958     Register Temp = B.buildLoad(S64, LoadAddr, *MMO).getReg(0);
7211   if (MRI.getType(Src) != S64)
7231   if (MRI.getType(Src) != S64)