Lines Matching +full:abs +full:- +full:flat
1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //==-----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
39 #define DEBUG_TYPE "amdgpu-isel"
43 //===----------------------------------------------------------------------===//
45 //===----------------------------------------------------------------------===//
52 // Figure out if this is really an extract of the high 16-bits of a dword.
58 if (!Idx->isOne())
71 if (ShiftAmt->getZExtValue() == 16) {
81 // Look through operations that obscure just looking at the low 16-bits of the
101 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISelLegacy, "amdgpu-isel",
102 "AMDGPU DAG->DAG Pattern Instruction Selection", false,
111 INITIALIZE_PASS_END(AMDGPUDAGToDAGISelLegacy, "amdgpu-isel",
112 "AMDGPU DAG->DAG Pattern Instruction Selection", false,
115 /// This pass converts a legalized DAG into a AMDGPU-specific
130 Subtarget->checkSubtargetFeatures(MF.getFunction());
136 // XXX - only need to list legal operations.
180 // On gfx10, all 16-bit instructions preserve the high bits.
181 return Subtarget->getGeneration() <= AMDGPUSubtarget::GFX9;
186 return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
190 return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
192 // fcopysign, select and others may be lowered to 32-bit bit operations
202 for (auto &L : LI->getLoopsInPreorder()) {
203 assert(L->isLCSSAForm(DT));
220 assert(Subtarget->d16PreservesUnusedBits());
221 MVT VT = N->getValueType(0).getSimpleVT();
225 SDValue Lo = N->getOperand(0);
226 SDValue Hi = N->getOperand(1);
230 // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
231 // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
232 // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
236 if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
237 SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
239 SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
241 LdHi->getChain(), LdHi->getBasePtr(), TiedIn
245 if (LdHi->getMemoryVT() == MVT::i8) {
246 LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
249 assert(LdHi->getMemoryVT() == MVT::i16);
253 CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
254 Ops, LdHi->getMemoryVT(),
255 LdHi->getMemOperand());
257 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
258 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
262 // build_vector (load ptr), hi -> load_d16_lo ptr, hi
263 // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
264 // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
268 if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
271 SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
273 if (LdLo->getMemoryVT() == MVT::i8) {
274 LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
277 assert(LdLo->getMemoryVT() == MVT::i16);
280 TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
283 LdLo->getChain(), LdLo->getBasePtr(), TiedIn
287 CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
288 Ops, LdLo->getMemoryVT(),
289 LdLo->getMemOperand());
291 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
292 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
300 if (!Subtarget->d16PreservesUnusedBits())
303 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
306 while (Position != CurDAG->allnodes_begin()) {
307 SDNode *N = &*--Position;
308 if (N->use_empty())
311 switch (N->getOpcode()) {
322 CurDAG->RemoveDeadNodes();
324 CurDAG->dump(););
329 if (N->isUndef())
332 const SIInstrInfo *TII = Subtarget->getInstrInfo();
334 return TII->isInlineConstant(C->getAPIntValue());
337 return TII->isInlineConstant(C->getValueAPF());
348 if (!N->isMachineOpcode()) {
349 if (N->getOpcode() == ISD::CopyToReg) {
350 Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
352 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
357 = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
358 return TRI->getPhysRegBaseClass(Reg);
364 switch (N->getMachineOpcode()) {
367 Subtarget->getInstrInfo()->get(N->getMachineOpcode());
372 if (RegClass == -1)
375 return Subtarget->getRegisterInfo()->getRegClass(RegClass);
378 unsigned RCID = N->getConstantOperandVal(0);
380 Subtarget->getRegisterInfo()->getRegClass(RCID);
382 SDValue SubRegOp = N->getOperand(OpNo + 1);
383 unsigned SubRegIdx = SubRegOp->getAsZExtVal();
384 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
394 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
395 Ops.push_back(N->getOperand(i));
398 return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
405 assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
407 SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);
412 unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
414 if (Subtarget->ldsRequiresM0Init())
415 return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
417 MachineFunction &MF = CurDAG->getMachineFunction();
418 unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
420 glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
427 SDNode *Lo = CurDAG->getMachineNode(
429 CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
431 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
432 CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
434 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
435 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
436 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
438 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
442 EVT VT = N->getValueType(0);
446 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
449 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
461 bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() ==
463 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
465 unsigned NOps = N->getNumOperands();
468 if (isa<RegisterSDNode>(N->getOperand(i))) {
474 RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
475 RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
479 assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
480 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
487 CurDAG->getTargetConstant(Sub, DL, MVT::i32);
493 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
497 unsigned int Opc = N->getOpcode();
498 if (N->isMachineOpcode()) {
499 N->setNodeId(-1);
521 if (N->getValueType(0) != MVT::i64)
529 if (N->getValueType(0) != MVT::i32)
550 EVT VT = N->getValueType(0);
565 SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
572 if (N->getValueType(0) == MVT::i128) {
573 RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
574 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
575 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
576 } else if (N->getValueType(0) == MVT::i64) {
577 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
578 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
579 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
583 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
584 N->getOperand(1), SubReg1 };
585 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
586 N->getValueType(0), Ops));
592 if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
597 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
602 Imm = C->getZExtValue();
608 ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
621 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
625 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
631 uint32_t OffsetVal = Offset->getZExtValue();
632 uint32_t WidthVal = Width->getZExtValue();
634 ReplaceNode(N, getBFE32(Signed, SDLoc(N), N->getOperand(0), OffsetVal,
660 if (N->getValueType(0) != MVT::i32)
677 if (N->getValueType(0) == MVT::i32) {
679 N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
680 { N->getOperand(0), N->getOperand(1) });
713 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
714 const Instruction *Term = BB->getTerminator();
715 return Term->getMetadata("amdgpu.uniform") ||
716 Term->getMetadata("structurizecfg.uniform");
721 assert(N->getOpcode() == ISD::AND);
723 const APInt &RHS = N->getConstantOperandAPInt(1);
727 const APInt &LHSKnownZeros = CurDAG->computeKnownBits(N->getOperand(0)).Zero;
735 // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
764 if (CurDAG->isBaseWithConstantOffset(Addr)) {
779 return "AMDGPU DAG->DAG Pattern Instruction Selection";
796 assert(L->isLCSSAForm(DT) && "Loop is not in LCSSA form!");
801 //===----------------------------------------------------------------------===//
803 //===----------------------------------------------------------------------===//
816 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
817 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
820 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
821 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
825 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
828 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
836 SDNode *Mov = CurDAG->getMachineNode(
838 CurDAG->getTargetConstant(Val, DL, MVT::i32));
845 SDValue LHS = N->getOperand(0);
846 SDValue RHS = N->getOperand(1);
848 unsigned Opcode = N->getOpcode();
854 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
855 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
857 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
859 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
862 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
864 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
867 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
875 unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];
876 unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];
881 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
883 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
884 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
891 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
894 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
900 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
904 // Replace the carry-use
914 SDValue LHS = N->getOperand(0);
915 SDValue RHS = N->getOperand(1);
916 SDValue CI = N->getOperand(2);
918 if (N->isDivergent()) {
919 unsigned Opc = N->getOpcode() == ISD::UADDO_CARRY ? AMDGPU::V_ADDC_U32_e64
921 CurDAG->SelectNodeTo(
922 N, Opc, N->getVTList(),
924 CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
926 unsigned Opc = N->getOpcode() == ISD::UADDO_CARRY ? AMDGPU::S_ADD_CO_PSEUDO
928 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
936 bool IsAdd = N->getOpcode() == ISD::UADDO;
937 bool IsVALU = N->isDivergent();
939 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
942 if ((IsAdd && (UI->getOpcode() != ISD::UADDO_CARRY)) ||
943 (!IsAdd && (UI->getOpcode() != ISD::USUBO_CARRY))) {
952 CurDAG->SelectNodeTo(
953 N, Opc, N->getVTList(),
954 {N->getOperand(0), N->getOperand(1),
955 CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
957 unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
960 CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
961 {N->getOperand(0), N->getOperand(1)});
970 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
971 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
972 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
973 Ops[8] = N->getOperand(0);
974 Ops[9] = N->getOperand(4);
978 bool UseFMAC = Subtarget->hasDLInsts() &&
979 cast<ConstantSDNode>(Ops[0])->isZero() &&
980 cast<ConstantSDNode>(Ops[2])->isZero() &&
981 cast<ConstantSDNode>(Ops[4])->isZero();
983 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), Ops);
991 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
992 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
993 Ops[6] = N->getOperand(0);
994 Ops[7] = N->getOperand(3);
996 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
1003 EVT VT = N->getValueType(0);
1013 SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1014 SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
1015 SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
1016 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1023 bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
1025 if (Subtarget->hasMADIntraFwdBug())
1031 SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1032 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
1034 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1041 bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
1043 if (Subtarget->hasMADIntraFwdBug())
1049 SDValue Zero = CurDAG->getTargetConstant(0, SL, MVT::i64);
1050 SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1051 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Zero, Clamp};
1052 SDNode *Mad = CurDAG->getMachineNode(Opc, SL, N->getVTList(), Ops);
1054 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
1055 SDNode *Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1060 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
1061 SDNode *Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1065 CurDAG->RemoveDeadNode(N);
1072 if (!Base || Subtarget->hasUsableDSOffset() ||
1073 Subtarget->unsafeDSOffsetFoldingEnabled())
1078 return CurDAG->SignBitIsZero(Base);
1084 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1088 if (isDSOffsetLegal(N0, C1->getSExtValue())) {
1091 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1095 // sub C, x -> add (sub 0, x), C
1097 int64_t ByteOffset = C->getSExtValue();
1099 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1101 // XXX - This is kind of hacky. Create a dummy sub node so we can check
1104 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1112 // FIXME: Select to VOP3 version for with-carry.
1114 if (Subtarget->hasAddNoCarry()) {
1117 CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1121 CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1124 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1137 if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {
1138 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1139 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1142 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1149 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1161 if (!Base || Subtarget->hasUsableDSOffset() ||
1162 Subtarget->unsafeDSOffsetFoldingEnabled())
1167 return CurDAG->SignBitIsZero(Base);
1173 Addr->getFlags().hasNoUnsignedWrap()) ||
1174 Addr->getOpcode() == ISD::OR;
1177 // Check that the base address of flat scratch load/store in the form of `base +
1186 if (Subtarget->hasSignedScratchOffsets())
1198 if (ImmOp->getSExtValue() < 0 && ImmOp->getSExtValue() > -0x40000000)
1202 return CurDAG->SignBitIsZero(LHS);
1205 // Check address value in SGPR/VGPR are legal for flat scratch in the form
1213 if (Subtarget->hasSignedScratchOffsets())
1218 return CurDAG->SignBitIsZero(RHS) && CurDAG->SignBitIsZero(LHS);
1221 // Check address value in SGPR/VGPR are legal for flat scratch in the form
1237 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
1242 return CurDAG->SignBitIsZero(RHS) && CurDAG->SignBitIsZero(LHS);
1245 // TODO: If offset is too big, put low 16-bit into offset.
1263 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1267 unsigned OffsetValue0 = C1->getZExtValue();
1273 Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1274 Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1278 // sub C, x -> add (sub 0, x), C
1281 unsigned OffsetValue0 = C->getZExtValue();
1286 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1288 // XXX - This is kind of hacky. Create a dummy sub node so we can check
1292 CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1));
1299 if (Subtarget->hasAddNoCarry()) {
1302 CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1305 MachineSDNode *MachineSub = CurDAG->getMachineNode(
1309 Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1310 Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1316 unsigned OffsetValue0 = CAddr->getZExtValue();
1320 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1322 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);
1324 Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1325 Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1333 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
1334 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
1342 // Subtarget prefers to use flat instruction
1344 if (Subtarget->useFlatForGlobal())
1349 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1350 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1351 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1352 SOffset = Subtarget->hasRestrictedSOffset()
1353 ? CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32)
1354 : CurDAG->getTargetConstant(0, DL, MVT::i32);
1358 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1360 if (isUInt<32>(C1->getZExtValue()))
1367 // (add N2, N3) -> addr64, or
1368 // (add (add N2, N3), C1) -> addr64
1371 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1373 if (N2->isDivergent()) {
1374 if (N3->isDivergent()) {
1389 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1390 } else if (N0->isDivergent()) {
1395 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1397 // N0 -> offset, or
1398 // (N0 + C1) -> offset
1399 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1405 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1409 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1410 if (TII->isLegalMUBUFImmOffset(C1->getZExtValue())) {
1412 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1417 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1419 SDValue(CurDAG->getMachineNode(
1421 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1433 if (!Subtarget->hasAddr64())
1440 if (C->getSExtValue()) {
1458 FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
1464 return std::pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
1473 MachineFunction &MF = CurDAG->getMachineFunction();
1476 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1479 int64_t Imm = CAddr->getSExtValue();
1486 CurDAG->getTargetConstant(Imm & ~MaxOffset, DL, MVT::i32);
1487 MachineSDNode *MovHighBits = CurDAG->getMachineNode(
1491 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1492 ImmOffset = CurDAG->getTargetConstant(Imm & MaxOffset, DL, MVT::i32);
1497 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1513 // check. For out-of-bounds MUBUF loads, a 0 is returned.
1518 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1519 if (TII->isLegalMUBUFImmOffset(C1) &&
1520 (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1521 CurDAG->SignBitIsZero(N0))) {
1523 ImmOffset = CurDAG->getTargetConstant(C1, DL, MVT::i32);
1530 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1537 auto Reg = cast<RegisterSDNode>(Val.getOperand(1))->getReg();
1550 static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
1551 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1552 MachineFunction &MF = CurDAG->getMachineFunction();
1558 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1560 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1568 if (!CAddr || !TII->isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1575 TII->isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
1577 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1582 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1584 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i32);
1592 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1597 if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1598 !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1599 !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1600 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1615 if (Subtarget->hasRestrictedSOffset() && isNullConstant(ByteOffsetNode)) {
1616 SOffset = CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
1631 for (SDValue V : N->op_values())
1643 unsigned AS = findMemSDNode(N)->getAddressSpace();
1646 Subtarget->hasFlatSegmentOffsetBug() &&
1647 FlatVariant == SIInstrFlags::FLAT &&
1650 if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1655 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1657 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1658 if (TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1665 // For a FLAT instruction the hardware decides whether to access
1676 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1680 SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1687 if (Subtarget->hasAddNoCarry()) {
1691 Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);
1695 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
1696 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
1698 SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1700 SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1706 SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
1709 CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,
1712 SDNode *Addc = CurDAG->getMachineNode(
1717 CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
1720 Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1729 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
1736 return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FLAT);
1761 // Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
1774 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1775 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1777 if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,
1781 } else if (!LHS->isDivergent()) {
1784 // saddr + large_offset -> saddr +
1788 std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1792 SDNode *VMov = CurDAG->getMachineNode(
1794 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1797 Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
1808 !TII->isInlineConstant(APInt(32, COffsetVal & 0xffffffff)) +
1809 !TII->isInlineConstant(APInt(32, COffsetVal >> 32));
1810 if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
1820 if (!LHS->isDivergent()) {
1828 if (!SAddr && !RHS->isDivergent()) {
1837 Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
1842 if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF ||
1846 // It's cheaper to materialize a single 32-bit zero for vaddr than the two
1847 // moves required to copy a 64-bit SGPR to VGPR.
1850 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
1851 CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
1853 Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
1859 SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
1865 SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1866 FI->getValueType(0));
1867 SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, SDLoc(SAddr),
1875 // Match (32-bit SGPR base) + sext(imm offset)
1879 if (Addr->isDivergent())
1886 if (CurDAG->isBaseWithConstantOffset(Addr) && isFlatScratchBaseLegal(Addr)) {
1887 COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
1895 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1897 if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS,
1900 std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1908 : CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32);
1909 SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32,
1914 Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i32);
1919 // Check whether the flat scratch SVS swizzle bug affects this access.
1922 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
1928 KnownBits VKnown = CurDAG->computeKnownBits(VAddr);
1931 CurDAG->computeKnownBits(SAddr),
1946 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1947 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1949 if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) {
1952 } else if (!LHS->isDivergent() && COffsetVal > 0) {
1954 // saddr + large_offset -> saddr + (vaddr = large_offset & ~MaxOffset) +
1958 = TII->splitFlatOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true);
1961 SDNode *VMov = CurDAG->getMachineNode(
1963 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1970 Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
1982 if (!LHS->isDivergent() && RHS->isDivergent()) {
1985 } else if (!RHS->isDivergent() && LHS->isDivergent()) {
2003 Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2016 KnownBits SKnown = CurDAG->computeKnownBits(*SOffset);
2025 // not null) offset. If Imm32Only is true, match only 32-bit immediate
2060 int64_t ByteOffset = IsBuffer ? C->getZExtValue() : C->getSExtValue();
2064 *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
2074 *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
2082 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
2084 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
2095 // Zero-extend a 32-bit address.
2098 const MachineFunction &MF = CurDAG->getMachineFunction();
2100 unsigned AddrHiVal = Info->get32BitAddressHighBits();
2101 SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
2104 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
2106 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2107 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
2109 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
2112 return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
2118 // true, match only 32-bit immediate offsets available on CI.
2133 ImmOff = C->getSExtValue();
2139 // A 32-bit (address + offset) should not cause unsigned 32-bit integer
2142 !Addr->getFlags().hasNoUnsignedWrap())
2147 if (CurDAG->isBaseWithConstantOffset(Addr) || Addr.getOpcode() == ISD::ADD) {
2179 *Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2193 assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2216 assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2223 // Match the (soffset + offset) pair as a 32-bit register base and
2236 if (CurDAG->isBaseWithConstantOffset(Index)) {
2245 if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) ||
2246 (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) {
2248 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
2257 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2264 if (Val->isDivergent()) {
2266 SDValue Off = CurDAG->getTargetConstant(Offset, DL, MVT::i32);
2267 SDValue W = CurDAG->getTargetConstant(Width, DL, MVT::i32);
2269 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, Off, W);
2276 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
2278 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
2282 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
2283 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
2286 const SDValue &Shl = N->getOperand(0);
2287 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
2288 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2291 uint32_t BVal = B->getZExtValue();
2292 uint32_t CVal = C->getZExtValue();
2295 bool Signed = N->getOpcode() == ISD::SRA;
2296 ReplaceNode(N, getBFE32(Signed, SDLoc(N), Shl.getOperand(0), CVal - BVal,
2297 32 - CVal));
2305 switch (N->getOpcode()) {
2307 if (N->getOperand(0).getOpcode() == ISD::SRL) {
2308 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
2310 const SDValue &Srl = N->getOperand(0);
2312 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
2315 uint32_t ShiftVal = Shift->getZExtValue();
2316 uint32_t MaskVal = Mask->getZExtValue();
2328 if (N->getOperand(0).getOpcode() == ISD::AND) {
2329 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
2331 const SDValue &And = N->getOperand(0);
2332 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
2333 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
2336 uint32_t ShiftVal = Shift->getZExtValue();
2337 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
2346 } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
2352 if (N->getOperand(0).getOpcode() == ISD::SHL) {
2359 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
2360 SDValue Src = N->getOperand(0);
2368 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2370 Amt->getZExtValue(), Width));
2379 assert(N->getOpcode() == ISD::BRCOND);
2380 if (!N->hasOneUse())
2383 SDValue Cond = N->getOperand(1);
2397 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
2398 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
2405 assert(VCMP->getOpcode() == AMDGPUISD::SETCC);
2415 auto VCMP_CC = cast<CondCodeSDNode>(VCMP.getOperand(2))->get();
2420 if (ISD::isExtOpcode(Cond->getOpcode())) // Skip extension.
2432 SDValue Cond = N->getOperand(1);
2435 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2436 N->getOperand(2), N->getOperand(0));
2441 const SIRegisterInfo *TRI = ST->getRegisterInfo();
2448 Cond->getOperand(0)->getOpcode() == AMDGPUISD::SETCC) {
2449 SDValue VCMP = Cond->getOperand(0);
2450 auto CC = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
2452 isNullConstant(Cond->getOperand(1)) &&
2453 // We may encounter ballot.i64 in wave32 mode on -O0.
2454 VCMP.getValueType().getSizeInBits() == ST->getWavefrontSize()) {
2466 UseSCCBr = !BallotCond->isDivergent();
2484 Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();
2501 Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
2504 CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
2511 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
2512 CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
2513 N->getOperand(2), // Basic Block
2518 if (Subtarget->hasSALUFloatInsts() && N->getValueType(0) == MVT::f32 &&
2519 !N->isDivergent()) {
2520 SDValue Src = N->getOperand(0);
2523 CurDAG->SelectNodeTo(N, AMDGPU::S_CVT_HI_F32_F16, N->getVTList(),
2539 SDValue Chain = N->getOperand(0);
2540 SDValue Ptr = N->getOperand(2);
2542 MachineMemOperand *MMO = M->getMemOperand();
2543 bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
2546 if (CurDAG->isBaseWithConstantOffset(Ptr)) {
2550 const APInt &OffsetVal = PtrOffset->getAsAPIntVal();
2553 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2559 Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2564 CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2566 N->getOperand(N->getNumOperands() - 1) // New glue
2569 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2570 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2577 SDValue Ops[] = {N->getOperand(2), N->getOperand(3), N->getOperand(4),
2578 N->getOperand(5), N->getOperand(0)};
2581 MachineMemOperand *MMO = M->getMemOperand();
2582 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2583 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2606 if (!Subtarget->hasGWS() ||
2608 !Subtarget->hasGWSSemaReleaseAll())) {
2615 const bool HasVSrc = N->getNumOperands() == 4;
2616 assert(HasVSrc || N->getNumOperands() == 3);
2619 SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
2622 MachineMemOperand *MMO = M->getMemOperand();
2633 // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
2635 glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
2636 ImmOffset = ConstOffset->getZExtValue();
2638 if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
2647 = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
2651 = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
2653 CurDAG->getTargetConstant(16, SL, MVT::i32));
2657 SDValue Chain = N->getOperand(0);
2658 SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
2663 Ops.push_back(N->getOperand(2));
2667 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2668 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2672 if (Subtarget->getLDSBankCount() != 16) {
2699 SDValue ToM0 = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AMDGPU::M0,
2700 N->getOperand(5), SDValue());
2702 SDVTList VTs = CurDAG->getVTList(MVT::f32, MVT::Other);
2705 CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {
2706 CurDAG->getTargetConstant(2, DL, MVT::i32), // P0
2707 N->getOperand(3), // Attr
2708 N->getOperand(2), // Attrchan
2713 CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {
2714 CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
2715 N->getOperand(1), // Src0
2716 N->getOperand(3), // Attr
2717 N->getOperand(2), // Attrchan
2718 CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
2719 SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high
2720 N->getOperand(4), // high
2721 CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp
2722 CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod
2726 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0));
2730 unsigned IntrID = N->getConstantOperandVal(1);
2734 if (N->getValueType(0) != MVT::i32)
2748 unsigned IntrID = N->getConstantOperandVal(0);
2750 SDNode *ConvGlueNode = N->getGluedNode();
2753 assert(ConvGlueNode->getOpcode() == ISD::CONVERGENCECTRL_GLUE);
2754 ConvGlueNode = ConvGlueNode->getOperand(0).getNode();
2756 CurDAG->getMachineNode(TargetOpcode::CONVERGENCECTRL_GLUE, {},
2784 SDValue Src = N->getOperand(1);
2785 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
2789 SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
2791 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), NewOps);
2796 unsigned IntrID = N->getConstantOperandVal(1);
2815 CurDAG->getTargetConstant(Subtarget->getWavefrontSizeLog2(), SDLoc(N), MVT::i32);
2816 CurDAG->SelectNodeTo(N, AMDGPU::S_LSHR_B32, N->getVTList(),
2817 {N->getOperand(0), Log2WaveSize});
2821 SDValue SrcVal = N->getOperand(1);
2828 Register SP = TLI->getStackPointerRegisterToSaveRestore();
2834 SDValue Log2WaveSize = CurDAG->getTargetConstant(
2835 Subtarget->getWavefrontSizeLog2(), SL, MVT::i32);
2837 if (N->isDivergent()) {
2838 SrcVal = SDValue(CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL,
2843 CopyVal = SDValue(CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
2848 SDValue CopyToSP = CurDAG->getCopyToReg(N->getOperand(0), SL, SP, CopyVal);
2849 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyToSP);
2863 // Fold fsub [+-]0 into fneg. This may not have folded depending on the
2866 if (LHS && LHS->isZero()) {
2873 Mods |= SISrcMods::ABS;
2885 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2897 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2910 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2934 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2955 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2956 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2965 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2966 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2976 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2977 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2994 (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
3021 Lo = CurDAG->getTargetExtractSubreg(
3027 Hi = CurDAG->getTargetExtractSubreg(
3046 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
3048 auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
3051 CurDAG->getTargetConstant(RC, SL, MVT::i32),
3052 Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3053 Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
3055 Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
3058 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3063 uint64_t Lit = cast<ConstantFPSDNode>(Lo)->getValueAPF()
3065 if (AMDGPU::isInlinableLiteral32(Lit, Subtarget->hasInv2PiInlineImm())) {
3066 Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);
3067 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3075 // Packed instructions do not have abs modifiers.
3078 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3091 assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
3094 unsigned SrcSign = C->getZExtValue();
3098 Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3105 assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
3108 unsigned SrcVal = C->getZExtValue();
3112 Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3139 Ops.push_back(CurDAG->getTargetConstant(DstRegClass, DL, MVT::i32));
3142 Ops.push_back(CurDAG->getTargetConstant(
3145 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, DstTy, Ops);
3155 // Pack 16-bit elements in pairs into 32-bit register. If both elements are
3156 // unpacked from 32-bit source use it, otherwise pack them using v_perm.
3163 SDValue PackLoLo = CurDAG->getTargetConstant(0x05040100, DL, MVT::i32);
3165 CurDAG->getMachineNode(AMDGPU::V_PERM_B32_e64, DL, MVT::i32,
3190 // Check if all elements also have abs modifier
3195 NegAbsElts.push_back(El->getOperand(0));
3201 // Neg and Abs
3207 // Abs
3218 for (unsigned i = 0; i < BV->getNumOperands(); ++i) {
3220 dyn_cast<BuildVectorSDNode>(stripBitcast(BV->getOperand(i)))) {
3221 for (unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
3222 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
3239 checkWMMAElementsModifiersF16(BV, [&](SDValue Element) -> bool {
3247 if (BV->getNumOperands() * 2 == EltsF16.size()) {
3257 for (unsigned i = 0; i < BV->getNumOperands(); ++i) {
3258 SDValue ElV2f16 = stripBitcast(BV->getOperand(i));
3259 // Based on first element decide which mod we match, neg or abs
3266 if (BV->getNumOperands() == EltsV2F16.size()) {
3273 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3286 checkWMMAElementsModifiersF16(BV, [&](SDValue ElF16) -> bool {
3287 // Based on first element decide which mod we match, neg or abs
3297 if (BV->getNumOperands() * 2 == EltsF16.size())
3306 for (unsigned i = 0; i < BV->getNumOperands(); ++i) {
3307 SDValue ElV2f16 = stripBitcast(BV->getOperand(i));
3308 // Based on first element decide which mod we match, neg or abs
3311 if (ElV2f16->getOpcode() != ModOpcode)
3313 EltsV2F16.push_back(ElV2f16->getOperand(0));
3317 if (BV->getNumOperands() == EltsV2F16.size())
3322 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3333 assert(BV->getNumOperands() > 0);
3334 // Based on first element decide which mod we match, neg or abs
3335 SDValue ElF32 = stripBitcast(BV->getOperand(0));
3338 for (unsigned i = 0; i < BV->getNumOperands(); ++i) {
3339 SDValue ElF32 = stripBitcast(BV->getOperand(i));
3346 if (BV->getNumOperands() == EltsF32.size())
3351 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3358 if (SDValue Splat = BV->getSplatValue(&UndefElements))
3361 unsigned Imm = C->getAPIntValue().getSExtValue();
3362 Src = CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
3366 unsigned Imm = C->getValueAPF().bitcastToAPInt().getSExtValue();
3367 Src = CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
3377 if (SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
3380 if (SDValue Splat = SplatSrc16BV->getSplatValue()) {
3381 const SIInstrInfo *TII = Subtarget->getInstrInfo();
3384 RawValue = C->getValueAPF().bitcastToAPInt();
3386 RawValue = C->getAPIntValue();
3395 if (TII->isInlineConstant(FloatVal)) {
3396 Src = CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
3401 if (TII->isInlineConstant(RawValue.value())) {
3402 Src = CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
3407 llvm_unreachable("unknown 16-bit type");
3424 ShiftAmt->getZExtValue() % 8 == 0) {
3425 Key = ShiftAmt->getZExtValue() / 8;
3430 IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32);
3443 ShiftAmt->getZExtValue() == 16) {
3449 IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32);
3457 SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
3479 // Be careful about folding modifiers if we already have an abs. fneg is
3481 if ((Mods & SISrcMods::ABS) == 0) {
3488 if ((ModsTmp & SISrcMods::ABS) != 0)
3489 Mods |= SISrcMods::ABS;
3501 // TODO: Should we try to look for neg/abs here?
3515 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3523 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3529 return CurDAG->getUNDEF(MVT::i32);
3533 return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
3538 return CurDAG->getConstant(
3539 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
3550 assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);
3553 static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
3555 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
3559 for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
3566 if (!RC || SIRI->isSGPRClass(RC))
3572 if (User->isMachineOpcode()) {
3573 unsigned Opc = User->getMachineOpcode();
3574 const MCInstrDesc &Desc = SII->get(Opc);
3578 if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
3579 unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
3601 const MachineMemOperand *MMO = Ld->getMemOperand();
3602 if (N->isDivergent() && !AMDGPUInstrInfo::isUniformMMO(MMO))
3605 return MMO->getSize().hasValue() &&
3606 Ld->getAlign() >=
3607 Align(std::min(MMO->getSize().getValue().getKnownMinValue(),
3609 ((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
3610 Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ||
3611 (Subtarget->getScalarizeGlobalBehavior() &&
3612 Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
3613 Ld->isSimple() &&
3615 ->isMemOpHasNoClobberedMemOperand(N)));
3626 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
3627 while (Position != CurDAG->allnodes_end()) {
3640 CurDAG->RemoveDeadNodes();