Lines Matching +full:cold +full:- +full:temp
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
43 computeRegisterProperties(Subtarget->getRegisterInfo());
118 if (Subtarget->hasCARRY())
121 if (Subtarget->hasBORROW())
125 if (!Subtarget->hasBFE())
130 if (!Subtarget->hasBFE())
134 if (!Subtarget->hasBFE())
151 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
156 if (!Subtarget->hasFMA())
162 if (!Subtarget->hasBFI())
166 if (!Subtarget->hasBCNT(32))
169 if (!Subtarget->hasBCNT(64))
172 if (Subtarget->hasFFBH())
175 if (Subtarget->hasFFBL())
180 if (Subtarget->hasBFE())
206 if (std::next(I) == I->getParent()->end())
208 return std::next(I)->getOpcode() == R600::RETURN;
214 MachineFunction *MF = BB->getParent();
215 MachineRegisterInfo &MRI = MF->getRegInfo();
217 const R600InstrInfo *TII = Subtarget->getInstrInfo();
223 if (TII->isLDSRetInstr(MI.getOpcode())) {
224 int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
225 assert(DstIdx != -1);
233 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
234 TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
243 MachineInstr *NewMI = TII->buildDefaultInstruction(
246 TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
251 MachineInstr *NewMI = TII->buildDefaultInstruction(
254 TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
262 TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
267 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
269 ->getValueAPF()
275 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
281 auto MIB = TII->buildDefaultInstruction(
283 int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
286 MIB->getOperand(Idx).ChangeToGA(MO.getGlobal(), MO.getOffset(),
292 MachineInstr *NewMI = TII->buildDefaultInstruction(
294 TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
302 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
309 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
317 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
323 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
328 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
329 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
337 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
342 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
343 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
355 EndBlock = BB->end(); NextExportInst != EndBlock;
357 if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
358 NextExportInst->getOpcode() == R600::R600_ExportSwz) {
359 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
371 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
392 //===----------------------------------------------------------------------===//
394 //===----------------------------------------------------------------------===//
415 Result.getNode()->getNumValues() == 2) &&
582 switch (N->getOpcode()) {
587 if (N->getValueType(0) == MVT::i1) {
588 Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
596 if (N->getValueType(0) == MVT::i1) {
597 Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
672 if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
676 const GlobalValue *GV = GSD->getGlobal();
684 // On hw >= R700, COS/SIN input must be between -1. and 1.
685 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
690 // TODO: Should this propagate fast-math-flags?
709 DAG.getConstantFP(-0.5, DL, MVT::f32)));
712 // On R600 hw, COS/SIN input must be between -Pi and Pi.
758 Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
769 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
779 return Cst->isZero();
781 return CstFP->isZero();
787 return CFP->isExactlyValue(1.0);
794 return CFP->getValueAPF().isZero();
808 SDValue Temp;
826 // select_cc f32, f32, -1, 0, cc_supported
828 // select_cc i32, i32, -1, 0, cc_supported
833 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
866 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
886 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
901 Temp = True;
903 False = Temp;
923 HWTrue = DAG.getConstant(-1, DL, CompareVT);
949 unsigned SrcAS = ASC->getSrcAddressSpace();
950 unsigned DestAS = ASC->getDestAddressSpace();
958 /// LLVM generates byte-addressed pointers. For indirect addressing, we need to
960 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
961 /// \p StackWidth, which tells us how many of the 4 sub-registers will be used
1018 assert(Store->isTruncatingStore()
1019 || Store->getValue().getValueType() == MVT::i8);
1020 assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
1023 if (Store->getMemoryVT() == MVT::i8) {
1024 assert(Store->getAlign() >= 1);
1026 } else if (Store->getMemoryVT() == MVT::i16) {
1027 assert(Store->getAlign() >= 2);
1033 SDValue OldChain = Store->getChain();
1036 SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1037 SDValue BasePtr = Store->getBasePtr();
1038 SDValue Offset = Store->getOffset();
1039 EVT MemVT = Store->getMemoryVT();
1067 // it also handles sub i32 non-truncating stores (like i1)
1069 Store->getValue());
1106 unsigned AS = StoreNode->getAddressSpace();
1108 SDValue Chain = StoreNode->getChain();
1109 SDValue Ptr = StoreNode->getBasePtr();
1110 SDValue Value = StoreNode->getValue();
1113 EVT MemVT = StoreNode->getMemoryVT();
1118 const bool TruncatingStore = StoreNode->isTruncatingStore();
1129 NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), MemVT,
1130 StoreNode->getAlign(), StoreNode->getMemOperand()->getFlags(),
1131 StoreNode->getAAInfo());
1138 Align Alignment = StoreNode->getAlign();
1141 StoreNode->getMemOperand()->getFlags(),
1159 assert(StoreNode->getAlign() >= 2);
1175 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1186 Op->getVTList(), Args, MemVT,
1187 StoreNode->getMemOperand());
1189 if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1193 if (StoreNode->isIndexed()) {
1196 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1213 return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1257 return -1;
1265 ISD::LoadExtType ExtType = Load->getExtensionType();
1266 EVT MemVT = Load->getMemoryVT();
1267 assert(Load->getAlign() >= MemVT.getStoreSize());
1269 SDValue BasePtr = Load->getBasePtr();
1270 SDValue Chain = Load->getChain();
1271 SDValue Offset = Load->getOffset();
1319 unsigned AS = LoadNode->getAddressSpace();
1320 EVT MemVT = LoadNode->getMemoryVT();
1321 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1330 SDValue Chain = LoadNode->getChain();
1331 SDValue Ptr = LoadNode->getBasePtr();
1333 if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1334 LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
1342 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1343 if (ConstantBlock > -1 &&
1344 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1345 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1347 if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1349 return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
1352 // non-constant ptr can't be folded, keeps it as a v4f32 load
1356 DAG.getConstant(LoadNode->getAddressSpace() -
1379 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1382 ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1383 LoadNode->getAlign(), LoadNode->getMemOperand()->getFlags());
1391 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1400 return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1417 const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1421 unsigned FrameIndex = FIN->getIndex();
1424 TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1425 return DAG.getConstant(Offset.getFixed() * 4 * TFL->getStackWidth(MF),
1436 case CallingConv::Cold:
1453 /// separate calling conventions for kernel and non-kernel functions.
1504 // XXX - I think PartOffset should give you this, but it seems to give the
1579 if (C->isZero()) {
1582 } else if (C->isExactlyValue(1.0)) {
1645 // Old -> New swizzle values
1650 unsigned Idx = Swz[i]->getAsZExtVal();
1658 unsigned Idx = Swz[i]->getAsZExtVal();
1669 EVT VT = LoadNode->getValueType(0);
1670 SDValue Chain = LoadNode->getChain();
1671 SDValue Ptr = LoadNode->getBasePtr();
1675 if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
1678 if (LoadNode->getAlign() < Align(4))
1712 //===----------------------------------------------------------------------===//
1714 //===----------------------------------------------------------------------===//
1721 switch (N->getOpcode()) {
1722 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1724 SDValue Arg = N->getOperand(0);
1726 return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1732 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1733 // (i32 select_cc f32, f32, -1, 0 cc)
1738 SDValue FNeg = N->getOperand(0);
1751 return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1754 DAG.getConstant(-1, DL, MVT::i32), // True
1762 SDValue InVec = N->getOperand(0);
1763 SDValue InVal = N->getOperand(1);
1764 SDValue EltNo = N->getOperand(2);
1779 unsigned Elt = EltNo->getAsZExtVal();
1786 Ops.append(InVec.getNode()->op_begin(),
1787 InVec.getNode()->op_end());
1814 SDValue Arg = N->getOperand(0);
1816 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1817 unsigned Element = Const->getZExtValue();
1818 return Arg->getOperand(Element);
1825 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1826 unsigned Element = Const->getZExtValue();
1827 return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1828 Arg->getOperand(0).getOperand(Element));
1839 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1842 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1844 SDValue LHS = N->getOperand(0);
1849 SDValue RHS = N->getOperand(1);
1850 SDValue True = N->getOperand(2);
1851 SDValue False = N->getOperand(3);
1852 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1864 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1881 SDValue Arg = N->getOperand(1);
1886 N->getOperand(0), // Chain
1888 N->getOperand(2), // ArrayBase
1889 N->getOperand(3), // Type
1890 N->getOperand(4), // SWZ_X
1891 N->getOperand(5), // SWZ_Y
1892 N->getOperand(6), // SWZ_Z
1893 N->getOperand(7) // SWZ_W
1895 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
1896 return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
1899 SDValue Arg = N->getOperand(1);
1904 N->getOperand(0),
1905 N->getOperand(1),
1906 N->getOperand(2),
1907 N->getOperand(3),
1908 N->getOperand(4),
1909 N->getOperand(5),
1910 N->getOperand(6),
1911 N->getOperand(7),
1912 N->getOperand(8),
1913 N->getOperand(9),
1914 N->getOperand(10),
1915 N->getOperand(11),
1916 N->getOperand(12),
1917 N->getOperand(13),
1918 N->getOperand(14),
1919 N->getOperand(15),
1920 N->getOperand(16),
1921 N->getOperand(17),
1922 N->getOperand(18),
1924 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
1925 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
1930 SDValue Ptr = LoadNode->getBasePtr();
1931 if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
1947 const R600InstrInfo *TII = Subtarget->getInstrInfo();
1965 unsigned Opcode = ParentNode->getMachineOpcode();
1966 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
1972 if (ParentNode->getValueType(0).isVector())
1977 TII->getOperandIdx(Opcode, R600::OpName::src0),
1978 TII->getOperandIdx(Opcode, R600::OpName::src1),
1979 TII->getOperandIdx(Opcode, R600::OpName::src2),
1980 TII->getOperandIdx(Opcode, R600::OpName::src0_X),
1981 TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
1982 TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
1983 TII->getOperandIdx(Opcode, R600::OpName::src0_W),
1984 TII->getOperandIdx(Opcode, R600::OpName::src1_X),
1985 TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
1986 TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
1987 TII->getOperandIdx(Opcode, R600::OpName::src1_W)
1991 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1995 OtherSrcIdx--;
1996 OtherSelIdx--;
1999 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2000 if (Reg->getReg() == R600::ALU_CONST) {
2001 Consts.push_back(ParentNode->getConstantOperandVal(OtherSelIdx));
2007 Consts.push_back(Cst->getZExtValue());
2008 if (!TII->fitsConstReadLimitations(Consts)) {
2018 if (Imm->getAsZExtVal())
2030 float FloatValue = FPC->getValueAPF().convertToFloat();
2038 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2058 if (C->getZExtValue())
2073 const R600InstrInfo *TII = Subtarget->getInstrInfo();
2074 if (!Node->isMachineOpcode())
2077 unsigned Opcode = Node->getMachineOpcode();
2080 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2084 TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2085 TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2086 TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2087 TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2088 TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2089 TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2090 TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2091 TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2094 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
2095 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
2096 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
2097 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
2098 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
2099 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
2100 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
2101 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
2104 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
2105 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
2106 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
2107 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
2108 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
2109 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
2110 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
2111 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
2116 SDValue &Src = Ops[OperandIdx[i] - 1];
2117 SDValue &Neg = Ops[NegIdx[i] - 1];
2118 SDValue &Abs = Ops[AbsIdx[i] - 1];
2119 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2120 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2122 SelIdx--;
2123 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2125 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2128 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2131 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2134 if (!TII->hasInstrModifiers(Opcode))
2137 TII->getOperandIdx(Opcode, R600::OpName::src0),
2138 TII->getOperandIdx(Opcode, R600::OpName::src1),
2139 TII->getOperandIdx(Opcode, R600::OpName::src2)
2142 TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
2143 TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
2144 TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
2147 TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
2148 TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
2149 -1
2154 SDValue &Src = Ops[OperandIdx[i] - 1];
2155 SDValue &Neg = Ops[NegIdx[i] - 1];
2157 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2158 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2159 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2160 int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
2162 SelIdx--;
2163 ImmIdx--;
2165 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2168 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2177 switch (RMW->getOperation()) {