Lines Matching defs:CI
125 bool hasSameBaseAddress(const CombineInfo &CI) {
126 if (NumAddresses != CI.NumAddresses)
129 const MachineInstr &MI = *CI.I;
212 static bool dmasksCanBeCombined(const CombineInfo &CI,
215 static bool offsetsCanBeCombined(CombineInfo &CI, const GCNSubtarget &STI,
217 static bool widthsFit(const GCNSubtarget &STI, const CombineInfo &CI,
219 unsigned getNewOpcode(const CombineInfo &CI, const CombineInfo &Paired);
220 static std::pair<unsigned, unsigned> getSubRegIdxs(const CombineInfo &CI,
223 getTargetRegisterClass(const CombineInfo &CI,
227 CombineInfo *checkAndPrepareMerge(CombineInfo &CI, CombineInfo &Paired);
229 void copyToDestRegs(CombineInfo &CI, CombineInfo &Paired,
232 Register copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired,
239 mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
245 mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired,
248 mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
251 mergeSMemLoadImmPair(CombineInfo &CI, CombineInfo &Paired,
254 mergeBufferLoadPair(CombineInfo &CI, CombineInfo &Paired,
257 mergeBufferStorePair(CombineInfo &CI, CombineInfo &Paired,
260 mergeTBufferLoadPair(CombineInfo &CI, CombineInfo &Paired,
263 mergeTBufferStorePair(CombineInfo &CI, CombineInfo &Paired,
266 mergeFlatLoadPair(CombineInfo &CI, CombineInfo &Paired,
269 mergeFlatStorePair(CombineInfo &CI, CombineInfo &Paired,
281 bool promoteConstantOffsetToImm(MachineInstr &CI,
284 void addInstToMergeableList(const CombineInfo &CI,
292 static MachineMemOperand *combineKnownAdjacentMMOs(const CombineInfo &CI,
295 static InstClassEnum getCommonInstClass(const CombineInfo &CI,
645 SILoadStoreOptimizer::getCommonInstClass(const CombineInfo &CI,
647 assert(CI.InstClass == Paired.InstClass);
649 if ((CI.InstClass == FLAT_LOAD || CI.InstClass == FLAT_STORE) &&
650 SIInstrInfo::isFLATGlobal(*CI.I) && SIInstrInfo::isFLATGlobal(*Paired.I))
651 return (CI.InstClass == FLAT_STORE) ? GLOBAL_STORE : GLOBAL_LOAD;
653 return CI.InstClass;
897 // Given that \p CI and \p Paired are adjacent memory operations produce a new
900 SILoadStoreOptimizer::combineKnownAdjacentMMOs(const CombineInfo &CI,
902 const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
909 if (Paired < CI)
917 MachineFunction *MF = CI.I->getMF();
921 bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI,
924 assert(CI.InstClass == MIMG);
927 const auto *TFEOp = TII.getNamedOperand(*CI.I, AMDGPU::OpName::tfe);
928 const auto *LWEOp = TII.getNamedOperand(*CI.I, AMDGPU::OpName::lwe);
939 int Idx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), op);
943 CI.I->getOperand(Idx).getImm() != Paired.I->getOperand(Idx).getImm())
948 unsigned MaxMask = std::max(CI.DMask, Paired.DMask);
949 unsigned MinMask = std::min(CI.DMask, Paired.DMask);
996 bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI,
1000 assert(CI.InstClass != MIMG);
1004 if (CI.Offset == Paired.Offset)
1008 if ((CI.Offset % CI.EltSize != 0) || (Paired.Offset % CI.EltSize != 0))
1011 if (CI.InstClass == TBUFFER_LOAD || CI.InstClass == TBUFFER_STORE) {
1014 llvm::AMDGPU::getGcnBufferFormatInfo(CI.Format, STI);
1031 if (getBufferFormatWithCompCount(CI.Format, CI.Width + Paired.Width, STI) == 0)
1035 uint32_t EltOffset0 = CI.Offset / CI.EltSize;
1036 uint32_t EltOffset1 = Paired.Offset / CI.EltSize;
1037 CI.UseST64 = false;
1038 CI.BaseOff = 0;
1041 if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) {
1042 if (EltOffset0 + CI.Width != EltOffset1 &&
1045 if (CI.CPol != Paired.CPol)
1047 if (CI.InstClass == S_LOAD_IMM || CI.InstClass == S_BUFFER_LOAD_IMM ||
1048 CI.InstClass == S_BUFFER_LOAD_SGPR_IMM) {
1054 if (CI.Width != Paired.Width &&
1055 (CI.Width < Paired.Width) == (CI.Offset < Paired.Offset))
1066 CI.Offset = EltOffset0 / 64;
1068 CI.UseST64 = true;
1076 CI.Offset = EltOffset0;
1096 CI.BaseOff = BaseOff * CI.EltSize;
1097 CI.Offset = (EltOffset0 - BaseOff) / 64;
1099 CI.UseST64 = true;
1110 CI.BaseOff = BaseOff * CI.EltSize;
1111 CI.Offset = EltOffset0 - BaseOff;
1121 const CombineInfo &CI,
1123 const unsigned Width = (CI.Width + Paired.Width);
1124 switch (CI.InstClass) {
1163 /// This function assumes that CI comes before Paired in a basic block. Return
1166 SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI,
1168 // If another instruction has already been merged into CI, it may now be a
1170 if (CI.InstClass == UNKNOWN || Paired.InstClass == UNKNOWN)
1172 assert(CI.InstClass == Paired.InstClass);
1174 if (getInstSubclass(CI.I->getOpcode(), *TII) !=
1180 if (CI.InstClass == MIMG) {
1181 if (!dmasksCanBeCombined(CI, *TII, Paired))
1184 if (!widthsFit(*STM, CI, Paired) || !offsetsCanBeCombined(CI, *STM, Paired))
1191 if (CI.I->mayLoad()) {
1192 // Try to hoist Paired up to CI.
1194 for (MachineBasicBlock::iterator MBBI = Paired.I; --MBBI != CI.I;) {
1198 Where = &CI;
1200 // Try to sink CI down to Paired.
1201 addDefsUsesToList(*CI.I, RegDefs, RegUses);
1202 for (MachineBasicBlock::iterator MBBI = CI.I; ++MBBI != Paired.I;) {
1203 if (!canSwapInstructions(RegDefs, RegUses, *CI.I, *MBBI))
1213 if (CI.InstClass == DS_READ || CI.InstClass == DS_WRITE)
1214 offsetsCanBeCombined(CI, *STM, Paired, true);
1218 // Copy the merged load result from DestReg to the original dest regs of CI and
1221 CombineInfo &CI, CombineInfo &Paired,
1224 MachineBasicBlock *MBB = CI.I->getParent();
1225 DebugLoc DL = CI.I->getDebugLoc();
1227 auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);
1231 auto *Dest0 = TII->getNamedOperand(*CI.I, OpName);
1249 // original source regs of CI and Paired into it.
1251 SILoadStoreOptimizer::copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired,
1254 MachineBasicBlock *MBB = CI.I->getParent();
1255 DebugLoc DL = CI.I->getDebugLoc();
1257 auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);
1260 const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
1263 const auto *Src0 = TII->getNamedOperand(*CI.I, OpName);
1290 SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
1292 MachineBasicBlock *MBB = CI.I->getParent();
1296 const auto *AddrReg = TII->getNamedOperand(*CI.I, AMDGPU::OpName::addr);
1298 unsigned NewOffset0 = std::min(CI.Offset, Paired.Offset);
1299 unsigned NewOffset1 = std::max(CI.Offset, Paired.Offset);
1301 CI.UseST64 ? read2ST64Opcode(CI.EltSize) : read2Opcode(CI.EltSize);
1308 const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
1311 DebugLoc DL = CI.I->getDebugLoc();
1316 if (CI.BaseOff) {
1319 .addImm(CI.BaseOff);
1337 .cloneMergedMemRefs({&*CI.I, &*Paired.I});
1339 copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdst, DestReg);
1341 CI.I->eraseFromParent();
1365 CombineInfo &CI, CombineInfo &Paired,
1367 MachineBasicBlock *MBB = CI.I->getParent();
1372 TII->getNamedOperand(*CI.I, AMDGPU::OpName::addr);
1374 TII->getNamedOperand(*CI.I, AMDGPU::OpName::data0);
1378 unsigned NewOffset0 = CI.Offset;
1381 CI.UseST64 ? write2ST64Opcode(CI.EltSize) : write2Opcode(CI.EltSize);
1393 DebugLoc DL = CI.I->getDebugLoc();
1398 if (CI.BaseOff) {
1401 .addImm(CI.BaseOff);
1421 .cloneMergedMemRefs({&*CI.I, &*Paired.I});
1423 CI.I->eraseFromParent();
1431 SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
1433 MachineBasicBlock *MBB = CI.I->getParent();
1434 DebugLoc DL = CI.I->getDebugLoc();
1435 const unsigned Opcode = getNewOpcode(CI, Paired);
1437 const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
1440 unsigned MergedDMask = CI.DMask | Paired.DMask;
1442 AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::dmask);
1445 for (unsigned I = 1, E = (*CI.I).getNumOperands(); I != E; ++I) {
1449 MIB.add((*CI.I).getOperand(I));
1455 assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
1457 MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1459 copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
1461 CI.I->eraseFromParent();
1467 CombineInfo &CI, CombineInfo &Paired,
1469 MachineBasicBlock *MBB = CI.I->getParent();
1470 DebugLoc DL = CI.I->getDebugLoc();
1471 const unsigned Opcode = getNewOpcode(CI, Paired);
1473 const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
1476 unsigned MergedOffset = std::min(CI.Offset, Paired.Offset);
1481 assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
1485 .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase));
1486 if (CI.InstClass == S_BUFFER_LOAD_SGPR_IMM)
1487 New.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset));
1489 New.addImm(CI.CPol).addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1491 copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::sdst, DestReg);
1493 CI.I->eraseFromParent();
1499 CombineInfo &CI, CombineInfo &Paired,
1501 MachineBasicBlock *MBB = CI.I->getParent();
1502 DebugLoc DL = CI.I->getDebugLoc();
1504 const unsigned Opcode = getNewOpcode(CI, Paired);
1506 const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
1510 unsigned MergedOffset = std::min(CI.Offset, Paired.Offset);
1517 MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
1522 assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
1525 MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
1526 .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
1528 .addImm(CI.CPol) // cpol
1530 .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1532 copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
1534 CI.I->eraseFromParent();
1540 CombineInfo &CI, CombineInfo &Paired,
1542 MachineBasicBlock *MBB = CI.I->getParent();
1543 DebugLoc DL = CI.I->getDebugLoc();
1545 const unsigned Opcode = getNewOpcode(CI, Paired);
1547 const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
1551 unsigned MergedOffset = std::min(CI.Offset, Paired.Offset);
1558 MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
1561 getBufferFormatWithCompCount(CI.Format, CI.Width + Paired.Width, *STM);
1566 assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
1569 MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
1570 .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
1573 .addImm(CI.CPol) // cpol
1575 .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1577 copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
1579 CI.I->eraseFromParent();
1585 CombineInfo &CI, CombineInfo &Paired,
1587 MachineBasicBlock *MBB = CI.I->getParent();
1588 DebugLoc DL = CI.I->getDebugLoc();
1590 const unsigned Opcode = getNewOpcode(CI, Paired);
1593 copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
1601 MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
1604 getBufferFormatWithCompCount(CI.Format, CI.Width + Paired.Width, *STM);
1609 assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
1612 MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
1613 .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
1614 .addImm(std::min(CI.Offset, Paired.Offset)) // offset
1616 .addImm(CI.CPol) // cpol
1618 .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1620 CI.I->eraseFromParent();
1626 CombineInfo &CI, CombineInfo &Paired,
1628 MachineBasicBlock *MBB = CI.I->getParent();
1629 DebugLoc DL = CI.I->getDebugLoc();
1631 const unsigned Opcode = getNewOpcode(CI, Paired);
1633 const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
1638 if (auto *SAddr = TII->getNamedOperand(*CI.I, AMDGPU::OpName::saddr))
1642 MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr))
1643 .addImm(std::min(CI.Offset, Paired.Offset))
1644 .addImm(CI.CPol)
1645 .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1647 copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdst, DestReg);
1649 CI.I->eraseFromParent();
1655 CombineInfo &CI, CombineInfo &Paired,
1657 MachineBasicBlock *MBB = CI.I->getParent();
1658 DebugLoc DL = CI.I->getDebugLoc();
1660 const unsigned Opcode = getNewOpcode(CI, Paired);
1663 copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
1666 .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr))
1669 if (auto *SAddr = TII->getNamedOperand(*CI.I, AMDGPU::OpName::saddr))
1673 MIB.addImm(std::min(CI.Offset, Paired.Offset))
1674 .addImm(CI.CPol)
1675 .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1677 CI.I->eraseFromParent();
1682 unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
1684 const unsigned Width = CI.Width + Paired.Width;
1686 switch (getCommonInstClass(CI, Paired)) {
1688 assert(CI.InstClass == BUFFER_LOAD || CI.InstClass == BUFFER_STORE);
1690 return AMDGPU::getMUBUFOpcode(AMDGPU::getMUBUFBaseOpcode(CI.I->getOpcode()),
1694 return AMDGPU::getMTBUFOpcode(AMDGPU::getMTBUFBaseOpcode(CI.I->getOpcode()),
1728 const MachineMemOperand *MMO = *CI.I->memoperands_begin();
1815 assert(((unsigned)llvm::popcount(CI.DMask | Paired.DMask) == Width) &&
1817 return AMDGPU::getMaskedMIMGOp(CI.I->getOpcode(), Width);
1822 SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI,
1824 assert((CI.InstClass != MIMG ||
1825 ((unsigned)llvm::popcount(CI.DMask | Paired.DMask) ==
1826 CI.Width + Paired.Width)) &&
1840 assert(CI.Width >= 1 && CI.Width <= 4);
1843 if (Paired < CI) {
1845 Idx0 = Idxs[Paired.Width][CI.Width - 1];
1847 Idx0 = Idxs[0][CI.Width - 1];
1848 Idx1 = Idxs[CI.Width][Paired.Width - 1];
1855 SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI,
1857 if (CI.InstClass == S_BUFFER_LOAD_IMM ||
1858 CI.InstClass == S_BUFFER_LOAD_SGPR_IMM || CI.InstClass == S_LOAD_IMM) {
1859 switch (CI.Width + Paired.Width) {
1875 unsigned BitWidth = 32 * (CI.Width + Paired.Width);
1876 return TRI->isAGPRClass(getDataRegClass(*CI.I))
1882 CombineInfo &CI, CombineInfo &Paired,
1884 MachineBasicBlock *MBB = CI.I->getParent();
1885 DebugLoc DL = CI.I->getDebugLoc();
1887 const unsigned Opcode = getNewOpcode(CI, Paired);
1890 copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
1898 MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
1904 assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
1907 MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
1908 .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
1909 .addImm(std::min(CI.Offset, Paired.Offset)) // offset
1910 .addImm(CI.CPol) // cpol
1912 .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1914 CI.I->eraseFromParent();
2231 void SILoadStoreOptimizer::addInstToMergeableList(const CombineInfo &CI,
2234 if (AddrList.front().InstClass == CI.InstClass &&
2235 AddrList.front().IsAGPR == CI.IsAGPR &&
2236 AddrList.front().hasSameBaseAddress(CI)) {
2237 AddrList.emplace_back(CI);
2243 MergeableInsts.emplace_back(1, CI);
2284 CombineInfo CI;
2285 CI.setMI(MI, *this);
2286 CI.Order = Order++;
2288 if (!CI.hasMergeableAddress(*MRI))
2291 if (CI.InstClass == DS_WRITE && CI.IsAGPR) {
2303 addInstToMergeableList(CI, MergeableInsts);
2388 CombineInfo &CI = *First;
2391 CombineInfo *Where = checkAndPrepareMerge(CI, Paired);
2399 LLVM_DEBUG(dbgs() << "Merging: " << *CI.I << " with: " << *Paired.I);
2402 switch (CI.InstClass) {
2407 NewMI = mergeRead2Pair(CI, Paired, Where->I);
2410 NewMI = mergeWrite2Pair(CI, Paired, Where->I);
2415 NewMI = mergeSMemLoadImmPair(CI, Paired, Where->I);
2416 OptimizeListAgain |= CI.Width + Paired.Width < 8;
2419 NewMI = mergeBufferLoadPair(CI, Paired, Where->I);
2420 OptimizeListAgain |= CI.Width + Paired.Width < 4;
2423 NewMI = mergeBufferStorePair(CI, Paired, Where->I);
2424 OptimizeListAgain |= CI.Width + Paired.Width < 4;
2427 NewMI = mergeImagePair(CI, Paired, Where->I);
2428 OptimizeListAgain |= CI.Width + Paired.Width < 4;
2431 NewMI = mergeTBufferLoadPair(CI, Paired, Where->I);
2432 OptimizeListAgain |= CI.Width + Paired.Width < 4;
2435 NewMI = mergeTBufferStorePair(CI, Paired, Where->I);
2436 OptimizeListAgain |= CI.Width + Paired.Width < 4;
2441 NewMI = mergeFlatLoadPair(CI, Paired, Where->I);
2442 OptimizeListAgain |= CI.Width + Paired.Width < 4;
2447 NewMI = mergeFlatStorePair(CI, Paired, Where->I);
2448 OptimizeListAgain |= CI.Width + Paired.Width < 4;
2451 CI.setMI(NewMI, *this);
2452 CI.Order = Where->Order;