Lines Matching defs:Paired

214                                   const CombineInfo &Paired);
216 CombineInfo &Paired, bool Modify = false);
218 const CombineInfo &Paired);
219 unsigned getNewOpcode(const CombineInfo &CI, const CombineInfo &Paired);
221 const CombineInfo &Paired);
224 const CombineInfo &Paired) const;
227 CombineInfo *checkAndPrepareMerge(CombineInfo &CI, CombineInfo &Paired);
229 void copyToDestRegs(CombineInfo &CI, CombineInfo &Paired,
232 Register copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired,
239 mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
245 mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired,
248 mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
251 mergeSMemLoadImmPair(CombineInfo &CI, CombineInfo &Paired,
254 mergeBufferLoadPair(CombineInfo &CI, CombineInfo &Paired,
257 mergeBufferStorePair(CombineInfo &CI, CombineInfo &Paired,
260 mergeTBufferLoadPair(CombineInfo &CI, CombineInfo &Paired,
263 mergeTBufferStorePair(CombineInfo &CI, CombineInfo &Paired,
266 mergeFlatLoadPair(CombineInfo &CI, CombineInfo &Paired,
269 mergeFlatStorePair(CombineInfo &CI, CombineInfo &Paired,
293 const CombineInfo &Paired);
296 const CombineInfo &Paired);
646 const CombineInfo &Paired) {
647 assert(CI.InstClass == Paired.InstClass);
650 SIInstrInfo::isFLATGlobal(*CI.I) && SIInstrInfo::isFLATGlobal(*Paired.I))
897 // Given that \p CI and \p Paired are adjacent memory operations produce a new
901 const CombineInfo &Paired) {
903 const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
909 if (Paired < CI)
923 const CombineInfo &Paired) {
940 if (AMDGPU::getNamedOperandIdx(Paired.I->getOpcode(), op) != Idx)
943 CI.I->getOperand(Idx).getImm() != Paired.I->getOperand(Idx).getImm())
948 unsigned MaxMask = std::max(CI.DMask, Paired.DMask);
949 unsigned MinMask = std::min(CI.DMask, Paired.DMask);
998 CombineInfo &Paired,
1004 if (CI.Offset == Paired.Offset)
1008 if ((CI.Offset % CI.EltSize != 0) || (Paired.Offset % CI.EltSize != 0))
1018 llvm::AMDGPU::getGcnBufferFormatInfo(Paired.Format, STI);
1031 if (getBufferFormatWithCompCount(CI.Format, CI.Width + Paired.Width, STI) == 0)
1036 uint32_t EltOffset1 = Paired.Offset / CI.EltSize;
1043 EltOffset1 + Paired.Width != EltOffset0)
1045 if (CI.CPol != Paired.CPol)
1054 if (CI.Width != Paired.Width &&
1055 (CI.Width < Paired.Width) == (CI.Offset < Paired.Offset))
1067 Paired.Offset = EltOffset1 / 64;
1077 Paired.Offset = EltOffset1;
1098 Paired.Offset = (EltOffset1 - BaseOff) / 64;
1112 Paired.Offset = EltOffset1 - BaseOff;
1122 const CombineInfo &Paired) {
1123 const unsigned Width = (CI.Width + Paired.Width);
1163 /// This function assumes that CI comes before Paired in a basic block. Return
1167 CombineInfo &Paired) {
1170 if (CI.InstClass == UNKNOWN || Paired.InstClass == UNKNOWN)
1172 assert(CI.InstClass == Paired.InstClass);
1175 getInstSubclass(Paired.I->getOpcode(), *TII))
1181 if (!dmasksCanBeCombined(CI, *TII, Paired))
1184 if (!widthsFit(*STM, CI, Paired) || !offsetsCanBeCombined(CI, *STM, Paired))
1192 // Try to hoist Paired up to CI.
1193 addDefsUsesToList(*Paired.I, RegDefs, RegUses);
1194 for (MachineBasicBlock::iterator MBBI = Paired.I; --MBBI != CI.I;) {
1195 if (!canSwapInstructions(RegDefs, RegUses, *Paired.I, *MBBI))
1200 // Try to sink CI down to Paired.
1202 for (MachineBasicBlock::iterator MBBI = CI.I; ++MBBI != Paired.I;) {
1206 Where = &Paired;
1214 offsetsCanBeCombined(CI, *STM, Paired, true);
1219 // Paired.
1221 CombineInfo &CI, CombineInfo &Paired,
1227 auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);
1232 auto *Dest1 = TII->getNamedOperand(*Paired.I, OpName);
1249 // original source regs of CI and Paired into it.
1251 SILoadStoreOptimizer::copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired,
1257 auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);
1260 const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
1264 const auto *Src1 = TII->getNamedOperand(*Paired.I, OpName);
1290 SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
1298 unsigned NewOffset0 = std::min(CI.Offset, Paired.Offset);
1299 unsigned NewOffset1 = std::max(CI.Offset, Paired.Offset);
1308 const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
1337 .cloneMergedMemRefs({&*CI.I, &*Paired.I});
1339 copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdst, DestReg);
1342 Paired.I->eraseFromParent();
1365 CombineInfo &CI, CombineInfo &Paired,
1376 TII->getNamedOperand(*Paired.I, AMDGPU::OpName::data0);
1379 unsigned NewOffset1 = Paired.Offset;
1421 .cloneMergedMemRefs({&*CI.I, &*Paired.I});
1424 Paired.I->eraseFromParent();
1431 SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
1435 const unsigned Opcode = getNewOpcode(CI, Paired);
1437 const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
1440 unsigned MergedDMask = CI.DMask | Paired.DMask;
1455 assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
1457 MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1459 copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
1462 Paired.I->eraseFromParent();
1467 CombineInfo &CI, CombineInfo &Paired,
1471 const unsigned Opcode = getNewOpcode(CI, Paired);
1473 const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
1476 unsigned MergedOffset = std::min(CI.Offset, Paired.Offset);
1481 assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
1489 New.addImm(CI.CPol).addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1491 copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::sdst, DestReg);
1494 Paired.I->eraseFromParent();
1499 CombineInfo &CI, CombineInfo &Paired,
1504 const unsigned Opcode = getNewOpcode(CI, Paired);
1506 const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
1510 unsigned MergedOffset = std::min(CI.Offset, Paired.Offset);
1522 assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
1530 .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1532 copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
1535 Paired.I->eraseFromParent();
1540 CombineInfo &CI, CombineInfo &Paired,
1545 const unsigned Opcode = getNewOpcode(CI, Paired);
1547 const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
1551 unsigned MergedOffset = std::min(CI.Offset, Paired.Offset);
1561 getBufferFormatWithCompCount(CI.Format, CI.Width + Paired.Width, *STM);
1566 assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
1575 .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1577 copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
1580 Paired.I->eraseFromParent();
1585 CombineInfo &CI, CombineInfo &Paired,
1590 const unsigned Opcode = getNewOpcode(CI, Paired);
1593 copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
1604 getBufferFormatWithCompCount(CI.Format, CI.Width + Paired.Width, *STM);
1609 assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
1614 .addImm(std::min(CI.Offset, Paired.Offset)) // offset
1618 .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1621 Paired.I->eraseFromParent();
1626 CombineInfo &CI, CombineInfo &Paired,
1631 const unsigned Opcode = getNewOpcode(CI, Paired);
1633 const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
1643 .addImm(std::min(CI.Offset, Paired.Offset))
1645 .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1647 copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdst, DestReg);
1650 Paired.I->eraseFromParent();
1655 CombineInfo &CI, CombineInfo &Paired,
1660 const unsigned Opcode = getNewOpcode(CI, Paired);
1663 copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
1673 MIB.addImm(std::min(CI.Offset, Paired.Offset))
1675 .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1678 Paired.I->eraseFromParent();
1683 const CombineInfo &Paired) {
1684 const unsigned Width = CI.Width + Paired.Width;
1686 switch (getCommonInstClass(CI, Paired)) {
1815 assert(((unsigned)llvm::popcount(CI.DMask | Paired.DMask) == Width) &&
1823 const CombineInfo &Paired) {
1825 ((unsigned)llvm::popcount(CI.DMask | Paired.DMask) ==
1826 CI.Width + Paired.Width)) &&
1841 assert(Paired.Width >= 1 && Paired.Width <= 4);
1843 if (Paired < CI) {
1844 Idx1 = Idxs[0][Paired.Width - 1];
1845 Idx0 = Idxs[Paired.Width][CI.Width - 1];
1848 Idx1 = Idxs[CI.Width][Paired.Width - 1];
1856 const CombineInfo &Paired) const {
1859 switch (CI.Width + Paired.Width) {
1875 unsigned BitWidth = 32 * (CI.Width + Paired.Width);
1882 CombineInfo &CI, CombineInfo &Paired,
1887 const unsigned Opcode = getNewOpcode(CI, Paired);
1890 copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
1904 assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
1909 .addImm(std::min(CI.Offset, Paired.Offset)) // offset
1912 .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1915 Paired.I->eraseFromParent();
2310 // is found, it is stored in the Paired field. If no instructions are found, then
2389 CombineInfo &Paired = *Second;
2391 CombineInfo *Where = checkAndPrepareMerge(CI, Paired);
2399 LLVM_DEBUG(dbgs() << "Merging: " << *CI.I << " with: " << *Paired.I);
2407 NewMI = mergeRead2Pair(CI, Paired, Where->I);
2410 NewMI = mergeWrite2Pair(CI, Paired, Where->I);
2415 NewMI = mergeSMemLoadImmPair(CI, Paired, Where->I);
2416 OptimizeListAgain |= CI.Width + Paired.Width < 8;
2419 NewMI = mergeBufferLoadPair(CI, Paired, Where->I);
2420 OptimizeListAgain |= CI.Width + Paired.Width < 4;
2423 NewMI = mergeBufferStorePair(CI, Paired, Where->I);
2424 OptimizeListAgain |= CI.Width + Paired.Width < 4;
2427 NewMI = mergeImagePair(CI, Paired, Where->I);
2428 OptimizeListAgain |= CI.Width + Paired.Width < 4;
2431 NewMI = mergeTBufferLoadPair(CI, Paired, Where->I);
2432 OptimizeListAgain |= CI.Width + Paired.Width < 4;
2435 NewMI = mergeTBufferStorePair(CI, Paired, Where->I);
2436 OptimizeListAgain |= CI.Width + Paired.Width < 4;
2441 NewMI = mergeFlatLoadPair(CI, Paired, Where->I);
2442 OptimizeListAgain |= CI.Width + Paired.Width < 4;
2447 NewMI = mergeFlatStorePair(CI, Paired, Where->I);
2448 OptimizeListAgain |= CI.Width + Paired.Width < 4;