Lines Matching defs:Paired

215                                   const CombineInfo &Paired);
217 CombineInfo &Paired, bool Modify = false);
219 const CombineInfo &Paired);
220 unsigned getNewOpcode(const CombineInfo &CI, const CombineInfo &Paired);
222 const CombineInfo &Paired);
225 const CombineInfo &Paired) const;
228 CombineInfo *checkAndPrepareMerge(CombineInfo &CI, CombineInfo &Paired);
230 void copyToDestRegs(CombineInfo &CI, CombineInfo &Paired,
233 Register copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired,
240 mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
246 mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired,
249 mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
252 mergeSMemLoadImmPair(CombineInfo &CI, CombineInfo &Paired,
255 mergeBufferLoadPair(CombineInfo &CI, CombineInfo &Paired,
258 mergeBufferStorePair(CombineInfo &CI, CombineInfo &Paired,
261 mergeTBufferLoadPair(CombineInfo &CI, CombineInfo &Paired,
264 mergeTBufferStorePair(CombineInfo &CI, CombineInfo &Paired,
267 mergeFlatLoadPair(CombineInfo &CI, CombineInfo &Paired,
270 mergeFlatStorePair(CombineInfo &CI, CombineInfo &Paired,
294 const CombineInfo &Paired);
297 const CombineInfo &Paired);
675 const CombineInfo &Paired) {
676 assert(CI.InstClass == Paired.InstClass);
679 SIInstrInfo::isFLATGlobal(*CI.I) && SIInstrInfo::isFLATGlobal(*Paired.I))
934 // Given that \p CI and \p Paired are adjacent memory operations produce a new
938 const CombineInfo &Paired) {
940 const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
946 if (Paired < CI)
960 const CombineInfo &Paired) {
977 if (AMDGPU::getNamedOperandIdx(Paired.I->getOpcode(), op) != Idx)
980 CI.I->getOperand(Idx).getImm() != Paired.I->getOperand(Idx).getImm())
985 unsigned MaxMask = std::max(CI.DMask, Paired.DMask);
986 unsigned MinMask = std::min(CI.DMask, Paired.DMask);
1035 CombineInfo &Paired,
1041 if (CI.Offset == Paired.Offset)
1045 if ((CI.Offset % CI.EltSize != 0) || (Paired.Offset % CI.EltSize != 0))
1055 llvm::AMDGPU::getGcnBufferFormatInfo(Paired.Format, STI);
1068 if (getBufferFormatWithCompCount(CI.Format, CI.Width + Paired.Width, STI) == 0)
1073 uint32_t EltOffset1 = Paired.Offset / CI.EltSize;
1080 EltOffset1 + Paired.Width != EltOffset0)
1082 if (CI.CPol != Paired.CPol)
1091 if (CI.Width != Paired.Width &&
1092 (CI.Width < Paired.Width) == (CI.Offset < Paired.Offset))
1104 Paired.Offset = EltOffset1 / 64;
1114 Paired.Offset = EltOffset1;
1135 Paired.Offset = (EltOffset1 - BaseOff) / 64;
1149 Paired.Offset = EltOffset1 - BaseOff;
1159 const CombineInfo &Paired) {
1160 const unsigned Width = (CI.Width + Paired.Width);
1200 /// This function assumes that CI comes before Paired in a basic block. Return
1204 CombineInfo &Paired) {
1207 if (CI.InstClass == UNKNOWN || Paired.InstClass == UNKNOWN)
1209 assert(CI.InstClass == Paired.InstClass);
1212 getInstSubclass(Paired.I->getOpcode(), *TII))
1218 if (!dmasksCanBeCombined(CI, *TII, Paired))
1221 if (!widthsFit(*STM, CI, Paired) || !offsetsCanBeCombined(CI, *STM, Paired))
1229 // Try to hoist Paired up to CI.
1230 addDefsUsesToList(*Paired.I, RegDefs, RegUses);
1231 for (MachineBasicBlock::iterator MBBI = Paired.I; --MBBI != CI.I;) {
1232 if (!canSwapInstructions(RegDefs, RegUses, *Paired.I, *MBBI))
1237 // Try to sink CI down to Paired.
1239 for (MachineBasicBlock::iterator MBBI = CI.I; ++MBBI != Paired.I;) {
1243 Where = &Paired;
1251 offsetsCanBeCombined(CI, *STM, Paired, true);
1256 // Paired.
1258 CombineInfo &CI, CombineInfo &Paired,
1264 auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);
1269 auto *Dest1 = TII->getNamedOperand(*Paired.I, OpName);
1286 // original source regs of CI and Paired into it.
1288 SILoadStoreOptimizer::copyFromSrcRegs(CombineInfo &CI, CombineInfo &Paired,
1294 auto [SubRegIdx0, SubRegIdx1] = getSubRegIdxs(CI, Paired);
1297 const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
1301 const auto *Src1 = TII->getNamedOperand(*Paired.I, OpName);
1327 SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
1335 unsigned NewOffset0 = std::min(CI.Offset, Paired.Offset);
1336 unsigned NewOffset1 = std::max(CI.Offset, Paired.Offset);
1345 const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
1374 .cloneMergedMemRefs({&*CI.I, &*Paired.I});
1376 copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdst, DestReg);
1379 Paired.I->eraseFromParent();
1402 CombineInfo &CI, CombineInfo &Paired,
1413 TII->getNamedOperand(*Paired.I, AMDGPU::OpName::data0);
1416 unsigned NewOffset1 = Paired.Offset;
1458 .cloneMergedMemRefs({&*CI.I, &*Paired.I});
1461 Paired.I->eraseFromParent();
1468 SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
1472 const unsigned Opcode = getNewOpcode(CI, Paired);
1474 const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
1477 unsigned MergedDMask = CI.DMask | Paired.DMask;
1492 assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
1494 MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1496 copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
1499 Paired.I->eraseFromParent();
1504 CombineInfo &CI, CombineInfo &Paired,
1508 const unsigned Opcode = getNewOpcode(CI, Paired);
1510 const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
1513 unsigned MergedOffset = std::min(CI.Offset, Paired.Offset);
1518 assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
1526 New.addImm(CI.CPol).addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1528 copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::sdst, DestReg);
1531 Paired.I->eraseFromParent();
1536 CombineInfo &CI, CombineInfo &Paired,
1541 const unsigned Opcode = getNewOpcode(CI, Paired);
1543 const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
1547 unsigned MergedOffset = std::min(CI.Offset, Paired.Offset);
1559 assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
1567 .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1569 copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
1572 Paired.I->eraseFromParent();
1577 CombineInfo &CI, CombineInfo &Paired,
1582 const unsigned Opcode = getNewOpcode(CI, Paired);
1584 const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
1588 unsigned MergedOffset = std::min(CI.Offset, Paired.Offset);
1598 getBufferFormatWithCompCount(CI.Format, CI.Width + Paired.Width, *STM);
1603 assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
1612 .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1614 copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata, DestReg);
1617 Paired.I->eraseFromParent();
1622 CombineInfo &CI, CombineInfo &Paired,
1627 const unsigned Opcode = getNewOpcode(CI, Paired);
1630 copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
1641 getBufferFormatWithCompCount(CI.Format, CI.Width + Paired.Width, *STM);
1646 assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
1651 .addImm(std::min(CI.Offset, Paired.Offset)) // offset
1655 .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1658 Paired.I->eraseFromParent();
1663 CombineInfo &CI, CombineInfo &Paired,
1668 const unsigned Opcode = getNewOpcode(CI, Paired);
1670 const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
1680 .addImm(std::min(CI.Offset, Paired.Offset))
1682 .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1684 copyToDestRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdst, DestReg);
1687 Paired.I->eraseFromParent();
1692 CombineInfo &CI, CombineInfo &Paired,
1697 const unsigned Opcode = getNewOpcode(CI, Paired);
1700 copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
1710 MIB.addImm(std::min(CI.Offset, Paired.Offset))
1712 .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1715 Paired.I->eraseFromParent();
1728 const CombineInfo &Paired) {
1729 const unsigned Width = CI.Width + Paired.Width;
1731 switch (getCommonInstClass(CI, Paired)) {
1877 assert(((unsigned)llvm::popcount(CI.DMask | Paired.DMask) == Width) &&
1885 const CombineInfo &Paired) {
1887 ((unsigned)llvm::popcount(CI.DMask | Paired.DMask) ==
1888 CI.Width + Paired.Width)) &&
1903 assert(Paired.Width >= 1 && Paired.Width <= 4);
1905 if (Paired < CI) {
1906 Idx1 = Idxs[0][Paired.Width - 1];
1907 Idx0 = Idxs[Paired.Width][CI.Width - 1];
1910 Idx1 = Idxs[CI.Width][Paired.Width - 1];
1918 const CombineInfo &Paired) const {
1921 switch (CI.Width + Paired.Width) {
1937 unsigned BitWidth = 32 * (CI.Width + Paired.Width);
1944 CombineInfo &CI, CombineInfo &Paired,
1949 const unsigned Opcode = getNewOpcode(CI, Paired);
1952 copyFromSrcRegs(CI, Paired, InsertBefore, AMDGPU::OpName::vdata);
1966 assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
1971 .addImm(std::min(CI.Offset, Paired.Offset)) // offset
1974 .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
1977 Paired.I->eraseFromParent();
2379 // is found, it is stored in the Paired field. If no instructions are found, then
2458 CombineInfo &Paired = *Second;
2460 CombineInfo *Where = checkAndPrepareMerge(CI, Paired);
2468 LLVM_DEBUG(dbgs() << "Merging: " << *CI.I << " with: " << *Paired.I);
2476 NewMI = mergeRead2Pair(CI, Paired, Where->I);
2479 NewMI = mergeWrite2Pair(CI, Paired, Where->I);
2484 NewMI = mergeSMemLoadImmPair(CI, Paired, Where->I);
2485 OptimizeListAgain |= CI.Width + Paired.Width < 8;
2488 NewMI = mergeBufferLoadPair(CI, Paired, Where->I);
2489 OptimizeListAgain |= CI.Width + Paired.Width < 4;
2492 NewMI = mergeBufferStorePair(CI, Paired, Where->I);
2493 OptimizeListAgain |= CI.Width + Paired.Width < 4;
2496 NewMI = mergeImagePair(CI, Paired, Where->I);
2497 OptimizeListAgain |= CI.Width + Paired.Width < 4;
2500 NewMI = mergeTBufferLoadPair(CI, Paired, Where->I);
2501 OptimizeListAgain |= CI.Width + Paired.Width < 4;
2504 NewMI = mergeTBufferStorePair(CI, Paired, Where->I);
2505 OptimizeListAgain |= CI.Width + Paired.Width < 4;
2510 NewMI = mergeFlatLoadPair(CI, Paired, Where->I);
2511 OptimizeListAgain |= CI.Width + Paired.Width < 4;
2516 NewMI = mergeFlatStorePair(CI, Paired, Where->I);
2517 OptimizeListAgain |= CI.Width + Paired.Width < 4;