Lines Matching defs:Wait
1 //===- SIInsertWaitcnts.cpp - Insert Wait Instructions --------------------===//
198 unsigned &getCounterRef(AMDGPU::Waitcnt &Wait, InstCounterType T) {
201 return Wait.LoadCnt;
203 return Wait.ExpCnt;
205 return Wait.DsCnt;
207 return Wait.StoreCnt;
209 return Wait.SampleCnt;
211 return Wait.BvhCnt;
213 return Wait.KmCnt;
219 void addWait(AMDGPU::Waitcnt &Wait, InstCounterType T, unsigned Count) {
220 unsigned &WC = getCounterRef(Wait, T);
224 void setNoWait(AMDGPU::Waitcnt &Wait, InstCounterType T) {
225 getCounterRef(Wait, T) = ~0u;
228 unsigned getWait(AMDGPU::Waitcnt &Wait, InstCounterType T) {
229 return getCounterRef(Wait, T);
310 void simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const;
312 void determineWait(InstCounterType T, int RegNo, AMDGPU::Waitcnt &Wait) const;
313 void applyWaitcnt(const AMDGPU::Waitcnt &Wait);
428 // Wait cnt scores for every sgpr, only DS_CNT (corresponding to LGKMcnt
472 // delete instructions if the incoming Wait value indicates they are not
478 MachineInstr &OldWaitcntInstr, AMDGPU::Waitcnt &Wait,
485 // Wait, returning true if any new instructions were created.
488 AMDGPU::Waitcnt Wait) = 0;
519 MachineInstr &OldWaitcntInstr, AMDGPU::Waitcnt &Wait,
524 AMDGPU::Waitcnt Wait) override;
555 MachineInstr &OldWaitcntInstr, AMDGPU::Waitcnt &Wait,
560 AMDGPU::Waitcnt Wait) override;
722 bool generateWaitcnt(AMDGPU::Waitcnt Wait,
1038 void WaitcntBrackets::simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const {
1039 simplifyWaitcnt(LOAD_CNT, Wait.LoadCnt);
1040 simplifyWaitcnt(EXP_CNT, Wait.ExpCnt);
1041 simplifyWaitcnt(DS_CNT, Wait.DsCnt);
1042 simplifyWaitcnt(STORE_CNT, Wait.StoreCnt);
1043 simplifyWaitcnt(SAMPLE_CNT, Wait.SampleCnt);
1044 simplifyWaitcnt(BVH_CNT, Wait.BvhCnt);
1045 simplifyWaitcnt(KM_CNT, Wait.KmCnt);
1058 AMDGPU::Waitcnt &Wait) const {
1071 addWait(Wait, T, 0);
1076 addWait(Wait, T, 0);
1081 addWait(Wait, T, NeededWait);
1086 void WaitcntBrackets::applyWaitcnt(const AMDGPU::Waitcnt &Wait) {
1087 applyWaitcnt(LOAD_CNT, Wait.LoadCnt);
1088 applyWaitcnt(EXP_CNT, Wait.ExpCnt);
1089 applyWaitcnt(DS_CNT, Wait.DsCnt);
1090 applyWaitcnt(STORE_CNT, Wait.StoreCnt);
1091 applyWaitcnt(SAMPLE_CNT, Wait.SampleCnt);
1092 applyWaitcnt(BVH_CNT, Wait.BvhCnt);
1093 applyWaitcnt(KM_CNT, Wait.KmCnt);
1182 /// from \p Wait that were added by previous passes. Currently this pass
1187 AMDGPU::Waitcnt &Wait, MachineBasicBlock::instr_iterator It) const {
1210 Wait = Wait.combined(OldWait);
1213 if (WaitcntInstr || (!Wait.hasWaitExceptStoreCnt() && TrySimplify)) {
1226 Wait.StoreCnt = std::min(Wait.StoreCnt, OldVSCnt);
1228 if (WaitcntVsCntInstr || (!Wait.hasWaitStoreCnt() && TrySimplify)) {
1238 AMDGPU::encodeWaitcnt(IV, Wait));
1241 ScoreBrackets.applyWaitcnt(LOAD_CNT, Wait.LoadCnt);
1242 ScoreBrackets.applyWaitcnt(EXP_CNT, Wait.ExpCnt);
1243 ScoreBrackets.applyWaitcnt(DS_CNT, Wait.DsCnt);
1244 Wait.LoadCnt = ~0u;
1245 Wait.ExpCnt = ~0u;
1246 Wait.DsCnt = ~0u;
1259 AMDGPU::OpName::simm16, Wait.StoreCnt);
1262 ScoreBrackets.applyWaitcnt(STORE_CNT, Wait.StoreCnt);
1263 Wait.StoreCnt = ~0u;
1278 /// required counters in \p Wait
1281 AMDGPU::Waitcnt Wait) {
1290 if (Wait.hasWaitExceptStoreCnt()) {
1291 unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait);
1301 if (Wait.hasWaitStoreCnt()) {
1307 .addImm(Wait.StoreCnt);
1329 /// follow \p OldWaitcntInstr and apply any extra waits from \p Wait that
1334 AMDGPU::Waitcnt &Wait, MachineBasicBlock::instr_iterator It) const {
1367 Wait = Wait.combined(OldWait);
1375 Wait = Wait.combined(OldWait);
1384 addWait(Wait, CT.value(), OldCnt);
1405 if (Wait.LoadCnt != ~0u && Wait.DsCnt != ~0u) {
1406 unsigned NewEnc = AMDGPU::encodeLoadcntDscnt(IV, Wait);
1410 ScoreBrackets.applyWaitcnt(LOAD_CNT, Wait.LoadCnt);
1411 ScoreBrackets.applyWaitcnt(DS_CNT, Wait.DsCnt);
1412 Wait.LoadCnt = ~0u;
1413 Wait.DsCnt = ~0u;
1430 if (Wait.StoreCnt != ~0u && Wait.DsCnt != ~0u) {
1431 unsigned NewEnc = AMDGPU::encodeStorecntDscnt(IV, Wait);
1435 ScoreBrackets.applyWaitcnt(STORE_CNT, Wait.StoreCnt);
1436 ScoreBrackets.applyWaitcnt(DS_CNT, Wait.DsCnt);
1437 Wait.StoreCnt = ~0u;
1438 Wait.DsCnt = ~0u;
1459 if (Wait.DsCnt != ~0u) {
1468 if (Wait.LoadCnt != ~0u) {
1471 } else if (Wait.StoreCnt != ~0u) {
1490 unsigned NewCnt = getWait(Wait, CT);
1497 setNoWait(Wait, CT);
1515 /// Generate S_WAIT_*CNT instructions for any required counters in \p Wait
1518 AMDGPU::Waitcnt Wait) {
1526 if (Wait.DsCnt != ~0u) {
1529 if (Wait.LoadCnt != ~0u) {
1530 unsigned Enc = AMDGPU::encodeLoadcntDscnt(IV, Wait);
1535 Wait.LoadCnt = ~0u;
1536 Wait.DsCnt = ~0u;
1537 } else if (Wait.StoreCnt != ~0u) {
1538 unsigned Enc = AMDGPU::encodeStorecntDscnt(IV, Wait);
1544 Wait.StoreCnt = ~0u;
1545 Wait.DsCnt = ~0u;
1561 unsigned Count = getWait(Wait, CT);
1621 AMDGPU::Waitcnt Wait;
1632 Wait.LoadCnt = 0;
1642 Wait = Wait.combined(WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/false));
1663 Wait.LoadCnt = 0;
1678 Wait.ExpCnt = 0;
1686 Wait = AMDGPU::Waitcnt();
1697 ScoreBrackets.determineWait(SmemAccessCounter, RegNo, Wait);
1707 ScoreBrackets.determineWait(SmemAccessCounter, RegNo, Wait);
1728 addWait(Wait, SmemAccessCounter, 0);
1753 ScoreBrackets.determineWait(LOAD_CNT, RegNo + I + 1, Wait);
1758 ScoreBrackets.determineWait(LOAD_CNT, RegNo, Wait);
1760 ScoreBrackets.determineWait(EXP_CNT, RegNo, Wait);
1787 ScoreBrackets.determineWait(LOAD_CNT, RegNo, Wait);
1788 ScoreBrackets.determineWait(SAMPLE_CNT, RegNo, Wait);
1789 ScoreBrackets.determineWait(BVH_CNT, RegNo, Wait);
1793 ScoreBrackets.determineWait(EXP_CNT, RegNo, Wait);
1795 ScoreBrackets.determineWait(DS_CNT, RegNo, Wait);
1797 ScoreBrackets.determineWait(SmemAccessCounter, RegNo, Wait);
1810 Wait = Wait.combined(WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/true));
1818 Wait.DsCnt = 0;
1823 ScoreBrackets.simplifyWaitcnt(Wait);
1826 Wait = WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/false);
1829 Wait.LoadCnt = 0;
1831 Wait.ExpCnt = 0;
1833 Wait.DsCnt = 0;
1835 Wait.SampleCnt = 0;
1837 Wait.BvhCnt = 0;
1839 Wait.KmCnt = 0;
1843 Wait.LoadCnt = 0;
1845 Wait.SampleCnt = 0;
1847 Wait.BvhCnt = 0;
1850 return generateWaitcnt(Wait, MI.getIterator(), *MI.getParent(), ScoreBrackets,
1854 bool SIInsertWaitcnts::generateWaitcnt(AMDGPU::Waitcnt Wait,
1865 WCG->applyPreexistingWaitcnt(ScoreBrackets, *OldWaitcntInstr, Wait, It);
1869 ScoreBrackets.applyWaitcnt(Wait);
1872 if (Wait.ExpCnt != ~0u && It != Block.instr_end() &&
1876 if (Wait.ExpCnt < WaitExp->getImm()) {
1877 WaitExp->setImm(Wait.ExpCnt);
1880 Wait.ExpCnt = ~0u;
1886 if (WCG->createNewWaitcnt(Block, It, Wait))
2250 AMDGPU::Waitcnt Wait = WCG->getAllZeroWaitcnt(
2252 ScoreBrackets.simplifyWaitcnt(Wait);
2253 Modified |= generateWaitcnt(Wait, std::next(Inst.getIterator()), Block,
2281 AMDGPU::Waitcnt Wait;
2285 Wait.LoadCnt = 0;
2287 Wait.SampleCnt = 0;
2289 Wait.BvhCnt = 0;
2293 Modified |= generateWaitcnt(Wait, Block.instr_end(), Block, ScoreBrackets,
2461 // Wait for any outstanding memory operations that the input registers may