1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 2# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX9 %s 3# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX12 %s 4 5# There shall be no S_WAITCNT between two stores. 6 7--- 8name: spill_vgpr_tuple 9 10body: | 11 bb.0: 12 liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79 13 14 ; GFX9-LABEL: name: spill_vgpr_tuple 15 ; GFX9: liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79 16 ; GFX9-NEXT: {{ $}} 17 ; GFX9-NEXT: S_WAITCNT 0 18 ; GFX9-NEXT: $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec 19 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65 20 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65 21 ; GFX9-NEXT: S_ENDPGM 0 22 ; 23 ; GFX12-LABEL: name: spill_vgpr_tuple 24 ; GFX12: liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79 25 ; GFX12-NEXT: {{ $}} 26 ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 27 ; GFX12-NEXT: S_WAIT_EXPCNT 0 28 ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 29 ; GFX12-NEXT: S_WAIT_BVHCNT 0 30 ; GFX12-NEXT: S_WAIT_KMCNT 0 31 ; GFX12-NEXT: $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec 32 ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65 33 ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65 34 ; GFX12-NEXT: S_ENDPGM 0 35 $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec 36 BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65 37 BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65 38 S_ENDPGM 0 39... 40 41# Make sure that while ignoring implicit operands we will not ignore implicit $vcc on VALU 42 43--- 44name: load_vcc_wait 45 46body: | 47 bb.0: 48 liveins: $vgpr0, $sgpr10_sgpr11 49 50 ; GFX9-LABEL: name: load_vcc_wait 51 ; GFX9: liveins: $vgpr0, $sgpr10_sgpr11 52 ; GFX9-NEXT: {{ $}} 53 ; GFX9-NEXT: S_WAITCNT 0 54 ; GFX9-NEXT: $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 55 ; GFX9-NEXT: S_WAITCNT 49279 56 ; GFX9-NEXT: $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec 57 ; GFX9-NEXT: S_ENDPGM 0 58 ; 59 ; GFX12-LABEL: name: load_vcc_wait 60 ; GFX12: liveins: $vgpr0, $sgpr10_sgpr11 61 ; GFX12-NEXT: {{ $}} 62 ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 63 ; GFX12-NEXT: S_WAIT_EXPCNT 0 64 ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 65 ; GFX12-NEXT: S_WAIT_BVHCNT 0 66 ; GFX12-NEXT: S_WAIT_KMCNT 0 67 ; GFX12-NEXT: $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 68 ; GFX12-NEXT: S_WAIT_KMCNT 0 69 ; GFX12-NEXT: $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec 70 ; GFX12-NEXT: S_ENDPGM 0 71 $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 72 $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec 73 S_ENDPGM 0 74... 75 76# Make sure that while ignoring implicit operands we will not ignore implicit $flat_src on FLAT 77 78--- 79name: load_flat_scr_lo_flat_load_wait 80 81body: | 82 bb.0: 83 liveins: $sgpr10_sgpr11, $vgpr0_vgpr1 84 85 ; GFX9-LABEL: name: load_flat_scr_lo_flat_load_wait 86 ; GFX9: liveins: $sgpr10_sgpr11, $vgpr0_vgpr1 87 ; GFX9-NEXT: {{ $}} 88 ; GFX9-NEXT: S_WAITCNT 0 89 ; GFX9-NEXT: $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 90 ; GFX9-NEXT: S_WAITCNT 49279 91 ; GFX9-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 92 ; GFX9-NEXT: S_ENDPGM 0 93 ; 94 ; GFX12-LABEL: name: load_flat_scr_lo_flat_load_wait 95 ; GFX12: liveins: $sgpr10_sgpr11, $vgpr0_vgpr1 96 ; GFX12-NEXT: {{ $}} 97 ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 98 ; GFX12-NEXT: S_WAIT_EXPCNT 0 99 ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 100 ; GFX12-NEXT: S_WAIT_BVHCNT 0 101 ; GFX12-NEXT: S_WAIT_KMCNT 0 102 ; GFX12-NEXT: $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 103 ; GFX12-NEXT: S_WAIT_KMCNT 0 104 ; GFX12-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 105 ; GFX12-NEXT: S_ENDPGM 0 106 $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 107 $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 108 S_ENDPGM 0 109... 110 111--- 112name: load_flat_scr_lo_scratch_store_wait 113 114body: | 115 bb.0: 116 liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32 117 118 ; GFX9-LABEL: name: load_flat_scr_lo_scratch_store_wait 119 ; GFX9: liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32 120 ; GFX9-NEXT: {{ $}} 121 ; GFX9-NEXT: S_WAITCNT 0 122 ; GFX9-NEXT: $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 123 ; GFX9-NEXT: S_WAITCNT 49279 124 ; GFX9-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr 125 ; GFX9-NEXT: S_ENDPGM 0 126 ; 127 ; GFX12-LABEL: name: load_flat_scr_lo_scratch_store_wait 128 ; GFX12: liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32 129 ; GFX12-NEXT: {{ $}} 130 ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 131 ; GFX12-NEXT: S_WAIT_EXPCNT 0 132 ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 133 ; GFX12-NEXT: S_WAIT_BVHCNT 0 134 ; GFX12-NEXT: S_WAIT_KMCNT 0 135 ; GFX12-NEXT: $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 136 ; GFX12-NEXT: S_WAIT_KMCNT 0 137 ; GFX12-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr 138 ; GFX12-NEXT: S_ENDPGM 0 139 $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 140 SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr 141 S_ENDPGM 0 142... 143 144# Check that implicit spill defs do not force wait to zero on the first store 145 146--- 147name: spill_load_store 148 149body: | 150 bb.0: 151 liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32 152 153 ; GFX9-LABEL: name: spill_load_store 154 ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32 155 ; GFX9-NEXT: {{ $}} 156 ; GFX9-NEXT: S_WAITCNT 0 157 ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 158 ; GFX9-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec 159 ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec 160 ; GFX9-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 161 ; GFX9-NEXT: S_WAITCNT 3955 162 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 163 ; GFX9-NEXT: S_WAITCNT 3955 164 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec 165 ; GFX9-NEXT: S_WAITCNT 3955 166 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec 167 ; GFX9-NEXT: S_WAITCNT 3955 168 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 169 ; GFX9-NEXT: S_ENDPGM 0 170 ; 171 ; GFX12-LABEL: name: spill_load_store 172 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32 173 ; GFX12-NEXT: {{ $}} 174 ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 175 ; GFX12-NEXT: S_WAIT_EXPCNT 0 176 ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 177 ; GFX12-NEXT: S_WAIT_BVHCNT 0 178 ; GFX12-NEXT: S_WAIT_KMCNT 0 179 ; GFX12-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 180 ; GFX12-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec 181 ; GFX12-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec 182 ; GFX12-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 183 ; GFX12-NEXT: S_WAIT_LOADCNT 3 184 ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 185 ; GFX12-NEXT: S_WAIT_LOADCNT 2 186 ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec 187 ; GFX12-NEXT: S_WAIT_LOADCNT 1 188 ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec 189 ; GFX12-NEXT: S_WAIT_LOADCNT 0 190 ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 191 ; GFX12-NEXT: S_ENDPGM 0 192 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 193 $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec 194 $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec 195 $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 196 BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 197 BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec 198 BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec 199 BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 200 S_ENDPGM 0 201... 202 203# Make sure we have wait to mitigate WAW on gfx12 204 205--- 206name: scratch_load_waw 207body: | 208 bb.0.entry: 209 liveins: $vgpr0, $sgpr0 210 211 ; GFX9-LABEL: name: scratch_load_waw 212 ; GFX9: liveins: $vgpr0, $sgpr0 213 ; GFX9-NEXT: {{ $}} 214 ; GFX9-NEXT: S_WAITCNT 0 215 ; GFX9-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr 216 ; GFX9-NEXT: $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr 217 ; GFX9-NEXT: S_ENDPGM 0 218 ; 219 ; GFX12-LABEL: name: scratch_load_waw 220 ; GFX12: liveins: $vgpr0, $sgpr0 221 ; GFX12-NEXT: {{ $}} 222 ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 223 ; GFX12-NEXT: S_WAIT_EXPCNT 0 224 ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 225 ; GFX12-NEXT: S_WAIT_BVHCNT 0 226 ; GFX12-NEXT: S_WAIT_KMCNT 0 227 ; GFX12-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr 228 ; GFX12-NEXT: S_WAIT_LOADCNT 0 229 ; GFX12-NEXT: $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr 230 ; GFX12-NEXT: S_ENDPGM 0 231 $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr 232 $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr 233 S_ENDPGM 0 234... 235