1# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -start-before=amdgpu-insert-delay-alu %s -o - | FileCheck %s 2 3--- 4name: valu_dep_1 5body: | 6 bb.0: 7 ; CHECK-LABEL: {{^}}valu_dep_1: 8 ; CHECK: %bb.0: 9 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 10 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) 11 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 12 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 13 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 14... 15 16--- 17name: valu_dep_2 18body: | 19 bb.0: 20 ; CHECK-LABEL: {{^}}valu_dep_2: 21 ; CHECK: %bb.0: 22 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 23 ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 24 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) 25 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 26 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 27 $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec 28 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 29... 30 31--- 32name: valu_dep_3 33body: | 34 bb.0: 35 ; CHECK-LABEL: {{^}}valu_dep_3: 36 ; CHECK: %bb.0: 37 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 38 ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 39 ; CHECK-NEXT: v_add_nc_u32_e32 v2, v2, v2 40 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_3) 41 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 42 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 43 $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec 44 $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec 45 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 46... 47 48--- 49name: valu_dep_4 50body: | 51 bb.0: 52 ; CHECK-LABEL: {{^}}valu_dep_4: 53 ; CHECK: %bb.0: 54 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 55 ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 56 ; CHECK-NEXT: v_add_nc_u32_e32 v2, v2, v2 57 ; CHECK-NEXT: v_add_nc_u32_e32 v3, v3, v3 58 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_4) 59 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 60 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 61 $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec 62 $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec 63 $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec 64 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 65... 66 67# There's no encoding for VALU_DEP_5. A normal VALU instruction will have 68# completed already. 69--- 70name: valu_dep_5 71body: | 72 bb.0: 73 ; CHECK-LABEL: {{^}}valu_dep_5: 74 ; CHECK: %bb.0: 75 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 76 ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 77 ; CHECK-NEXT: v_add_nc_u32_e32 v2, v2, v2 78 ; CHECK-NEXT: v_add_nc_u32_e32 v3, v3, v3 79 ; CHECK-NEXT: v_add_nc_u32_e32 v4, v4, v4 80 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 81 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 82 $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec 83 $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec 84 $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec 85 $vgpr4 = V_ADD_U32_e32 $vgpr4, $vgpr4, implicit $exec 86 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 87... 88 89--- 90name: trans32_dep_1 91body: | 92 bb.0: 93 ; CHECK-LABEL: {{^}}trans32_dep_1: 94 ; CHECK: %bb.0: 95 ; CHECK-NEXT: v_exp_f32_e32 v0, v0 96 ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_1) 97 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 98 $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode 99 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 100... 101 102--- 103name: trans32_dep_2 104body: | 105 bb.0: 106 ; CHECK-LABEL: {{^}}trans32_dep_2: 107 ; CHECK: %bb.0: 108 ; CHECK-NEXT: v_exp_f32_e32 v0, v0 109 ; CHECK-NEXT: v_exp_f32_e32 v1, v1 110 ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_2) 111 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 112 $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode 113 $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode 114 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 115... 116 117--- 118name: trans32_dep_3 119body: | 120 bb.0: 121 ; CHECK-LABEL: {{^}}trans32_dep_3: 122 ; CHECK: %bb.0: 123 ; CHECK-NEXT: v_exp_f32_e32 v0, v0 124 ; CHECK-NEXT: v_exp_f32_e32 v1, v1 125 ; CHECK-NEXT: v_exp_f32_e32 v2, v2 126 ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_3) 127 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 128 $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode 129 $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode 130 $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode 131 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 132... 133 134# There's no encoding for TRANS32_DEP_4. A normal TRANS instruction will have 135# completed already. 136--- 137name: trans32_dep_4 138body: | 139 bb.0: 140 ; CHECK-LABEL: {{^}}trans32_dep_4: 141 ; CHECK: %bb.0: 142 ; CHECK-NEXT: v_exp_f32_e32 v0, v0 143 ; CHECK-NEXT: v_exp_f32_e32 v1, v1 144 ; CHECK-NEXT: v_exp_f32_e32 v2, v2 145 ; CHECK-NEXT: v_exp_f32_e32 v3, v3 146 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 147 $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode 148 $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode 149 $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode 150 $vgpr3 = V_EXP_F32_e32 $vgpr3, implicit $exec, implicit $mode 151 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 152... 153 154--- 155name: salu_cycle_1 156body: | 157 bb.0: 158 ; CHECK-LABEL: {{^}}salu_cycle_1: 159 ; CHECK: %bb.0: 160 ; CHECK-NEXT: s_mov_b32 s0, 0 161 ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 162 ; CHECK-NEXT: v_add_nc_u32_e32 v0, s0, v0 163 $sgpr0 = S_MOV_B32 0 164 $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec 165... 166 167# There's no need for SALU_CYCLE_2 here because the s_mov will have completed 168# already. 169--- 170name: salu_cycle_2 171body: | 172 bb.0: 173 ; CHECK-LABEL: {{^}}salu_cycle_2: 174 ; CHECK: %bb.0: 175 ; CHECK-NEXT: s_mov_b32 s0, 0 176 ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 177 ; CHECK-NEXT: v_add_nc_u32_e32 v0, s0, v0 178 $sgpr0 = S_MOV_B32 0 179 $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec 180 $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec 181... 182 183--- 184name: valu_dep_1_same_trans32_dep_1 185body: | 186 bb.0: 187 ; CHECK-LABEL: {{^}}valu_dep_1_same_trans32_dep_1: 188 ; CHECK: %bb.0: 189 ; CHECK-NEXT: v_exp_f32_e32 v0, v0 190 ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 191 ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1) 192 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v1 193 $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode 194 $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec 195 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec 196... 197 198# There's no need to encode the VALU depdendency because it will complete before 199# the TRANS. 200--- 201name: trans32_dep_1_only 202body: | 203 bb.0: 204 ; CHECK-LABEL: {{^}}trans32_dep_1_only: 205 ; CHECK: %bb.0: 206 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 207 ; CHECK-NEXT: v_exp_f32_e32 v1, v1 208 ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_1) 209 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v1 210 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 211 $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode 212 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec 213... 214 215--- 216name: valu_dep_1_same_salu_cycle_1 217body: | 218 bb.0: 219 ; CHECK-LABEL: {{^}}valu_dep_1_same_salu_cycle_1: 220 ; CHECK: %bb.0: 221 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 222 ; CHECK-NEXT: s_mov_b32 s0, 0 223 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 224 ; CHECK-NEXT: v_add_nc_u32_e32 v0, s0, v0 225 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 226 $sgpr0 = S_MOV_B32 0 227 $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec 228... 229 230--- 231name: valu_dep_1_next_valu_dep_1 232body: | 233 bb.0: 234 ; CHECK-LABEL: {{^}}valu_dep_1_next_valu_dep_1: 235 ; CHECK: %bb.0: 236 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 237 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 238 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 239 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 240 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 241 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 242 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 243... 244 245--- 246name: valu_dep_2_next_valu_dep_2 247body: | 248 bb.0: 249 ; CHECK-LABEL: {{^}}valu_dep_2_next_valu_dep_2: 250 ; CHECK: %bb.0: 251 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 252 ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 253 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 254 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 255 ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 256 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 257 $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec 258 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 259 $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec 260... 261 262# There's no need to encode a dependency for the second mul, because the 263# dependency for the first mul has already guaranteed that the add has 264# completed. 265--- 266name: valu_dep_1_no_next_1 267body: | 268 bb.0: 269 ; CHECK-LABEL: {{^}}valu_dep_1_no_next_1: 270 ; CHECK: %bb.0: 271 ; CHECK-NEXT: v_add_f32_e32 v0, v0, v0 272 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) 273 ; CHECK-NEXT: v_mul_f32_e32 v1, v0, v0 274 ; CHECK-NEXT: v_mul_f32_e32 v2, v0, v0 275 $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode 276 $vgpr1 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode 277 $vgpr2 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode 278... 279 280# There's no need to encode a dependency for the second add, because the 281# dependency for the second mul has already guaranteed that a later VALU has 282# completed. 283--- 284name: valu_dep_1_no_next_2 285body: | 286 bb.0: 287 ; CHECK-LABEL: {{^}}valu_dep_1_no_next_2: 288 ; CHECK: %bb.0: 289 ; CHECK-NEXT: v_add_f32_e32 v0, v0, v0 290 ; CHECK-NEXT: v_mul_f32_e32 v1, v1, v1 291 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) 292 ; CHECK-NEXT: v_mul_f32_e32 v1, v1, v1 293 ; CHECK-NEXT: v_add_f32_e32 v0, v0, v0 294 $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode 295 $vgpr1 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $exec, implicit $mode 296 $vgpr1 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $exec, implicit $mode 297 $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec, implicit $mode 298... 299 300# There are no wait states between an add/sub/cmp generating carry and an 301# add/sub/cndmask that consumes it, so no need to encode a dependency. 302 303--- 304name: implicit_cmp_cndmask 305body: | 306 bb.0: 307 ; CHECK-LABEL: {{^}}implicit_cmp_cndmask: 308 ; CHECK: %bb.0: 309 ; CHECK-NEXT: v_cmp_eq_i32_e32 vcc, v0, v1 310 ; CHECK-NEXT: v_cndmask_b32_e64 v2, v3, v4, vcc 311 implicit $vcc = V_CMP_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec 312 $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr3, 0, $vgpr4, $vcc, implicit $exec 313... 314 315# TODO: There should be no s_delay_alu here. 316--- 317name: explicit_cmp_cndmask 318body: | 319 bb.0: 320 ; CHECK-LABEL: {{^}}explicit_cmp_cndmask: 321 ; CHECK: %bb.0: 322 ; CHECK-NEXT: v_cmp_eq_i32_e64 s[0:1], v0, v1 323 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) 324 ; CHECK-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[0:1] 325 $sgpr0_sgpr1 = V_CMP_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec 326 $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr3, 0, $vgpr4, $sgpr0_sgpr1, implicit $exec 327... 328 329--- 330name: implicit_addc_addc 331body: | 332 bb.0: 333 ; CHECK-LABEL: {{^}}implicit_addc_addc: 334 ; CHECK: %bb.0: 335 ; CHECK-NEXT: v_add_co_ci_u32_e32 v0, vcc, v0, v0, vcc 336 ; CHECK-NEXT: v_add_co_ci_u32_e32 v1, vcc, v1, v1, vcc 337 $vgpr0 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec 338 $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec 339... 340 341--- 342name: explicit_addc_addc 343body: | 344 bb.0: 345 ; CHECK-LABEL: {{^}}explicit_addc_addc: 346 ; CHECK: %bb.0: 347 ; CHECK-NEXT: v_add_co_u32 v0, vcc, v0, v0 348 ; CHECK-NEXT: v_add_co_ci_u32_e32 v1, vcc, v1, v1, vcc 349 $vgpr0,$vcc = V_ADD_CO_U32_e64 $vgpr0, $vgpr0, 0, implicit $exec 350 $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec 351... 352 353--- 354name: valu_dep_3_bundle 355body: | 356 bb.0: 357 ; CHECK-LABEL: {{^}}valu_dep_3_bundle: 358 ; CHECK: %bb.0: 359 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 360 ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 361 ; CHECK-NEXT: v_add_nc_u32_e32 v2, v2, v2 362 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_3) 363 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 364 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 365 BUNDLE { 366 $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec 367 $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec 368 } 369 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 370... 371 372--- 373name: if 374body: | 375 bb.0: 376 ; CHECK-LABEL: {{^}}if: 377 ; CHECK: %bb.0: 378 ; CHECK-NEXT: s_cbranch_vccz .LBB23_2 379 ; CHECK-NEXT: %bb.1: 380 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 381 ; CHECK-NEXT: .LBB23_2: 382 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) 383 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 384 S_CBRANCH_VCCZ %bb.2, implicit $vcc 385 bb.1: 386 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 387 bb.2: 388 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 389... 390 391--- 392name: else 393body: | 394 bb.0: 395 ; CHECK-LABEL: {{^}}else: 396 ; CHECK: %bb.0: 397 ; CHECK-NEXT: s_cbranch_vccz .LBB24_2 398 ; CHECK-NEXT: %bb.1 399 ; CHECK-NEXT: s_branch .LBB24_3 400 ; CHECK-NEXT: .LBB24_2: 401 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 402 ; CHECK-NEXT: .LBB24_3: 403 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) 404 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 405 S_CBRANCH_VCCZ %bb.2, implicit $vcc 406 bb.1: 407 S_BRANCH %bb.3 408 bb.2: 409 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 410 bb.3: 411 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 412... 413 414--- 415name: if_else 416body: | 417 bb.0: 418 ; CHECK-LABEL: {{^}}if_else: 419 ; CHECK: %bb.0: 420 ; CHECK-NEXT: s_cbranch_vccz .LBB25_2 421 ; CHECK-NEXT: %bb.1: 422 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 423 ; CHECK-NEXT: s_branch .LBB25_3 424 ; CHECK-NEXT: .LBB25_2: 425 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 426 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v1, v1 427 ; CHECK-NEXT: .LBB25_3: 428 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) 429 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 430 S_CBRANCH_VCCZ %bb.2, implicit $vcc 431 bb.1: 432 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 433 S_BRANCH %bb.3 434 bb.2: 435 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 436 $vgpr0 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec 437 bb.3: 438 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 439... 440 441# Dependency from outside the loop. 442--- 443name: loop_1 444body: | 445 bb.0: 446 ; CHECK-LABEL: {{^}}loop_1: 447 ; CHECK: %bb.0: 448 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 449 ; CHECK-NEXT: .LBB26_1: 450 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) 451 ; CHECK-NEXT: v_add_nc_u32_e32 v1, v0, v0 452 ; CHECK-NEXT: s_cbranch_vccz .LBB26_1 453 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 454 bb.1: 455 $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 456 S_CBRANCH_VCCZ %bb.1, implicit $vcc 457 bb.2: 458... 459 460# Dependency from inside the loop. 461--- 462name: loop_2 463body: | 464 bb.0: 465 ; CHECK-LABEL: {{^}}loop_2: 466 ; CHECK: %bb.0: 467 ; CHECK-NEXT: .LBB27_1: 468 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) 469 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 470 ; CHECK-NEXT: s_cbranch_vccz .LBB27_1 471 bb.1: 472 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 473 S_CBRANCH_VCCZ %bb.1, implicit $vcc 474 bb.2: 475... 476 477# No VALU delay across s_sendmsg_rtn because it waits for all outstanding VALU 478# to complete. 479--- 480name: sendmsg_rtn 481body: | 482 bb.0: 483 ; CHECK-LABEL: {{^}}sendmsg_rtn: 484 ; CHECK: %bb.0: 485 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 486 ; CHECK-NEXT: s_sendmsg_rtn_b32 s0, sendmsg(MSG_RTN_GET_DOORBELL) 487 ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 488 ; CHECK-NEXT: s_add_u32 s0, s0, s0 489 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 490 $vgpr0 = V_MOV_B32_e32 0, implicit $exec 491 $sgpr0 = S_SENDMSG_RTN_B32 128 492 $sgpr0 = S_ADD_U32 $sgpr0, $sgpr0, implicit-def $scc 493 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 494... 495 496# No VALU delay before or across FLAT because it waits for all outstanding VALU 497# to complete. 498--- 499name: flat_load 500body: | 501 bb.0: 502 ; CHECK-LABEL: {{^}}flat_load: 503 ; CHECK: %bb.0: 504 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 505 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 506 ; CHECK-NEXT: v_mov_b32_e32 v2, 0 507 ; CHECK-NEXT: flat_load_b32 v0, v[0:1] 508 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v2, v2 509 $vgpr0 = V_MOV_B32_e32 0, implicit $exec 510 $vgpr1 = V_MOV_B32_e32 0, implicit $exec 511 $vgpr2 = V_MOV_B32_e32 0, implicit $exec 512 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 513 $vgpr0 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec 514... 515 516# No VALU delay across an s_waitcnt_depctr that waits for all outstanding VALU 517# to complete. 518--- 519name: waitcnt_depctr 520body: | 521 bb.0: 522 ; CHECK-LABEL: {{^}}waitcnt_depctr: 523 ; CHECK: %bb.0: 524 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 525 ; CHECK-NEXT: s_waitcnt_depctr 0xfff 526 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 527 $vgpr0 = V_MOV_B32_e32 0, implicit $exec 528 S_WAITCNT_DEPCTR 4095 529 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 530... 531 532# Check that no delays are emitted for writelane instructions. 533--- 534name: writelane1 535body: | 536 bb.0: 537 ; CHECK-LABEL: {{^}}writelane1: 538 ; CHECK: %bb.0: 539 ; CHECK-NEXT: v_writelane_b32 v0, s0, 0 540 ; CHECK-NEXT: v_writelane_b32 v0, s0, 1 541 ; CHECK-NEXT: v_writelane_b32 v0, s0, 2 542 ; CHECK-NEXT: v_writelane_b32 v0, s0, 3 543 $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0 544 $vgpr0 = V_WRITELANE_B32 $sgpr0, 1, $vgpr0 545 $vgpr0 = V_WRITELANE_B32 $sgpr0, 2, $vgpr0 546 $vgpr0 = V_WRITELANE_B32 $sgpr0, 3, $vgpr0 547... 548 549# Check if a VALU delay is added after writelane. 550--- 551name: writelane2 552body: | 553 bb.0: 554 ; CHECK-LABEL: {{^}}writelane2: 555 ; CHECK: %bb.0: 556 ; CHECK-NEXT: v_writelane_b32 v0, s0, 3 557 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) 558 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 559 $vgpr0 = V_WRITELANE_B32 $sgpr0, 3, $vgpr0 560 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 561... 562