1# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -start-after=amdgpu-insert-delay-alu %s -o - | FileCheck %s 2 3--- 4name: valu_dep_1 5body: | 6 bb.0: 7 ; CHECK-LABEL: {{^}}valu_dep_1: 8 ; CHECK: %bb.0: 9 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 10 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) 11 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 12 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 13 S_DELAY_ALU .id0_VALU_DEP_1 14 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 15... 16 17--- 18name: valu_dep_2 19body: | 20 bb.0: 21 ; CHECK-LABEL: {{^}}valu_dep_2: 22 ; CHECK: %bb.0: 23 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 24 ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 25 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) 26 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 27 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 28 $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec 29 S_DELAY_ALU .id0_VALU_DEP_2 30 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 31... 32 33--- 34name: valu_dep_3 35body: | 36 bb.0: 37 ; CHECK-LABEL: {{^}}valu_dep_3: 38 ; CHECK: %bb.0: 39 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 40 ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 41 ; CHECK-NEXT: v_add_nc_u32_e32 v2, v2, v2 42 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_3) 43 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 44 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 45 $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec 46 $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec 47 S_DELAY_ALU .id0_VALU_DEP_3 48 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 49... 50 51--- 52name: valu_dep_4 53body: | 54 bb.0: 55 ; CHECK-LABEL: {{^}}valu_dep_4: 56 ; CHECK: %bb.0: 57 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 58 ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 59 ; CHECK-NEXT: v_add_nc_u32_e32 v2, v2, v2 60 ; CHECK-NEXT: v_add_nc_u32_e32 v3, v3, v3 61 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_4) 62 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 63 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 64 $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec 65 $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec 66 $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec 67 S_DELAY_ALU .id0_VALU_DEP_4 68 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 69... 70 71--- 72name: trans32_dep_1 73body: | 74 bb.0: 75 ; CHECK-LABEL: {{^}}trans32_dep_1: 76 ; CHECK: %bb.0: 77 ; CHECK-NEXT: v_exp_f32_e32 v0, v0 78 ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_1) 79 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 80 $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode 81 S_DELAY_ALU .id0_TRANS32_DEP_1 82 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 83... 84 85--- 86name: trans32_dep_2 87body: | 88 bb.0: 89 ; CHECK-LABEL: {{^}}trans32_dep_2: 90 ; CHECK: %bb.0: 91 ; CHECK-NEXT: v_exp_f32_e32 v0, v0 92 ; CHECK-NEXT: v_exp_f32_e32 v1, v1 93 ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_2) 94 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 95 $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode 96 $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode 97 S_DELAY_ALU .id0_TRANS32_DEP_2 98 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 99... 100 101--- 102name: trans32_dep_3 103body: | 104 bb.0: 105 ; CHECK-LABEL: {{^}}trans32_dep_3: 106 ; CHECK: %bb.0: 107 ; CHECK-NEXT: v_exp_f32_e32 v0, v0 108 ; CHECK-NEXT: v_exp_f32_e32 v1, v1 109 ; CHECK-NEXT: v_exp_f32_e32 v2, v2 110 ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_3) 111 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 112 $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode 113 $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode 114 $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode 115 S_DELAY_ALU .id0_TRANS32_DEP_3 116 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 117... 118 119--- 120name: salu_cycle_1 121body: | 122 bb.0: 123 ; CHECK-LABEL: {{^}}salu_cycle_1: 124 ; CHECK: %bb.0: 125 ; CHECK-NEXT: s_mov_b32 s0, 0 126 ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 127 ; CHECK-NEXT: v_add_nc_u32_e32 v0, s0, v0 128 $sgpr0 = S_MOV_B32 0 129 S_DELAY_ALU .id0_SALU_CYCLE_1 130 $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec 131... 132 133--- 134name: valu_dep_1_same_trans32_dep_1 135body: | 136 bb.0: 137 ; CHECK-LABEL: {{^}}valu_dep_1_same_trans32_dep_1: 138 ; CHECK: %bb.0: 139 ; CHECK-NEXT: v_exp_f32_e32 v0, v0 140 ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 141 ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1) 142 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v1 143 $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode 144 $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec 145 S_DELAY_ALU .id0_TRANS32_DEP_1_skip_SAME_id1_VALU_DEP_1 146 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec 147... 148 149--- 150name: valu_dep_1_same_salu_cycle_1 151body: | 152 bb.0: 153 ; CHECK-LABEL: {{^}}valu_dep_1_same_salu_cycle_1: 154 ; CHECK: %bb.0: 155 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 156 ; CHECK-NEXT: s_mov_b32 s0, 0 157 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 158 ; CHECK-NEXT: v_add_nc_u32_e32 v0, s0, v0 159 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 160 $sgpr0 = S_MOV_B32 0 161 S_DELAY_ALU .id0_VALU_DEP_1_skip_SAME_id1_SALU_CYCLE_1 162 $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec 163... 164 165--- 166name: valu_dep_1_next_valu_dep_1 167body: | 168 bb.0: 169 ; CHECK-LABEL: {{^}}valu_dep_1_next_valu_dep_1: 170 ; CHECK: %bb.0: 171 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 172 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 173 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 174 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 175 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 176 S_DELAY_ALU .id0_VALU_DEP_1_skip_NEXT_id1_VALU_DEP_1 177 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 178 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 179... 180 181--- 182name: valu_dep_2_next_valu_dep_2 183body: | 184 bb.0: 185 ; CHECK-LABEL: {{^}}valu_dep_2_next_valu_dep_2: 186 ; CHECK: %bb.0: 187 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 188 ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 189 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 190 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 191 ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 192 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 193 $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec 194 S_DELAY_ALU .id0_VALU_DEP_2_skip_NEXT_id1_VALU_DEP_2 195 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 196 $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec 197... 198 199--- 200name: valu_dep_2_skip_valu_dep_2 201body: | 202 bb.0: 203 ; CHECK-LABEL: {{^}}valu_dep_2_skip_valu_dep_2 204 ; CHECK: %bb.0: 205 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 206 ; CHECK-NEXT: v_add_nc_u32_e32 v2, v1, v1 207 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 208 ; CHECK-NEXT: v_add_nc_u32_e32 v1, v0, v1 209 ; CHECK-NEXT: v_add_nc_u32_e32 v4, v3, v3 210 ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 211 $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec 212 $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec 213 S_DELAY_ALU .id0_VALU_DEP_2_skip_SKIP_1_id1_VALU_DEP_2 214 $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec 215 $vgpr4 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec 216 $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec 217... 218