1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -amdgpu-opt-vgpr-liverange=true -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s 3 4; a normal if-else 5define amdgpu_ps float @else1(i32 %z, float %v) #0 { 6; SI-LABEL: else1: 7; SI: ; %bb.0: ; %main_body 8; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v0 9; SI-NEXT: ; implicit-def: $vgpr0 10; SI-NEXT: s_and_saveexec_b32 s0, vcc_lo 11; SI-NEXT: s_xor_b32 s0, exec_lo, s0 12; SI-NEXT: s_cbranch_execnz .LBB0_3 13; SI-NEXT: ; %bb.1: ; %Flow 14; SI-NEXT: s_andn2_saveexec_b32 s0, s0 15; SI-NEXT: s_cbranch_execnz .LBB0_4 16; SI-NEXT: .LBB0_2: ; %end 17; SI-NEXT: s_or_b32 exec_lo, exec_lo, s0 18; SI-NEXT: s_branch .LBB0_5 19; SI-NEXT: .LBB0_3: ; %else 20; SI-NEXT: v_mul_f32_e32 v0, 0x40400000, v1 21; SI-NEXT: ; implicit-def: $vgpr1 22; SI-NEXT: s_andn2_saveexec_b32 s0, s0 23; SI-NEXT: s_cbranch_execz .LBB0_2 24; SI-NEXT: .LBB0_4: ; %if 25; SI-NEXT: v_add_f32_e32 v0, v1, v1 26; SI-NEXT: s_or_b32 exec_lo, exec_lo, s0 27; SI-NEXT: s_branch .LBB0_5 28; SI-NEXT: .LBB0_5: 29main_body: 30 %cc = icmp sgt i32 %z, 5 31 br i1 %cc, label %if, label %else 32 33if: 34 %v.if = fmul float %v, 2.0 35 br label %end 36 37else: 38 %v.else = fmul float %v, 3.0 39 br label %end 40 41end: 42 %r = phi float [ %v.if, %if ], [ %v.else, %else ] 43 ret float %r 44} 45 46 47; %v was used after if-else 48define amdgpu_ps float @else2(i32 %z, float %v) #0 { 49; SI-LABEL: else2: 50; SI: ; %bb.0: ; %main_body 51; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v0 52; SI-NEXT: ; implicit-def: $vgpr0 53; SI-NEXT: s_and_saveexec_b32 s0, vcc_lo 54; SI-NEXT: s_xor_b32 s0, exec_lo, s0 55; SI-NEXT: ; %bb.1: ; %else 56; SI-NEXT: v_mul_f32_e32 v0, 0x40400000, v1 57; SI-NEXT: ; %bb.2: ; %Flow 58; SI-NEXT: s_andn2_saveexec_b32 s0, s0 59; SI-NEXT: ; %bb.3: ; %if 60; SI-NEXT: v_add_f32_e32 v1, v1, v1 61; SI-NEXT: v_mov_b32_e32 v0, v1 62; SI-NEXT: ; %bb.4: ; %end 63; SI-NEXT: s_or_b32 exec_lo, exec_lo, s0 64; SI-NEXT: v_add_f32_e32 v0, v1, v0 65; SI-NEXT: ; return to shader part epilog 66main_body: 67 %cc = icmp sgt i32 %z, 5 68 br i1 %cc, label %if, label %else 69 70if: 71 %v.if = fmul float %v, 2.0 72 br label %end 73 74else: 75 %v.else = fmul float %v, 3.0 76 br label %end 77 78end: 79 %r0 = phi float [ %v.if, %if ], [ %v, %else ] 80 %r1 = phi float [ %v.if, %if ], [ %v.else, %else ] 81 %r2 = fadd float %r0, %r1 82 ret float %r2 83} 84 85; if-else inside loop, %x can be optimized, but %v cannot be. 86define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) #0 { 87; SI-LABEL: else3: 88; SI: ; %bb.0: ; %entry 89; SI-NEXT: s_mov_b32 s1, 0 90; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v0 91; SI-NEXT: s_branch .LBB2_2 92; SI-NEXT: .LBB2_1: ; %if.end 93; SI-NEXT: ; in Loop: Header=BB2_2 Depth=1 94; SI-NEXT: s_or_b32 exec_lo, exec_lo, s2 95; SI-NEXT: v_add_nc_u32_e32 v2, 1, v3 96; SI-NEXT: s_add_i32 s1, s1, 1 97; SI-NEXT: s_cmp_lt_i32 s1, s0 98; SI-NEXT: s_cbranch_scc0 .LBB2_6 99; SI-NEXT: .LBB2_2: ; %for.body 100; SI-NEXT: ; =>This Inner Loop Header: Depth=1 101; SI-NEXT: ; implicit-def: $vgpr3 102; SI-NEXT: ; implicit-def: $vgpr0 103; SI-NEXT: s_and_saveexec_b32 s2, vcc_lo 104; SI-NEXT: s_xor_b32 s2, exec_lo, s2 105; SI-NEXT: ; %bb.3: ; %else 106; SI-NEXT: ; in Loop: Header=BB2_2 Depth=1 107; SI-NEXT: v_mul_f32_e32 v0, v1, v2 108; SI-NEXT: v_lshl_add_u32 v3, v2, 1, v2 109; SI-NEXT: ; implicit-def: $vgpr2 110; SI-NEXT: ; %bb.4: ; %Flow 111; SI-NEXT: ; in Loop: Header=BB2_2 Depth=1 112; SI-NEXT: s_andn2_saveexec_b32 s2, s2 113; SI-NEXT: s_cbranch_execz .LBB2_1 114; SI-NEXT: ; %bb.5: ; %if 115; SI-NEXT: ; in Loop: Header=BB2_2 Depth=1 116; SI-NEXT: v_mul_f32_e32 v0, s1, v1 117; SI-NEXT: v_add_nc_u32_e32 v3, 1, v2 118; SI-NEXT: s_branch .LBB2_1 119; SI-NEXT: .LBB2_6: ; %for.end 120; SI-NEXT: v_add_f32_e32 v0, v3, v0 121; SI-NEXT: ; return to shader part epilog 122entry: 123; %break = icmp sgt i32 %bound, 0 124; br i1 %break, label %for.body, label %for.end 125 br label %for.body 126 127for.body: 128 %i = phi i32 [ 0, %entry ], [ %inc, %if.end ] 129 %x = phi i32 [ %x0, %entry ], [ %xinc, %if.end ] 130 %cc = icmp sgt i32 %z, 5 131 br i1 %cc, label %if, label %else 132 133if: 134 %i.tmp = bitcast i32 %i to float 135 %v.if = fmul float %v, %i.tmp 136 %x.if = add i32 %x, 1 137 br label %if.end 138 139else: 140 %x.tmp = bitcast i32 %x to float 141 %v.else = fmul float %v, %x.tmp 142 %x.else = mul i32 %x, 3 143 br label %if.end 144 145if.end: 146 %v.endif = phi float [ %v.if, %if ], [ %v.else, %else ] 147 %x.endif = phi i32 [ %x.if, %if ], [ %x.else, %else ] 148 149 %xinc = add i32 %x.endif, 1 150 %inc = add i32 %i, 1 151 %cond = icmp slt i32 %inc, %bound 152 br i1 %cond, label %for.body, label %for.end 153 154for.end: 155 %x_float = bitcast i32 %x.endif to float 156 %r = fadd float %x_float, %v.endif 157 ret float %r 158} 159 160; a loop inside an if-else 161define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_func, ptr %extern_func2) #0 { 162; SI-LABEL: loop: 163; SI: ; %bb.0: ; %main_body 164; SI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 165; SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 166; SI-NEXT: s_mov_b32 s14, -1 167; SI-NEXT: v_mov_b32_e32 v6, v0 168; SI-NEXT: v_mov_b32_e32 v0, v1 169; SI-NEXT: s_mov_b32 s15, 0x31c16000 170; SI-NEXT: s_add_u32 s12, s12, s1 171; SI-NEXT: s_addc_u32 s13, s13, 0 172; SI-NEXT: s_mov_b32 s32, 0 173; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v6 174; SI-NEXT: ; implicit-def: $vgpr1 175; SI-NEXT: s_and_saveexec_b32 s0, vcc_lo 176; SI-NEXT: s_xor_b32 s6, exec_lo, s0 177; SI-NEXT: s_cbranch_execz .LBB3_4 178; SI-NEXT: ; %bb.1: ; %else 179; SI-NEXT: s_mov_b32 s7, exec_lo 180; SI-NEXT: .LBB3_2: ; =>This Inner Loop Header: Depth=1 181; SI-NEXT: v_readfirstlane_b32 s4, v4 182; SI-NEXT: v_readfirstlane_b32 s5, v5 183; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5] 184; SI-NEXT: s_and_saveexec_b32 s8, vcc_lo 185; SI-NEXT: s_mov_b64 s[0:1], s[12:13] 186; SI-NEXT: s_mov_b64 s[2:3], s[14:15] 187; SI-NEXT: s_swappc_b64 s[30:31], s[4:5] 188; SI-NEXT: v_mov_b32_e32 v1, v0 189; SI-NEXT: ; implicit-def: $vgpr4_vgpr5 190; SI-NEXT: ; implicit-def: $vgpr0 191; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s8 192; SI-NEXT: s_cbranch_execnz .LBB3_2 193; SI-NEXT: ; %bb.3: 194; SI-NEXT: s_mov_b32 exec_lo, s7 195; SI-NEXT: ; implicit-def: $vgpr0 196; SI-NEXT: ; implicit-def: $vgpr2 197; SI-NEXT: .LBB3_4: ; %Flow 198; SI-NEXT: s_andn2_saveexec_b32 s6, s6 199; SI-NEXT: s_cbranch_execz .LBB3_8 200; SI-NEXT: ; %bb.5: ; %if 201; SI-NEXT: s_mov_b32 s7, exec_lo 202; SI-NEXT: .LBB3_6: ; =>This Inner Loop Header: Depth=1 203; SI-NEXT: v_readfirstlane_b32 s4, v2 204; SI-NEXT: v_readfirstlane_b32 s5, v3 205; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3] 206; SI-NEXT: s_and_saveexec_b32 s8, vcc_lo 207; SI-NEXT: s_mov_b64 s[0:1], s[12:13] 208; SI-NEXT: s_mov_b64 s[2:3], s[14:15] 209; SI-NEXT: s_swappc_b64 s[30:31], s[4:5] 210; SI-NEXT: v_mov_b32_e32 v1, v0 211; SI-NEXT: ; implicit-def: $vgpr2_vgpr3 212; SI-NEXT: ; implicit-def: $vgpr0 213; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s8 214; SI-NEXT: s_cbranch_execnz .LBB3_6 215; SI-NEXT: ; %bb.7: 216; SI-NEXT: s_mov_b32 exec_lo, s7 217; SI-NEXT: .LBB3_8: ; %end 218; SI-NEXT: s_or_b32 exec_lo, exec_lo, s6 219; SI-NEXT: v_mov_b32_e32 v0, v1 220; SI-NEXT: ; return to shader part epilog 221main_body: 222 %cc = icmp sgt i32 %z, 5 223 br i1 %cc, label %if, label %else 224 225if: 226 %v.if = call amdgpu_gfx float %extern_func(float %v) 227 br label %end 228 229else: 230 %v.else = call amdgpu_gfx float %extern_func2(float %v) 231 br label %end 232 233end: 234 %r = phi float [ %v.if, %if ], [ %v.else, %else ] 235 ret float %r 236} 237 238; a loop inside an if-else, but the variable is still in use after the if-else 239define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %extern_func, ptr %extern_func2) #0 { 240; SI-LABEL: loop_with_use: 241; SI: ; %bb.0: ; %main_body 242; SI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 243; SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 244; SI-NEXT: s_mov_b32 s14, -1 245; SI-NEXT: v_mov_b32_e32 v40, v1 246; SI-NEXT: s_mov_b32 s15, 0x31c16000 247; SI-NEXT: s_add_u32 s12, s12, s1 248; SI-NEXT: s_addc_u32 s13, s13, 0 249; SI-NEXT: s_mov_b32 s32, 0 250; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v0 251; SI-NEXT: ; implicit-def: $vgpr0 252; SI-NEXT: s_and_saveexec_b32 s0, vcc_lo 253; SI-NEXT: s_xor_b32 s6, exec_lo, s0 254; SI-NEXT: s_cbranch_execz .LBB4_4 255; SI-NEXT: ; %bb.1: ; %else 256; SI-NEXT: s_mov_b32 s7, exec_lo 257; SI-NEXT: .LBB4_2: ; =>This Inner Loop Header: Depth=1 258; SI-NEXT: v_readfirstlane_b32 s4, v4 259; SI-NEXT: v_readfirstlane_b32 s5, v5 260; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5] 261; SI-NEXT: s_and_saveexec_b32 s8, vcc_lo 262; SI-NEXT: v_mov_b32_e32 v0, v40 263; SI-NEXT: s_mov_b64 s[0:1], s[12:13] 264; SI-NEXT: s_mov_b64 s[2:3], s[14:15] 265; SI-NEXT: s_swappc_b64 s[30:31], s[4:5] 266; SI-NEXT: ; implicit-def: $vgpr4_vgpr5 267; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s8 268; SI-NEXT: s_cbranch_execnz .LBB4_2 269; SI-NEXT: ; %bb.3: 270; SI-NEXT: s_mov_b32 exec_lo, s7 271; SI-NEXT: ; implicit-def: $vgpr2 272; SI-NEXT: .LBB4_4: ; %Flow 273; SI-NEXT: s_andn2_saveexec_b32 s6, s6 274; SI-NEXT: s_cbranch_execz .LBB4_8 275; SI-NEXT: ; %bb.5: ; %if 276; SI-NEXT: s_mov_b32 s7, exec_lo 277; SI-NEXT: .LBB4_6: ; =>This Inner Loop Header: Depth=1 278; SI-NEXT: v_readfirstlane_b32 s4, v2 279; SI-NEXT: v_readfirstlane_b32 s5, v3 280; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3] 281; SI-NEXT: s_and_saveexec_b32 s8, vcc_lo 282; SI-NEXT: v_mov_b32_e32 v0, v40 283; SI-NEXT: s_mov_b64 s[0:1], s[12:13] 284; SI-NEXT: s_mov_b64 s[2:3], s[14:15] 285; SI-NEXT: s_swappc_b64 s[30:31], s[4:5] 286; SI-NEXT: ; implicit-def: $vgpr2_vgpr3 287; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s8 288; SI-NEXT: s_cbranch_execnz .LBB4_6 289; SI-NEXT: ; %bb.7: 290; SI-NEXT: s_mov_b32 exec_lo, s7 291; SI-NEXT: .LBB4_8: ; %end 292; SI-NEXT: s_or_b32 exec_lo, exec_lo, s6 293; SI-NEXT: v_add_f32_e32 v0, v0, v40 294; SI-NEXT: ; return to shader part epilog 295main_body: 296 %cc = icmp sgt i32 %z, 5 297 br i1 %cc, label %if, label %else 298 299if: 300 %v.if = call amdgpu_gfx float %extern_func(float %v) 301 br label %end 302 303else: 304 %v.else = call amdgpu_gfx float %extern_func2(float %v) 305 br label %end 306 307end: 308 %r = phi float [ %v.if, %if ], [ %v.else, %else ] 309 %r2 = fadd float %r, %v 310 ret float %r2 311} 312 313attributes #0 = { nounwind } 314