1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,SI %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,GFX10-WAVE64 %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX10-WAVE32 %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX11 %s 6 7define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 { 8; GCN-LABEL: test_kill_depth_0_imm_pos: 9; GCN: ; %bb.0: 10; GCN-NEXT: s_endpgm 11 call void @llvm.amdgcn.kill(i1 true) 12 ret void 13} 14 15define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 { 16; WAVE64-LABEL: test_kill_depth_0_imm_neg: 17; WAVE64: ; %bb.0: 18; WAVE64-NEXT: s_andn2_b64 exec, exec, exec 19; WAVE64-NEXT: s_cbranch_scc0 .LBB1_1 20; WAVE64-NEXT: s_endpgm 21; WAVE64-NEXT: .LBB1_1: 22; WAVE64-NEXT: s_mov_b64 exec, 0 23; WAVE64-NEXT: exp null off, off, off, off done vm 24; WAVE64-NEXT: s_endpgm 25; 26; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg: 27; GFX10-WAVE32: ; %bb.0: 28; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, exec_lo 29; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB1_1 30; GFX10-WAVE32-NEXT: s_endpgm 31; GFX10-WAVE32-NEXT: .LBB1_1: 32; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 33; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 34; GFX10-WAVE32-NEXT: s_endpgm 35; 36; GFX11-LABEL: test_kill_depth_0_imm_neg: 37; GFX11: ; %bb.0: 38; GFX11-NEXT: s_and_not1_b64 exec, exec, exec 39; GFX11-NEXT: s_cbranch_scc0 .LBB1_1 40; GFX11-NEXT: s_endpgm 41; GFX11-NEXT: .LBB1_1: 42; GFX11-NEXT: s_mov_b64 exec, 0 43; GFX11-NEXT: exp mrt0 off, off, off, off done 44; GFX11-NEXT: s_endpgm 45 call void @llvm.amdgcn.kill(i1 false) 46 ret void 47} 48 49; FIXME: Ideally only one early-exit would be emitted 50define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 { 51; WAVE64-LABEL: test_kill_depth_0_imm_neg_x2: 52; WAVE64: ; %bb.0: 53; WAVE64-NEXT: s_mov_b64 s[0:1], exec 54; WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec 55; WAVE64-NEXT: s_cbranch_scc0 .LBB2_2 56; WAVE64-NEXT: ; %bb.1: 57; WAVE64-NEXT: s_mov_b64 exec, 0 58; WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec 59; WAVE64-NEXT: s_cbranch_scc0 .LBB2_2 60; WAVE64-NEXT: s_endpgm 61; WAVE64-NEXT: .LBB2_2: 62; WAVE64-NEXT: s_mov_b64 exec, 0 63; WAVE64-NEXT: exp null off, off, off, off done vm 64; WAVE64-NEXT: s_endpgm 65; 66; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg_x2: 67; GFX10-WAVE32: ; %bb.0: 68; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 69; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo 70; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB2_2 71; GFX10-WAVE32-NEXT: ; %bb.1: 72; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 73; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo 74; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB2_2 75; GFX10-WAVE32-NEXT: s_endpgm 76; GFX10-WAVE32-NEXT: .LBB2_2: 77; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 78; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 79; GFX10-WAVE32-NEXT: s_endpgm 80; 81; GFX11-LABEL: test_kill_depth_0_imm_neg_x2: 82; GFX11: ; %bb.0: 83; GFX11-NEXT: s_mov_b64 s[0:1], exec 84; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 85; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], exec 86; GFX11-NEXT: s_cbranch_scc0 .LBB2_2 87; GFX11-NEXT: ; %bb.1: 88; GFX11-NEXT: s_mov_b64 exec, 0 89; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 90; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], exec 91; GFX11-NEXT: s_cbranch_scc0 .LBB2_2 92; GFX11-NEXT: s_endpgm 93; GFX11-NEXT: .LBB2_2: 94; GFX11-NEXT: s_mov_b64 exec, 0 95; GFX11-NEXT: exp mrt0 off, off, off, off done 96; GFX11-NEXT: s_endpgm 97 call void @llvm.amdgcn.kill(i1 false) 98 call void @llvm.amdgcn.kill(i1 false) 99 ret void 100} 101 102define amdgpu_ps void @test_kill_depth_var(float %x) #0 { 103; WAVE64-LABEL: test_kill_depth_var: 104; WAVE64: ; %bb.0: 105; WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 106; WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 107; WAVE64-NEXT: s_cbranch_scc0 .LBB3_1 108; WAVE64-NEXT: s_endpgm 109; WAVE64-NEXT: .LBB3_1: 110; WAVE64-NEXT: s_mov_b64 exec, 0 111; WAVE64-NEXT: exp null off, off, off, off done vm 112; WAVE64-NEXT: s_endpgm 113; 114; GFX10-WAVE32-LABEL: test_kill_depth_var: 115; GFX10-WAVE32: ; %bb.0: 116; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 117; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 118; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB3_1 119; GFX10-WAVE32-NEXT: s_endpgm 120; GFX10-WAVE32-NEXT: .LBB3_1: 121; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 122; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 123; GFX10-WAVE32-NEXT: s_endpgm 124; 125; GFX11-LABEL: test_kill_depth_var: 126; GFX11: ; %bb.0: 127; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 128; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 129; GFX11-NEXT: s_cbranch_scc0 .LBB3_1 130; GFX11-NEXT: s_endpgm 131; GFX11-NEXT: .LBB3_1: 132; GFX11-NEXT: s_mov_b64 exec, 0 133; GFX11-NEXT: exp mrt0 off, off, off, off done 134; GFX11-NEXT: s_endpgm 135 %cmp = fcmp olt float %x, 0.0 136 call void @llvm.amdgcn.kill(i1 %cmp) 137 ret void 138} 139 140; FIXME: Ideally only one early-exit would be emitted 141define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 { 142; SI-LABEL: test_kill_depth_var_x2_same: 143; SI: ; %bb.0: 144; SI-NEXT: s_mov_b64 s[0:1], exec 145; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 146; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 147; SI-NEXT: s_cbranch_scc0 .LBB4_2 148; SI-NEXT: ; %bb.1: 149; SI-NEXT: s_andn2_b64 exec, exec, vcc 150; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 151; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 152; SI-NEXT: s_cbranch_scc0 .LBB4_2 153; SI-NEXT: s_endpgm 154; SI-NEXT: .LBB4_2: 155; SI-NEXT: s_mov_b64 exec, 0 156; SI-NEXT: exp null off, off, off, off done vm 157; SI-NEXT: s_endpgm 158; 159; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_same: 160; GFX10-WAVE64: ; %bb.0: 161; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 162; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec 163; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 164; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB4_2 165; GFX10-WAVE64-NEXT: ; %bb.1: 166; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 167; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 168; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 169; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB4_2 170; GFX10-WAVE64-NEXT: s_endpgm 171; GFX10-WAVE64-NEXT: .LBB4_2: 172; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 173; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 174; GFX10-WAVE64-NEXT: s_endpgm 175; 176; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_same: 177; GFX10-WAVE32: ; %bb.0: 178; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 179; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 180; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 181; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB4_2 182; GFX10-WAVE32-NEXT: ; %bb.1: 183; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 184; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 185; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 186; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB4_2 187; GFX10-WAVE32-NEXT: s_endpgm 188; GFX10-WAVE32-NEXT: .LBB4_2: 189; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 190; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 191; GFX10-WAVE32-NEXT: s_endpgm 192; 193; GFX11-LABEL: test_kill_depth_var_x2_same: 194; GFX11: ; %bb.0: 195; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 196; GFX11-NEXT: s_mov_b64 s[0:1], exec 197; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 198; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 199; GFX11-NEXT: s_cbranch_scc0 .LBB4_2 200; GFX11-NEXT: ; %bb.1: 201; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 202; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 203; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 204; GFX11-NEXT: s_cbranch_scc0 .LBB4_2 205; GFX11-NEXT: s_endpgm 206; GFX11-NEXT: .LBB4_2: 207; GFX11-NEXT: s_mov_b64 exec, 0 208; GFX11-NEXT: exp mrt0 off, off, off, off done 209; GFX11-NEXT: s_endpgm 210 %cmp = fcmp olt float %x, 0.0 211 call void @llvm.amdgcn.kill(i1 %cmp) 212 call void @llvm.amdgcn.kill(i1 %cmp) 213 ret void 214} 215 216; FIXME: Ideally only one early-exit would be emitted 217define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 { 218; SI-LABEL: test_kill_depth_var_x2: 219; SI: ; %bb.0: 220; SI-NEXT: s_mov_b64 s[0:1], exec 221; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 222; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 223; SI-NEXT: s_cbranch_scc0 .LBB5_2 224; SI-NEXT: ; %bb.1: 225; SI-NEXT: s_andn2_b64 exec, exec, vcc 226; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1 227; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 228; SI-NEXT: s_cbranch_scc0 .LBB5_2 229; SI-NEXT: s_endpgm 230; SI-NEXT: .LBB5_2: 231; SI-NEXT: s_mov_b64 exec, 0 232; SI-NEXT: exp null off, off, off, off done vm 233; SI-NEXT: s_endpgm 234; 235; GFX10-WAVE64-LABEL: test_kill_depth_var_x2: 236; GFX10-WAVE64: ; %bb.0: 237; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 238; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec 239; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 240; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB5_2 241; GFX10-WAVE64-NEXT: ; %bb.1: 242; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 243; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1 244; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 245; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB5_2 246; GFX10-WAVE64-NEXT: s_endpgm 247; GFX10-WAVE64-NEXT: .LBB5_2: 248; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 249; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 250; GFX10-WAVE64-NEXT: s_endpgm 251; 252; GFX10-WAVE32-LABEL: test_kill_depth_var_x2: 253; GFX10-WAVE32: ; %bb.0: 254; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 255; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 256; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 257; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB5_2 258; GFX10-WAVE32-NEXT: ; %bb.1: 259; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 260; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v1 261; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 262; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB5_2 263; GFX10-WAVE32-NEXT: s_endpgm 264; GFX10-WAVE32-NEXT: .LBB5_2: 265; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 266; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 267; GFX10-WAVE32-NEXT: s_endpgm 268; 269; GFX11-LABEL: test_kill_depth_var_x2: 270; GFX11: ; %bb.0: 271; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 272; GFX11-NEXT: s_mov_b64 s[0:1], exec 273; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 274; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 275; GFX11-NEXT: s_cbranch_scc0 .LBB5_2 276; GFX11-NEXT: ; %bb.1: 277; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 278; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1 279; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 280; GFX11-NEXT: s_cbranch_scc0 .LBB5_2 281; GFX11-NEXT: s_endpgm 282; GFX11-NEXT: .LBB5_2: 283; GFX11-NEXT: s_mov_b64 exec, 0 284; GFX11-NEXT: exp mrt0 off, off, off, off done 285; GFX11-NEXT: s_endpgm 286 %cmp.x = fcmp olt float %x, 0.0 287 call void @llvm.amdgcn.kill(i1 %cmp.x) 288 %cmp.y = fcmp olt float %y, 0.0 289 call void @llvm.amdgcn.kill(i1 %cmp.y) 290 ret void 291} 292 293define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 { 294; SI-LABEL: test_kill_depth_var_x2_instructions: 295; SI: ; %bb.0: 296; SI-NEXT: s_mov_b64 s[0:1], exec 297; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 298; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 299; SI-NEXT: s_cbranch_scc0 .LBB6_2 300; SI-NEXT: ; %bb.1: 301; SI-NEXT: s_andn2_b64 exec, exec, vcc 302; SI-NEXT: ;;#ASMSTART 303; SI-NEXT: v_mov_b32_e64 v7, -1 304; SI-NEXT: ;;#ASMEND 305; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 306; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 307; SI-NEXT: s_cbranch_scc0 .LBB6_2 308; SI-NEXT: s_endpgm 309; SI-NEXT: .LBB6_2: 310; SI-NEXT: s_mov_b64 exec, 0 311; SI-NEXT: exp null off, off, off, off done vm 312; SI-NEXT: s_endpgm 313; 314; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_instructions: 315; GFX10-WAVE64: ; %bb.0: 316; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 317; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec 318; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 319; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB6_2 320; GFX10-WAVE64-NEXT: ; %bb.1: 321; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 322; GFX10-WAVE64-NEXT: ;;#ASMSTART 323; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1 324; GFX10-WAVE64-NEXT: ;;#ASMEND 325; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 326; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 327; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB6_2 328; GFX10-WAVE64-NEXT: s_endpgm 329; GFX10-WAVE64-NEXT: .LBB6_2: 330; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 331; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 332; GFX10-WAVE64-NEXT: s_endpgm 333; 334; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_instructions: 335; GFX10-WAVE32: ; %bb.0: 336; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 337; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 338; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 339; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB6_2 340; GFX10-WAVE32-NEXT: ; %bb.1: 341; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 342; GFX10-WAVE32-NEXT: ;;#ASMSTART 343; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1 344; GFX10-WAVE32-NEXT: ;;#ASMEND 345; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7 346; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 347; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB6_2 348; GFX10-WAVE32-NEXT: s_endpgm 349; GFX10-WAVE32-NEXT: .LBB6_2: 350; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 351; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 352; GFX10-WAVE32-NEXT: s_endpgm 353; 354; GFX11-LABEL: test_kill_depth_var_x2_instructions: 355; GFX11: ; %bb.0: 356; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 357; GFX11-NEXT: s_mov_b64 s[0:1], exec 358; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 359; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 360; GFX11-NEXT: s_cbranch_scc0 .LBB6_2 361; GFX11-NEXT: ; %bb.1: 362; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 363; GFX11-NEXT: ;;#ASMSTART 364; GFX11-NEXT: v_mov_b32_e64 v7, -1 365; GFX11-NEXT: ;;#ASMEND 366; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 367; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 368; GFX11-NEXT: s_cbranch_scc0 .LBB6_2 369; GFX11-NEXT: s_endpgm 370; GFX11-NEXT: .LBB6_2: 371; GFX11-NEXT: s_mov_b64 exec, 0 372; GFX11-NEXT: exp mrt0 off, off, off, off done 373; GFX11-NEXT: s_endpgm 374 %cmp.x = fcmp olt float %x, 0.0 375 call void @llvm.amdgcn.kill(i1 %cmp.x) 376 %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={v7}"() 377 %cmp.y = fcmp olt float %y, 0.0 378 call void @llvm.amdgcn.kill(i1 %cmp.y) 379 ret void 380} 381 382; FIXME: why does the skip depend on the asm length in the same block? 383define amdgpu_ps float @test_kill_control_flow(i32 inreg %arg) #0 { 384; SI-LABEL: test_kill_control_flow: 385; SI: ; %bb.0: ; %entry 386; SI-NEXT: s_cmp_lg_u32 s0, 0 387; SI-NEXT: s_cbranch_scc0 .LBB7_2 388; SI-NEXT: ; %bb.1: ; %exit 389; SI-NEXT: v_mov_b32_e32 v0, 1.0 390; SI-NEXT: s_branch .LBB7_5 391; SI-NEXT: .LBB7_2: ; %bb 392; SI-NEXT: s_mov_b64 s[2:3], exec 393; SI-NEXT: ;;#ASMSTART 394; SI-NEXT: v_mov_b32_e64 v7, -1 395; SI-NEXT: v_nop_e64 396; SI-NEXT: v_nop_e64 397; SI-NEXT: v_nop_e64 398; SI-NEXT: v_nop_e64 399; SI-NEXT: v_nop_e64 400; SI-NEXT: v_nop_e64 401; SI-NEXT: v_nop_e64 402; SI-NEXT: v_nop_e64 403; SI-NEXT: v_nop_e64 404; SI-NEXT: v_nop_e64 405; SI-NEXT: ;;#ASMEND 406; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 407; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc 408; SI-NEXT: s_cbranch_scc0 .LBB7_4 409; SI-NEXT: ; %bb.3: ; %bb 410; SI-NEXT: s_andn2_b64 exec, exec, vcc 411; SI-NEXT: v_mov_b32_e32 v0, 1.0 412; SI-NEXT: s_branch .LBB7_5 413; SI-NEXT: .LBB7_4: 414; SI-NEXT: s_mov_b64 exec, 0 415; SI-NEXT: exp null off, off, off, off done vm 416; SI-NEXT: s_endpgm 417; SI-NEXT: .LBB7_5: 418; 419; GFX10-WAVE64-LABEL: test_kill_control_flow: 420; GFX10-WAVE64: ; %bb.0: ; %entry 421; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0 422; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB7_2 423; GFX10-WAVE64-NEXT: ; %bb.1: ; %exit 424; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 1.0 425; GFX10-WAVE64-NEXT: s_branch .LBB7_5 426; GFX10-WAVE64-NEXT: .LBB7_2: ; %bb 427; GFX10-WAVE64-NEXT: ;;#ASMSTART 428; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1 429; GFX10-WAVE64-NEXT: v_nop_e64 430; GFX10-WAVE64-NEXT: v_nop_e64 431; GFX10-WAVE64-NEXT: v_nop_e64 432; GFX10-WAVE64-NEXT: v_nop_e64 433; GFX10-WAVE64-NEXT: v_nop_e64 434; GFX10-WAVE64-NEXT: v_nop_e64 435; GFX10-WAVE64-NEXT: v_nop_e64 436; GFX10-WAVE64-NEXT: v_nop_e64 437; GFX10-WAVE64-NEXT: v_nop_e64 438; GFX10-WAVE64-NEXT: v_nop_e64 439; GFX10-WAVE64-NEXT: ;;#ASMEND 440; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 441; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec 442; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc 443; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB7_4 444; GFX10-WAVE64-NEXT: ; %bb.3: ; %bb 445; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 446; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 1.0 447; GFX10-WAVE64-NEXT: s_branch .LBB7_5 448; GFX10-WAVE64-NEXT: .LBB7_4: 449; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 450; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 451; GFX10-WAVE64-NEXT: s_endpgm 452; GFX10-WAVE64-NEXT: .LBB7_5: 453; 454; GFX10-WAVE32-LABEL: test_kill_control_flow: 455; GFX10-WAVE32: ; %bb.0: ; %entry 456; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0 457; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB7_2 458; GFX10-WAVE32-NEXT: ; %bb.1: ; %exit 459; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 1.0 460; GFX10-WAVE32-NEXT: s_branch .LBB7_5 461; GFX10-WAVE32-NEXT: .LBB7_2: ; %bb 462; GFX10-WAVE32-NEXT: ;;#ASMSTART 463; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1 464; GFX10-WAVE32-NEXT: v_nop_e64 465; GFX10-WAVE32-NEXT: v_nop_e64 466; GFX10-WAVE32-NEXT: v_nop_e64 467; GFX10-WAVE32-NEXT: v_nop_e64 468; GFX10-WAVE32-NEXT: v_nop_e64 469; GFX10-WAVE32-NEXT: v_nop_e64 470; GFX10-WAVE32-NEXT: v_nop_e64 471; GFX10-WAVE32-NEXT: v_nop_e64 472; GFX10-WAVE32-NEXT: v_nop_e64 473; GFX10-WAVE32-NEXT: v_nop_e64 474; GFX10-WAVE32-NEXT: ;;#ASMEND 475; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7 476; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo 477; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, vcc_lo 478; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB7_4 479; GFX10-WAVE32-NEXT: ; %bb.3: ; %bb 480; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 481; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 1.0 482; GFX10-WAVE32-NEXT: s_branch .LBB7_5 483; GFX10-WAVE32-NEXT: .LBB7_4: 484; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 485; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 486; GFX10-WAVE32-NEXT: s_endpgm 487; GFX10-WAVE32-NEXT: .LBB7_5: 488; 489; GFX11-LABEL: test_kill_control_flow: 490; GFX11: ; %bb.0: ; %entry 491; GFX11-NEXT: s_cmp_lg_u32 s0, 0 492; GFX11-NEXT: s_cbranch_scc0 .LBB7_2 493; GFX11-NEXT: ; %bb.1: ; %exit 494; GFX11-NEXT: v_mov_b32_e32 v0, 1.0 495; GFX11-NEXT: s_branch .LBB7_5 496; GFX11-NEXT: .LBB7_2: ; %bb 497; GFX11-NEXT: ;;#ASMSTART 498; GFX11-NEXT: v_mov_b32_e64 v7, -1 499; GFX11-NEXT: v_nop_e64 500; GFX11-NEXT: v_nop_e64 501; GFX11-NEXT: v_nop_e64 502; GFX11-NEXT: v_nop_e64 503; GFX11-NEXT: v_nop_e64 504; GFX11-NEXT: v_nop_e64 505; GFX11-NEXT: v_nop_e64 506; GFX11-NEXT: v_nop_e64 507; GFX11-NEXT: v_nop_e64 508; GFX11-NEXT: v_nop_e64 509; GFX11-NEXT: ;;#ASMEND 510; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 511; GFX11-NEXT: s_mov_b64 s[2:3], exec 512; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 513; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], vcc 514; GFX11-NEXT: s_cbranch_scc0 .LBB7_4 515; GFX11-NEXT: ; %bb.3: ; %bb 516; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 517; GFX11-NEXT: v_mov_b32_e32 v0, 1.0 518; GFX11-NEXT: s_branch .LBB7_5 519; GFX11-NEXT: .LBB7_4: 520; GFX11-NEXT: s_mov_b64 exec, 0 521; GFX11-NEXT: exp mrt0 off, off, off, off done 522; GFX11-NEXT: s_endpgm 523; GFX11-NEXT: .LBB7_5: 524entry: 525 %cmp = icmp eq i32 %arg, 0 526 br i1 %cmp, label %bb, label %exit 527 528bb: 529 %var = call float asm sideeffect "v_mov_b32_e64 v7, -1 530 v_nop_e64 531 v_nop_e64 532 v_nop_e64 533 v_nop_e64 534 v_nop_e64 535 v_nop_e64 536 v_nop_e64 537 v_nop_e64 538 v_nop_e64 539 v_nop_e64", "={v7}"() 540 %cmp.var = fcmp olt float %var, 0.0 541 ; TODO: We could do an early-exit here (the branch above is uniform!) 542 call void @llvm.amdgcn.kill(i1 %cmp.var) 543 br label %exit 544 545exit: 546 ret float 1.0 547} 548 549define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 { 550; SI-LABEL: test_kill_control_flow_remainder: 551; SI: ; %bb.0: ; %entry 552; SI-NEXT: s_cmp_lg_u32 s0, 0 553; SI-NEXT: v_mov_b32_e32 v9, 0 554; SI-NEXT: s_cbranch_scc1 .LBB8_3 555; SI-NEXT: ; %bb.1: ; %bb 556; SI-NEXT: s_mov_b64 s[2:3], exec 557; SI-NEXT: ;;#ASMSTART 558; SI-NEXT: v_mov_b32_e64 v7, -1 559; SI-NEXT: v_nop_e64 560; SI-NEXT: v_nop_e64 561; SI-NEXT: v_nop_e64 562; SI-NEXT: v_nop_e64 563; SI-NEXT: v_nop_e64 564; SI-NEXT: v_nop_e64 565; SI-NEXT: v_nop_e64 566; SI-NEXT: v_nop_e64 567; SI-NEXT: v_nop_e64 568; SI-NEXT: v_nop_e64 569; SI-NEXT: v_nop_e64 570; SI-NEXT: ;;#ASMEND 571; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 572; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc 573; SI-NEXT: ;;#ASMSTART 574; SI-NEXT: v_mov_b32_e64 v8, -1 575; SI-NEXT: ;;#ASMEND 576; SI-NEXT: s_cbranch_scc0 .LBB8_4 577; SI-NEXT: ; %bb.2: ; %bb 578; SI-NEXT: s_andn2_b64 exec, exec, vcc 579; SI-NEXT: s_mov_b32 s3, 0xf000 580; SI-NEXT: s_mov_b32 s2, -1 581; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 582; SI-NEXT: s_waitcnt vmcnt(0) 583; SI-NEXT: ;;#ASMSTART 584; SI-NEXT: v_mov_b32_e64 v9, -2 585; SI-NEXT: ;;#ASMEND 586; SI-NEXT: .LBB8_3: ; %exit 587; SI-NEXT: s_mov_b32 s3, 0xf000 588; SI-NEXT: s_mov_b32 s2, -1 589; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 590; SI-NEXT: s_endpgm 591; SI-NEXT: .LBB8_4: 592; SI-NEXT: s_mov_b64 exec, 0 593; SI-NEXT: exp null off, off, off, off done vm 594; SI-NEXT: s_endpgm 595; 596; GFX10-WAVE64-LABEL: test_kill_control_flow_remainder: 597; GFX10-WAVE64: ; %bb.0: ; %entry 598; GFX10-WAVE64-NEXT: v_mov_b32_e32 v9, 0 599; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0 600; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB8_2 601; GFX10-WAVE64-NEXT: ; %bb.1: ; %exit 602; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v9, off 603; GFX10-WAVE64-NEXT: s_endpgm 604; GFX10-WAVE64-NEXT: .LBB8_2: ; %bb 605; GFX10-WAVE64-NEXT: ;;#ASMSTART 606; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1 607; GFX10-WAVE64-NEXT: v_nop_e64 608; GFX10-WAVE64-NEXT: v_nop_e64 609; GFX10-WAVE64-NEXT: v_nop_e64 610; GFX10-WAVE64-NEXT: v_nop_e64 611; GFX10-WAVE64-NEXT: v_nop_e64 612; GFX10-WAVE64-NEXT: v_nop_e64 613; GFX10-WAVE64-NEXT: v_nop_e64 614; GFX10-WAVE64-NEXT: v_nop_e64 615; GFX10-WAVE64-NEXT: v_nop_e64 616; GFX10-WAVE64-NEXT: v_nop_e64 617; GFX10-WAVE64-NEXT: v_nop_e64 618; GFX10-WAVE64-NEXT: ;;#ASMEND 619; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 620; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec 621; GFX10-WAVE64-NEXT: ;;#ASMSTART 622; GFX10-WAVE64-NEXT: v_mov_b32_e64 v8, -1 623; GFX10-WAVE64-NEXT: ;;#ASMEND 624; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc 625; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB8_4 626; GFX10-WAVE64-NEXT: ; %bb.3: ; %bb 627; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 628; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v8, off 629; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 630; GFX10-WAVE64-NEXT: ;;#ASMSTART 631; GFX10-WAVE64-NEXT: v_mov_b32_e64 v9, -2 632; GFX10-WAVE64-NEXT: ;;#ASMEND 633; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v9, off 634; GFX10-WAVE64-NEXT: s_endpgm 635; GFX10-WAVE64-NEXT: .LBB8_4: 636; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 637; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 638; GFX10-WAVE64-NEXT: s_endpgm 639; 640; GFX10-WAVE32-LABEL: test_kill_control_flow_remainder: 641; GFX10-WAVE32: ; %bb.0: ; %entry 642; GFX10-WAVE32-NEXT: v_mov_b32_e32 v9, 0 643; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0 644; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB8_2 645; GFX10-WAVE32-NEXT: ; %bb.1: ; %exit 646; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v9, off 647; GFX10-WAVE32-NEXT: s_endpgm 648; GFX10-WAVE32-NEXT: .LBB8_2: ; %bb 649; GFX10-WAVE32-NEXT: ;;#ASMSTART 650; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1 651; GFX10-WAVE32-NEXT: v_nop_e64 652; GFX10-WAVE32-NEXT: v_nop_e64 653; GFX10-WAVE32-NEXT: v_nop_e64 654; GFX10-WAVE32-NEXT: v_nop_e64 655; GFX10-WAVE32-NEXT: v_nop_e64 656; GFX10-WAVE32-NEXT: v_nop_e64 657; GFX10-WAVE32-NEXT: v_nop_e64 658; GFX10-WAVE32-NEXT: v_nop_e64 659; GFX10-WAVE32-NEXT: v_nop_e64 660; GFX10-WAVE32-NEXT: v_nop_e64 661; GFX10-WAVE32-NEXT: v_nop_e64 662; GFX10-WAVE32-NEXT: ;;#ASMEND 663; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7 664; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo 665; GFX10-WAVE32-NEXT: ;;#ASMSTART 666; GFX10-WAVE32-NEXT: v_mov_b32_e64 v8, -1 667; GFX10-WAVE32-NEXT: ;;#ASMEND 668; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, vcc_lo 669; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB8_4 670; GFX10-WAVE32-NEXT: ; %bb.3: ; %bb 671; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 672; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v8, off 673; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 674; GFX10-WAVE32-NEXT: ;;#ASMSTART 675; GFX10-WAVE32-NEXT: v_mov_b32_e64 v9, -2 676; GFX10-WAVE32-NEXT: ;;#ASMEND 677; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v9, off 678; GFX10-WAVE32-NEXT: s_endpgm 679; GFX10-WAVE32-NEXT: .LBB8_4: 680; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 681; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 682; GFX10-WAVE32-NEXT: s_endpgm 683; 684; GFX11-LABEL: test_kill_control_flow_remainder: 685; GFX11: ; %bb.0: ; %entry 686; GFX11-NEXT: v_mov_b32_e32 v9, 0 687; GFX11-NEXT: s_cmp_lg_u32 s0, 0 688; GFX11-NEXT: s_cbranch_scc0 .LBB8_2 689; GFX11-NEXT: ; %bb.1: ; %exit 690; GFX11-NEXT: global_store_b32 v[0:1], v9, off 691; GFX11-NEXT: s_endpgm 692; GFX11-NEXT: .LBB8_2: ; %bb 693; GFX11-NEXT: ;;#ASMSTART 694; GFX11-NEXT: v_mov_b32_e64 v7, -1 695; GFX11-NEXT: v_nop_e64 696; GFX11-NEXT: v_nop_e64 697; GFX11-NEXT: v_nop_e64 698; GFX11-NEXT: v_nop_e64 699; GFX11-NEXT: v_nop_e64 700; GFX11-NEXT: v_nop_e64 701; GFX11-NEXT: v_nop_e64 702; GFX11-NEXT: v_nop_e64 703; GFX11-NEXT: v_nop_e64 704; GFX11-NEXT: v_nop_e64 705; GFX11-NEXT: v_nop_e64 706; GFX11-NEXT: ;;#ASMEND 707; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 708; GFX11-NEXT: s_mov_b64 s[2:3], exec 709; GFX11-NEXT: ;;#ASMSTART 710; GFX11-NEXT: v_mov_b32_e64 v8, -1 711; GFX11-NEXT: ;;#ASMEND 712; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], vcc 713; GFX11-NEXT: s_cbranch_scc0 .LBB8_4 714; GFX11-NEXT: ; %bb.3: ; %bb 715; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 716; GFX11-NEXT: global_store_b32 v[0:1], v8, off dlc 717; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 718; GFX11-NEXT: ;;#ASMSTART 719; GFX11-NEXT: v_mov_b32_e64 v9, -2 720; GFX11-NEXT: ;;#ASMEND 721; GFX11-NEXT: global_store_b32 v[0:1], v9, off 722; GFX11-NEXT: s_endpgm 723; GFX11-NEXT: .LBB8_4: 724; GFX11-NEXT: s_mov_b64 exec, 0 725; GFX11-NEXT: exp mrt0 off, off, off, off done 726; GFX11-NEXT: s_endpgm 727entry: 728 %cmp = icmp eq i32 %arg, 0 729 br i1 %cmp, label %bb, label %exit 730 731bb: 732 %var = call float asm sideeffect "v_mov_b32_e64 v7, -1 733 v_nop_e64 734 v_nop_e64 735 v_nop_e64 736 v_nop_e64 737 v_nop_e64 738 v_nop_e64 739 v_nop_e64 740 v_nop_e64 741 v_nop_e64 742 v_nop_e64 743 v_nop_e64", "={v7}"() 744 %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={v8}"() 745 %cmp.var = fcmp olt float %var, 0.0 746 ; TODO: We could do an early-exit here (the branch above is uniform!) 747 call void @llvm.amdgcn.kill(i1 %cmp.var) 748 store volatile float %live.across, ptr addrspace(1) undef 749 %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={v9}"() 750 br label %exit 751 752exit: 753 %phi = phi float [ 0.0, %entry ], [ %live.out, %bb ] 754 store float %phi, ptr addrspace(1) undef 755 ret void 756} 757 758define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) #0 { 759; SI-LABEL: test_kill_control_flow_return: 760; SI: ; %bb.0: ; %entry 761; SI-NEXT: s_cmp_eq_u32 s0, 1 762; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 763; SI-NEXT: s_mov_b64 s[2:3], exec 764; SI-NEXT: s_andn2_b64 s[4:5], exec, s[4:5] 765; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5] 766; SI-NEXT: s_cbranch_scc0 .LBB9_4 767; SI-NEXT: ; %bb.1: ; %entry 768; SI-NEXT: s_and_b64 exec, exec, s[2:3] 769; SI-NEXT: s_cmp_lg_u32 s0, 0 770; SI-NEXT: v_mov_b32_e32 v0, 0 771; SI-NEXT: s_cbranch_scc0 .LBB9_3 772; SI-NEXT: ; %bb.2: ; %exit 773; SI-NEXT: s_branch .LBB9_5 774; SI-NEXT: .LBB9_3: ; %bb 775; SI-NEXT: ;;#ASMSTART 776; SI-NEXT: v_mov_b32_e64 v7, -1 777; SI-NEXT: v_nop_e64 778; SI-NEXT: v_nop_e64 779; SI-NEXT: v_nop_e64 780; SI-NEXT: v_nop_e64 781; SI-NEXT: v_nop_e64 782; SI-NEXT: v_nop_e64 783; SI-NEXT: v_nop_e64 784; SI-NEXT: v_nop_e64 785; SI-NEXT: v_nop_e64 786; SI-NEXT: v_nop_e64 787; SI-NEXT: ;;#ASMEND 788; SI-NEXT: v_mov_b32_e32 v0, v7 789; SI-NEXT: s_branch .LBB9_5 790; SI-NEXT: .LBB9_4: 791; SI-NEXT: s_mov_b64 exec, 0 792; SI-NEXT: exp null off, off, off, off done vm 793; SI-NEXT: s_endpgm 794; SI-NEXT: .LBB9_5: 795; 796; GFX10-WAVE64-LABEL: test_kill_control_flow_return: 797; GFX10-WAVE64: ; %bb.0: ; %entry 798; GFX10-WAVE64-NEXT: s_cmp_eq_u32 s0, 1 799; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec 800; GFX10-WAVE64-NEXT: s_cselect_b64 s[4:5], -1, 0 801; GFX10-WAVE64-NEXT: s_andn2_b64 s[4:5], exec, s[4:5] 802; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5] 803; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB9_4 804; GFX10-WAVE64-NEXT: ; %bb.1: ; %entry 805; GFX10-WAVE64-NEXT: s_and_b64 exec, exec, s[2:3] 806; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 0 807; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0 808; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB9_3 809; GFX10-WAVE64-NEXT: ; %bb.2: ; %exit 810; GFX10-WAVE64-NEXT: s_branch .LBB9_5 811; GFX10-WAVE64-NEXT: .LBB9_3: ; %bb 812; GFX10-WAVE64-NEXT: ;;#ASMSTART 813; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1 814; GFX10-WAVE64-NEXT: v_nop_e64 815; GFX10-WAVE64-NEXT: v_nop_e64 816; GFX10-WAVE64-NEXT: v_nop_e64 817; GFX10-WAVE64-NEXT: v_nop_e64 818; GFX10-WAVE64-NEXT: v_nop_e64 819; GFX10-WAVE64-NEXT: v_nop_e64 820; GFX10-WAVE64-NEXT: v_nop_e64 821; GFX10-WAVE64-NEXT: v_nop_e64 822; GFX10-WAVE64-NEXT: v_nop_e64 823; GFX10-WAVE64-NEXT: v_nop_e64 824; GFX10-WAVE64-NEXT: ;;#ASMEND 825; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, v7 826; GFX10-WAVE64-NEXT: s_branch .LBB9_5 827; GFX10-WAVE64-NEXT: .LBB9_4: 828; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 829; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 830; GFX10-WAVE64-NEXT: s_endpgm 831; GFX10-WAVE64-NEXT: .LBB9_5: 832; 833; GFX10-WAVE32-LABEL: test_kill_control_flow_return: 834; GFX10-WAVE32: ; %bb.0: ; %entry 835; GFX10-WAVE32-NEXT: s_cmp_eq_u32 s0, 1 836; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo 837; GFX10-WAVE32-NEXT: s_cselect_b32 s2, -1, 0 838; GFX10-WAVE32-NEXT: s_andn2_b32 s2, exec_lo, s2 839; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, s2 840; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB9_4 841; GFX10-WAVE32-NEXT: ; %bb.1: ; %entry 842; GFX10-WAVE32-NEXT: s_and_b32 exec_lo, exec_lo, s1 843; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 0 844; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0 845; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB9_3 846; GFX10-WAVE32-NEXT: ; %bb.2: ; %exit 847; GFX10-WAVE32-NEXT: s_branch .LBB9_5 848; GFX10-WAVE32-NEXT: .LBB9_3: ; %bb 849; GFX10-WAVE32-NEXT: ;;#ASMSTART 850; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1 851; GFX10-WAVE32-NEXT: v_nop_e64 852; GFX10-WAVE32-NEXT: v_nop_e64 853; GFX10-WAVE32-NEXT: v_nop_e64 854; GFX10-WAVE32-NEXT: v_nop_e64 855; GFX10-WAVE32-NEXT: v_nop_e64 856; GFX10-WAVE32-NEXT: v_nop_e64 857; GFX10-WAVE32-NEXT: v_nop_e64 858; GFX10-WAVE32-NEXT: v_nop_e64 859; GFX10-WAVE32-NEXT: v_nop_e64 860; GFX10-WAVE32-NEXT: v_nop_e64 861; GFX10-WAVE32-NEXT: ;;#ASMEND 862; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, v7 863; GFX10-WAVE32-NEXT: s_branch .LBB9_5 864; GFX10-WAVE32-NEXT: .LBB9_4: 865; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 866; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 867; GFX10-WAVE32-NEXT: s_endpgm 868; GFX10-WAVE32-NEXT: .LBB9_5: 869; 870; GFX11-LABEL: test_kill_control_flow_return: 871; GFX11: ; %bb.0: ; %entry 872; GFX11-NEXT: s_cmp_eq_u32 s0, 1 873; GFX11-NEXT: s_mov_b64 s[2:3], exec 874; GFX11-NEXT: s_cselect_b64 s[4:5], -1, 0 875; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 876; GFX11-NEXT: s_and_not1_b64 s[4:5], exec, s[4:5] 877; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], s[4:5] 878; GFX11-NEXT: s_cbranch_scc0 .LBB9_4 879; GFX11-NEXT: ; %bb.1: ; %entry 880; GFX11-NEXT: s_and_b64 exec, exec, s[2:3] 881; GFX11-NEXT: v_mov_b32_e32 v0, 0 882; GFX11-NEXT: s_cmp_lg_u32 s0, 0 883; GFX11-NEXT: s_cbranch_scc0 .LBB9_3 884; GFX11-NEXT: ; %bb.2: ; %exit 885; GFX11-NEXT: s_branch .LBB9_5 886; GFX11-NEXT: .LBB9_3: ; %bb 887; GFX11-NEXT: ;;#ASMSTART 888; GFX11-NEXT: v_mov_b32_e64 v7, -1 889; GFX11-NEXT: v_nop_e64 890; GFX11-NEXT: v_nop_e64 891; GFX11-NEXT: v_nop_e64 892; GFX11-NEXT: v_nop_e64 893; GFX11-NEXT: v_nop_e64 894; GFX11-NEXT: v_nop_e64 895; GFX11-NEXT: v_nop_e64 896; GFX11-NEXT: v_nop_e64 897; GFX11-NEXT: v_nop_e64 898; GFX11-NEXT: v_nop_e64 899; GFX11-NEXT: ;;#ASMEND 900; GFX11-NEXT: v_mov_b32_e32 v0, v7 901; GFX11-NEXT: s_branch .LBB9_5 902; GFX11-NEXT: .LBB9_4: 903; GFX11-NEXT: s_mov_b64 exec, 0 904; GFX11-NEXT: exp mrt0 off, off, off, off done 905; GFX11-NEXT: s_endpgm 906; GFX11-NEXT: .LBB9_5: 907entry: 908 %kill = icmp eq i32 %arg, 1 909 %cmp = icmp eq i32 %arg, 0 910 call void @llvm.amdgcn.kill(i1 %kill) 911 br i1 %cmp, label %bb, label %exit 912 913bb: 914 %var = call float asm sideeffect "v_mov_b32_e64 v7, -1 915 v_nop_e64 916 v_nop_e64 917 v_nop_e64 918 v_nop_e64 919 v_nop_e64 920 v_nop_e64 921 v_nop_e64 922 v_nop_e64 923 v_nop_e64 924 v_nop_e64", "={v7}"() 925 br label %exit 926 927exit: 928 %ret = phi float [ %var, %bb ], [ 0.0, %entry ] 929 ret float %ret 930} 931 932define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 { 933; SI-LABEL: test_kill_divergent_loop: 934; SI: ; %bb.0: ; %entry 935; SI-NEXT: s_mov_b64 s[0:1], exec 936; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 937; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc 938; SI-NEXT: s_xor_b64 s[4:5], exec, s[2:3] 939; SI-NEXT: s_cbranch_execz .LBB10_4 940; SI-NEXT: ; %bb.1: ; %bb.preheader 941; SI-NEXT: s_mov_b32 s3, 0xf000 942; SI-NEXT: s_mov_b32 s2, -1 943; SI-NEXT: .LBB10_2: ; %bb 944; SI-NEXT: ; =>This Inner Loop Header: Depth=1 945; SI-NEXT: ;;#ASMSTART 946; SI-NEXT: v_mov_b32_e64 v7, -1 947; SI-NEXT: v_nop_e64 948; SI-NEXT: v_nop_e64 949; SI-NEXT: v_nop_e64 950; SI-NEXT: v_nop_e64 951; SI-NEXT: v_nop_e64 952; SI-NEXT: v_nop_e64 953; SI-NEXT: v_nop_e64 954; SI-NEXT: v_nop_e64 955; SI-NEXT: v_nop_e64 956; SI-NEXT: v_nop_e64 957; SI-NEXT: ;;#ASMEND 958; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 959; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 960; SI-NEXT: s_cbranch_scc0 .LBB10_5 961; SI-NEXT: ; %bb.3: ; %bb 962; SI-NEXT: ; in Loop: Header=BB10_2 Depth=1 963; SI-NEXT: s_andn2_b64 exec, exec, vcc 964; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc 965; SI-NEXT: s_waitcnt vmcnt(0) 966; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 967; SI-NEXT: s_cbranch_vccnz .LBB10_2 968; SI-NEXT: .LBB10_4: ; %Flow1 969; SI-NEXT: s_or_b64 exec, exec, s[4:5] 970; SI-NEXT: s_mov_b32 s3, 0xf000 971; SI-NEXT: s_mov_b32 s2, -1 972; SI-NEXT: v_mov_b32_e32 v0, 8 973; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 974; SI-NEXT: s_waitcnt vmcnt(0) 975; SI-NEXT: s_endpgm 976; SI-NEXT: .LBB10_5: 977; SI-NEXT: s_mov_b64 exec, 0 978; SI-NEXT: exp null off, off, off, off done vm 979; SI-NEXT: s_endpgm 980; 981; GFX10-WAVE64-LABEL: test_kill_divergent_loop: 982; GFX10-WAVE64: ; %bb.0: ; %entry 983; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 984; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec 985; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc 986; GFX10-WAVE64-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 987; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB10_3 988; GFX10-WAVE64-NEXT: .LBB10_1: ; %bb 989; GFX10-WAVE64-NEXT: ; =>This Inner Loop Header: Depth=1 990; GFX10-WAVE64-NEXT: ;;#ASMSTART 991; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1 992; GFX10-WAVE64-NEXT: v_nop_e64 993; GFX10-WAVE64-NEXT: v_nop_e64 994; GFX10-WAVE64-NEXT: v_nop_e64 995; GFX10-WAVE64-NEXT: v_nop_e64 996; GFX10-WAVE64-NEXT: v_nop_e64 997; GFX10-WAVE64-NEXT: v_nop_e64 998; GFX10-WAVE64-NEXT: v_nop_e64 999; GFX10-WAVE64-NEXT: v_nop_e64 1000; GFX10-WAVE64-NEXT: v_nop_e64 1001; GFX10-WAVE64-NEXT: v_nop_e64 1002; GFX10-WAVE64-NEXT: ;;#ASMEND 1003; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 1004; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 1005; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB10_4 1006; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb 1007; GFX10-WAVE64-NEXT: ; in Loop: Header=BB10_1 Depth=1 1008; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 1009; GFX10-WAVE64-NEXT: global_load_dword v0, v[0:1], off glc dlc 1010; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) 1011; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1012; GFX10-WAVE64-NEXT: s_cbranch_vccnz .LBB10_1 1013; GFX10-WAVE64-NEXT: .LBB10_3: ; %Flow1 1014; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[2:3] 1015; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 8 1016; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off 1017; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 1018; GFX10-WAVE64-NEXT: s_endpgm 1019; GFX10-WAVE64-NEXT: .LBB10_4: 1020; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1021; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 1022; GFX10-WAVE64-NEXT: s_endpgm 1023; 1024; GFX10-WAVE32-LABEL: test_kill_divergent_loop: 1025; GFX10-WAVE32: ; %bb.0: ; %entry 1026; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 1027; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1028; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo 1029; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1 1030; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB10_3 1031; GFX10-WAVE32-NEXT: .LBB10_1: ; %bb 1032; GFX10-WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1 1033; GFX10-WAVE32-NEXT: ;;#ASMSTART 1034; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1 1035; GFX10-WAVE32-NEXT: v_nop_e64 1036; GFX10-WAVE32-NEXT: v_nop_e64 1037; GFX10-WAVE32-NEXT: v_nop_e64 1038; GFX10-WAVE32-NEXT: v_nop_e64 1039; GFX10-WAVE32-NEXT: v_nop_e64 1040; GFX10-WAVE32-NEXT: v_nop_e64 1041; GFX10-WAVE32-NEXT: v_nop_e64 1042; GFX10-WAVE32-NEXT: v_nop_e64 1043; GFX10-WAVE32-NEXT: v_nop_e64 1044; GFX10-WAVE32-NEXT: v_nop_e64 1045; GFX10-WAVE32-NEXT: ;;#ASMEND 1046; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7 1047; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 1048; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB10_4 1049; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb 1050; GFX10-WAVE32-NEXT: ; in Loop: Header=BB10_1 Depth=1 1051; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 1052; GFX10-WAVE32-NEXT: global_load_dword v0, v[0:1], off glc dlc 1053; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) 1054; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1055; GFX10-WAVE32-NEXT: s_cbranch_vccnz .LBB10_1 1056; GFX10-WAVE32-NEXT: .LBB10_3: ; %Flow1 1057; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s1 1058; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 8 1059; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off 1060; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 1061; GFX10-WAVE32-NEXT: s_endpgm 1062; GFX10-WAVE32-NEXT: .LBB10_4: 1063; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1064; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 1065; GFX10-WAVE32-NEXT: s_endpgm 1066; 1067; GFX11-LABEL: test_kill_divergent_loop: 1068; GFX11: ; %bb.0: ; %entry 1069; GFX11-NEXT: s_mov_b64 s[0:1], exec 1070; GFX11-NEXT: s_mov_b64 s[2:3], exec 1071; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0 1072; GFX11-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 1073; GFX11-NEXT: s_cbranch_execz .LBB10_3 1074; GFX11-NEXT: .LBB10_1: ; %bb 1075; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 1076; GFX11-NEXT: ;;#ASMSTART 1077; GFX11-NEXT: v_mov_b32_e64 v7, -1 1078; GFX11-NEXT: v_nop_e64 1079; GFX11-NEXT: v_nop_e64 1080; GFX11-NEXT: v_nop_e64 1081; GFX11-NEXT: v_nop_e64 1082; GFX11-NEXT: v_nop_e64 1083; GFX11-NEXT: v_nop_e64 1084; GFX11-NEXT: v_nop_e64 1085; GFX11-NEXT: v_nop_e64 1086; GFX11-NEXT: v_nop_e64 1087; GFX11-NEXT: v_nop_e64 1088; GFX11-NEXT: ;;#ASMEND 1089; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7 1090; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 1091; GFX11-NEXT: s_cbranch_scc0 .LBB10_4 1092; GFX11-NEXT: ; %bb.2: ; %bb 1093; GFX11-NEXT: ; in Loop: Header=BB10_1 Depth=1 1094; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 1095; GFX11-NEXT: global_load_b32 v0, v[0:1], off glc dlc 1096; GFX11-NEXT: s_waitcnt vmcnt(0) 1097; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1098; GFX11-NEXT: s_cbranch_vccnz .LBB10_1 1099; GFX11-NEXT: .LBB10_3: ; %Flow1 1100; GFX11-NEXT: s_or_b64 exec, exec, s[2:3] 1101; GFX11-NEXT: v_mov_b32_e32 v0, 8 1102; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc 1103; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1104; GFX11-NEXT: s_endpgm 1105; GFX11-NEXT: .LBB10_4: 1106; GFX11-NEXT: s_mov_b64 exec, 0 1107; GFX11-NEXT: exp mrt0 off, off, off, off done 1108; GFX11-NEXT: s_endpgm 1109entry: 1110 %cmp = icmp eq i32 %arg, 0 1111 br i1 %cmp, label %bb, label %exit 1112 1113bb: 1114 %var = call float asm sideeffect "v_mov_b32_e64 v7, -1 1115 v_nop_e64 1116 v_nop_e64 1117 v_nop_e64 1118 v_nop_e64 1119 v_nop_e64 1120 v_nop_e64 1121 v_nop_e64 1122 v_nop_e64 1123 v_nop_e64 1124 v_nop_e64", "={v7}"() 1125 %cmp.var = fcmp olt float %var, 0.0 1126 call void @llvm.amdgcn.kill(i1 %cmp.var) 1127 %vgpr = load volatile i32, ptr addrspace(1) undef 1128 %loop.cond = icmp eq i32 %vgpr, 0 1129 br i1 %loop.cond, label %bb, label %exit 1130 1131exit: 1132 store volatile i32 8, ptr addrspace(1) undef 1133 ret void 1134} 1135 1136; bug 28550 1137define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 { 1138; SI-LABEL: phi_use_def_before_kill: 1139; SI: ; %bb.0: ; %bb 1140; SI-NEXT: v_add_f32_e64 v1, s0, 1.0 1141; SI-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1 1142; SI-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc 1143; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1 1144; SI-NEXT: s_andn2_b64 exec, exec, vcc 1145; SI-NEXT: s_cbranch_scc0 .LBB11_6 1146; SI-NEXT: ; %bb.1: ; %bb 1147; SI-NEXT: s_andn2_b64 exec, exec, vcc 1148; SI-NEXT: s_cbranch_scc0 .LBB11_3 1149; SI-NEXT: ; %bb.2: ; %bb8 1150; SI-NEXT: s_mov_b32 s3, 0xf000 1151; SI-NEXT: s_mov_b32 s2, -1 1152; SI-NEXT: v_mov_b32_e32 v0, 8 1153; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1154; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1155; SI-NEXT: v_mov_b32_e32 v0, 4.0 1156; SI-NEXT: .LBB11_3: ; %phibb 1157; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 1158; SI-NEXT: s_cbranch_vccz .LBB11_5 1159; SI-NEXT: ; %bb.4: ; %bb10 1160; SI-NEXT: s_mov_b32 s3, 0xf000 1161; SI-NEXT: s_mov_b32 s2, -1 1162; SI-NEXT: v_mov_b32_e32 v0, 9 1163; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1164; SI-NEXT: s_waitcnt vmcnt(0) 1165; SI-NEXT: .LBB11_5: ; %end 1166; SI-NEXT: s_endpgm 1167; SI-NEXT: .LBB11_6: 1168; SI-NEXT: s_mov_b64 exec, 0 1169; SI-NEXT: exp null off, off, off, off done vm 1170; SI-NEXT: s_endpgm 1171; 1172; GFX10-WAVE64-LABEL: phi_use_def_before_kill: 1173; GFX10-WAVE64: ; %bb.0: ; %bb 1174; GFX10-WAVE64-NEXT: v_add_f32_e64 v1, s0, 1.0 1175; GFX10-WAVE64-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1 1176; GFX10-WAVE64-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc 1177; GFX10-WAVE64-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1 1178; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 1179; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB11_6 1180; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb 1181; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 1182; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB11_3 1183; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb8 1184; GFX10-WAVE64-NEXT: v_mov_b32_e32 v1, 8 1185; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 4.0 1186; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v1, off 1187; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 1188; GFX10-WAVE64-NEXT: .LBB11_3: ; %phibb 1189; GFX10-WAVE64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 1190; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB11_5 1191; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb10 1192; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 9 1193; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off 1194; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 1195; GFX10-WAVE64-NEXT: .LBB11_5: ; %end 1196; GFX10-WAVE64-NEXT: s_endpgm 1197; GFX10-WAVE64-NEXT: .LBB11_6: 1198; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1199; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 1200; GFX10-WAVE64-NEXT: s_endpgm 1201; 1202; GFX10-WAVE32-LABEL: phi_use_def_before_kill: 1203; GFX10-WAVE32: ; %bb.0: ; %bb 1204; GFX10-WAVE32-NEXT: v_add_f32_e64 v1, s0, 1.0 1205; GFX10-WAVE32-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0, v1 1206; GFX10-WAVE32-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc_lo 1207; GFX10-WAVE32-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0, v1 1208; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 1209; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB11_6 1210; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb 1211; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 1212; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB11_3 1213; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb8 1214; GFX10-WAVE32-NEXT: v_mov_b32_e32 v1, 8 1215; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 4.0 1216; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v1, off 1217; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 1218; GFX10-WAVE32-NEXT: .LBB11_3: ; %phibb 1219; GFX10-WAVE32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0 1220; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB11_5 1221; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb10 1222; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 9 1223; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off 1224; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 1225; GFX10-WAVE32-NEXT: .LBB11_5: ; %end 1226; GFX10-WAVE32-NEXT: s_endpgm 1227; GFX10-WAVE32-NEXT: .LBB11_6: 1228; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1229; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 1230; GFX10-WAVE32-NEXT: s_endpgm 1231; 1232; GFX11-LABEL: phi_use_def_before_kill: 1233; GFX11: ; %bb.0: ; %bb 1234; GFX11-NEXT: v_add_f32_e64 v1, s0, 1.0 1235; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1236; GFX11-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1 1237; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc 1238; GFX11-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1 1239; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 1240; GFX11-NEXT: s_cbranch_scc0 .LBB11_6 1241; GFX11-NEXT: ; %bb.1: ; %bb 1242; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 1243; GFX11-NEXT: s_cbranch_scc0 .LBB11_3 1244; GFX11-NEXT: ; %bb.2: ; %bb8 1245; GFX11-NEXT: v_mov_b32_e32 v1, 8 1246; GFX11-NEXT: v_mov_b32_e32 v0, 4.0 1247; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc 1248; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1249; GFX11-NEXT: .LBB11_3: ; %phibb 1250; GFX11-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 1251; GFX11-NEXT: s_cbranch_vccz .LBB11_5 1252; GFX11-NEXT: ; %bb.4: ; %bb10 1253; GFX11-NEXT: v_mov_b32_e32 v0, 9 1254; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc 1255; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1256; GFX11-NEXT: .LBB11_5: ; %end 1257; GFX11-NEXT: s_endpgm 1258; GFX11-NEXT: .LBB11_6: 1259; GFX11-NEXT: s_mov_b64 exec, 0 1260; GFX11-NEXT: exp mrt0 off, off, off, off done 1261; GFX11-NEXT: s_endpgm 1262bb: 1263 %tmp = fadd float %x, 1.000000e+00 1264 %tmp1 = fcmp olt float 0.000000e+00, %tmp 1265 %tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00 1266 %cmp.tmp2 = fcmp olt float %tmp2, 0.0 1267 call void @llvm.amdgcn.kill(i1 %cmp.tmp2) 1268 br i1 undef, label %phibb, label %bb8 1269 1270phibb: 1271 %tmp5 = phi float [ %tmp2, %bb ], [ 4.0, %bb8 ] 1272 %tmp6 = fcmp oeq float %tmp5, 0.000000e+00 1273 br i1 %tmp6, label %bb10, label %end 1274 1275bb8: 1276 store volatile i32 8, ptr addrspace(1) undef 1277 br label %phibb 1278 1279bb10: 1280 store volatile i32 9, ptr addrspace(1) undef 1281 br label %end 1282 1283end: 1284 ret void 1285} 1286 1287define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 { 1288; SI-LABEL: no_skip_no_successors: 1289; SI: ; %bb.0: ; %bb 1290; SI-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0 1291; SI-NEXT: s_and_b64 vcc, exec, s[4:5] 1292; SI-NEXT: s_cbranch_vccz .LBB12_3 1293; SI-NEXT: ; %bb.1: ; %bb6 1294; SI-NEXT: s_mov_b64 s[2:3], exec 1295; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec 1296; SI-NEXT: s_cbranch_scc0 .LBB12_5 1297; SI-NEXT: ; %bb.2: ; %bb6 1298; SI-NEXT: s_mov_b64 exec, 0 1299; SI-NEXT: .LBB12_3: ; %bb3 1300; SI-NEXT: v_mov_b32_e32 v0, 0x3e7ae148 1301; SI-NEXT: v_cmp_nge_f32_e32 vcc, s0, v0 1302; SI-NEXT: s_and_b64 vcc, exec, vcc 1303; SI-NEXT: ; %bb.4: ; %bb5 1304; SI-NEXT: .LBB12_5: 1305; SI-NEXT: s_mov_b64 exec, 0 1306; SI-NEXT: exp null off, off, off, off done vm 1307; SI-NEXT: s_endpgm 1308; 1309; GFX10-WAVE64-LABEL: no_skip_no_successors: 1310; GFX10-WAVE64: ; %bb.0: ; %bb 1311; GFX10-WAVE64-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0 1312; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, s[4:5] 1313; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB12_3 1314; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb6 1315; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec 1316; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], exec 1317; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB12_5 1318; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb6 1319; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1320; GFX10-WAVE64-NEXT: .LBB12_3: ; %bb3 1321; GFX10-WAVE64-NEXT: v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0 1322; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, s[0:1] 1323; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb5 1324; GFX10-WAVE64-NEXT: .LBB12_5: 1325; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1326; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 1327; GFX10-WAVE64-NEXT: s_endpgm 1328; 1329; GFX10-WAVE32-LABEL: no_skip_no_successors: 1330; GFX10-WAVE32: ; %bb.0: ; %bb 1331; GFX10-WAVE32-NEXT: v_cmp_nge_f32_e64 s1, s1, 0 1332; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, s1 1333; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB12_3 1334; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb6 1335; GFX10-WAVE32-NEXT: s_mov_b32 s2, exec_lo 1336; GFX10-WAVE32-NEXT: s_andn2_b32 s2, s2, exec_lo 1337; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB12_5 1338; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb6 1339; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1340; GFX10-WAVE32-NEXT: .LBB12_3: ; %bb3 1341; GFX10-WAVE32-NEXT: v_cmp_nle_f32_e64 s0, 0x3e7ae148, s0 1342; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, s0 1343; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb5 1344; GFX10-WAVE32-NEXT: .LBB12_5: 1345; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1346; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 1347; GFX10-WAVE32-NEXT: s_endpgm 1348; 1349; GFX11-LABEL: no_skip_no_successors: 1350; GFX11: ; %bb.0: ; %bb 1351; GFX11-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0 1352; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1353; GFX11-NEXT: s_and_b64 vcc, exec, s[4:5] 1354; GFX11-NEXT: s_cbranch_vccz .LBB12_3 1355; GFX11-NEXT: ; %bb.1: ; %bb6 1356; GFX11-NEXT: s_mov_b64 s[2:3], exec 1357; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1358; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], exec 1359; GFX11-NEXT: s_cbranch_scc0 .LBB12_5 1360; GFX11-NEXT: ; %bb.2: ; %bb6 1361; GFX11-NEXT: s_mov_b64 exec, 0 1362; GFX11-NEXT: .LBB12_3: ; %bb3 1363; GFX11-NEXT: v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0 1364; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1365; GFX11-NEXT: s_and_b64 vcc, exec, s[0:1] 1366; GFX11-NEXT: ; %bb.4: ; %bb5 1367; GFX11-NEXT: .LBB12_5: 1368; GFX11-NEXT: s_mov_b64 exec, 0 1369; GFX11-NEXT: exp mrt0 off, off, off, off done 1370; GFX11-NEXT: s_endpgm 1371bb: 1372 %tmp = fcmp ult float %arg1, 0.000000e+00 1373 br i1 %tmp, label %bb6, label %bb3 1374 1375bb3: ; preds = %bb 1376 %tmp2 = fcmp ult float %arg, 0x3FCF5C2900000000 1377 br i1 %tmp2, label %bb5, label %bb4 1378 1379bb4: ; preds = %bb3 1380 br i1 true, label %bb5, label %bb7 1381 1382bb5: ; preds = %bb4, %bb3 1383 unreachable 1384 1385bb6: ; preds = %bb 1386 call void @llvm.amdgcn.kill(i1 false) 1387 unreachable 1388 1389bb7: ; preds = %bb4 1390 ret void 1391} 1392 1393define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) #0 { 1394; SI-LABEL: if_after_kill_block: 1395; SI: ; %bb.0: ; %bb 1396; SI-NEXT: s_mov_b64 s[0:1], exec 1397; SI-NEXT: s_wqm_b64 exec, exec 1398; SI-NEXT: v_cmp_nle_f32_e32 vcc, 0, v1 1399; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc 1400; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 1401; SI-NEXT: s_cbranch_execz .LBB13_3 1402; SI-NEXT: ; %bb.1: ; %bb3 1403; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 1404; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 1405; SI-NEXT: s_cbranch_scc0 .LBB13_6 1406; SI-NEXT: ; %bb.2: ; %bb3 1407; SI-NEXT: s_andn2_b64 exec, exec, vcc 1408; SI-NEXT: .LBB13_3: ; %bb4 1409; SI-NEXT: s_or_b64 exec, exec, s[2:3] 1410; SI-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 1411; SI-NEXT: s_waitcnt vmcnt(0) 1412; SI-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 1413; SI-NEXT: s_and_saveexec_b64 s[0:1], vcc 1414; SI-NEXT: s_cbranch_execz .LBB13_5 1415; SI-NEXT: ; %bb.4: ; %bb8 1416; SI-NEXT: s_mov_b32 s3, 0xf000 1417; SI-NEXT: s_mov_b32 s2, -1 1418; SI-NEXT: v_mov_b32_e32 v0, 9 1419; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 1420; SI-NEXT: s_waitcnt vmcnt(0) 1421; SI-NEXT: .LBB13_5: ; %UnifiedReturnBlock 1422; SI-NEXT: s_endpgm 1423; SI-NEXT: .LBB13_6: 1424; SI-NEXT: s_mov_b64 exec, 0 1425; SI-NEXT: exp null off, off, off, off done vm 1426; SI-NEXT: s_endpgm 1427; 1428; GFX10-WAVE64-LABEL: if_after_kill_block: 1429; GFX10-WAVE64: ; %bb.0: ; %bb 1430; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec 1431; GFX10-WAVE64-NEXT: s_wqm_b64 exec, exec 1432; GFX10-WAVE64-NEXT: v_cmp_nle_f32_e32 vcc, 0, v1 1433; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc 1434; GFX10-WAVE64-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 1435; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB13_3 1436; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb3 1437; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 1438; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc 1439; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB13_6 1440; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb3 1441; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc 1442; GFX10-WAVE64-NEXT: .LBB13_3: ; %bb4 1443; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[2:3] 1444; GFX10-WAVE64-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D 1445; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) 1446; GFX10-WAVE64-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 1447; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[0:1], vcc 1448; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB13_5 1449; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb8 1450; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 9 1451; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off 1452; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0 1453; GFX10-WAVE64-NEXT: .LBB13_5: ; %UnifiedReturnBlock 1454; GFX10-WAVE64-NEXT: s_endpgm 1455; GFX10-WAVE64-NEXT: .LBB13_6: 1456; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1457; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 1458; GFX10-WAVE64-NEXT: s_endpgm 1459; 1460; GFX10-WAVE32-LABEL: if_after_kill_block: 1461; GFX10-WAVE32: ; %bb.0: ; %bb 1462; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 1463; GFX10-WAVE32-NEXT: s_wqm_b32 exec_lo, exec_lo 1464; GFX10-WAVE32-NEXT: v_cmp_nle_f32_e32 vcc_lo, 0, v1 1465; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo 1466; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1 1467; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB13_3 1468; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb3 1469; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0 1470; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo 1471; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB13_6 1472; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb3 1473; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo 1474; GFX10-WAVE32-NEXT: .LBB13_3: ; %bb4 1475; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s1 1476; GFX10-WAVE32-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D 1477; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) 1478; GFX10-WAVE32-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0 1479; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s0, vcc_lo 1480; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB13_5 1481; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb8 1482; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 9 1483; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off 1484; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 1485; GFX10-WAVE32-NEXT: .LBB13_5: ; %UnifiedReturnBlock 1486; GFX10-WAVE32-NEXT: s_endpgm 1487; GFX10-WAVE32-NEXT: .LBB13_6: 1488; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1489; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 1490; GFX10-WAVE32-NEXT: s_endpgm 1491; 1492; GFX11-LABEL: if_after_kill_block: 1493; GFX11: ; %bb.0: ; %bb 1494; GFX11-NEXT: s_mov_b64 s[0:1], exec 1495; GFX11-NEXT: s_wqm_b64 exec, exec 1496; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1497; GFX11-NEXT: s_mov_b64 s[2:3], exec 1498; GFX11-NEXT: v_cmpx_nle_f32_e32 0, v1 1499; GFX11-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 1500; GFX11-NEXT: s_cbranch_execz .LBB13_3 1501; GFX11-NEXT: ; %bb.1: ; %bb3 1502; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0 1503; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc 1504; GFX11-NEXT: s_cbranch_scc0 .LBB13_6 1505; GFX11-NEXT: ; %bb.2: ; %bb3 1506; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc 1507; GFX11-NEXT: .LBB13_3: ; %bb4 1508; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1509; GFX11-NEXT: s_or_b64 exec, exec, s[2:3] 1510; GFX11-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D 1511; GFX11-NEXT: s_mov_b64 s[0:1], exec 1512; GFX11-NEXT: s_waitcnt vmcnt(0) 1513; GFX11-NEXT: v_cmpx_neq_f32_e32 0, v0 1514; GFX11-NEXT: s_cbranch_execz .LBB13_5 1515; GFX11-NEXT: ; %bb.4: ; %bb8 1516; GFX11-NEXT: v_mov_b32_e32 v0, 9 1517; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc 1518; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1519; GFX11-NEXT: .LBB13_5: ; %UnifiedReturnBlock 1520; GFX11-NEXT: s_endpgm 1521; GFX11-NEXT: .LBB13_6: 1522; GFX11-NEXT: s_mov_b64 exec, 0 1523; GFX11-NEXT: exp mrt0 off, off, off, off done 1524; GFX11-NEXT: s_endpgm 1525bb: 1526 %tmp = fcmp ult float %arg1, 0.000000e+00 1527 br i1 %tmp, label %bb3, label %bb4 1528 1529bb3: ; preds = %bb 1530 %cmp.arg = fcmp olt float %arg, 0.0 1531 call void @llvm.amdgcn.kill(i1 %cmp.arg) 1532 br label %bb4 1533 1534bb4: ; preds = %bb3, %bb 1535 %tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, float %arg2, float %arg3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) 1536 %tmp6 = extractelement <4 x float> %tmp5, i32 0 1537 %tmp7 = fcmp une float %tmp6, 0.000000e+00 1538 br i1 %tmp7, label %bb8, label %bb9 1539 1540bb8: ; preds = %bb9, %bb4 1541 store volatile i32 9, ptr addrspace(1) undef 1542 ret void 1543 1544bb9: ; preds = %bb4 1545 ret void 1546} 1547 1548define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) { 1549; SI-LABEL: cbranch_kill: 1550; SI: ; %bb.0: ; %.entry 1551; SI-NEXT: s_mov_b64 s[0:1], exec 1552; SI-NEXT: v_mov_b32_e32 v4, 0 1553; SI-NEXT: v_mov_b32_e32 v2, v1 1554; SI-NEXT: v_mov_b32_e32 v3, v1 1555; SI-NEXT: image_sample_l v1, v[1:4], s[0:7], s[0:3] dmask:0x1 da 1556; SI-NEXT: s_waitcnt vmcnt(0) 1557; SI-NEXT: v_cmp_ge_f32_e32 vcc, 0, v1 1558; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc 1559; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 1560; SI-NEXT: s_cbranch_execz .LBB14_3 1561; SI-NEXT: ; %bb.1: ; %kill 1562; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec 1563; SI-NEXT: ; implicit-def: $vgpr0 1564; SI-NEXT: ; implicit-def: $vgpr1 1565; SI-NEXT: s_cbranch_scc0 .LBB14_6 1566; SI-NEXT: ; %bb.2: ; %kill 1567; SI-NEXT: s_mov_b64 exec, 0 1568; SI-NEXT: .LBB14_3: ; %Flow 1569; SI-NEXT: s_or_saveexec_b64 s[0:1], s[2:3] 1570; SI-NEXT: ; implicit-def: $vgpr2 1571; SI-NEXT: s_xor_b64 exec, exec, s[0:1] 1572; SI-NEXT: ; %bb.4: ; %live 1573; SI-NEXT: v_mul_f32_e32 v2, v0, v1 1574; SI-NEXT: ; %bb.5: ; %export 1575; SI-NEXT: s_or_b64 exec, exec, s[0:1] 1576; SI-NEXT: exp mrt0 v2, v2, v2, v2 done vm 1577; SI-NEXT: s_endpgm 1578; SI-NEXT: .LBB14_6: 1579; SI-NEXT: s_mov_b64 exec, 0 1580; SI-NEXT: exp null off, off, off, off done vm 1581; SI-NEXT: s_endpgm 1582; 1583; GFX10-WAVE64-LABEL: cbranch_kill: 1584; GFX10-WAVE64: ; %bb.0: ; %.entry 1585; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, 0 1586; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec 1587; GFX10-WAVE64-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY 1588; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) 1589; GFX10-WAVE64-NEXT: v_cmp_ge_f32_e32 vcc, 0, v1 1590; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc 1591; GFX10-WAVE64-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 1592; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB14_3 1593; GFX10-WAVE64-NEXT: ; %bb.1: ; %kill 1594; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec 1595; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr0 1596; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr1 1597; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB14_6 1598; GFX10-WAVE64-NEXT: ; %bb.2: ; %kill 1599; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1600; GFX10-WAVE64-NEXT: .LBB14_3: ; %Flow 1601; GFX10-WAVE64-NEXT: s_or_saveexec_b64 s[0:1], s[2:3] 1602; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr2 1603; GFX10-WAVE64-NEXT: s_xor_b64 exec, exec, s[0:1] 1604; GFX10-WAVE64-NEXT: ; %bb.4: ; %live 1605; GFX10-WAVE64-NEXT: v_mul_f32_e32 v2, v0, v1 1606; GFX10-WAVE64-NEXT: ; %bb.5: ; %export 1607; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[0:1] 1608; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v2, v2 done vm 1609; GFX10-WAVE64-NEXT: s_endpgm 1610; GFX10-WAVE64-NEXT: .LBB14_6: 1611; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1612; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 1613; GFX10-WAVE64-NEXT: s_endpgm 1614; 1615; GFX10-WAVE32-LABEL: cbranch_kill: 1616; GFX10-WAVE32: ; %bb.0: ; %.entry 1617; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, 0 1618; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo 1619; GFX10-WAVE32-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY 1620; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) 1621; GFX10-WAVE32-NEXT: v_cmp_ge_f32_e32 vcc_lo, 0, v1 1622; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo 1623; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1 1624; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB14_3 1625; GFX10-WAVE32-NEXT: ; %bb.1: ; %kill 1626; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo 1627; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr0 1628; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr1 1629; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB14_6 1630; GFX10-WAVE32-NEXT: ; %bb.2: ; %kill 1631; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1632; GFX10-WAVE32-NEXT: .LBB14_3: ; %Flow 1633; GFX10-WAVE32-NEXT: s_or_saveexec_b32 s0, s1 1634; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr2 1635; GFX10-WAVE32-NEXT: s_xor_b32 exec_lo, exec_lo, s0 1636; GFX10-WAVE32-NEXT: ; %bb.4: ; %live 1637; GFX10-WAVE32-NEXT: v_mul_f32_e32 v2, v0, v1 1638; GFX10-WAVE32-NEXT: ; %bb.5: ; %export 1639; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s0 1640; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v2, v2 done vm 1641; GFX10-WAVE32-NEXT: s_endpgm 1642; GFX10-WAVE32-NEXT: .LBB14_6: 1643; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1644; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 1645; GFX10-WAVE32-NEXT: s_endpgm 1646; 1647; GFX11-LABEL: cbranch_kill: 1648; GFX11: ; %bb.0: ; %.entry 1649; GFX11-NEXT: v_mov_b32_e32 v2, 0 1650; GFX11-NEXT: s_mov_b64 s[0:1], exec 1651; GFX11-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY 1652; GFX11-NEXT: s_mov_b64 s[2:3], exec 1653; GFX11-NEXT: s_waitcnt vmcnt(0) 1654; GFX11-NEXT: v_cmpx_ge_f32_e32 0, v1 1655; GFX11-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 1656; GFX11-NEXT: s_cbranch_execz .LBB14_3 1657; GFX11-NEXT: ; %bb.1: ; %kill 1658; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], exec 1659; GFX11-NEXT: ; implicit-def: $vgpr0 1660; GFX11-NEXT: ; implicit-def: $vgpr1 1661; GFX11-NEXT: s_cbranch_scc0 .LBB14_6 1662; GFX11-NEXT: ; %bb.2: ; %kill 1663; GFX11-NEXT: s_mov_b64 exec, 0 1664; GFX11-NEXT: .LBB14_3: ; %Flow 1665; GFX11-NEXT: s_or_saveexec_b64 s[0:1], s[2:3] 1666; GFX11-NEXT: ; implicit-def: $vgpr2 1667; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1668; GFX11-NEXT: s_xor_b64 exec, exec, s[0:1] 1669; GFX11-NEXT: ; %bb.4: ; %live 1670; GFX11-NEXT: v_mul_f32_e32 v2, v0, v1 1671; GFX11-NEXT: ; %bb.5: ; %export 1672; GFX11-NEXT: s_or_b64 exec, exec, s[0:1] 1673; GFX11-NEXT: exp mrt0 v2, v2, v2, v2 done 1674; GFX11-NEXT: s_endpgm 1675; GFX11-NEXT: .LBB14_6: 1676; GFX11-NEXT: s_mov_b64 exec, 0 1677; GFX11-NEXT: exp mrt0 off, off, off, off done 1678; GFX11-NEXT: s_endpgm 1679.entry: 1680 %sample = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 1, float %val1, float %val1, float %val1, float 0.000000e+00, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) 1681 %cond0 = fcmp ugt float %sample, 0.000000e+00 1682 br i1 %cond0, label %live, label %kill 1683 1684kill: 1685 call void @llvm.amdgcn.kill(i1 false) 1686 br label %export 1687 1688live: 1689 %scale = fmul reassoc nnan nsz arcp contract float %val0, %sample 1690 br label %export 1691 1692export: 1693 %proxy = phi float [ undef, %kill ], [ %scale, %live ] 1694 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %proxy, float %proxy, float %proxy, float %proxy, i1 true, i1 true) #3 1695 ret void 1696} 1697 1698 1699define amdgpu_ps void @complex_loop(i32 inreg %cmpa, i32 %cmpb, i32 %cmpc) { 1700; SI-LABEL: complex_loop: 1701; SI: ; %bb.0: ; %.entry 1702; SI-NEXT: s_cmp_lt_i32 s0, 1 1703; SI-NEXT: s_cbranch_scc1 .LBB15_7 1704; SI-NEXT: ; %bb.1: ; %.lr.ph 1705; SI-NEXT: s_mov_b64 s[2:3], exec 1706; SI-NEXT: s_mov_b32 s6, 0 1707; SI-NEXT: s_mov_b64 s[0:1], 0 1708; SI-NEXT: s_branch .LBB15_3 1709; SI-NEXT: .LBB15_2: ; %latch 1710; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1 1711; SI-NEXT: s_or_b64 exec, exec, s[4:5] 1712; SI-NEXT: s_add_i32 s6, s6, 1 1713; SI-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1 1714; SI-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 1715; SI-NEXT: v_mov_b32_e32 v2, s6 1716; SI-NEXT: s_andn2_b64 exec, exec, s[0:1] 1717; SI-NEXT: s_cbranch_execz .LBB15_6 1718; SI-NEXT: .LBB15_3: ; %hdr 1719; SI-NEXT: ; =>This Inner Loop Header: Depth=1 1720; SI-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 1721; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc 1722; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] 1723; SI-NEXT: s_cbranch_execz .LBB15_2 1724; SI-NEXT: ; %bb.4: ; %kill 1725; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1 1726; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec 1727; SI-NEXT: s_cbranch_scc0 .LBB15_8 1728; SI-NEXT: ; %bb.5: ; %kill 1729; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1 1730; SI-NEXT: s_mov_b64 exec, 0 1731; SI-NEXT: s_branch .LBB15_2 1732; SI-NEXT: .LBB15_6: ; %Flow 1733; SI-NEXT: s_or_b64 exec, exec, s[0:1] 1734; SI-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1735; SI-NEXT: s_endpgm 1736; SI-NEXT: .LBB15_7: 1737; SI-NEXT: v_mov_b32_e32 v2, -1 1738; SI-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1739; SI-NEXT: s_endpgm 1740; SI-NEXT: .LBB15_8: 1741; SI-NEXT: s_mov_b64 exec, 0 1742; SI-NEXT: exp null off, off, off, off done vm 1743; SI-NEXT: s_endpgm 1744; 1745; GFX10-WAVE64-LABEL: complex_loop: 1746; GFX10-WAVE64: ; %bb.0: ; %.entry 1747; GFX10-WAVE64-NEXT: s_cmp_lt_i32 s0, 1 1748; GFX10-WAVE64-NEXT: s_cbranch_scc1 .LBB15_7 1749; GFX10-WAVE64-NEXT: ; %bb.1: ; %.lr.ph 1750; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec 1751; GFX10-WAVE64-NEXT: s_mov_b32 s6, 0 1752; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], 0 1753; GFX10-WAVE64-NEXT: s_branch .LBB15_3 1754; GFX10-WAVE64-NEXT: .LBB15_2: ; %latch 1755; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1 1756; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[4:5] 1757; GFX10-WAVE64-NEXT: s_add_i32 s6, s6, 1 1758; GFX10-WAVE64-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1 1759; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, s6 1760; GFX10-WAVE64-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 1761; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, s[0:1] 1762; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB15_6 1763; GFX10-WAVE64-NEXT: .LBB15_3: ; %hdr 1764; GFX10-WAVE64-NEXT: ; =>This Inner Loop Header: Depth=1 1765; GFX10-WAVE64-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 1766; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc 1767; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], exec, s[4:5] 1768; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB15_2 1769; GFX10-WAVE64-NEXT: ; %bb.4: ; %kill 1770; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1 1771; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], exec 1772; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB15_8 1773; GFX10-WAVE64-NEXT: ; %bb.5: ; %kill 1774; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1 1775; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1776; GFX10-WAVE64-NEXT: s_branch .LBB15_2 1777; GFX10-WAVE64-NEXT: .LBB15_6: ; %Flow 1778; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[0:1] 1779; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1780; GFX10-WAVE64-NEXT: s_endpgm 1781; GFX10-WAVE64-NEXT: .LBB15_7: 1782; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, -1 1783; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1784; GFX10-WAVE64-NEXT: s_endpgm 1785; GFX10-WAVE64-NEXT: .LBB15_8: 1786; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0 1787; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm 1788; GFX10-WAVE64-NEXT: s_endpgm 1789; 1790; GFX10-WAVE32-LABEL: complex_loop: 1791; GFX10-WAVE32: ; %bb.0: ; %.entry 1792; GFX10-WAVE32-NEXT: s_cmp_lt_i32 s0, 1 1793; GFX10-WAVE32-NEXT: s_cbranch_scc1 .LBB15_7 1794; GFX10-WAVE32-NEXT: ; %bb.1: ; %.lr.ph 1795; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo 1796; GFX10-WAVE32-NEXT: s_mov_b32 s0, 0 1797; GFX10-WAVE32-NEXT: s_mov_b32 s2, 0 1798; GFX10-WAVE32-NEXT: s_branch .LBB15_3 1799; GFX10-WAVE32-NEXT: .LBB15_2: ; %latch 1800; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1 1801; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s3 1802; GFX10-WAVE32-NEXT: s_add_i32 s2, s2, 1 1803; GFX10-WAVE32-NEXT: v_cmp_ge_i32_e32 vcc_lo, s2, v1 1804; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, s2 1805; GFX10-WAVE32-NEXT: s_or_b32 s0, vcc_lo, s0 1806; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 1807; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB15_6 1808; GFX10-WAVE32-NEXT: .LBB15_3: ; %hdr 1809; GFX10-WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1 1810; GFX10-WAVE32-NEXT: v_cmp_gt_u32_e32 vcc_lo, s2, v0 1811; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s3, vcc_lo 1812; GFX10-WAVE32-NEXT: s_xor_b32 s3, exec_lo, s3 1813; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB15_2 1814; GFX10-WAVE32-NEXT: ; %bb.4: ; %kill 1815; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1 1816; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, exec_lo 1817; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB15_8 1818; GFX10-WAVE32-NEXT: ; %bb.5: ; %kill 1819; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1 1820; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1821; GFX10-WAVE32-NEXT: s_branch .LBB15_2 1822; GFX10-WAVE32-NEXT: .LBB15_6: ; %Flow 1823; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s0 1824; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1825; GFX10-WAVE32-NEXT: s_endpgm 1826; GFX10-WAVE32-NEXT: .LBB15_7: 1827; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, -1 1828; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v0, v0 done vm 1829; GFX10-WAVE32-NEXT: s_endpgm 1830; GFX10-WAVE32-NEXT: .LBB15_8: 1831; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0 1832; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm 1833; GFX10-WAVE32-NEXT: s_endpgm 1834; 1835; GFX11-LABEL: complex_loop: 1836; GFX11: ; %bb.0: ; %.entry 1837; GFX11-NEXT: s_cmp_lt_i32 s0, 1 1838; GFX11-NEXT: s_cbranch_scc1 .LBB15_7 1839; GFX11-NEXT: ; %bb.1: ; %.lr.ph 1840; GFX11-NEXT: s_mov_b64 s[2:3], exec 1841; GFX11-NEXT: s_mov_b32 s6, 0 1842; GFX11-NEXT: s_mov_b64 s[0:1], 0 1843; GFX11-NEXT: s_branch .LBB15_3 1844; GFX11-NEXT: .LBB15_2: ; %latch 1845; GFX11-NEXT: ; in Loop: Header=BB15_3 Depth=1 1846; GFX11-NEXT: s_or_b64 exec, exec, s[4:5] 1847; GFX11-NEXT: s_add_i32 s6, s6, 1 1848; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) 1849; GFX11-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1 1850; GFX11-NEXT: v_mov_b32_e32 v2, s6 1851; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 1852; GFX11-NEXT: s_and_not1_b64 exec, exec, s[0:1] 1853; GFX11-NEXT: s_cbranch_execz .LBB15_6 1854; GFX11-NEXT: .LBB15_3: ; %hdr 1855; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 1856; GFX11-NEXT: s_mov_b64 s[4:5], exec 1857; GFX11-NEXT: v_cmpx_gt_u32_e64 s6, v0 1858; GFX11-NEXT: s_xor_b64 s[4:5], exec, s[4:5] 1859; GFX11-NEXT: s_cbranch_execz .LBB15_2 1860; GFX11-NEXT: ; %bb.4: ; %kill 1861; GFX11-NEXT: ; in Loop: Header=BB15_3 Depth=1 1862; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], exec 1863; GFX11-NEXT: s_cbranch_scc0 .LBB15_8 1864; GFX11-NEXT: ; %bb.5: ; %kill 1865; GFX11-NEXT: ; in Loop: Header=BB15_3 Depth=1 1866; GFX11-NEXT: s_mov_b64 exec, 0 1867; GFX11-NEXT: s_branch .LBB15_2 1868; GFX11-NEXT: .LBB15_6: ; %Flow 1869; GFX11-NEXT: s_or_b64 exec, exec, s[0:1] 1870; GFX11-NEXT: exp mrt0 v2, v2, v0, v0 done 1871; GFX11-NEXT: s_endpgm 1872; GFX11-NEXT: .LBB15_7: 1873; GFX11-NEXT: v_mov_b32_e32 v2, -1 1874; GFX11-NEXT: exp mrt0 v2, v2, v0, v0 done 1875; GFX11-NEXT: s_endpgm 1876; GFX11-NEXT: .LBB15_8: 1877; GFX11-NEXT: s_mov_b64 exec, 0 1878; GFX11-NEXT: exp mrt0 off, off, off, off done 1879; GFX11-NEXT: s_endpgm 1880.entry: 1881 %flaga = icmp sgt i32 %cmpa, 0 1882 br i1 %flaga, label %.lr.ph, label %._crit_edge 1883 1884.lr.ph: 1885 br label %hdr 1886 1887hdr: 1888 %ctr = phi i32 [ 0, %.lr.ph ], [ %ctr.next, %latch ] 1889 %flagb = icmp ugt i32 %ctr, %cmpb 1890 br i1 %flagb, label %kill, label %latch 1891 1892kill: 1893 call void @llvm.amdgcn.kill(i1 false) 1894 br label %latch 1895 1896latch: 1897 %ctr.next = add nuw nsw i32 %ctr, 1 1898 %flagc = icmp slt i32 %ctr.next, %cmpc 1899 br i1 %flagc, label %hdr, label %._crit_edge 1900 1901._crit_edge: 1902 %tmp = phi i32 [ -1, %.entry ], [ %ctr.next, %latch ] 1903 %out = bitcast i32 %tmp to float 1904 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %out, float %out, float undef, float undef, i1 true, i1 true) 1905 ret void 1906} 1907 1908define void @skip_mode_switch(i32 %arg) { 1909; WAVE64-LABEL: skip_mode_switch: 1910; WAVE64: ; %bb.0: ; %entry 1911; WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1912; WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1913; WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc 1914; WAVE64-NEXT: s_cbranch_execz .LBB16_2 1915; WAVE64-NEXT: ; %bb.1: ; %bb.0 1916; WAVE64-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 1917; WAVE64-NEXT: .LBB16_2: ; %bb.1 1918; WAVE64-NEXT: s_or_b64 exec, exec, s[4:5] 1919; WAVE64-NEXT: s_setpc_b64 s[30:31] 1920; 1921; GFX10-WAVE32-LABEL: skip_mode_switch: 1922; GFX10-WAVE32: ; %bb.0: ; %entry 1923; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1924; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1925; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s4, vcc_lo 1926; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB16_2 1927; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb.0 1928; GFX10-WAVE32-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 1929; GFX10-WAVE32-NEXT: .LBB16_2: ; %bb.1 1930; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s4 1931; GFX10-WAVE32-NEXT: s_setpc_b64 s[30:31] 1932; 1933; GFX11-LABEL: skip_mode_switch: 1934; GFX11: ; %bb.0: ; %entry 1935; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1936; GFX11-NEXT: s_mov_b64 s[0:1], exec 1937; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0 1938; GFX11-NEXT: s_cbranch_execz .LBB16_2 1939; GFX11-NEXT: ; %bb.1: ; %bb.0 1940; GFX11-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3 1941; GFX11-NEXT: .LBB16_2: ; %bb.1 1942; GFX11-NEXT: s_or_b64 exec, exec, s[0:1] 1943; GFX11-NEXT: s_setpc_b64 s[30:31] 1944entry: 1945 %cmp = icmp eq i32 %arg, 0 1946 br i1 %cmp, label %bb.0, label %bb.1 1947 1948bb.0: 1949 call void @llvm.amdgcn.s.setreg(i32 2049, i32 3) 1950 br label %bb.1 1951 1952bb.1: 1953 ret void 1954} 1955 1956declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #3 1957declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 1958declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 1959declare void @llvm.amdgcn.kill(i1) #0 1960 1961declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) 1962 1963attributes #0 = { nounwind } 1964attributes #1 = { nounwind readonly } 1965attributes #2 = { nounwind readnone speculatable } 1966attributes #3 = { inaccessiblememonly nounwind writeonly } 1967