1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s 3 4; Make sure the branch targets are correct after lowering llvm.amdgcn.if 5 6define i32 @divergent_if_swap_brtarget_order0(i32 %value) { 7; CHECK-LABEL: divergent_if_swap_brtarget_order0: 8; CHECK: ; %bb.0: ; %entry 9; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 11; CHECK-NEXT: ; implicit-def: $vgpr0 12; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc 13; CHECK-NEXT: s_cbranch_execz .LBB0_2 14; CHECK-NEXT: ; %bb.1: ; %if.true 15; CHECK-NEXT: global_load_dword v0, v[0:1], off glc 16; CHECK-NEXT: s_waitcnt vmcnt(0) 17; CHECK-NEXT: .LBB0_2: ; %endif 18; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] 19; CHECK-NEXT: s_setpc_b64 s[30:31] 20entry: 21 %c = icmp ne i32 %value, 0 22 br i1 %c, label %if.true, label %endif 23 24if.true: 25 %val = load volatile i32, ptr addrspace(1) undef 26 br label %endif 27 28endif: 29 %v = phi i32 [ %val, %if.true ], [ undef, %entry ] 30 ret i32 %v 31} 32 33define i32 @divergent_if_swap_brtarget_order1(i32 %value) { 34; CHECK-LABEL: divergent_if_swap_brtarget_order1: 35; CHECK: ; %bb.0: ; %entry 36; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 37; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 38; CHECK-NEXT: ; implicit-def: $vgpr0 39; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc 40; CHECK-NEXT: s_cbranch_execz .LBB1_2 41; CHECK-NEXT: ; %bb.1: ; %if.true 42; CHECK-NEXT: global_load_dword v0, v[0:1], off glc 43; CHECK-NEXT: s_waitcnt vmcnt(0) 44; CHECK-NEXT: .LBB1_2: ; %endif 45; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] 46; CHECK-NEXT: s_setpc_b64 s[30:31] 47entry: 48 %c = icmp ne i32 %value, 0 49 br i1 %c, label %if.true, label %endif 50 51endif: 52 %v = phi i32 [ %val, %if.true ], [ undef, %entry ] 53 ret i32 %v 54 55if.true: 56 %val = load volatile i32, ptr addrspace(1) undef 57 br label %endif 58} 59 60; Make sure and 1 is inserted on llvm.amdgcn.if 61define i32 @divergent_if_nonboolean_condition0(i32 %value) { 62; CHECK-LABEL: divergent_if_nonboolean_condition0: 63; CHECK: ; %bb.0: ; %entry 64; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 65; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 66; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 67; CHECK-NEXT: ; implicit-def: $vgpr0 68; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc 69; CHECK-NEXT: s_cbranch_execz .LBB2_2 70; CHECK-NEXT: ; %bb.1: ; %if.true 71; CHECK-NEXT: global_load_dword v0, v[0:1], off glc 72; CHECK-NEXT: s_waitcnt vmcnt(0) 73; CHECK-NEXT: .LBB2_2: ; %endif 74; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] 75; CHECK-NEXT: s_setpc_b64 s[30:31] 76entry: 77 %c = trunc i32 %value to i1 78 br i1 %c, label %if.true, label %endif 79 80if.true: 81 %val = load volatile i32, ptr addrspace(1) undef 82 br label %endif 83 84endif: 85 %v = phi i32 [ %val, %if.true ], [ undef, %entry ] 86 ret i32 %v 87} 88 89; Make sure and 1 is inserted on llvm.amdgcn.if 90define i32 @divergent_if_nonboolean_condition1(ptr addrspace(1) %ptr) { 91; CHECK-LABEL: divergent_if_nonboolean_condition1: 92; CHECK: ; %bb.0: ; %entry 93; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 94; CHECK-NEXT: global_load_dword v0, v[0:1], off 95; CHECK-NEXT: s_waitcnt vmcnt(0) 96; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 97; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 98; CHECK-NEXT: ; implicit-def: $vgpr0 99; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc 100; CHECK-NEXT: s_cbranch_execz .LBB3_2 101; CHECK-NEXT: ; %bb.1: ; %if.true 102; CHECK-NEXT: global_load_dword v0, v[0:1], off glc 103; CHECK-NEXT: s_waitcnt vmcnt(0) 104; CHECK-NEXT: .LBB3_2: ; %endif 105; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] 106; CHECK-NEXT: s_setpc_b64 s[30:31] 107entry: 108 %value = load i32, ptr addrspace(1) %ptr 109 %c = trunc i32 %value to i1 110 br i1 %c, label %if.true, label %endif 111 112if.true: 113 %val = load volatile i32, ptr addrspace(1) undef 114 br label %endif 115 116endif: 117 %v = phi i32 [ %val, %if.true ], [ undef, %entry ] 118 ret i32 %v 119} 120 121@external_constant = external addrspace(4) constant i32, align 4 122@const.ptr = external addrspace(4) constant ptr, align 4 123 124; Make sure this case compiles. G_ICMP was mis-mapped due to having 125; the result register class constrained by llvm.amdgcn.if lowering. 126define void @constrained_if_register_class() { 127; CHECK-LABEL: constrained_if_register_class: 128; CHECK: ; %bb.0: ; %bb 129; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 130; CHECK-NEXT: s_getpc_b64 s[4:5] 131; CHECK-NEXT: s_add_u32 s4, s4, external_constant@gotpcrel32@lo+4 132; CHECK-NEXT: s_addc_u32 s5, s5, external_constant@gotpcrel32@hi+12 133; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 134; CHECK-NEXT: s_waitcnt lgkmcnt(0) 135; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0 136; CHECK-NEXT: s_waitcnt lgkmcnt(0) 137; CHECK-NEXT: s_cmp_lg_u32 s4, 0 138; CHECK-NEXT: s_cbranch_scc0 .LBB4_2 139; CHECK-NEXT: .LBB4_1: ; %bb12 140; CHECK-NEXT: s_setpc_b64 s[30:31] 141; CHECK-NEXT: .LBB4_2: ; %bb2 142; CHECK-NEXT: s_getpc_b64 s[4:5] 143; CHECK-NEXT: s_add_u32 s4, s4, const.ptr@gotpcrel32@lo+4 144; CHECK-NEXT: s_addc_u32 s5, s5, const.ptr@gotpcrel32@hi+12 145; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 146; CHECK-NEXT: v_mov_b32_e32 v0, 0 147; CHECK-NEXT: s_waitcnt lgkmcnt(0) 148; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 149; CHECK-NEXT: s_waitcnt lgkmcnt(0) 150; CHECK-NEXT: global_load_dword v0, v0, s[4:5] 151; CHECK-NEXT: s_mov_b32 s4, -1 152; CHECK-NEXT: s_waitcnt vmcnt(0) 153; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, 1.0, v0 154; CHECK-NEXT: s_cbranch_vccnz .LBB4_4 155; CHECK-NEXT: ; %bb.3: ; %bb7 156; CHECK-NEXT: s_mov_b32 s4, 0 157; CHECK-NEXT: .LBB4_4: ; %bb8 158; CHECK-NEXT: s_cmp_lg_u32 s4, 0 159; CHECK-NEXT: s_cbranch_scc1 .LBB4_1 160; CHECK-NEXT: ; %bb.5: ; %bb11 161; CHECK-NEXT: v_mov_b32_e32 v0, 4.0 162; CHECK-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen 163; CHECK-NEXT: s_waitcnt vmcnt(0) 164; CHECK-NEXT: s_setpc_b64 s[30:31] 165bb: 166 %tmp = load i32, ptr addrspace(4) @external_constant 167 %tmp1 = icmp ne i32 %tmp, 0 168 br i1 %tmp1, label %bb12, label %bb2 169 170bb2: 171 %ptr = load ptr, ptr addrspace(4) @const.ptr 172 %tmp4 = load float, ptr %ptr, align 4 173 %tmp5 = fcmp olt float %tmp4, 1.0 174 %tmp6 = or i1 %tmp5, false 175 br i1 %tmp6, label %bb8, label %bb7 176 177bb7: 178 br label %bb8 179 180bb8: 181 %tmp9 = phi i32 [ 0, %bb7 ], [ -1, %bb2 ] 182 %tmp10 = icmp eq i32 %tmp9, 0 183 br i1 %tmp10, label %bb11, label %bb12 184 185bb11: 186 store float 4.0, ptr addrspace(5) undef, align 4 187 br label %bb12 188 189bb12: 190 ret void 191} 192 193define amdgpu_kernel void @break_loop(i32 %arg) { 194; CHECK-LABEL: break_loop: 195; CHECK: ; %bb.0: ; %bb 196; CHECK-NEXT: s_load_dword s0, s[8:9], 0x0 197; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3 198; CHECK-NEXT: ; implicit-def: $vgpr1 199; CHECK-NEXT: s_waitcnt lgkmcnt(0) 200; CHECK-NEXT: v_subrev_u32_e32 v0, s0, v0 201; CHECK-NEXT: s_mov_b64 s[0:1], 0 202; CHECK-NEXT: s_branch .LBB5_3 203; CHECK-NEXT: .LBB5_1: ; %bb4 204; CHECK-NEXT: ; in Loop: Header=BB5_3 Depth=1 205; CHECK-NEXT: global_load_dword v2, v[0:1], off glc 206; CHECK-NEXT: s_waitcnt vmcnt(0) 207; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec 208; CHECK-NEXT: v_cmp_ge_i32_e32 vcc, v0, v2 209; CHECK-NEXT: s_and_b64 s[4:5], exec, vcc 210; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 211; CHECK-NEXT: .LBB5_2: ; %Flow 212; CHECK-NEXT: ; in Loop: Header=BB5_3 Depth=1 213; CHECK-NEXT: s_and_b64 s[4:5], exec, s[2:3] 214; CHECK-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 215; CHECK-NEXT: s_andn2_b64 exec, exec, s[0:1] 216; CHECK-NEXT: s_cbranch_execz .LBB5_5 217; CHECK-NEXT: .LBB5_3: ; %bb1 218; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 219; CHECK-NEXT: v_add_u32_e32 v1, 1, v1 220; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec 221; CHECK-NEXT: s_and_b64 s[4:5], exec, -1 222; CHECK-NEXT: v_cmp_le_i32_e32 vcc, 0, v1 223; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 224; CHECK-NEXT: s_cbranch_vccz .LBB5_1 225; CHECK-NEXT: ; %bb.4: ; in Loop: Header=BB5_3 Depth=1 226; CHECK-NEXT: ; implicit-def: $vgpr1 227; CHECK-NEXT: s_branch .LBB5_2 228; CHECK-NEXT: .LBB5_5: ; %bb9 229; CHECK-NEXT: s_endpgm 230bb: 231 %id = call i32 @llvm.amdgcn.workitem.id.x() 232 %tmp = sub i32 %id, %arg 233 br label %bb1 234 235bb1: 236 %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ] 237 %lsr.iv.next = add i32 %lsr.iv, 1 238 %cmp0 = icmp slt i32 %lsr.iv.next, 0 239 br i1 %cmp0, label %bb4, label %bb9 240 241bb4: 242 %load = load volatile i32, ptr addrspace(1) undef, align 4 243 %cmp1 = icmp slt i32 %tmp, %load 244 br i1 %cmp1, label %bb1, label %bb9 245 246bb9: 247 ret void 248} 249 250declare i32 @llvm.amdgcn.workitem.id.x() 251