1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s 3; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=FLAT %s 4 5define amdgpu_kernel void @break_inserted_outside_of_loop(ptr addrspace(1) %out, i32 %a) { 6; SI-LABEL: break_inserted_outside_of_loop: 7; SI: ; %bb.0: ; %main_body 8; SI-NEXT: s_load_dword s0, s[4:5], 0xb 9; SI-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 10; SI-NEXT: s_waitcnt lgkmcnt(0) 11; SI-NEXT: v_and_b32_e32 v0, s0, v0 12; SI-NEXT: v_and_b32_e32 v0, 1, v0 13; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 14; SI-NEXT: s_mov_b64 s[0:1], 0 15; SI-NEXT: .LBB0_1: ; %ENDIF 16; SI-NEXT: ; =>This Inner Loop Header: Depth=1 17; SI-NEXT: s_and_b64 s[2:3], exec, vcc 18; SI-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 19; SI-NEXT: s_andn2_b64 exec, exec, s[0:1] 20; SI-NEXT: s_cbranch_execnz .LBB0_1 21; SI-NEXT: ; %bb.2: ; %ENDLOOP 22; SI-NEXT: s_or_b64 exec, exec, s[0:1] 23; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 24; SI-NEXT: s_mov_b32 s3, 0xf000 25; SI-NEXT: s_mov_b32 s2, -1 26; SI-NEXT: v_mov_b32_e32 v0, 0 27; SI-NEXT: s_waitcnt lgkmcnt(0) 28; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 29; SI-NEXT: s_endpgm 30; 31; FLAT-LABEL: break_inserted_outside_of_loop: 32; FLAT: ; %bb.0: ; %main_body 33; FLAT-NEXT: s_load_dword s0, s[4:5], 0x2c 34; FLAT-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 35; FLAT-NEXT: s_waitcnt lgkmcnt(0) 36; FLAT-NEXT: v_and_b32_e32 v0, s0, v0 37; FLAT-NEXT: v_and_b32_e32 v0, 1, v0 38; FLAT-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 39; FLAT-NEXT: s_mov_b64 s[0:1], 0 40; FLAT-NEXT: .LBB0_1: ; %ENDIF 41; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 42; FLAT-NEXT: s_and_b64 s[2:3], exec, vcc 43; FLAT-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 44; FLAT-NEXT: s_andn2_b64 exec, exec, s[0:1] 45; FLAT-NEXT: s_cbranch_execnz .LBB0_1 46; FLAT-NEXT: ; %bb.2: ; %ENDLOOP 47; FLAT-NEXT: s_or_b64 exec, exec, s[0:1] 48; FLAT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 49; FLAT-NEXT: s_mov_b32 s3, 0xf000 50; FLAT-NEXT: s_mov_b32 s2, -1 51; FLAT-NEXT: v_mov_b32_e32 v0, 0 52; FLAT-NEXT: s_waitcnt lgkmcnt(0) 53; FLAT-NEXT: buffer_store_dword v0, off, s[0:3], 0 54; FLAT-NEXT: s_endpgm 55main_body: 56 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 57 %0 = and i32 %a, %tid 58 %1 = trunc i32 %0 to i1 59 br label %ENDIF 60 61ENDLOOP: 62 store i32 0, ptr addrspace(1) %out 63 ret void 64 65ENDIF: 66 br i1 %1, label %ENDLOOP, label %ENDIF 67} 68 69define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) { 70; SI-LABEL: phi_cond_outside_loop: 71; SI: ; %bb.0: ; %entry 72; SI-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 73; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 74; SI-NEXT: s_mov_b64 s[0:1], 0 75; SI-NEXT: s_mov_b64 s[2:3], 0 76; SI-NEXT: s_and_saveexec_b64 s[6:7], vcc 77; SI-NEXT: s_cbranch_execz .LBB1_2 78; SI-NEXT: ; %bb.1: ; %else 79; SI-NEXT: s_load_dword s2, s[4:5], 0x9 80; SI-NEXT: s_waitcnt lgkmcnt(0) 81; SI-NEXT: s_cmp_eq_u32 s2, 0 82; SI-NEXT: s_cselect_b64 s[2:3], -1, 0 83; SI-NEXT: s_and_b64 s[2:3], s[2:3], exec 84; SI-NEXT: .LBB1_2: ; %endif 85; SI-NEXT: s_or_b64 exec, exec, s[6:7] 86; SI-NEXT: .LBB1_3: ; %loop 87; SI-NEXT: ; =>This Inner Loop Header: Depth=1 88; SI-NEXT: s_and_b64 s[4:5], exec, s[2:3] 89; SI-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 90; SI-NEXT: s_andn2_b64 exec, exec, s[0:1] 91; SI-NEXT: s_cbranch_execnz .LBB1_3 92; SI-NEXT: ; %bb.4: ; %exit 93; SI-NEXT: s_endpgm 94; 95; FLAT-LABEL: phi_cond_outside_loop: 96; FLAT: ; %bb.0: ; %entry 97; FLAT-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 98; FLAT-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 99; FLAT-NEXT: s_mov_b64 s[0:1], 0 100; FLAT-NEXT: s_mov_b64 s[2:3], 0 101; FLAT-NEXT: s_and_saveexec_b64 s[6:7], vcc 102; FLAT-NEXT: s_cbranch_execz .LBB1_2 103; FLAT-NEXT: ; %bb.1: ; %else 104; FLAT-NEXT: s_load_dword s2, s[4:5], 0x24 105; FLAT-NEXT: s_waitcnt lgkmcnt(0) 106; FLAT-NEXT: s_cmp_eq_u32 s2, 0 107; FLAT-NEXT: s_cselect_b64 s[2:3], -1, 0 108; FLAT-NEXT: s_and_b64 s[2:3], s[2:3], exec 109; FLAT-NEXT: .LBB1_2: ; %endif 110; FLAT-NEXT: s_or_b64 exec, exec, s[6:7] 111; FLAT-NEXT: .LBB1_3: ; %loop 112; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 113; FLAT-NEXT: s_and_b64 s[4:5], exec, s[2:3] 114; FLAT-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 115; FLAT-NEXT: s_andn2_b64 exec, exec, s[0:1] 116; FLAT-NEXT: s_cbranch_execnz .LBB1_3 117; FLAT-NEXT: ; %bb.4: ; %exit 118; FLAT-NEXT: s_endpgm 119entry: 120 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 121 %0 = icmp eq i32 %tid , 0 122 br i1 %0, label %if, label %else 123 124if: 125 br label %endif 126 127else: 128 %1 = icmp eq i32 %b, 0 129 br label %endif 130 131endif: 132 %2 = phi i1 [0, %if], [%1, %else] 133 br label %loop 134 135loop: 136 br i1 %2, label %exit, label %loop 137 138exit: 139 ret void 140} 141 142define amdgpu_kernel void @switch_unreachable(ptr addrspace(1) %g, ptr addrspace(3) %l, i32 %x) nounwind { 143; SI-LABEL: switch_unreachable: 144; SI: ; %bb.0: ; %centry 145; 146; FLAT-LABEL: switch_unreachable: 147; FLAT: ; %bb.0: ; %centry 148centry: 149 switch i32 %x, label %sw.default [ 150 i32 0, label %sw.bb 151 i32 60, label %sw.bb 152 ] 153 154sw.bb: 155 unreachable 156 157sw.default: 158 unreachable 159 160sw.epilog: 161 ret void 162} 163 164declare float @llvm.fabs.f32(float) nounwind readnone 165 166define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind { 167; SI-LABEL: loop_land_info_assert: 168; SI: ; %bb.0: ; %entry 169; SI-NEXT: s_load_dword s0, s[4:5], 0xa 170; SI-NEXT: s_waitcnt lgkmcnt(0) 171; SI-NEXT: s_cmp_lt_i32 s0, 4 172; SI-NEXT: s_cbranch_scc1 .LBB3_4 173; SI-NEXT: ; %bb.1: ; %for.cond.preheader 174; SI-NEXT: s_load_dword s0, s[4:5], 0xc 175; SI-NEXT: s_waitcnt lgkmcnt(0) 176; SI-NEXT: s_cmpk_lt_i32 s0, 0x3e8 177; SI-NEXT: s_cbranch_scc0 .LBB3_4 178; SI-NEXT: ; %bb.2: ; %for.body 179; SI-NEXT: s_and_b64 vcc, exec, 0 180; SI-NEXT: .LBB3_3: ; %self.loop 181; SI-NEXT: ; =>This Inner Loop Header: Depth=1 182; SI-NEXT: s_mov_b64 vcc, vcc 183; SI-NEXT: s_cbranch_vccz .LBB3_3 184; SI-NEXT: .LBB3_4: ; %DummyReturnBlock 185; SI-NEXT: s_endpgm 186; 187; FLAT-LABEL: loop_land_info_assert: 188; FLAT: ; %bb.0: ; %entry 189; FLAT-NEXT: s_load_dword s0, s[4:5], 0x28 190; FLAT-NEXT: s_waitcnt lgkmcnt(0) 191; FLAT-NEXT: s_cmp_lt_i32 s0, 4 192; FLAT-NEXT: s_cbranch_scc1 .LBB3_4 193; FLAT-NEXT: ; %bb.1: ; %for.cond.preheader 194; FLAT-NEXT: s_load_dword s0, s[4:5], 0x30 195; FLAT-NEXT: s_waitcnt lgkmcnt(0) 196; FLAT-NEXT: s_cmpk_lt_i32 s0, 0x3e8 197; FLAT-NEXT: s_cbranch_scc0 .LBB3_4 198; FLAT-NEXT: ; %bb.2: ; %for.body 199; FLAT-NEXT: s_and_b64 vcc, exec, 0 200; FLAT-NEXT: .LBB3_3: ; %self.loop 201; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 202; FLAT-NEXT: s_mov_b64 vcc, vcc 203; FLAT-NEXT: s_cbranch_vccz .LBB3_3 204; FLAT-NEXT: .LBB3_4: ; %DummyReturnBlock 205; FLAT-NEXT: s_endpgm 206entry: 207 %cmp = icmp sgt i32 %c0, 0 208 br label %while.cond.outer 209 210while.cond.outer: 211 %tmp = load float, ptr addrspace(1) undef 212 br label %while.cond 213 214while.cond: 215 %cmp1 = icmp slt i32 %c1, 4 216 br i1 %cmp1, label %convex.exit, label %for.cond 217 218convex.exit: 219 %or = or i1 %cmp, %cmp1 220 br i1 %or, label %return, label %if.end 221 222if.end: 223 %tmp3 = call float @llvm.fabs.f32(float %tmp) nounwind readnone 224 %cmp2 = fcmp olt float %tmp3, 0x3E80000000000000 225 br i1 %cmp2, label %if.else, label %while.cond.outer 226 227if.else: 228 store volatile i32 3, ptr addrspace(1) undef, align 4 229 br label %while.cond 230 231for.cond: 232 %cmp3 = icmp slt i32 %c3, 1000 233 br i1 %cmp3, label %for.body, label %return 234 235for.body: 236 br i1 %cmp3, label %self.loop, label %if.end.2 237 238if.end.2: 239 %or.cond2 = or i1 %cmp3, %arg 240 br i1 %or.cond2, label %return, label %for.cond 241 242self.loop: 243 br label %self.loop 244 245return: 246 ret void 247} 248 249declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 250 251attributes #0 = { nounwind readnone } 252