1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope %s 3 4; Although it's modeled without any control flow in order to get better code 5; out of the structurizer, @llvm.amdgcn.kill actually ends the thread that calls 6; it with "true". In case it's called in a provably infinite loop, we still 7; need to successfully exit and export something, even if we can't know where 8; to jump to in the LLVM IR. Therefore we insert a null export ourselves in 9; this case right before the s_endpgm to avoid GPU hangs, which is what this 10; tests. 11 12; FIXME: Immediate value 0x41200000 should be folded into the v_cmp instruction. 13define amdgpu_ps void @return_void(float %0) #0 { 14; CHECK-LABEL: return_void: 15; CHECK: ; %bb.0: ; %main_body 16; CHECK-NEXT: s_mov_b64 s[0:1], exec 17; CHECK-NEXT: s_mov_b32 s2, 0x41200000 18; CHECK-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v0 19; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc 20; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 21; CHECK-NEXT: s_cbranch_execz .LBB0_3 22; CHECK-NEXT: .LBB0_1: ; %loop 23; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 24; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec 25; CHECK-NEXT: s_cbranch_scc0 .LBB0_6 26; CHECK-NEXT: ; %bb.2: ; %loop 27; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 28; CHECK-NEXT: s_mov_b64 exec, 0 29; CHECK-NEXT: s_mov_b64 vcc, 0 30; CHECK-NEXT: s_branch .LBB0_1 31; CHECK-NEXT: .LBB0_3: ; %Flow1 32; CHECK-NEXT: s_andn2_saveexec_b64 s[0:1], s[2:3] 33; CHECK-NEXT: s_cbranch_execz .LBB0_5 34; CHECK-NEXT: ; %bb.4: ; %end 35; CHECK-NEXT: v_mov_b32_e32 v0, 1.0 36; CHECK-NEXT: v_mov_b32_e32 v1, 0 37; CHECK-NEXT: exp mrt0 v1, v1, v1, v0 done vm 38; CHECK-NEXT: .LBB0_5: ; %UnifiedReturnBlock 39; CHECK-NEXT: s_endpgm 40; CHECK-NEXT: .LBB0_6: 41; CHECK-NEXT: s_mov_b64 exec, 0 42; CHECK-NEXT: exp null off, off, off, off done vm 43; CHECK-NEXT: s_endpgm 44main_body: 45 %cmp = fcmp olt float %0, 1.000000e+01 46 br i1 %cmp, label %end, label %loop 47 48loop: 49 call void @llvm.amdgcn.kill(i1 false) #3 50 br label %loop 51 52end: 53 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 0., float 0., float 0., float 1., i1 true, i1 true) #3 54 ret void 55} 56 57define amdgpu_ps void @return_void_compr(float %0) #0 { 58; CHECK-LABEL: return_void_compr: 59; CHECK: ; %bb.0: ; %main_body 60; CHECK-NEXT: s_mov_b64 s[0:1], exec 61; CHECK-NEXT: s_mov_b32 s2, 0x41200000 62; CHECK-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v0 63; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc 64; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 65; CHECK-NEXT: s_cbranch_execz .LBB1_3 66; CHECK-NEXT: .LBB1_1: ; %loop 67; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 68; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec 69; CHECK-NEXT: s_cbranch_scc0 .LBB1_6 70; CHECK-NEXT: ; %bb.2: ; %loop 71; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1 72; CHECK-NEXT: s_mov_b64 exec, 0 73; CHECK-NEXT: s_mov_b64 vcc, 0 74; CHECK-NEXT: s_branch .LBB1_1 75; CHECK-NEXT: .LBB1_3: ; %Flow1 76; CHECK-NEXT: s_andn2_saveexec_b64 s[0:1], s[2:3] 77; CHECK-NEXT: s_cbranch_execz .LBB1_5 78; CHECK-NEXT: ; %bb.4: ; %end 79; CHECK-NEXT: v_mov_b32_e32 v0, 0 80; CHECK-NEXT: exp mrt0 v0, off, v0, off done compr vm 81; CHECK-NEXT: .LBB1_5: ; %UnifiedReturnBlock 82; CHECK-NEXT: s_endpgm 83; CHECK-NEXT: .LBB1_6: 84; CHECK-NEXT: s_mov_b64 exec, 0 85; CHECK-NEXT: exp null off, off, off, off done vm 86; CHECK-NEXT: s_endpgm 87main_body: 88 %cmp = fcmp olt float %0, 1.000000e+01 89 br i1 %cmp, label %end, label %loop 90 91loop: 92 call void @llvm.amdgcn.kill(i1 false) #3 93 br label %loop 94 95end: 96 call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 5, <2 x i16> < i16 0, i16 0 >, <2 x i16> < i16 0, i16 0 >, i1 true, i1 true) #3 97 ret void 98} 99 100; test the case where there's only a kill in an infinite loop 101define amdgpu_ps void @only_kill() #0 { 102; CHECK-LABEL: only_kill: 103; CHECK: ; %bb.0: ; %main_body 104; CHECK-NEXT: s_mov_b64 s[0:1], exec 105; CHECK-NEXT: .LBB2_1: ; %loop 106; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 107; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec 108; CHECK-NEXT: s_cbranch_scc0 .LBB2_4 109; CHECK-NEXT: ; %bb.2: ; %loop 110; CHECK-NEXT: ; in Loop: Header=BB2_1 Depth=1 111; CHECK-NEXT: s_mov_b64 exec, 0 112; CHECK-NEXT: s_mov_b64 vcc, exec 113; CHECK-NEXT: s_cbranch_execnz .LBB2_1 114; CHECK-NEXT: ; %bb.3: ; %DummyReturnBlock 115; CHECK-NEXT: s_endpgm 116; CHECK-NEXT: .LBB2_4: 117; CHECK-NEXT: s_mov_b64 exec, 0 118; CHECK-NEXT: exp null off, off, off, off done vm 119; CHECK-NEXT: s_endpgm 120main_body: 121 br label %loop 122 123loop: 124 call void @llvm.amdgcn.kill(i1 false) #3 125 br label %loop 126} 127 128; Check that the epilog is the final block 129define amdgpu_ps float @return_nonvoid(float %0) #0 { 130; CHECK-LABEL: return_nonvoid: 131; CHECK: ; %bb.0: ; %main_body 132; CHECK-NEXT: s_mov_b64 s[0:1], exec 133; CHECK-NEXT: s_mov_b32 s2, 0x41200000 134; CHECK-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v0 135; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc 136; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3] 137; CHECK-NEXT: s_cbranch_execz .LBB3_3 138; CHECK-NEXT: .LBB3_1: ; %loop 139; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 140; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec 141; CHECK-NEXT: s_cbranch_scc0 .LBB3_4 142; CHECK-NEXT: ; %bb.2: ; %loop 143; CHECK-NEXT: ; in Loop: Header=BB3_1 Depth=1 144; CHECK-NEXT: s_mov_b64 exec, 0 145; CHECK-NEXT: s_mov_b64 vcc, exec 146; CHECK-NEXT: s_cbranch_execnz .LBB3_1 147; CHECK-NEXT: .LBB3_3: ; %Flow1 148; CHECK-NEXT: s_or_b64 exec, exec, s[2:3] 149; CHECK-NEXT: v_mov_b32_e32 v0, 0 150; CHECK-NEXT: s_branch .LBB3_5 151; CHECK-NEXT: .LBB3_4: 152; CHECK-NEXT: s_mov_b64 exec, 0 153; CHECK-NEXT: exp null off, off, off, off done vm 154; CHECK-NEXT: s_endpgm 155; CHECK-NEXT: .LBB3_5: 156main_body: 157 %cmp = fcmp olt float %0, 1.000000e+01 158 br i1 %cmp, label %end, label %loop 159 160loop: 161 call void @llvm.amdgcn.kill(i1 false) #3 162 br label %loop 163 164end: 165 ret float 0. 166} 167 168declare void @llvm.amdgcn.kill(i1) #0 169declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #0 170declare void @llvm.amdgcn.exp.compr.v2i16(i32 immarg, i32 immarg, <2 x i16>, <2 x i16>, i1 immarg, i1 immarg) #0 171 172attributes #0 = { nounwind } 173