xref: /llvm-project/llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll (revision b1bcb7ca460fcd317bbc8309e14c8761bf8394e0)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope %s
3
4; Although it's modeled without any control flow in order to get better code
5; out of the structurizer, @llvm.amdgcn.kill actually ends the thread that calls
6; it with "true". In case it's called in a provably infinite loop, we still
7; need to successfully exit and export something, even if we can't know where
8; to jump to in the LLVM IR. Therefore we insert a null export ourselves in
9; this case right before the s_endpgm to avoid GPU hangs, which is what this
10; tests.
11
12; FIXME: Immediate value 0x41200000 should be folded into the v_cmp instruction.
13define amdgpu_ps void @return_void(float %0) #0 {
14; CHECK-LABEL: return_void:
15; CHECK:       ; %bb.0: ; %main_body
16; CHECK-NEXT:    s_mov_b64 s[0:1], exec
17; CHECK-NEXT:    s_mov_b32 s2, 0x41200000
18; CHECK-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v0
19; CHECK-NEXT:    s_and_saveexec_b64 s[2:3], vcc
20; CHECK-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
21; CHECK-NEXT:    s_cbranch_execz .LBB0_3
22; CHECK-NEXT:  .LBB0_1: ; %loop
23; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
24; CHECK-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
25; CHECK-NEXT:    s_cbranch_scc0 .LBB0_6
26; CHECK-NEXT:  ; %bb.2: ; %loop
27; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
28; CHECK-NEXT:    s_mov_b64 exec, 0
29; CHECK-NEXT:    s_mov_b64 vcc, 0
30; CHECK-NEXT:    s_branch .LBB0_1
31; CHECK-NEXT:  .LBB0_3: ; %Flow1
32; CHECK-NEXT:    s_andn2_saveexec_b64 s[0:1], s[2:3]
33; CHECK-NEXT:    s_cbranch_execz .LBB0_5
34; CHECK-NEXT:  ; %bb.4: ; %end
35; CHECK-NEXT:    v_mov_b32_e32 v0, 1.0
36; CHECK-NEXT:    v_mov_b32_e32 v1, 0
37; CHECK-NEXT:    exp mrt0 v1, v1, v1, v0 done vm
38; CHECK-NEXT:  .LBB0_5: ; %UnifiedReturnBlock
39; CHECK-NEXT:    s_endpgm
40; CHECK-NEXT:  .LBB0_6:
41; CHECK-NEXT:    s_mov_b64 exec, 0
42; CHECK-NEXT:    exp null off, off, off, off done vm
43; CHECK-NEXT:    s_endpgm
44main_body:
45  %cmp = fcmp olt float %0, 1.000000e+01
46  br i1 %cmp, label %end, label %loop
47
48loop:
49  call void @llvm.amdgcn.kill(i1 false) #3
50  br label %loop
51
52end:
53  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 0., float 0., float 0., float 1., i1 true, i1 true) #3
54  ret void
55}
56
57define amdgpu_ps void @return_void_compr(float %0) #0 {
58; CHECK-LABEL: return_void_compr:
59; CHECK:       ; %bb.0: ; %main_body
60; CHECK-NEXT:    s_mov_b64 s[0:1], exec
61; CHECK-NEXT:    s_mov_b32 s2, 0x41200000
62; CHECK-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v0
63; CHECK-NEXT:    s_and_saveexec_b64 s[2:3], vcc
64; CHECK-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
65; CHECK-NEXT:    s_cbranch_execz .LBB1_3
66; CHECK-NEXT:  .LBB1_1: ; %loop
67; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
68; CHECK-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
69; CHECK-NEXT:    s_cbranch_scc0 .LBB1_6
70; CHECK-NEXT:  ; %bb.2: ; %loop
71; CHECK-NEXT:    ; in Loop: Header=BB1_1 Depth=1
72; CHECK-NEXT:    s_mov_b64 exec, 0
73; CHECK-NEXT:    s_mov_b64 vcc, 0
74; CHECK-NEXT:    s_branch .LBB1_1
75; CHECK-NEXT:  .LBB1_3: ; %Flow1
76; CHECK-NEXT:    s_andn2_saveexec_b64 s[0:1], s[2:3]
77; CHECK-NEXT:    s_cbranch_execz .LBB1_5
78; CHECK-NEXT:  ; %bb.4: ; %end
79; CHECK-NEXT:    v_mov_b32_e32 v0, 0
80; CHECK-NEXT:    exp mrt0 v0, off, v0, off done compr vm
81; CHECK-NEXT:  .LBB1_5: ; %UnifiedReturnBlock
82; CHECK-NEXT:    s_endpgm
83; CHECK-NEXT:  .LBB1_6:
84; CHECK-NEXT:    s_mov_b64 exec, 0
85; CHECK-NEXT:    exp null off, off, off, off done vm
86; CHECK-NEXT:    s_endpgm
87main_body:
88  %cmp = fcmp olt float %0, 1.000000e+01
89  br i1 %cmp, label %end, label %loop
90
91loop:
92  call void @llvm.amdgcn.kill(i1 false) #3
93  br label %loop
94
95end:
96  call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 5, <2 x i16> < i16 0, i16 0 >, <2 x i16> < i16 0, i16 0 >, i1 true, i1 true) #3
97  ret void
98}
99
100; test the case where there's only a kill in an infinite loop
101define amdgpu_ps void @only_kill() #0 {
102; CHECK-LABEL: only_kill:
103; CHECK:       ; %bb.0: ; %main_body
104; CHECK-NEXT:    s_mov_b64 s[0:1], exec
105; CHECK-NEXT:  .LBB2_1: ; %loop
106; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
107; CHECK-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
108; CHECK-NEXT:    s_cbranch_scc0 .LBB2_4
109; CHECK-NEXT:  ; %bb.2: ; %loop
110; CHECK-NEXT:    ; in Loop: Header=BB2_1 Depth=1
111; CHECK-NEXT:    s_mov_b64 exec, 0
112; CHECK-NEXT:    s_mov_b64 vcc, exec
113; CHECK-NEXT:    s_cbranch_execnz .LBB2_1
114; CHECK-NEXT:  ; %bb.3: ; %DummyReturnBlock
115; CHECK-NEXT:    s_endpgm
116; CHECK-NEXT:  .LBB2_4:
117; CHECK-NEXT:    s_mov_b64 exec, 0
118; CHECK-NEXT:    exp null off, off, off, off done vm
119; CHECK-NEXT:    s_endpgm
120main_body:
121  br label %loop
122
123loop:
124  call void @llvm.amdgcn.kill(i1 false) #3
125  br label %loop
126}
127
128; Check that the epilog is the final block
129define amdgpu_ps float @return_nonvoid(float %0) #0 {
130; CHECK-LABEL: return_nonvoid:
131; CHECK:       ; %bb.0: ; %main_body
132; CHECK-NEXT:    s_mov_b64 s[0:1], exec
133; CHECK-NEXT:    s_mov_b32 s2, 0x41200000
134; CHECK-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v0
135; CHECK-NEXT:    s_and_saveexec_b64 s[2:3], vcc
136; CHECK-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
137; CHECK-NEXT:    s_cbranch_execz .LBB3_3
138; CHECK-NEXT:  .LBB3_1: ; %loop
139; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
140; CHECK-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
141; CHECK-NEXT:    s_cbranch_scc0 .LBB3_4
142; CHECK-NEXT:  ; %bb.2: ; %loop
143; CHECK-NEXT:    ; in Loop: Header=BB3_1 Depth=1
144; CHECK-NEXT:    s_mov_b64 exec, 0
145; CHECK-NEXT:    s_mov_b64 vcc, exec
146; CHECK-NEXT:    s_cbranch_execnz .LBB3_1
147; CHECK-NEXT:  .LBB3_3: ; %Flow1
148; CHECK-NEXT:    s_or_b64 exec, exec, s[2:3]
149; CHECK-NEXT:    v_mov_b32_e32 v0, 0
150; CHECK-NEXT:    s_branch .LBB3_5
151; CHECK-NEXT:  .LBB3_4:
152; CHECK-NEXT:    s_mov_b64 exec, 0
153; CHECK-NEXT:    exp null off, off, off, off done vm
154; CHECK-NEXT:    s_endpgm
155; CHECK-NEXT:  .LBB3_5:
156main_body:
157  %cmp = fcmp olt float %0, 1.000000e+01
158  br i1 %cmp, label %end, label %loop
159
160loop:
161  call void @llvm.amdgcn.kill(i1 false) #3
162  br label %loop
163
164end:
165  ret float 0.
166}
167
168declare void @llvm.amdgcn.kill(i1) #0
169declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #0
170declare void @llvm.amdgcn.exp.compr.v2i16(i32 immarg, i32 immarg, <2 x i16>, <2 x i16>, i1 immarg, i1 immarg) #0
171
172attributes #0 = { nounwind }
173