xref: /llvm-project/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll (revision 3cf539fb046457a444e93cefc87cca10cbd3b807)
1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -stop-after=si-pre-emit-peephole -o - %s | FileCheck -check-prefix=GCN %s
3; If the block containing the SI_RETURN_TO_EPILOG is not the last block, insert an empty block at the end and
4; insert an unconditional jump there.
5define amdgpu_ps float @simple_test_return_to_epilog(float %a) #0 {
6  ; GCN-LABEL: name: simple_test_return_to_epilog
7  ; GCN: bb.0.entry:
8  ; GCN-NEXT:   liveins: $vgpr0
9  ; GCN-NEXT: {{  $}}
10  ; GCN-NEXT:   SI_RETURN_TO_EPILOG killed $vgpr0
11entry:
12  ret float %a
13}
14
15define amdgpu_ps float @test_return_to_epilog_into_end_block(i32 inreg %a, float %b) #0 {
16  ; GCN-LABEL: name: test_return_to_epilog_into_end_block
17  ; GCN: bb.0.entry:
18  ; GCN-NEXT:   successors: %bb.1(0x80000000), %bb.2(0x00000000)
19  ; GCN-NEXT:   liveins: $sgpr2, $vgpr0
20  ; GCN-NEXT: {{  $}}
21  ; GCN-NEXT:   S_CMP_LT_I32 killed renamable $sgpr2, 1, implicit-def $scc
22  ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit killed $scc
23  ; GCN-NEXT: {{  $}}
24  ; GCN-NEXT: bb.1.if:
25  ; GCN-NEXT:   successors: %bb.3(0x80000000)
26  ; GCN-NEXT:   liveins: $vgpr0
27  ; GCN-NEXT: {{  $}}
28  ; GCN-NEXT:   S_BRANCH %bb.3
29  ; GCN-NEXT: {{  $}}
30  ; GCN-NEXT: bb.2.else:
31  ; GCN-NEXT:   successors:
32  ; GCN-NEXT: {{  $}}
33  ; GCN-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
34  ; GCN-NEXT:   GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
35  ; GCN-NEXT:   S_WAITCNT 3952
36  ; GCN-NEXT: {{  $}}
37  ; GCN-NEXT: bb.3:
38entry:
39  %cc = icmp sgt i32 %a, 0
40  br i1 %cc, label %if, label %else
41if:                                               ; preds = %entry
42  ret float %b
43else:                                             ; preds = %entry
44  store volatile i32 0, ptr addrspace(1) undef
45  unreachable
46}
47
48define amdgpu_ps float @test_unify_return_to_epilog_into_end_block(i32 inreg %a, i32 inreg %b, float %c, float %d) #0 {
49  ; GCN-LABEL: name: test_unify_return_to_epilog_into_end_block
50  ; GCN: bb.0.entry:
51  ; GCN-NEXT:   successors: %bb.1(0x50000000), %bb.2(0x30000000)
52  ; GCN-NEXT:   liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
53  ; GCN-NEXT: {{  $}}
54  ; GCN-NEXT:   S_CMP_LT_I32 killed renamable $sgpr2, 1, implicit-def $scc
55  ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit killed $scc
56  ; GCN-NEXT: {{  $}}
57  ; GCN-NEXT: bb.1.if:
58  ; GCN-NEXT:   successors: %bb.5(0x80000000)
59  ; GCN-NEXT:   liveins: $vgpr0
60  ; GCN-NEXT: {{  $}}
61  ; GCN-NEXT:   S_BRANCH %bb.5
62  ; GCN-NEXT: {{  $}}
63  ; GCN-NEXT: bb.2.else.if.cond:
64  ; GCN-NEXT:   successors: %bb.3(0x80000000), %bb.4(0x00000000)
65  ; GCN-NEXT:   liveins: $sgpr3, $vgpr1
66  ; GCN-NEXT: {{  $}}
67  ; GCN-NEXT:   S_CMP_LT_I32 killed renamable $sgpr3, 1, implicit-def $scc
68  ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.4, implicit killed $scc
69  ; GCN-NEXT: {{  $}}
70  ; GCN-NEXT: bb.3.else.if:
71  ; GCN-NEXT:   successors: %bb.5(0x80000000)
72  ; GCN-NEXT:   liveins: $vgpr1
73  ; GCN-NEXT: {{  $}}
74  ; GCN-NEXT:   $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec, implicit $exec
75  ; GCN-NEXT:   S_BRANCH %bb.5
76  ; GCN-NEXT: {{  $}}
77  ; GCN-NEXT: bb.4.else:
78  ; GCN-NEXT:   successors:
79  ; GCN-NEXT: {{  $}}
80  ; GCN-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
81  ; GCN-NEXT:   GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
82  ; GCN-NEXT:   S_WAITCNT 3952
83  ; GCN-NEXT: {{  $}}
84  ; GCN-NEXT: bb.5:
85entry:
86  %cc = icmp sgt i32 %a, 0
87  br i1 %cc, label %if, label %else.if.cond
88if:                                               ; preds = %entry
89  ret float %c
90else.if.cond:                                     ; preds = %entry
91  %cc1 = icmp sgt i32 %b, 0
92  br i1 %cc1, label %else.if, label %else
93else.if:                                          ; preds = %else.if.cond
94  ret float %d
95else:                                             ; preds = %else.if.cond
96  store volatile i32 0, ptr addrspace(1) undef
97  unreachable
98}
99
100define amdgpu_ps { <4 x float> } @test_return_to_epilog_with_optimized_kill(float %val) #0 {
101  ; GCN-LABEL: name: test_return_to_epilog_with_optimized_kill
102  ; GCN: bb.0 (%ir-block.0):
103  ; GCN-NEXT:   successors: %bb.3(0x40000000), %bb.1(0x40000000)
104  ; GCN-NEXT:   liveins: $vgpr0
105  ; GCN-NEXT: {{  $}}
106  ; GCN-NEXT:   renamable $vgpr1 = nofpexcept V_RCP_F32_e32 $vgpr0, implicit $mode, implicit $exec
107  ; GCN-NEXT:   $sgpr0_sgpr1 = S_MOV_B64 $exec
108  ; GCN-NEXT:   nofpexcept V_CMP_NGT_F32_e32 0, killed $vgpr1, implicit-def $vcc, implicit $mode, implicit $exec
109  ; GCN-NEXT:   $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
110  ; GCN-NEXT:   renamable $sgpr2_sgpr3 = S_XOR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def dead $scc
111  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.3, implicit $exec
112  ; GCN-NEXT: {{  $}}
113  ; GCN-NEXT: bb.1.Flow1:
114  ; GCN-NEXT:   successors: %bb.6(0x40000000), %bb.2(0x40000000)
115  ; GCN-NEXT:   liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
116  ; GCN-NEXT: {{  $}}
117  ; GCN-NEXT:   $sgpr2_sgpr3 = S_ANDN2_SAVEEXEC_B64 killed $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
118  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.6, implicit $exec
119  ; GCN-NEXT: {{  $}}
120  ; GCN-NEXT: bb.2.end:
121  ; GCN-NEXT:   successors: %bb.9(0x80000000)
122  ; GCN-NEXT:   liveins: $sgpr2_sgpr3
123  ; GCN-NEXT: {{  $}}
124  ; GCN-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
125  ; GCN-NEXT:   S_BRANCH %bb.9
126  ; GCN-NEXT: {{  $}}
127  ; GCN-NEXT: bb.3.flow.preheader:
128  ; GCN-NEXT:   successors: %bb.4(0x80000000)
129  ; GCN-NEXT:   liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2_sgpr3
130  ; GCN-NEXT: {{  $}}
131  ; GCN-NEXT:   nofpexcept V_CMP_NGT_F32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $mode, implicit $exec
132  ; GCN-NEXT:   renamable $sgpr4_sgpr5 = S_MOV_B64 0
133  ; GCN-NEXT: {{  $}}
134  ; GCN-NEXT: bb.4.flow:
135  ; GCN-NEXT:   successors: %bb.5(0x04000000), %bb.4(0x7c000000)
136  ; GCN-NEXT:   liveins: $vcc, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5
137  ; GCN-NEXT: {{  $}}
138  ; GCN-NEXT:   renamable $sgpr6_sgpr7 = S_AND_B64 $exec, renamable $vcc, implicit-def $scc
139  ; GCN-NEXT:   renamable $sgpr4_sgpr5 = S_OR_B64 killed renamable $sgpr6_sgpr7, killed renamable $sgpr4_sgpr5, implicit-def $scc
140  ; GCN-NEXT:   $exec = S_ANDN2_B64 $exec, renamable $sgpr4_sgpr5, implicit-def $scc
141  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.4, implicit $exec
142  ; GCN-NEXT: {{  $}}
143  ; GCN-NEXT: bb.5.Flow:
144  ; GCN-NEXT:   successors: %bb.6(0x40000000), %bb.2(0x40000000)
145  ; GCN-NEXT:   liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5
146  ; GCN-NEXT: {{  $}}
147  ; GCN-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
148  ; GCN-NEXT:   $sgpr2_sgpr3 = S_ANDN2_SAVEEXEC_B64 killed $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
149  ; GCN-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
150  ; GCN-NEXT: {{  $}}
151  ; GCN-NEXT: bb.6.kill0:
152  ; GCN-NEXT:   successors: %bb.7(0x40000000), %bb.8(0x40000000)
153  ; GCN-NEXT:   liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
154  ; GCN-NEXT: {{  $}}
155  ; GCN-NEXT:   dead renamable $sgpr0_sgpr1 = S_ANDN2_B64 killed renamable $sgpr0_sgpr1, $exec, implicit-def $scc
156  ; GCN-NEXT:   S_CBRANCH_SCC0 %bb.8, implicit $scc
157  ; GCN-NEXT: {{  $}}
158  ; GCN-NEXT: bb.7.kill0:
159  ; GCN-NEXT:   successors: %bb.9(0x80000000)
160  ; GCN-NEXT:   liveins: $sgpr2_sgpr3, $scc
161  ; GCN-NEXT: {{  $}}
162  ; GCN-NEXT:   $exec = S_MOV_B64 0
163  ; GCN-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
164  ; GCN-NEXT:   S_BRANCH %bb.9
165  ; GCN-NEXT: {{  $}}
166  ; GCN-NEXT: bb.8:
167  ; GCN-NEXT:   $exec = S_MOV_B64 0
168  ; GCN-NEXT:   EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
169  ; GCN-NEXT:   S_ENDPGM 0
170  ; GCN-NEXT: {{  $}}
171  ; GCN-NEXT: bb.9:
172  %.i0 = fdiv reassoc nnan nsz arcp contract afn float 1.000000e+00, %val
173  %cmp0 = fcmp olt float %.i0, 0.000000e+00
174  br i1 %cmp0, label %kill0, label %flow
175
176kill0:                                            ; preds = %entry
177  call void @llvm.amdgcn.kill(i1 false)
178  br label %end
179
180flow:                                             ; preds = %entry
181  %cmp1 = fcmp olt float %val, 0.000000e+00
182  br i1 %cmp1, label %flow, label %end
183
184kill1:                                            ; preds = %flow
185  call void @llvm.amdgcn.kill(i1 false)
186  br label %end
187
188end:                                              ; preds = %kill0, %kill1, %flow
189  ret { <4 x float> } undef
190}
191
192declare void @llvm.amdgcn.kill(i1) #0
193
194attributes #0 = { nounwind }
195