xref: /llvm-project/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll (revision 8632e8bd64d6f02e571777390274c262d5c85167)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -amdgpu-opt-vgpr-liverange=true -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
3
4; a normal if-else
5define amdgpu_ps float @else1(i32 %z, float %v) #0 {
6; SI-LABEL: else1:
7; SI:       ; %bb.0: ; %main_body
8; SI-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 6, v0
9; SI-NEXT:    ; implicit-def: $vgpr0
10; SI-NEXT:    s_and_saveexec_b32 s0, vcc_lo
11; SI-NEXT:    s_xor_b32 s0, exec_lo, s0
12; SI-NEXT:    s_cbranch_execnz .LBB0_3
13; SI-NEXT:  ; %bb.1: ; %Flow
14; SI-NEXT:    s_andn2_saveexec_b32 s0, s0
15; SI-NEXT:    s_cbranch_execnz .LBB0_4
16; SI-NEXT:  .LBB0_2: ; %end
17; SI-NEXT:    s_or_b32 exec_lo, exec_lo, s0
18; SI-NEXT:    s_branch .LBB0_5
19; SI-NEXT:  .LBB0_3: ; %else
20; SI-NEXT:    v_mul_f32_e32 v0, 0x40400000, v1
21; SI-NEXT:    ; implicit-def: $vgpr1
22; SI-NEXT:    s_andn2_saveexec_b32 s0, s0
23; SI-NEXT:    s_cbranch_execz .LBB0_2
24; SI-NEXT:  .LBB0_4: ; %if
25; SI-NEXT:    v_add_f32_e32 v0, v1, v1
26; SI-NEXT:    s_or_b32 exec_lo, exec_lo, s0
27; SI-NEXT:    s_branch .LBB0_5
28; SI-NEXT:  .LBB0_5:
29main_body:
30  %cc = icmp sgt i32 %z, 5
31  br i1 %cc, label %if, label %else
32
33if:
34  %v.if = fmul float %v, 2.0
35  br label %end
36
37else:
38  %v.else = fmul float %v, 3.0
39  br label %end
40
41end:
42  %r = phi float [ %v.if, %if ], [ %v.else, %else ]
43  ret float %r
44}
45
46
47; %v was used after if-else
48define amdgpu_ps float @else2(i32 %z, float %v) #0 {
49; SI-LABEL: else2:
50; SI:       ; %bb.0: ; %main_body
51; SI-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 6, v0
52; SI-NEXT:    ; implicit-def: $vgpr0
53; SI-NEXT:    s_and_saveexec_b32 s0, vcc_lo
54; SI-NEXT:    s_xor_b32 s0, exec_lo, s0
55; SI-NEXT:  ; %bb.1: ; %else
56; SI-NEXT:    v_mul_f32_e32 v0, 0x40400000, v1
57; SI-NEXT:  ; %bb.2: ; %Flow
58; SI-NEXT:    s_andn2_saveexec_b32 s0, s0
59; SI-NEXT:  ; %bb.3: ; %if
60; SI-NEXT:    v_add_f32_e32 v1, v1, v1
61; SI-NEXT:    v_mov_b32_e32 v0, v1
62; SI-NEXT:  ; %bb.4: ; %end
63; SI-NEXT:    s_or_b32 exec_lo, exec_lo, s0
64; SI-NEXT:    v_add_f32_e32 v0, v1, v0
65; SI-NEXT:    ; return to shader part epilog
66main_body:
67  %cc = icmp sgt i32 %z, 5
68  br i1 %cc, label %if, label %else
69
70if:
71  %v.if = fmul float %v, 2.0
72  br label %end
73
74else:
75  %v.else = fmul float %v, 3.0
76  br label %end
77
78end:
79  %r0 = phi float [ %v.if, %if ], [ %v, %else ]
80  %r1 = phi float [ %v.if, %if ], [ %v.else, %else ]
81  %r2 = fadd float %r0, %r1
82  ret float %r2
83}
84
85; if-else inside loop, %x can be optimized, but %v cannot be.
86define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) #0 {
87; SI-LABEL: else3:
88; SI:       ; %bb.0: ; %entry
89; SI-NEXT:    s_mov_b32 s1, 0
90; SI-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 6, v0
91; SI-NEXT:    s_branch .LBB2_2
92; SI-NEXT:  .LBB2_1: ; %if.end
93; SI-NEXT:    ; in Loop: Header=BB2_2 Depth=1
94; SI-NEXT:    s_or_b32 exec_lo, exec_lo, s2
95; SI-NEXT:    v_add_nc_u32_e32 v2, 1, v3
96; SI-NEXT:    s_add_i32 s1, s1, 1
97; SI-NEXT:    s_cmp_lt_i32 s1, s0
98; SI-NEXT:    s_cbranch_scc0 .LBB2_6
99; SI-NEXT:  .LBB2_2: ; %for.body
100; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
101; SI-NEXT:    ; implicit-def: $vgpr3
102; SI-NEXT:    ; implicit-def: $vgpr0
103; SI-NEXT:    s_and_saveexec_b32 s2, vcc_lo
104; SI-NEXT:    s_xor_b32 s2, exec_lo, s2
105; SI-NEXT:  ; %bb.3: ; %else
106; SI-NEXT:    ; in Loop: Header=BB2_2 Depth=1
107; SI-NEXT:    v_mul_f32_e32 v0, v1, v2
108; SI-NEXT:    v_lshl_add_u32 v3, v2, 1, v2
109; SI-NEXT:    ; implicit-def: $vgpr2
110; SI-NEXT:  ; %bb.4: ; %Flow
111; SI-NEXT:    ; in Loop: Header=BB2_2 Depth=1
112; SI-NEXT:    s_andn2_saveexec_b32 s2, s2
113; SI-NEXT:    s_cbranch_execz .LBB2_1
114; SI-NEXT:  ; %bb.5: ; %if
115; SI-NEXT:    ; in Loop: Header=BB2_2 Depth=1
116; SI-NEXT:    v_mul_f32_e32 v0, s1, v1
117; SI-NEXT:    v_add_nc_u32_e32 v3, 1, v2
118; SI-NEXT:    s_branch .LBB2_1
119; SI-NEXT:  .LBB2_6: ; %for.end
120; SI-NEXT:    v_add_f32_e32 v0, v3, v0
121; SI-NEXT:    ; return to shader part epilog
122entry:
123;  %break = icmp sgt i32 %bound, 0
124;  br i1 %break, label %for.body, label %for.end
125  br label %for.body
126
127for.body:
128  %i = phi i32 [ 0, %entry ], [ %inc, %if.end ]
129  %x = phi i32 [ %x0, %entry ], [ %xinc, %if.end ]
130  %cc = icmp sgt i32 %z, 5
131  br i1 %cc, label %if, label %else
132
133if:
134  %i.tmp = bitcast i32 %i to float
135  %v.if = fmul float %v, %i.tmp
136  %x.if = add i32 %x, 1
137  br label %if.end
138
139else:
140  %x.tmp = bitcast i32 %x to float
141  %v.else = fmul float %v, %x.tmp
142  %x.else = mul i32 %x, 3
143  br label %if.end
144
145if.end:
146  %v.endif = phi float [ %v.if, %if ], [ %v.else, %else ]
147  %x.endif = phi i32 [ %x.if, %if ], [ %x.else, %else ]
148
149  %xinc = add i32 %x.endif, 1
150  %inc = add i32 %i, 1
151  %cond = icmp slt i32 %inc, %bound
152  br i1 %cond, label %for.body, label %for.end
153
154for.end:
155  %x_float = bitcast i32 %x.endif to float
156  %r = fadd float %x_float, %v.endif
157  ret float %r
158}
159
160; a loop inside an if-else
161define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_func, ptr %extern_func2) #0 {
162; SI-LABEL: loop:
163; SI:       ; %bb.0: ; %main_body
164; SI-NEXT:    s_mov_b32 s12, SCRATCH_RSRC_DWORD0
165; SI-NEXT:    s_mov_b32 s13, SCRATCH_RSRC_DWORD1
166; SI-NEXT:    s_mov_b32 s14, -1
167; SI-NEXT:    v_mov_b32_e32 v6, v0
168; SI-NEXT:    v_mov_b32_e32 v0, v1
169; SI-NEXT:    s_mov_b32 s15, 0x31c16000
170; SI-NEXT:    s_add_u32 s12, s12, s1
171; SI-NEXT:    s_addc_u32 s13, s13, 0
172; SI-NEXT:    s_mov_b32 s32, 0
173; SI-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 6, v6
174; SI-NEXT:    ; implicit-def: $vgpr1
175; SI-NEXT:    s_and_saveexec_b32 s0, vcc_lo
176; SI-NEXT:    s_xor_b32 s6, exec_lo, s0
177; SI-NEXT:    s_cbranch_execz .LBB3_4
178; SI-NEXT:  ; %bb.1: ; %else
179; SI-NEXT:    s_mov_b32 s7, exec_lo
180; SI-NEXT:  .LBB3_2: ; =>This Inner Loop Header: Depth=1
181; SI-NEXT:    v_readfirstlane_b32 s4, v4
182; SI-NEXT:    v_readfirstlane_b32 s5, v5
183; SI-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5]
184; SI-NEXT:    s_and_saveexec_b32 s8, vcc_lo
185; SI-NEXT:    s_mov_b64 s[0:1], s[12:13]
186; SI-NEXT:    s_mov_b64 s[2:3], s[14:15]
187; SI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
188; SI-NEXT:    v_mov_b32_e32 v1, v0
189; SI-NEXT:    ; implicit-def: $vgpr4_vgpr5
190; SI-NEXT:    ; implicit-def: $vgpr0
191; SI-NEXT:    s_xor_b32 exec_lo, exec_lo, s8
192; SI-NEXT:    s_cbranch_execnz .LBB3_2
193; SI-NEXT:  ; %bb.3:
194; SI-NEXT:    s_mov_b32 exec_lo, s7
195; SI-NEXT:    ; implicit-def: $vgpr0
196; SI-NEXT:    ; implicit-def: $vgpr2
197; SI-NEXT:  .LBB3_4: ; %Flow
198; SI-NEXT:    s_andn2_saveexec_b32 s6, s6
199; SI-NEXT:    s_cbranch_execz .LBB3_8
200; SI-NEXT:  ; %bb.5: ; %if
201; SI-NEXT:    s_mov_b32 s7, exec_lo
202; SI-NEXT:  .LBB3_6: ; =>This Inner Loop Header: Depth=1
203; SI-NEXT:    v_readfirstlane_b32 s4, v2
204; SI-NEXT:    v_readfirstlane_b32 s5, v3
205; SI-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3]
206; SI-NEXT:    s_and_saveexec_b32 s8, vcc_lo
207; SI-NEXT:    s_mov_b64 s[0:1], s[12:13]
208; SI-NEXT:    s_mov_b64 s[2:3], s[14:15]
209; SI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
210; SI-NEXT:    v_mov_b32_e32 v1, v0
211; SI-NEXT:    ; implicit-def: $vgpr2_vgpr3
212; SI-NEXT:    ; implicit-def: $vgpr0
213; SI-NEXT:    s_xor_b32 exec_lo, exec_lo, s8
214; SI-NEXT:    s_cbranch_execnz .LBB3_6
215; SI-NEXT:  ; %bb.7:
216; SI-NEXT:    s_mov_b32 exec_lo, s7
217; SI-NEXT:  .LBB3_8: ; %end
218; SI-NEXT:    s_or_b32 exec_lo, exec_lo, s6
219; SI-NEXT:    v_mov_b32_e32 v0, v1
220; SI-NEXT:    ; return to shader part epilog
221main_body:
222  %cc = icmp sgt i32 %z, 5
223  br i1 %cc, label %if, label %else
224
225if:
226  %v.if = call amdgpu_gfx float %extern_func(float %v)
227  br label %end
228
229else:
230  %v.else = call amdgpu_gfx float %extern_func2(float %v)
231  br label %end
232
233end:
234  %r = phi float [ %v.if, %if ], [ %v.else, %else ]
235  ret float %r
236}
237
238; a loop inside an if-else, but the variable is still in use after the if-else
239define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %extern_func, ptr %extern_func2) #0 {
240; SI-LABEL: loop_with_use:
241; SI:       ; %bb.0: ; %main_body
242; SI-NEXT:    s_mov_b32 s12, SCRATCH_RSRC_DWORD0
243; SI-NEXT:    s_mov_b32 s13, SCRATCH_RSRC_DWORD1
244; SI-NEXT:    s_mov_b32 s14, -1
245; SI-NEXT:    v_mov_b32_e32 v40, v1
246; SI-NEXT:    s_mov_b32 s15, 0x31c16000
247; SI-NEXT:    s_add_u32 s12, s12, s1
248; SI-NEXT:    s_addc_u32 s13, s13, 0
249; SI-NEXT:    s_mov_b32 s32, 0
250; SI-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 6, v0
251; SI-NEXT:    ; implicit-def: $vgpr0
252; SI-NEXT:    s_and_saveexec_b32 s0, vcc_lo
253; SI-NEXT:    s_xor_b32 s6, exec_lo, s0
254; SI-NEXT:    s_cbranch_execz .LBB4_4
255; SI-NEXT:  ; %bb.1: ; %else
256; SI-NEXT:    s_mov_b32 s7, exec_lo
257; SI-NEXT:  .LBB4_2: ; =>This Inner Loop Header: Depth=1
258; SI-NEXT:    v_readfirstlane_b32 s4, v4
259; SI-NEXT:    v_readfirstlane_b32 s5, v5
260; SI-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5]
261; SI-NEXT:    s_and_saveexec_b32 s8, vcc_lo
262; SI-NEXT:    v_mov_b32_e32 v0, v40
263; SI-NEXT:    s_mov_b64 s[0:1], s[12:13]
264; SI-NEXT:    s_mov_b64 s[2:3], s[14:15]
265; SI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
266; SI-NEXT:    ; implicit-def: $vgpr4_vgpr5
267; SI-NEXT:    s_xor_b32 exec_lo, exec_lo, s8
268; SI-NEXT:    s_cbranch_execnz .LBB4_2
269; SI-NEXT:  ; %bb.3:
270; SI-NEXT:    s_mov_b32 exec_lo, s7
271; SI-NEXT:    ; implicit-def: $vgpr2
272; SI-NEXT:  .LBB4_4: ; %Flow
273; SI-NEXT:    s_andn2_saveexec_b32 s6, s6
274; SI-NEXT:    s_cbranch_execz .LBB4_8
275; SI-NEXT:  ; %bb.5: ; %if
276; SI-NEXT:    s_mov_b32 s7, exec_lo
277; SI-NEXT:  .LBB4_6: ; =>This Inner Loop Header: Depth=1
278; SI-NEXT:    v_readfirstlane_b32 s4, v2
279; SI-NEXT:    v_readfirstlane_b32 s5, v3
280; SI-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3]
281; SI-NEXT:    s_and_saveexec_b32 s8, vcc_lo
282; SI-NEXT:    v_mov_b32_e32 v0, v40
283; SI-NEXT:    s_mov_b64 s[0:1], s[12:13]
284; SI-NEXT:    s_mov_b64 s[2:3], s[14:15]
285; SI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
286; SI-NEXT:    ; implicit-def: $vgpr2_vgpr3
287; SI-NEXT:    s_xor_b32 exec_lo, exec_lo, s8
288; SI-NEXT:    s_cbranch_execnz .LBB4_6
289; SI-NEXT:  ; %bb.7:
290; SI-NEXT:    s_mov_b32 exec_lo, s7
291; SI-NEXT:  .LBB4_8: ; %end
292; SI-NEXT:    s_or_b32 exec_lo, exec_lo, s6
293; SI-NEXT:    v_add_f32_e32 v0, v0, v40
294; SI-NEXT:    ; return to shader part epilog
295main_body:
296  %cc = icmp sgt i32 %z, 5
297  br i1 %cc, label %if, label %else
298
299if:
300  %v.if = call amdgpu_gfx float %extern_func(float %v)
301  br label %end
302
303else:
304  %v.else = call amdgpu_gfx float %extern_func2(float %v)
305  br label %end
306
307end:
308  %r = phi float [ %v.if, %if ], [ %v.else, %else ]
309  %r2 = fadd float %r, %v
310  ret float %r2
311}
312
313attributes #0 = { nounwind }
314