xref: /llvm-project/llvm/test/CodeGen/AMDGPU/required-export-priority.ll (revision 1bf385f10291101163a346c8f075d56e1578351b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefix=GCN %s
3
4define amdgpu_ps void @test_export_zeroes_f32() #0 {
5; GCN-LABEL: test_export_zeroes_f32:
6; GCN:       ; %bb.0:
7; GCN-NEXT:    s_setprio 2
8; GCN-NEXT:    v_mov_b32_e32 v0, 0
9; GCN-NEXT:    exp mrt0 off, off, off, off
10; GCN-NEXT:    exp mrt0 off, off, off, off done
11; GCN-NEXT:    s_setprio 0
12; GCN-NEXT:    s_nop 0
13; GCN-NEXT:    s_nop 0
14; GCN-NEXT:    s_endpgm
15  call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 false, i1 false)
16  call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 true, i1 false)
17  ret void
18}
19
20define amdgpu_ps void @test_export_en_src0_f32() #0 {
21; GCN-LABEL: test_export_en_src0_f32:
22; GCN:       ; %bb.0:
23; GCN-NEXT:    s_setprio 2
24; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
25; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
26; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
27; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
28; GCN-NEXT:    exp mrt0 v3, off, off, off done
29; GCN-NEXT:    s_setprio 0
30; GCN-NEXT:    s_nop 0
31; GCN-NEXT:    s_nop 0
32; GCN-NEXT:    s_endpgm
33  call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
34  ret void
35}
36
37define amdgpu_gs void @test_export_gs() #0 {
38; GCN-LABEL: test_export_gs:
39; GCN:       ; %bb.0:
40; GCN-NEXT:    s_setprio 2
41; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
42; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
43; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
44; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
45; GCN-NEXT:    exp mrt0 off, v2, off, off done
46; GCN-NEXT:    s_setprio 0
47; GCN-NEXT:    s_nop 0
48; GCN-NEXT:    s_nop 0
49; GCN-NEXT:    s_endpgm
50  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
51  ret void
52}
53
54define amdgpu_hs void @test_export_hs() #0 {
55; GCN-LABEL: test_export_hs:
56; GCN:       ; %bb.0:
57; GCN-NEXT:    s_setprio 2
58; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
59; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
60; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
61; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
62; GCN-NEXT:    exp mrt0 off, v2, off, off done
63; GCN-NEXT:    s_setprio 0
64; GCN-NEXT:    s_nop 0
65; GCN-NEXT:    s_nop 0
66; GCN-NEXT:    s_endpgm
67  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
68  ret void
69}
70
71define amdgpu_gfx void @test_export_gfx(float %v) #0 {
72; GCN-LABEL: test_export_gfx:
73; GCN:       ; %bb.0:
74; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75; GCN-NEXT:    v_mov_b32_e32 v1, 4.0
76; GCN-NEXT:    v_mov_b32_e32 v2, 0.5
77; GCN-NEXT:    v_mov_b32_e32 v3, 2.0
78; GCN-NEXT:    exp mrt0 off, v3, off, off done
79; GCN-NEXT:    s_setprio 0
80; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
81; GCN-NEXT:    s_nop 0
82; GCN-NEXT:    s_nop 0
83; GCN-NEXT:    s_setprio 2
84; GCN-NEXT:    s_waitcnt expcnt(0)
85; GCN-NEXT:    s_setpc_b64 s[30:31]
86  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %v, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
87  ret void
88}
89
90define amdgpu_cs void @test_export_cs() #0 {
91; GCN-LABEL: test_export_cs:
92; GCN:       ; %bb.0:
93; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
94; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
95; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
96; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
97; GCN-NEXT:    exp mrt0 off, v2, off, off done
98; GCN-NEXT:    s_endpgm
99  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
100  ret void
101}
102
103define amdgpu_kernel void @test_export_kernel() #0 {
104; GCN-LABEL: test_export_kernel:
105; GCN:       ; %bb.0:
106; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
107; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
108; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
109; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
110; GCN-NEXT:    exp mrt0 off, v2, off, off done
111; GCN-NEXT:    s_endpgm
112  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
113  ret void
114}
115
116define amdgpu_gfx void @test_no_export_gfx(float %v) #0 {
117; GCN-LABEL: test_no_export_gfx:
118; GCN:       ; %bb.0:
119; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120; GCN-NEXT:    s_setpc_b64 s[30:31]
121  ret void
122}
123
124define amdgpu_ps void @test_no_export_ps(float %v) #0 {
125; GCN-LABEL: test_no_export_ps:
126; GCN:       ; %bb.0:
127; GCN-NEXT:    s_endpgm
128  ret void
129}
130
131define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
132; GCN-LABEL: test_if_export_f32:
133; GCN:       ; %bb.0:
134; GCN-NEXT:    s_setprio 2
135; GCN-NEXT:    s_mov_b32 s0, exec_lo
136; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
137; GCN-NEXT:    s_cbranch_execz .LBB9_2
138; GCN-NEXT:  ; %bb.1: ; %exp
139; GCN-NEXT:    exp mrt0 v1, v2, v3, v4
140; GCN-NEXT:    s_setprio 0
141; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
142; GCN-NEXT:    s_nop 0
143; GCN-NEXT:    s_nop 0
144; GCN-NEXT:    s_setprio 2
145; GCN-NEXT:  .LBB9_2: ; %end
146; GCN-NEXT:    s_endpgm
147  %cc = icmp eq i32 %flag, 0
148  br i1 %cc, label %end, label %exp
149
150exp:
151  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 false)
152  br label %end
153
154end:
155  ret void
156}
157
158define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
159; GCN-LABEL: test_if_export_vm_f32:
160; GCN:       ; %bb.0:
161; GCN-NEXT:    s_setprio 2
162; GCN-NEXT:    s_mov_b32 s0, exec_lo
163; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
164; GCN-NEXT:    s_cbranch_execz .LBB10_2
165; GCN-NEXT:  ; %bb.1: ; %exp
166; GCN-NEXT:    exp mrt0 v1, v2, v3, v4
167; GCN-NEXT:    s_setprio 0
168; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
169; GCN-NEXT:    s_nop 0
170; GCN-NEXT:    s_nop 0
171; GCN-NEXT:    s_setprio 2
172; GCN-NEXT:  .LBB10_2: ; %end
173; GCN-NEXT:    s_endpgm
174  %cc = icmp eq i32 %flag, 0
175  br i1 %cc, label %end, label %exp
176
177exp:
178  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 true)
179  br label %end
180
181end:
182  ret void
183}
184
185define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
186; GCN-LABEL: test_if_export_done_f32:
187; GCN:       ; %bb.0:
188; GCN-NEXT:    s_setprio 2
189; GCN-NEXT:    s_mov_b32 s0, exec_lo
190; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
191; GCN-NEXT:    s_cbranch_execz .LBB11_2
192; GCN-NEXT:  ; %bb.1: ; %exp
193; GCN-NEXT:    exp mrt0 v1, v2, v3, v4 done
194; GCN-NEXT:    s_setprio 0
195; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
196; GCN-NEXT:    s_nop 0
197; GCN-NEXT:    s_nop 0
198; GCN-NEXT:    s_setprio 2
199; GCN-NEXT:  .LBB11_2: ; %end
200; GCN-NEXT:    s_endpgm
201  %cc = icmp eq i32 %flag, 0
202  br i1 %cc, label %end, label %exp
203
204exp:
205  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 false)
206  br label %end
207
208end:
209  ret void
210}
211
212define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
213; GCN-LABEL: test_if_export_vm_done_f32:
214; GCN:       ; %bb.0:
215; GCN-NEXT:    s_setprio 2
216; GCN-NEXT:    s_mov_b32 s0, exec_lo
217; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
218; GCN-NEXT:    s_cbranch_execz .LBB12_2
219; GCN-NEXT:  ; %bb.1: ; %exp
220; GCN-NEXT:    exp mrt0 v1, v2, v3, v4 done
221; GCN-NEXT:    s_setprio 0
222; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
223; GCN-NEXT:    s_nop 0
224; GCN-NEXT:    s_nop 0
225; GCN-NEXT:    s_setprio 2
226; GCN-NEXT:  .LBB12_2: ; %end
227; GCN-NEXT:    s_endpgm
228  %cc = icmp eq i32 %flag, 0
229  br i1 %cc, label %end, label %exp
230
231exp:
232  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
233  br label %end
234
235end:
236  ret void
237}
238
239define amdgpu_ps void @test_export_pos_before_param_across_load(i32 %idx) #0 {
240; GCN-LABEL: test_export_pos_before_param_across_load:
241; GCN:       ; %bb.0:
242; GCN-NEXT:    s_setprio 2
243; GCN-NEXT:    buffer_load_b32 v0, v0, s[0:3], 0 offen
244; GCN-NEXT:    v_mov_b32_e32 v1, 0
245; GCN-NEXT:    v_mov_b32_e32 v2, 1.0
246; GCN-NEXT:    v_mov_b32_e32 v3, 0.5
247; GCN-NEXT:    s_waitcnt vmcnt(0)
248; GCN-NEXT:    exp pos0 v1, v1, v1, v0 done
249; GCN-NEXT:    exp invalid_target_32 v2, v2, v2, v2
250; GCN-NEXT:    exp invalid_target_33 v2, v2, v2, v3
251; GCN-NEXT:    s_setprio 0
252; GCN-NEXT:    s_nop 0
253; GCN-NEXT:    s_nop 0
254; GCN-NEXT:    s_endpgm
255  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
256  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
257  %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0)
258  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false)
259  ret void
260}
261
262define amdgpu_ps void @test_export_across_store_load(i32 %idx, float %v) #0 {
263; GCN-LABEL: test_export_across_store_load:
264; GCN:       ; %bb.0:
265; GCN-NEXT:    s_setprio 2
266; GCN-NEXT:    v_mov_b32_e32 v2, 0
267; GCN-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
268; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_2)
269; GCN-NEXT:    v_cndmask_b32_e32 v0, 16, v2, vcc_lo
270; GCN-NEXT:    v_mov_b32_e32 v2, 0
271; GCN-NEXT:    scratch_store_b32 v0, v1, off
272; GCN-NEXT:    scratch_load_b32 v0, off, off
273; GCN-NEXT:    v_mov_b32_e32 v1, 1.0
274; GCN-NEXT:    exp pos0 v2, v2, v2, v1 done
275; GCN-NEXT:    s_setprio 0
276; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
277; GCN-NEXT:    s_nop 0
278; GCN-NEXT:    s_nop 0
279; GCN-NEXT:    s_setprio 2
280; GCN-NEXT:    s_waitcnt vmcnt(0)
281; GCN-NEXT:    exp invalid_target_32 v0, v2, v1, v2
282; GCN-NEXT:    exp invalid_target_33 v0, v2, v1, v2
283; GCN-NEXT:    s_setprio 0
284; GCN-NEXT:    s_nop 0
285; GCN-NEXT:    s_nop 0
286; GCN-NEXT:    s_endpgm
287  %data0 = alloca <4 x float>, align 8, addrspace(5)
288  %data1 = alloca <4 x float>, align 8, addrspace(5)
289  %cmp = icmp eq i32 %idx, 1
290  %data = select i1 %cmp, ptr addrspace(5) %data0, ptr addrspace(5) %data1
291  store float %v, ptr addrspace(5) %data, align 8
292  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float 1.0, i1 true, i1 false)
293  %load0 = load float, ptr addrspace(5) %data0, align 8
294  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
295  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
296  ret void
297}
298
299define amdgpu_ps void @test_export_in_callee(float %v) #0 {
300; GCN-LABEL: test_export_in_callee:
301; GCN:       ; %bb.0:
302; GCN-NEXT:    s_setprio 2
303; GCN-NEXT:    s_getpc_b64 s[0:1]
304; GCN-NEXT:    s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
305; GCN-NEXT:    s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
306; GCN-NEXT:    v_add_f32_e32 v0, 1.0, v0
307; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
308; GCN-NEXT:    s_mov_b32 s32, 0
309; GCN-NEXT:    s_waitcnt lgkmcnt(0)
310; GCN-NEXT:    s_swappc_b64 s[30:31], s[0:1]
311; GCN-NEXT:    s_endpgm
312  %x = fadd float %v, 1.0
313  call void @test_export_gfx(float %x)
314  ret void
315}
316
317define amdgpu_ps void @test_export_in_callee_prio(float %v) #0 {
318; GCN-LABEL: test_export_in_callee_prio:
319; GCN:       ; %bb.0:
320; GCN-NEXT:    s_setprio 2
321; GCN-NEXT:    s_mov_b32 s32, 0
322; GCN-NEXT:    v_add_f32_e32 v0, 1.0, v0
323; GCN-NEXT:    s_setprio 2
324; GCN-NEXT:    s_getpc_b64 s[0:1]
325; GCN-NEXT:    s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
326; GCN-NEXT:    s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
327; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
328; GCN-NEXT:    s_waitcnt lgkmcnt(0)
329; GCN-NEXT:    s_swappc_b64 s[30:31], s[0:1]
330; GCN-NEXT:    s_endpgm
331  %x = fadd float %v, 1.0
332  call void @llvm.amdgcn.s.setprio(i16 0)
333  call void @test_export_gfx(float %x)
334  ret void
335}
336
337declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
338declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
339declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #2
340declare void @llvm.amdgcn.s.setprio(i16)
341
342attributes #0 = { nounwind }
343attributes #1 = { nounwind inaccessiblememonly }
344attributes #2 = { nounwind readnone }
345