xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.writelane.ll (revision 5cae88164e5247d01f6a814cf610fa667c9aa9a6)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX7 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
6
7define amdgpu_ps float @test_writelane_s_s_s(i32 inreg %data, i32 inreg %lane, i32 inreg %vdst.in) #0 {
8; GFX7-LABEL: test_writelane_s_s_s:
9; GFX7:       ; %bb.0:
10; GFX7-NEXT:    v_mov_b32_e32 v0, s4
11; GFX7-NEXT:    s_mov_b32 m0, s3
12; GFX7-NEXT:    v_writelane_b32 v0, s2, m0
13; GFX7-NEXT:    ; return to shader part epilog
14;
15; GFX8-LABEL: test_writelane_s_s_s:
16; GFX8:       ; %bb.0:
17; GFX8-NEXT:    v_mov_b32_e32 v0, s4
18; GFX8-NEXT:    s_mov_b32 m0, s3
19; GFX8-NEXT:    v_writelane_b32 v0, s2, m0
20; GFX8-NEXT:    ; return to shader part epilog
21;
22; GFX10-LABEL: test_writelane_s_s_s:
23; GFX10:       ; %bb.0:
24; GFX10-NEXT:    v_mov_b32_e32 v0, s4
25; GFX10-NEXT:    v_writelane_b32 v0, s2, s3
26; GFX10-NEXT:    ; return to shader part epilog
27  %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 %lane, i32 %vdst.in)
28  %writelane.cast = bitcast i32 %writelane to float
29  ret float %writelane.cast
30}
31
32define amdgpu_ps float @test_writelane_s_s_imm(i32 inreg %data, i32 inreg %lane) #0 {
33; GFX7-LABEL: test_writelane_s_s_imm:
34; GFX7:       ; %bb.0:
35; GFX7-NEXT:    v_mov_b32_e32 v0, 42
36; GFX7-NEXT:    s_mov_b32 m0, s3
37; GFX7-NEXT:    v_writelane_b32 v0, s2, m0
38; GFX7-NEXT:    ; return to shader part epilog
39;
40; GFX8-LABEL: test_writelane_s_s_imm:
41; GFX8:       ; %bb.0:
42; GFX8-NEXT:    v_mov_b32_e32 v0, 42
43; GFX8-NEXT:    s_mov_b32 m0, s3
44; GFX8-NEXT:    v_writelane_b32 v0, s2, m0
45; GFX8-NEXT:    ; return to shader part epilog
46;
47; GFX10-LABEL: test_writelane_s_s_imm:
48; GFX10:       ; %bb.0:
49; GFX10-NEXT:    v_mov_b32_e32 v0, 42
50; GFX10-NEXT:    v_writelane_b32 v0, s2, s3
51; GFX10-NEXT:    ; return to shader part epilog
52  %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 %lane, i32 42)
53  %writelane.cast = bitcast i32 %writelane to float
54  ret float %writelane.cast
55}
56
57; data is not inline imm
58define amdgpu_ps float @test_writelane_k_s_v(i32 inreg %lane, i32 %vdst.in) #0 {
59; GFX7-LABEL: test_writelane_k_s_v:
60; GFX7:       ; %bb.0:
61; GFX7-NEXT:    s_movk_i32 s0, 0x3e7
62; GFX7-NEXT:    s_mov_b32 m0, s2
63; GFX7-NEXT:    v_writelane_b32 v0, s0, m0
64; GFX7-NEXT:    ; return to shader part epilog
65;
66; GFX8-LABEL: test_writelane_k_s_v:
67; GFX8:       ; %bb.0:
68; GFX8-NEXT:    s_movk_i32 s0, 0x3e7
69; GFX8-NEXT:    s_mov_b32 m0, s2
70; GFX8-NEXT:    v_writelane_b32 v0, s0, m0
71; GFX8-NEXT:    ; return to shader part epilog
72;
73; GFX10-LABEL: test_writelane_k_s_v:
74; GFX10:       ; %bb.0:
75; GFX10-NEXT:    s_movk_i32 s0, 0x3e7
76; GFX10-NEXT:    v_writelane_b32 v0, s0, s2
77; GFX10-NEXT:    ; return to shader part epilog
78  %writelane = call i32 @llvm.amdgcn.writelane(i32 999, i32 %lane, i32 %vdst.in)
79  %writelane.cast = bitcast i32 %writelane to float
80  ret float %writelane.cast
81}
82
83; Data is inline imm
84define amdgpu_ps float @test_writelane_imm_s_v(i32 inreg %lane, i32 %vdst.in) #0 {
85; GFX7-LABEL: test_writelane_imm_s_v:
86; GFX7:       ; %bb.0:
87; GFX7-NEXT:    v_writelane_b32 v0, 42, s2
88; GFX7-NEXT:    ; return to shader part epilog
89;
90; GFX8-LABEL: test_writelane_imm_s_v:
91; GFX8:       ; %bb.0:
92; GFX8-NEXT:    v_writelane_b32 v0, 42, s2
93; GFX8-NEXT:    ; return to shader part epilog
94;
95; GFX10-LABEL: test_writelane_imm_s_v:
96; GFX10:       ; %bb.0:
97; GFX10-NEXT:    v_writelane_b32 v0, 42, s2
98; GFX10-NEXT:    ; return to shader part epilog
99  %writelane = call i32 @llvm.amdgcn.writelane(i32 42, i32 %lane, i32 %vdst.in)
100  %writelane.cast = bitcast i32 %writelane to float
101  ret float %writelane.cast
102}
103
104; Data is subtarget dependent inline imm
105define amdgpu_ps float @test_writelane_imminv2pi_s_v(i32 inreg %lane, i32 %vdst.in) #0 {
106; GFX7-LABEL: test_writelane_imminv2pi_s_v:
107; GFX7:       ; %bb.0:
108; GFX7-NEXT:    s_mov_b32 s0, 0x3e22f983
109; GFX7-NEXT:    s_mov_b32 m0, s2
110; GFX7-NEXT:    v_writelane_b32 v0, s0, m0
111; GFX7-NEXT:    ; return to shader part epilog
112;
113; GFX8-LABEL: test_writelane_imminv2pi_s_v:
114; GFX8:       ; %bb.0:
115; GFX8-NEXT:    v_writelane_b32 v0, 0.15915494, s2
116; GFX8-NEXT:    ; return to shader part epilog
117;
118; GFX10-LABEL: test_writelane_imminv2pi_s_v:
119; GFX10:       ; %bb.0:
120; GFX10-NEXT:    v_writelane_b32 v0, 0.15915494, s2
121; GFX10-NEXT:    ; return to shader part epilog
122  %writelane = call i32 @llvm.amdgcn.writelane(i32 bitcast (float 0x3FC45F3060000000 to i32), i32 %lane, i32 %vdst.in)
123  %writelane.cast = bitcast i32 %writelane to float
124  ret float %writelane.cast
125}
126
127
128; Lane is inline imm
129define amdgpu_ps float @test_writelane_s_imm_v(i32 inreg %data, i32 %vdst.in) #0 {
130; GFX7-LABEL: test_writelane_s_imm_v:
131; GFX7:       ; %bb.0:
132; GFX7-NEXT:    v_writelane_b32 v0, s2, 23
133; GFX7-NEXT:    ; return to shader part epilog
134;
135; GFX8-LABEL: test_writelane_s_imm_v:
136; GFX8:       ; %bb.0:
137; GFX8-NEXT:    v_writelane_b32 v0, s2, 23
138; GFX8-NEXT:    ; return to shader part epilog
139;
140; GFX10-LABEL: test_writelane_s_imm_v:
141; GFX10:       ; %bb.0:
142; GFX10-NEXT:    v_writelane_b32 v0, s2, 23
143; GFX10-NEXT:    ; return to shader part epilog
144  %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 23, i32 %vdst.in)
145  %writelane.cast = bitcast i32 %writelane to float
146  ret float %writelane.cast
147}
148
149; Lane index is larger than the wavesize
150define amdgpu_ps float @test_writelane_s_k0_v(i32 inreg %data, i32 %vdst.in) #0 {
151; GFX7-LABEL: test_writelane_s_k0_v:
152; GFX7:       ; %bb.0:
153; GFX7-NEXT:    v_writelane_b32 v0, s2, 3
154; GFX7-NEXT:    ; return to shader part epilog
155;
156; GFX8-LABEL: test_writelane_s_k0_v:
157; GFX8:       ; %bb.0:
158; GFX8-NEXT:    v_writelane_b32 v0, s2, 3
159; GFX8-NEXT:    ; return to shader part epilog
160;
161; GFX10-LABEL: test_writelane_s_k0_v:
162; GFX10:       ; %bb.0:
163; GFX10-NEXT:    s_movk_i32 s0, 0x43
164; GFX10-NEXT:    v_writelane_b32 v0, s2, s0
165; GFX10-NEXT:    ; return to shader part epilog
166  %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 67, i32 %vdst.in)
167  %writelane.cast = bitcast i32 %writelane to float
168  ret float %writelane.cast
169}
170
171; Lane index is larger than the wavesize for wave32
172define amdgpu_ps float @test_writelane_s_k1_v(i32 inreg %data, i32 %vdst.in) #0 {
173; GFX7-LABEL: test_writelane_s_k1_v:
174; GFX7:       ; %bb.0:
175; GFX7-NEXT:    v_writelane_b32 v0, s2, 32
176; GFX7-NEXT:    ; return to shader part epilog
177;
178; GFX8-LABEL: test_writelane_s_k1_v:
179; GFX8:       ; %bb.0:
180; GFX8-NEXT:    v_writelane_b32 v0, s2, 32
181; GFX8-NEXT:    ; return to shader part epilog
182;
183; GFX10-LABEL: test_writelane_s_k1_v:
184; GFX10:       ; %bb.0:
185; GFX10-NEXT:    v_writelane_b32 v0, s2, 32
186; GFX10-NEXT:    ; return to shader part epilog
187  %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 32, i32 %vdst.in)
188  %writelane.cast = bitcast i32 %writelane to float
189  ret float %writelane.cast
190}
191
192define amdgpu_ps float @test_writelane_v_v_v(i32 %data, i32 %lane, i32 %vdst.in) #0 {
193; GFX7-LABEL: test_writelane_v_v_v:
194; GFX7:       ; %bb.0:
195; GFX7-NEXT:    v_readfirstlane_b32 s1, v1
196; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
197; GFX7-NEXT:    s_mov_b32 m0, s1
198; GFX7-NEXT:    v_writelane_b32 v2, s0, m0
199; GFX7-NEXT:    v_mov_b32_e32 v0, v2
200; GFX7-NEXT:    ; return to shader part epilog
201;
202; GFX8-LABEL: test_writelane_v_v_v:
203; GFX8:       ; %bb.0:
204; GFX8-NEXT:    v_readfirstlane_b32 s1, v1
205; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
206; GFX8-NEXT:    s_mov_b32 m0, s1
207; GFX8-NEXT:    v_writelane_b32 v2, s0, m0
208; GFX8-NEXT:    v_mov_b32_e32 v0, v2
209; GFX8-NEXT:    ; return to shader part epilog
210;
211; GFX10-LABEL: test_writelane_v_v_v:
212; GFX10:       ; %bb.0:
213; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
214; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
215; GFX10-NEXT:    v_writelane_b32 v2, s0, s1
216; GFX10-NEXT:    v_mov_b32_e32 v0, v2
217; GFX10-NEXT:    ; return to shader part epilog
218  %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 %lane, i32 %vdst.in)
219  %writelane.cast = bitcast i32 %writelane to float
220  ret float %writelane.cast
221}
222
223define amdgpu_ps float @test_writelane_v_s_v(i32 %data, i32 inreg %lane, i32 %vdst.in) #0 {
224; GFX7-LABEL: test_writelane_v_s_v:
225; GFX7:       ; %bb.0:
226; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
227; GFX7-NEXT:    s_mov_b32 m0, s2
228; GFX7-NEXT:    v_writelane_b32 v1, s0, m0
229; GFX7-NEXT:    v_mov_b32_e32 v0, v1
230; GFX7-NEXT:    ; return to shader part epilog
231;
232; GFX8-LABEL: test_writelane_v_s_v:
233; GFX8:       ; %bb.0:
234; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
235; GFX8-NEXT:    s_mov_b32 m0, s2
236; GFX8-NEXT:    v_writelane_b32 v1, s0, m0
237; GFX8-NEXT:    v_mov_b32_e32 v0, v1
238; GFX8-NEXT:    ; return to shader part epilog
239;
240; GFX10-LABEL: test_writelane_v_s_v:
241; GFX10:       ; %bb.0:
242; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
243; GFX10-NEXT:    v_writelane_b32 v1, s0, s2
244; GFX10-NEXT:    v_mov_b32_e32 v0, v1
245; GFX10-NEXT:    ; return to shader part epilog
246  %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 inreg %lane, i32 %vdst.in)
247  %writelane.cast = bitcast i32 %writelane to float
248  ret float %writelane.cast
249}
250
251; FIXME: This could theoretically use m0 directly as the data source,
252; and another SGPR as the lane selector and avoid register swap.
253define amdgpu_ps float @test_writelane_m0_s_v(i32 inreg %lane, i32 %vdst.in) #0 {
254; GFX7-LABEL: test_writelane_m0_s_v:
255; GFX7:       ; %bb.0:
256; GFX7-NEXT:    ;;#ASMSTART
257; GFX7-NEXT:    s_mov_b32 m0, -1
258; GFX7-NEXT:    ;;#ASMEND
259; GFX7-NEXT:    s_mov_b32 s0, m0
260; GFX7-NEXT:    s_mov_b32 m0, s2
261; GFX7-NEXT:    v_writelane_b32 v0, s0, m0
262; GFX7-NEXT:    ; return to shader part epilog
263;
264; GFX8-LABEL: test_writelane_m0_s_v:
265; GFX8:       ; %bb.0:
266; GFX8-NEXT:    ;;#ASMSTART
267; GFX8-NEXT:    s_mov_b32 m0, -1
268; GFX8-NEXT:    ;;#ASMEND
269; GFX8-NEXT:    s_mov_b32 s0, m0
270; GFX8-NEXT:    s_mov_b32 m0, s2
271; GFX8-NEXT:    v_writelane_b32 v0, s0, m0
272; GFX8-NEXT:    ; return to shader part epilog
273;
274; GFX10-LABEL: test_writelane_m0_s_v:
275; GFX10:       ; %bb.0:
276; GFX10-NEXT:    ;;#ASMSTART
277; GFX10-NEXT:    s_mov_b32 m0, -1
278; GFX10-NEXT:    ;;#ASMEND
279; GFX10-NEXT:    v_writelane_b32 v0, m0, s2
280; GFX10-NEXT:    ; return to shader part epilog
281  %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"()
282  %writelane = call i32 @llvm.amdgcn.writelane(i32 %m0, i32 %lane, i32 %vdst.in)
283  %writelane.cast = bitcast i32 %writelane to float
284  ret float %writelane.cast
285}
286
287define amdgpu_ps float @test_writelane_s_m0_v(i32 inreg %data, i32 %vdst.in) #0 {
288; GFX7-LABEL: test_writelane_s_m0_v:
289; GFX7:       ; %bb.0:
290; GFX7-NEXT:    ;;#ASMSTART
291; GFX7-NEXT:    s_mov_b32 m0, -1
292; GFX7-NEXT:    ;;#ASMEND
293; GFX7-NEXT:    v_writelane_b32 v0, s2, m0
294; GFX7-NEXT:    ; return to shader part epilog
295;
296; GFX8-LABEL: test_writelane_s_m0_v:
297; GFX8:       ; %bb.0:
298; GFX8-NEXT:    ;;#ASMSTART
299; GFX8-NEXT:    s_mov_b32 m0, -1
300; GFX8-NEXT:    ;;#ASMEND
301; GFX8-NEXT:    v_writelane_b32 v0, s2, m0
302; GFX8-NEXT:    ; return to shader part epilog
303;
304; GFX10-LABEL: test_writelane_s_m0_v:
305; GFX10:       ; %bb.0:
306; GFX10-NEXT:    ;;#ASMSTART
307; GFX10-NEXT:    s_mov_b32 m0, -1
308; GFX10-NEXT:    ;;#ASMEND
309; GFX10-NEXT:    v_writelane_b32 v0, s2, m0
310; GFX10-NEXT:    ; return to shader part epilog
311  %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"()
312  %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 %m0, i32 %vdst.in)
313  %writelane.cast = bitcast i32 %writelane to float
314  ret float %writelane.cast
315}
316
317declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #1
318declare i32 @llvm.amdgcn.workitem.id.x() #2
319
320attributes #0 = { nounwind }
321attributes #1 = { convergent nounwind readnone willreturn }
322attributes #2 = { nounwind readnone speculatable willreturn }
323