xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane16.var.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel=0 -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-SDAG %s
3; RUN: llc -global-isel=1 -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-GISEL %s
4
5declare i32 @llvm.amdgcn.permlane16.var(i32, i32, i32, i1, i1)
6declare i32 @llvm.amdgcn.permlanex16.var(i32, i32, i32, i1, i1)
7declare i32 @llvm.amdgcn.workitem.id.x()
8declare i32 @llvm.amdgcn.workitem.id.y()
9
10define amdgpu_kernel void @v_permlane16var_b32_vv(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
11; GFX12-SDAG-LABEL: v_permlane16var_b32_vv:
12; GFX12-SDAG:       ; %bb.0:
13; GFX12-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
14; GFX12-SDAG-NEXT:    v_mov_b32_e32 v2, 0
15; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
16; GFX12-SDAG-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
17; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
18; GFX12-SDAG-NEXT:    v_permlane16_var_b32 v0, v0, v1
19; GFX12-SDAG-NEXT:    global_store_b32 v2, v0, s[0:1]
20; GFX12-SDAG-NEXT:    s_endpgm
21;
22; GFX12-GISEL-LABEL: v_permlane16var_b32_vv:
23; GFX12-GISEL:       ; %bb.0:
24; GFX12-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
25; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
26; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
27; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
28; GFX12-GISEL-NEXT:    v_permlane16_var_b32 v0, v0, v1
29; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0
30; GFX12-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
31; GFX12-GISEL-NEXT:    s_endpgm
32  %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 %src1, i1 false, i1 false)
33  store i32 %v, ptr addrspace(1) %out
34  ret void
35}
36
37define amdgpu_kernel void @v_permlane16var_b32_vi(ptr addrspace(1) %out, i32 %src0) {
38; GFX12-SDAG-LABEL: v_permlane16var_b32_vi:
39; GFX12-SDAG:       ; %bb.0:
40; GFX12-SDAG-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
41; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 1
42; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
43; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
44; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
45; GFX12-SDAG-NEXT:    v_permlane16_var_b32 v1, v1, v0
46; GFX12-SDAG-NEXT:    global_store_b32 v2, v1, s[0:1]
47; GFX12-SDAG-NEXT:    s_endpgm
48;
49; GFX12-GISEL-LABEL: v_permlane16var_b32_vi:
50; GFX12-GISEL:       ; %bb.0:
51; GFX12-GISEL-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
52; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
53; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, s2
54; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
55; GFX12-GISEL-NEXT:    v_permlane16_var_b32 v1, v1, v0
56; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, 0
57; GFX12-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
58; GFX12-GISEL-NEXT:    s_endpgm
59  %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 1, i1 false, i1 false)
60  store i32 %v, ptr addrspace(1) %out
61  ret void
62}
63
64define amdgpu_kernel void @v_permlane16var_b32_vl(ptr addrspace(1) %out, i32 %src0) {
65; GFX12-SDAG-LABEL: v_permlane16var_b32_vl:
66; GFX12-SDAG:       ; %bb.0:
67; GFX12-SDAG-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
68; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0xc1d1
69; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
70; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
71; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
72; GFX12-SDAG-NEXT:    v_permlane16_var_b32 v1, v1, v0
73; GFX12-SDAG-NEXT:    global_store_b32 v2, v1, s[0:1]
74; GFX12-SDAG-NEXT:    s_endpgm
75;
76; GFX12-GISEL-LABEL: v_permlane16var_b32_vl:
77; GFX12-GISEL:       ; %bb.0:
78; GFX12-GISEL-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
79; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
80; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, 0xc1d1 :: v_dual_mov_b32 v1, s2
81; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
82; GFX12-GISEL-NEXT:    v_permlane16_var_b32 v1, v1, v0
83; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, 0
84; GFX12-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
85; GFX12-GISEL-NEXT:    s_endpgm
86  %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 49617, i1 false, i1 false)
87  store i32 %v, ptr addrspace(1) %out
88  ret void
89}
90
91define amdgpu_kernel void @v_permlane16var_b32_vvv(ptr addrspace(1) %out, i32 %src0) {
92; GFX12-SDAG-LABEL: v_permlane16var_b32_vvv:
93; GFX12-SDAG:       ; %bb.0:
94; GFX12-SDAG-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
95; GFX12-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
96; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
97; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
98; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
99; GFX12-SDAG-NEXT:    v_permlane16_var_b32 v1, v1, v0
100; GFX12-SDAG-NEXT:    global_store_b32 v2, v1, s[0:1]
101; GFX12-SDAG-NEXT:    s_endpgm
102;
103; GFX12-GISEL-LABEL: v_permlane16var_b32_vvv:
104; GFX12-GISEL:       ; %bb.0:
105; GFX12-GISEL-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
106; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
107; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0
108; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
109; GFX12-GISEL-NEXT:    v_permlane16_var_b32 v1, v1, v0
110; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, 0
111; GFX12-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
112; GFX12-GISEL-NEXT:    s_endpgm
113  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
114  %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 %tidx, i1 false, i1 false)
115  store i32 %v, ptr addrspace(1) %out
116  ret void
117}
118
119define amdgpu_kernel void @v_permlane16var_b32_vv_fi(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
120; GFX12-SDAG-LABEL: v_permlane16var_b32_vv_fi:
121; GFX12-SDAG:       ; %bb.0:
122; GFX12-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
123; GFX12-SDAG-NEXT:    v_mov_b32_e32 v2, 0
124; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
125; GFX12-SDAG-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
126; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
127; GFX12-SDAG-NEXT:    v_permlane16_var_b32 v0, v0, v1 op_sel:[1,0]
128; GFX12-SDAG-NEXT:    global_store_b32 v2, v0, s[0:1]
129; GFX12-SDAG-NEXT:    s_endpgm
130;
131; GFX12-GISEL-LABEL: v_permlane16var_b32_vv_fi:
132; GFX12-GISEL:       ; %bb.0:
133; GFX12-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
134; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
135; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
136; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
137; GFX12-GISEL-NEXT:    v_permlane16_var_b32 v0, v0, v1 op_sel:[1,0]
138; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0
139; GFX12-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
140; GFX12-GISEL-NEXT:    s_endpgm
141  %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 %src1, i1 true, i1 false)
142  store i32 %v, ptr addrspace(1) %out
143  ret void
144}
145
146define amdgpu_kernel void @v_permlane16var_b32_vv_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
147; GFX12-SDAG-LABEL: v_permlane16var_b32_vv_bc:
148; GFX12-SDAG:       ; %bb.0:
149; GFX12-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
150; GFX12-SDAG-NEXT:    v_mov_b32_e32 v2, 0
151; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
152; GFX12-SDAG-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
153; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
154; GFX12-SDAG-NEXT:    v_permlane16_var_b32 v0, v0, v1 op_sel:[0,1]
155; GFX12-SDAG-NEXT:    global_store_b32 v2, v0, s[0:1]
156; GFX12-SDAG-NEXT:    s_endpgm
157;
158; GFX12-GISEL-LABEL: v_permlane16var_b32_vv_bc:
159; GFX12-GISEL:       ; %bb.0:
160; GFX12-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
161; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
162; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
163; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
164; GFX12-GISEL-NEXT:    v_permlane16_var_b32 v0, v0, v1 op_sel:[0,1]
165; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0
166; GFX12-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
167; GFX12-GISEL-NEXT:    s_endpgm
168  %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 %src1, i1 false, i1 true)
169  store i32 %v, ptr addrspace(1) %out
170  ret void
171}
172
173define amdgpu_kernel void @v_permlane16var_b32_vv_fi_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
174; GFX12-SDAG-LABEL: v_permlane16var_b32_vv_fi_bc:
175; GFX12-SDAG:       ; %bb.0:
176; GFX12-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
177; GFX12-SDAG-NEXT:    v_mov_b32_e32 v2, 0
178; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
179; GFX12-SDAG-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
180; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
181; GFX12-SDAG-NEXT:    v_permlane16_var_b32 v0, v0, v1 op_sel:[1,1]
182; GFX12-SDAG-NEXT:    global_store_b32 v2, v0, s[0:1]
183; GFX12-SDAG-NEXT:    s_endpgm
184;
185; GFX12-GISEL-LABEL: v_permlane16var_b32_vv_fi_bc:
186; GFX12-GISEL:       ; %bb.0:
187; GFX12-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
188; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
189; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
190; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
191; GFX12-GISEL-NEXT:    v_permlane16_var_b32 v0, v0, v1 op_sel:[1,1]
192; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0
193; GFX12-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
194; GFX12-GISEL-NEXT:    s_endpgm
195  %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 %src1, i1 true, i1 true)
196  store i32 %v, ptr addrspace(1) %out
197  ret void
198}
199
200define amdgpu_kernel void @v_permlanex16var_b32_vv(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
201; GFX12-SDAG-LABEL: v_permlanex16var_b32_vv:
202; GFX12-SDAG:       ; %bb.0:
203; GFX12-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
204; GFX12-SDAG-NEXT:    v_mov_b32_e32 v2, 0
205; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
206; GFX12-SDAG-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
207; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
208; GFX12-SDAG-NEXT:    v_permlanex16_var_b32 v0, v0, v1
209; GFX12-SDAG-NEXT:    global_store_b32 v2, v0, s[0:1]
210; GFX12-SDAG-NEXT:    s_endpgm
211;
212; GFX12-GISEL-LABEL: v_permlanex16var_b32_vv:
213; GFX12-GISEL:       ; %bb.0:
214; GFX12-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
215; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
216; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
217; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
218; GFX12-GISEL-NEXT:    v_permlanex16_var_b32 v0, v0, v1
219; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0
220; GFX12-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
221; GFX12-GISEL-NEXT:    s_endpgm
222  %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 %src1, i1 false, i1 false)
223  store i32 %v, ptr addrspace(1) %out
224  ret void
225}
226
227define amdgpu_kernel void @v_permlanex16var_b32_vi(ptr addrspace(1) %out, i32 %src0) {
228; GFX12-SDAG-LABEL: v_permlanex16var_b32_vi:
229; GFX12-SDAG:       ; %bb.0:
230; GFX12-SDAG-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
231; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 1
232; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
233; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
234; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
235; GFX12-SDAG-NEXT:    v_permlanex16_var_b32 v1, v1, v0
236; GFX12-SDAG-NEXT:    global_store_b32 v2, v1, s[0:1]
237; GFX12-SDAG-NEXT:    s_endpgm
238;
239; GFX12-GISEL-LABEL: v_permlanex16var_b32_vi:
240; GFX12-GISEL:       ; %bb.0:
241; GFX12-GISEL-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
242; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
243; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, s2
244; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
245; GFX12-GISEL-NEXT:    v_permlanex16_var_b32 v1, v1, v0
246; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, 0
247; GFX12-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
248; GFX12-GISEL-NEXT:    s_endpgm
249  %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 1, i1 false, i1 false)
250  store i32 %v, ptr addrspace(1) %out
251  ret void
252}
253
254define amdgpu_kernel void @v_permlanex16var_b32_vl(ptr addrspace(1) %out, i32 %src0) {
255; GFX12-SDAG-LABEL: v_permlanex16var_b32_vl:
256; GFX12-SDAG:       ; %bb.0:
257; GFX12-SDAG-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
258; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0xc1d1
259; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
260; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
261; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
262; GFX12-SDAG-NEXT:    v_permlanex16_var_b32 v1, v1, v0
263; GFX12-SDAG-NEXT:    global_store_b32 v2, v1, s[0:1]
264; GFX12-SDAG-NEXT:    s_endpgm
265;
266; GFX12-GISEL-LABEL: v_permlanex16var_b32_vl:
267; GFX12-GISEL:       ; %bb.0:
268; GFX12-GISEL-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
269; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
270; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, 0xc1d1 :: v_dual_mov_b32 v1, s2
271; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
272; GFX12-GISEL-NEXT:    v_permlanex16_var_b32 v1, v1, v0
273; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, 0
274; GFX12-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
275; GFX12-GISEL-NEXT:    s_endpgm
276  %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 49617, i1 false, i1 false)
277  store i32 %v, ptr addrspace(1) %out
278  ret void
279}
280
281define amdgpu_kernel void @v_permlanex16var_b32_vvv(ptr addrspace(1) %out, i32 %src0) {
282; GFX12-SDAG-LABEL: v_permlanex16var_b32_vvv:
283; GFX12-SDAG:       ; %bb.0:
284; GFX12-SDAG-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
285; GFX12-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
286; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
287; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
288; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
289; GFX12-SDAG-NEXT:    v_permlanex16_var_b32 v1, v1, v0
290; GFX12-SDAG-NEXT:    global_store_b32 v2, v1, s[0:1]
291; GFX12-SDAG-NEXT:    s_endpgm
292;
293; GFX12-GISEL-LABEL: v_permlanex16var_b32_vvv:
294; GFX12-GISEL:       ; %bb.0:
295; GFX12-GISEL-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
296; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
297; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0
298; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
299; GFX12-GISEL-NEXT:    v_permlanex16_var_b32 v1, v1, v0
300; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, 0
301; GFX12-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
302; GFX12-GISEL-NEXT:    s_endpgm
303  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
304  %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 %tidx, i1 false, i1 false)
305  store i32 %v, ptr addrspace(1) %out
306  ret void
307}
308
309define amdgpu_kernel void @v_permlanex16var_b32_vv_fi(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
310; GFX12-SDAG-LABEL: v_permlanex16var_b32_vv_fi:
311; GFX12-SDAG:       ; %bb.0:
312; GFX12-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
313; GFX12-SDAG-NEXT:    v_mov_b32_e32 v2, 0
314; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
315; GFX12-SDAG-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
316; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
317; GFX12-SDAG-NEXT:    v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,0]
318; GFX12-SDAG-NEXT:    global_store_b32 v2, v0, s[0:1]
319; GFX12-SDAG-NEXT:    s_endpgm
320;
321; GFX12-GISEL-LABEL: v_permlanex16var_b32_vv_fi:
322; GFX12-GISEL:       ; %bb.0:
323; GFX12-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
324; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
325; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
326; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
327; GFX12-GISEL-NEXT:    v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,0]
328; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0
329; GFX12-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
330; GFX12-GISEL-NEXT:    s_endpgm
331  %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 %src1, i1 true, i1 false)
332  store i32 %v, ptr addrspace(1) %out
333  ret void
334}
335
336define amdgpu_kernel void @v_permlanex16var_b32_vv_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
337; GFX12-SDAG-LABEL: v_permlanex16var_b32_vv_bc:
338; GFX12-SDAG:       ; %bb.0:
339; GFX12-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
340; GFX12-SDAG-NEXT:    v_mov_b32_e32 v2, 0
341; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
342; GFX12-SDAG-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
343; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
344; GFX12-SDAG-NEXT:    v_permlanex16_var_b32 v0, v0, v1 op_sel:[0,1]
345; GFX12-SDAG-NEXT:    global_store_b32 v2, v0, s[0:1]
346; GFX12-SDAG-NEXT:    s_endpgm
347;
348; GFX12-GISEL-LABEL: v_permlanex16var_b32_vv_bc:
349; GFX12-GISEL:       ; %bb.0:
350; GFX12-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
351; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
352; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
353; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
354; GFX12-GISEL-NEXT:    v_permlanex16_var_b32 v0, v0, v1 op_sel:[0,1]
355; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0
356; GFX12-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
357; GFX12-GISEL-NEXT:    s_endpgm
358  %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 %src1, i1 false, i1 true)
359  store i32 %v, ptr addrspace(1) %out
360  ret void
361}
362
363define amdgpu_kernel void @v_permlanex16var_b32_vv_fi_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
364; GFX12-SDAG-LABEL: v_permlanex16var_b32_vv_fi_bc:
365; GFX12-SDAG:       ; %bb.0:
366; GFX12-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
367; GFX12-SDAG-NEXT:    v_mov_b32_e32 v2, 0
368; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
369; GFX12-SDAG-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
370; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
371; GFX12-SDAG-NEXT:    v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,1]
372; GFX12-SDAG-NEXT:    global_store_b32 v2, v0, s[0:1]
373; GFX12-SDAG-NEXT:    s_endpgm
374;
375; GFX12-GISEL-LABEL: v_permlanex16var_b32_vv_fi_bc:
376; GFX12-GISEL:       ; %bb.0:
377; GFX12-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
378; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
379; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
380; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
381; GFX12-GISEL-NEXT:    v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,1]
382; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0
383; GFX12-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
384; GFX12-GISEL-NEXT:    s_endpgm
385  %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 %src1, i1 true, i1 true)
386  store i32 %v, ptr addrspace(1) %out
387  ret void
388}
389
390define amdgpu_kernel void @v_permlane16var_b32_tid_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
391; GFX12-SDAG-LABEL: v_permlane16var_b32_tid_tid:
392; GFX12-SDAG:       ; %bb.0:
393; GFX12-SDAG-NEXT:    s_clause 0x1
394; GFX12-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x30
395; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
396; GFX12-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
397; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
398; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
399; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
400; GFX12-SDAG-NEXT:    v_permlane16_var_b32 v0, v0, v1
401; GFX12-SDAG-NEXT:    global_store_b32 v2, v0, s[0:1]
402; GFX12-SDAG-NEXT:    s_endpgm
403;
404; GFX12-GISEL-LABEL: v_permlane16var_b32_tid_tid:
405; GFX12-GISEL:       ; %bb.0:
406; GFX12-GISEL-NEXT:    s_clause 0x1
407; GFX12-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x30
408; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
409; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
410; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0
411; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
412; GFX12-GISEL-NEXT:    v_permlane16_var_b32 v0, v0, v1
413; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0
414; GFX12-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
415; GFX12-GISEL-NEXT:    s_endpgm
416  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
417  %v = call i32 @llvm.amdgcn.permlane16.var(i32 %tidx, i32 %tidx, i32 %src1, i1 false, i1 false)
418  store i32 %v, ptr addrspace(1) %out
419  ret void
420}
421
422define amdgpu_kernel void @v_permlane16var_b32_undef_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
423; GFX12-SDAG-LABEL: v_permlane16var_b32_undef_tid:
424; GFX12-SDAG:       ; %bb.0:
425; GFX12-SDAG-NEXT:    s_clause 0x1
426; GFX12-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x30
427; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
428; GFX12-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
429; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
430; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
431; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
432; GFX12-SDAG-NEXT:    v_permlane16_var_b32 v0, v0, v1
433; GFX12-SDAG-NEXT:    global_store_b32 v2, v0, s[0:1]
434; GFX12-SDAG-NEXT:    s_endpgm
435;
436; GFX12-GISEL-LABEL: v_permlane16var_b32_undef_tid:
437; GFX12-GISEL:       ; %bb.0:
438; GFX12-GISEL-NEXT:    s_clause 0x1
439; GFX12-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x30
440; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
441; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
442; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0
443; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
444; GFX12-GISEL-NEXT:    v_permlane16_var_b32 v0, v0, v1
445; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0
446; GFX12-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
447; GFX12-GISEL-NEXT:    s_endpgm
448  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
449  %undef = freeze i32 poison
450  %v = call i32 @llvm.amdgcn.permlane16.var(i32 %undef, i32 %tidx, i32 %src1, i1 false, i1 false)
451  store i32 %v, ptr addrspace(1) %out
452  ret void
453}
454
455define amdgpu_kernel void @v_permlane16var_b32_i_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
456; GFX12-SDAG-LABEL: v_permlane16var_b32_i_tid:
457; GFX12-SDAG:       ; %bb.0:
458; GFX12-SDAG-NEXT:    s_clause 0x1
459; GFX12-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x30
460; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
461; GFX12-SDAG-NEXT:    v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v0, 0x3ff, v0
462; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
463; GFX12-SDAG-NEXT:    v_dual_mov_b32 v1, 0x3039 :: v_dual_mov_b32 v2, s2
464; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
465; GFX12-SDAG-NEXT:    v_permlane16_var_b32 v1, v0, v2
466; GFX12-SDAG-NEXT:    global_store_b32 v3, v1, s[0:1]
467; GFX12-SDAG-NEXT:    s_endpgm
468;
469; GFX12-GISEL-LABEL: v_permlane16var_b32_i_tid:
470; GFX12-GISEL:       ; %bb.0:
471; GFX12-GISEL-NEXT:    s_clause 0x1
472; GFX12-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x30
473; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
474; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0x3039
475; GFX12-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
476; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
477; GFX12-GISEL-NEXT:    v_mov_b32_e32 v2, s2
478; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
479; GFX12-GISEL-NEXT:    v_permlane16_var_b32 v1, v0, v2
480; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, 0
481; GFX12-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
482; GFX12-GISEL-NEXT:    s_endpgm
483  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
484  %v = call i32 @llvm.amdgcn.permlane16.var(i32 12345, i32 %tidx, i32 %src1, i1 false, i1 false)
485  store i32 %v, ptr addrspace(1) %out
486  ret void
487}
488
489define amdgpu_kernel void @v_permlane16var_b32_i_tid_fi(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
490; GFX12-SDAG-LABEL: v_permlane16var_b32_i_tid_fi:
491; GFX12-SDAG:       ; %bb.0:
492; GFX12-SDAG-NEXT:    s_clause 0x1
493; GFX12-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x30
494; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
495; GFX12-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
496; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
497; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
498; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
499; GFX12-SDAG-NEXT:    v_permlane16_var_b32 v0, v0, v1 op_sel:[1,0]
500; GFX12-SDAG-NEXT:    global_store_b32 v2, v0, s[0:1]
501; GFX12-SDAG-NEXT:    s_endpgm
502;
503; GFX12-GISEL-LABEL: v_permlane16var_b32_i_tid_fi:
504; GFX12-GISEL:       ; %bb.0:
505; GFX12-GISEL-NEXT:    s_clause 0x1
506; GFX12-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x30
507; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
508; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
509; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0
510; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
511; GFX12-GISEL-NEXT:    v_permlane16_var_b32 v0, v0, v1 op_sel:[1,0]
512; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0
513; GFX12-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
514; GFX12-GISEL-NEXT:    s_endpgm
515  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
516  %undef = freeze i32 poison
517  %v = call i32 @llvm.amdgcn.permlane16.var(i32 %undef, i32 %tidx, i32 %src1, i1 true, i1 false)
518  store i32 %v, ptr addrspace(1) %out
519  ret void
520}
521
522define amdgpu_kernel void @v_permlane16var_b32_i_tid_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
523; GFX12-SDAG-LABEL: v_permlane16var_b32_i_tid_bc:
524; GFX12-SDAG:       ; %bb.0:
525; GFX12-SDAG-NEXT:    s_clause 0x1
526; GFX12-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x30
527; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
528; GFX12-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
529; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
530; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
531; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
532; GFX12-SDAG-NEXT:    v_permlane16_var_b32 v0, v0, v1 op_sel:[0,1]
533; GFX12-SDAG-NEXT:    global_store_b32 v2, v0, s[0:1]
534; GFX12-SDAG-NEXT:    s_endpgm
535;
536; GFX12-GISEL-LABEL: v_permlane16var_b32_i_tid_bc:
537; GFX12-GISEL:       ; %bb.0:
538; GFX12-GISEL-NEXT:    s_clause 0x1
539; GFX12-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x30
540; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
541; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
542; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0
543; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
544; GFX12-GISEL-NEXT:    v_permlane16_var_b32 v0, v0, v1 op_sel:[0,1]
545; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0
546; GFX12-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
547; GFX12-GISEL-NEXT:    s_endpgm
548  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
549  %undef = freeze i32 poison
550  %v = call i32 @llvm.amdgcn.permlane16.var(i32 %undef, i32 %tidx, i32 %src1, i1 false, i1 true)
551  store i32 %v, ptr addrspace(1) %out
552  ret void
553}
554
555define amdgpu_kernel void @v_permlane16var_b32_i_tid_fi_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
556; GFX12-SDAG-LABEL: v_permlane16var_b32_i_tid_fi_bc:
557; GFX12-SDAG:       ; %bb.0:
558; GFX12-SDAG-NEXT:    s_clause 0x1
559; GFX12-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x30
560; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
561; GFX12-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
562; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
563; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
564; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
565; GFX12-SDAG-NEXT:    v_permlane16_var_b32 v0, v0, v1 op_sel:[1,1]
566; GFX12-SDAG-NEXT:    global_store_b32 v2, v0, s[0:1]
567; GFX12-SDAG-NEXT:    s_endpgm
568;
569; GFX12-GISEL-LABEL: v_permlane16var_b32_i_tid_fi_bc:
570; GFX12-GISEL:       ; %bb.0:
571; GFX12-GISEL-NEXT:    s_clause 0x1
572; GFX12-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x30
573; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
574; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
575; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0
576; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
577; GFX12-GISEL-NEXT:    v_permlane16_var_b32 v0, v0, v1 op_sel:[1,1]
578; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0
579; GFX12-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
580; GFX12-GISEL-NEXT:    s_endpgm
581  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
582  %undef = freeze i32 poison
583  %v = call i32 @llvm.amdgcn.permlane16.var(i32 %undef, i32 %tidx, i32 %src1, i1 true, i1 true)
584  store i32 %v, ptr addrspace(1) %out
585  ret void
586}
587
588define amdgpu_kernel void @v_permlanex16var_b32_tid_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
589; GFX12-SDAG-LABEL: v_permlanex16var_b32_tid_tid:
590; GFX12-SDAG:       ; %bb.0:
591; GFX12-SDAG-NEXT:    s_clause 0x1
592; GFX12-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x30
593; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
594; GFX12-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
595; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
596; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
597; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
598; GFX12-SDAG-NEXT:    v_permlanex16_var_b32 v0, v0, v1
599; GFX12-SDAG-NEXT:    global_store_b32 v2, v0, s[0:1]
600; GFX12-SDAG-NEXT:    s_endpgm
601;
602; GFX12-GISEL-LABEL: v_permlanex16var_b32_tid_tid:
603; GFX12-GISEL:       ; %bb.0:
604; GFX12-GISEL-NEXT:    s_clause 0x1
605; GFX12-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x30
606; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
607; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
608; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0
609; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
610; GFX12-GISEL-NEXT:    v_permlanex16_var_b32 v0, v0, v1
611; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0
612; GFX12-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
613; GFX12-GISEL-NEXT:    s_endpgm
614  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
615  %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %tidx, i32 %tidx, i32 %src1, i1 false, i1 false)
616  store i32 %v, ptr addrspace(1) %out
617  ret void
618}
619
620define amdgpu_kernel void @v_permlanex16var_b32_undef_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
621; GFX12-SDAG-LABEL: v_permlanex16var_b32_undef_tid:
622; GFX12-SDAG:       ; %bb.0:
623; GFX12-SDAG-NEXT:    s_clause 0x1
624; GFX12-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x30
625; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
626; GFX12-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
627; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
628; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
629; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
630; GFX12-SDAG-NEXT:    v_permlanex16_var_b32 v0, v0, v1
631; GFX12-SDAG-NEXT:    global_store_b32 v2, v0, s[0:1]
632; GFX12-SDAG-NEXT:    s_endpgm
633;
634; GFX12-GISEL-LABEL: v_permlanex16var_b32_undef_tid:
635; GFX12-GISEL:       ; %bb.0:
636; GFX12-GISEL-NEXT:    s_clause 0x1
637; GFX12-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x30
638; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
639; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
640; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0
641; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
642; GFX12-GISEL-NEXT:    v_permlanex16_var_b32 v0, v0, v1
643; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0
644; GFX12-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
645; GFX12-GISEL-NEXT:    s_endpgm
646  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
647  %undef = freeze i32 poison
648  %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %undef, i32 %tidx, i32 %src1, i1 false, i1 false)
649  store i32 %v, ptr addrspace(1) %out
650  ret void
651}
652
653define amdgpu_kernel void @v_permlanex16var_b32_i_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
654; GFX12-SDAG-LABEL: v_permlanex16var_b32_i_tid:
655; GFX12-SDAG:       ; %bb.0:
656; GFX12-SDAG-NEXT:    s_clause 0x1
657; GFX12-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x30
658; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
659; GFX12-SDAG-NEXT:    v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v0, 0x3ff, v0
660; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
661; GFX12-SDAG-NEXT:    v_dual_mov_b32 v1, 0x3039 :: v_dual_mov_b32 v2, s2
662; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
663; GFX12-SDAG-NEXT:    v_permlanex16_var_b32 v1, v0, v2
664; GFX12-SDAG-NEXT:    global_store_b32 v3, v1, s[0:1]
665; GFX12-SDAG-NEXT:    s_endpgm
666;
667; GFX12-GISEL-LABEL: v_permlanex16var_b32_i_tid:
668; GFX12-GISEL:       ; %bb.0:
669; GFX12-GISEL-NEXT:    s_clause 0x1
670; GFX12-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x30
671; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
672; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0x3039
673; GFX12-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
674; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
675; GFX12-GISEL-NEXT:    v_mov_b32_e32 v2, s2
676; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
677; GFX12-GISEL-NEXT:    v_permlanex16_var_b32 v1, v0, v2
678; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, 0
679; GFX12-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
680; GFX12-GISEL-NEXT:    s_endpgm
681  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
682  %v = call i32 @llvm.amdgcn.permlanex16.var(i32 12345, i32 %tidx, i32 %src1, i1 false, i1 false)
683  store i32 %v, ptr addrspace(1) %out
684  ret void
685}
686
687define amdgpu_kernel void @v_permlanex16var_b32_i_tid_fi(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
688; GFX12-SDAG-LABEL: v_permlanex16var_b32_i_tid_fi:
689; GFX12-SDAG:       ; %bb.0:
690; GFX12-SDAG-NEXT:    s_clause 0x1
691; GFX12-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x30
692; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
693; GFX12-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
694; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
695; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
696; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
697; GFX12-SDAG-NEXT:    v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,0]
698; GFX12-SDAG-NEXT:    global_store_b32 v2, v0, s[0:1]
699; GFX12-SDAG-NEXT:    s_endpgm
700;
701; GFX12-GISEL-LABEL: v_permlanex16var_b32_i_tid_fi:
702; GFX12-GISEL:       ; %bb.0:
703; GFX12-GISEL-NEXT:    s_clause 0x1
704; GFX12-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x30
705; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
706; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
707; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0
708; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
709; GFX12-GISEL-NEXT:    v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,0]
710; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0
711; GFX12-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
712; GFX12-GISEL-NEXT:    s_endpgm
713  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
714  %undef = freeze i32 poison
715  %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %undef, i32 %tidx, i32 %src1, i1 true, i1 false)
716  store i32 %v, ptr addrspace(1) %out
717  ret void
718}
719
720define amdgpu_kernel void @v_permlanex16var_b32_i_tid_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
721; GFX12-SDAG-LABEL: v_permlanex16var_b32_i_tid_bc:
722; GFX12-SDAG:       ; %bb.0:
723; GFX12-SDAG-NEXT:    s_clause 0x1
724; GFX12-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x30
725; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
726; GFX12-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
727; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
728; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
729; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
730; GFX12-SDAG-NEXT:    v_permlanex16_var_b32 v0, v0, v1 op_sel:[0,1]
731; GFX12-SDAG-NEXT:    global_store_b32 v2, v0, s[0:1]
732; GFX12-SDAG-NEXT:    s_endpgm
733;
734; GFX12-GISEL-LABEL: v_permlanex16var_b32_i_tid_bc:
735; GFX12-GISEL:       ; %bb.0:
736; GFX12-GISEL-NEXT:    s_clause 0x1
737; GFX12-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x30
738; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
739; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
740; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0
741; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
742; GFX12-GISEL-NEXT:    v_permlanex16_var_b32 v0, v0, v1 op_sel:[0,1]
743; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0
744; GFX12-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
745; GFX12-GISEL-NEXT:    s_endpgm
746  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
747  %undef = freeze i32 poison
748  %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %undef, i32 %tidx, i32 %src1, i1 false, i1 true)
749  store i32 %v, ptr addrspace(1) %out
750  ret void
751}
752
753define amdgpu_kernel void @v_permlanex16var_b32_i_tid_fi_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
754; GFX12-SDAG-LABEL: v_permlanex16var_b32_i_tid_fi_bc:
755; GFX12-SDAG:       ; %bb.0:
756; GFX12-SDAG-NEXT:    s_clause 0x1
757; GFX12-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x30
758; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
759; GFX12-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
760; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
761; GFX12-SDAG-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
762; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
763; GFX12-SDAG-NEXT:    v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,1]
764; GFX12-SDAG-NEXT:    global_store_b32 v2, v0, s[0:1]
765; GFX12-SDAG-NEXT:    s_endpgm
766;
767; GFX12-GISEL-LABEL: v_permlanex16var_b32_i_tid_fi_bc:
768; GFX12-GISEL:       ; %bb.0:
769; GFX12-GISEL-NEXT:    s_clause 0x1
770; GFX12-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x30
771; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
772; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
773; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0
774; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
775; GFX12-GISEL-NEXT:    v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,1]
776; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0
777; GFX12-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
778; GFX12-GISEL-NEXT:    s_endpgm
779  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
780  %undef = freeze i32 poison
781  %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %undef, i32 %tidx, i32 %src1, i1 true, i1 true)
782  store i32 %v, ptr addrspace(1) %out
783  ret void
784}
785