xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ptr.ll (revision 35f7b60aa6105753859bcccaf4a793aaf16b4acd)
1*35f7b60aSVikram Hegde; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2*35f7b60aSVikram Hegde; RUN: llc -global-isel=0 -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-SDAG %s
3*35f7b60aSVikram Hegde; RUN: llc -global-isel=0 -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-SDAG %s
4*35f7b60aSVikram Hegde; RUN: llc -global-isel=0 -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-SDAG %s
5*35f7b60aSVikram Hegde
6*35f7b60aSVikram Hegdedefine void @v_permlane16_p0(ptr addrspace(1) %out, ptr %src0, i32 %src1, i32 %src2) {
7*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlane16_p0:
8*35f7b60aSVikram Hegde; GFX10-SDAG:       ; %bb.0:
9*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s4, v4
11*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s5, v5
12*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlane16_b32 v3, v3, s4, s5
13*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlane16_b32 v2, v2, s4, s5
14*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off
15*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
16*35f7b60aSVikram Hegde;
17*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlane16_p0:
18*35f7b60aSVikram Hegde; GFX11-SDAG:       ; %bb.0:
19*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
21*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
22*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
23*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlane16_b32 v3, v3, s0, s1
24*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlane16_b32 v2, v2, s0, s1
25*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    global_store_b64 v[0:1], v[2:3], off
26*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
27*35f7b60aSVikram Hegde;
28*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlane16_p0:
29*35f7b60aSVikram Hegde; GFX12-SDAG:       ; %bb.0:
30*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
31*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
32*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
33*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
34*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
35*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
36*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
37*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
38*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlane16_b32 v3, v3, s0, s1
39*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlane16_b32 v2, v2, s0, s1
40*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    global_store_b64 v[0:1], v[2:3], off
41*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
42*35f7b60aSVikram Hegde  %v = call ptr @llvm.amdgcn.permlane16.p0(ptr %src0, ptr %src0, i32 %src1, i32 %src2, i1 false, i1 false)
43*35f7b60aSVikram Hegde  store ptr %v, ptr addrspace(1) %out
44*35f7b60aSVikram Hegde  ret void
45*35f7b60aSVikram Hegde}
46*35f7b60aSVikram Hegde
47*35f7b60aSVikram Hegdedefine void @v_permlanex16_p0(ptr addrspace(1) %out, ptr %src0, i32 %src1, i32 %src2) {
48*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlanex16_p0:
49*35f7b60aSVikram Hegde; GFX10-SDAG:       ; %bb.0:
50*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s4, v4
52*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s5, v5
53*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlanex16_b32 v3, v3, s4, s5
54*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s4, s5
55*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off
56*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
57*35f7b60aSVikram Hegde;
58*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlanex16_p0:
59*35f7b60aSVikram Hegde; GFX11-SDAG:       ; %bb.0:
60*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
62*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
63*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
64*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlanex16_b32 v3, v3, s0, s1
65*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s0, s1
66*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    global_store_b64 v[0:1], v[2:3], off
67*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
68*35f7b60aSVikram Hegde;
69*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlanex16_p0:
70*35f7b60aSVikram Hegde; GFX12-SDAG:       ; %bb.0:
71*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
72*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
73*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
74*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
75*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
76*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
77*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
78*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
79*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlanex16_b32 v3, v3, s0, s1
80*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s0, s1
81*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    global_store_b64 v[0:1], v[2:3], off
82*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
83*35f7b60aSVikram Hegde  %v = call ptr @llvm.amdgcn.permlanex16.p0(ptr %src0, ptr %src0, i32 %src1, i32 %src2, i1 false, i1 false)
84*35f7b60aSVikram Hegde  store ptr %v, ptr addrspace(1) %out
85*35f7b60aSVikram Hegde  ret void
86*35f7b60aSVikram Hegde}
87*35f7b60aSVikram Hegde
88*35f7b60aSVikram Hegdedefine void @v_permlane16_v3p0(ptr addrspace(1) %out, <3 x ptr> %src0, i32 %src1, i32 %src2) {
89*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlane16_v3p0:
90*35f7b60aSVikram Hegde; GFX10-SDAG:       ; %bb.0:
91*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s4, v8
93*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s5, v9
94*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlane16_b32 v7, v7, s4, s5
95*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlane16_b32 v6, v6, s4, s5
96*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlane16_b32 v5, v5, s4, s5
97*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlane16_b32 v4, v4, s4, s5
98*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlane16_b32 v3, v3, s4, s5
99*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlane16_b32 v2, v2, s4, s5
100*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    global_store_dwordx2 v[0:1], v[6:7], off offset:16
101*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
102*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
103*35f7b60aSVikram Hegde;
104*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlane16_v3p0:
105*35f7b60aSVikram Hegde; GFX11-SDAG:       ; %bb.0:
106*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
107*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s0, v8
108*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s1, v9
109*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
110*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlane16_b32 v7, v7, s0, s1
111*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlane16_b32 v6, v6, s0, s1
112*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlane16_b32 v5, v5, s0, s1
113*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlane16_b32 v4, v4, s0, s1
114*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlane16_b32 v3, v3, s0, s1
115*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlane16_b32 v2, v2, s0, s1
116*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_clause 0x1
117*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    global_store_b64 v[0:1], v[6:7], off offset:16
118*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    global_store_b128 v[0:1], v[2:5], off
119*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
120*35f7b60aSVikram Hegde;
121*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlane16_v3p0:
122*35f7b60aSVikram Hegde; GFX12-SDAG:       ; %bb.0:
123*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
124*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
125*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
126*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
127*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
128*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s0, v8
129*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s1, v9
130*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
131*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlane16_b32 v7, v7, s0, s1
132*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlane16_b32 v6, v6, s0, s1
133*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlane16_b32 v5, v5, s0, s1
134*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlane16_b32 v4, v4, s0, s1
135*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlane16_b32 v3, v3, s0, s1
136*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlane16_b32 v2, v2, s0, s1
137*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_clause 0x1
138*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    global_store_b64 v[0:1], v[6:7], off offset:16
139*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    global_store_b128 v[0:1], v[2:5], off
140*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
141*35f7b60aSVikram Hegde  %v = call <3 x ptr> @llvm.amdgcn.permlane16.v3p0(<3 x ptr> %src0, <3 x ptr> %src0, i32 %src1, i32 %src2, i1 false, i1 false)
142*35f7b60aSVikram Hegde  store <3 x ptr> %v, ptr addrspace(1) %out
143*35f7b60aSVikram Hegde  ret void
144*35f7b60aSVikram Hegde}
145*35f7b60aSVikram Hegde
146*35f7b60aSVikram Hegdedefine void @v_permlanex16_v3p0(ptr addrspace(1) %out, <3 x ptr> %src0, i32 %src1, i32 %src2) {
147*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlanex16_v3p0:
148*35f7b60aSVikram Hegde; GFX10-SDAG:       ; %bb.0:
149*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s4, v8
151*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s5, v9
152*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlanex16_b32 v7, v7, s4, s5
153*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlanex16_b32 v6, v6, s4, s5
154*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlanex16_b32 v5, v5, s4, s5
155*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlanex16_b32 v4, v4, s4, s5
156*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlanex16_b32 v3, v3, s4, s5
157*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s4, s5
158*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    global_store_dwordx2 v[0:1], v[6:7], off offset:16
159*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
160*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
161*35f7b60aSVikram Hegde;
162*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlanex16_v3p0:
163*35f7b60aSVikram Hegde; GFX11-SDAG:       ; %bb.0:
164*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s0, v8
166*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s1, v9
167*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
168*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlanex16_b32 v7, v7, s0, s1
169*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlanex16_b32 v6, v6, s0, s1
170*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlanex16_b32 v5, v5, s0, s1
171*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlanex16_b32 v4, v4, s0, s1
172*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlanex16_b32 v3, v3, s0, s1
173*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s0, s1
174*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_clause 0x1
175*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    global_store_b64 v[0:1], v[6:7], off offset:16
176*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    global_store_b128 v[0:1], v[2:5], off
177*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
178*35f7b60aSVikram Hegde;
179*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlanex16_v3p0:
180*35f7b60aSVikram Hegde; GFX12-SDAG:       ; %bb.0:
181*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
182*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
183*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
184*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
185*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
186*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s0, v8
187*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s1, v9
188*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
189*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlanex16_b32 v7, v7, s0, s1
190*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlanex16_b32 v6, v6, s0, s1
191*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlanex16_b32 v5, v5, s0, s1
192*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlanex16_b32 v4, v4, s0, s1
193*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlanex16_b32 v3, v3, s0, s1
194*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s0, s1
195*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_clause 0x1
196*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    global_store_b64 v[0:1], v[6:7], off offset:16
197*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    global_store_b128 v[0:1], v[2:5], off
198*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
199*35f7b60aSVikram Hegde  %v = call <3 x ptr> @llvm.amdgcn.permlanex16.v3p0(<3 x ptr> %src0, <3 x ptr> %src0, i32 %src1, i32 %src2, i1 false, i1 false)
200*35f7b60aSVikram Hegde  store <3 x ptr> %v, ptr addrspace(1) %out
201*35f7b60aSVikram Hegde  ret void
202*35f7b60aSVikram Hegde}
203*35f7b60aSVikram Hegde
204*35f7b60aSVikram Hegdedefine void @v_permlane16_p3(ptr addrspace(1) %out, ptr addrspace(3) %src0, i32 %src1, i32 %src2) {
205*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlane16_p3:
206*35f7b60aSVikram Hegde; GFX10-SDAG:       ; %bb.0:
207*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
208*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s4, v3
209*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s5, v4
210*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlane16_b32 v2, v2, s4, s5
211*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    global_store_dword v[0:1], v2, off
212*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
213*35f7b60aSVikram Hegde;
214*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlane16_p3:
215*35f7b60aSVikram Hegde; GFX11-SDAG:       ; %bb.0:
216*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s0, v3
218*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s1, v4
219*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
220*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlane16_b32 v2, v2, s0, s1
221*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
222*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
223*35f7b60aSVikram Hegde;
224*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlane16_p3:
225*35f7b60aSVikram Hegde; GFX12-SDAG:       ; %bb.0:
226*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
227*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
228*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
229*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
230*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
231*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s0, v3
232*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s1, v4
233*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
234*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlane16_b32 v2, v2, s0, s1
235*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
236*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
237*35f7b60aSVikram Hegde  %v = call ptr addrspace(3) @llvm.amdgcn.permlane16.p3(ptr addrspace(3) %src0, ptr addrspace(3) %src0, i32 %src1, i32 %src2, i1 false, i1 false)
238*35f7b60aSVikram Hegde  store ptr addrspace(3) %v, ptr addrspace(1) %out
239*35f7b60aSVikram Hegde  ret void
240*35f7b60aSVikram Hegde}
241*35f7b60aSVikram Hegde
242*35f7b60aSVikram Hegdedefine void @v_permlanex16_p3(ptr addrspace(1) %out, ptr addrspace(3) %src0, i32 %src1, i32 %src2) {
243*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlanex16_p3:
244*35f7b60aSVikram Hegde; GFX10-SDAG:       ; %bb.0:
245*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
246*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s4, v3
247*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s5, v4
248*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s4, s5
249*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    global_store_dword v[0:1], v2, off
250*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
251*35f7b60aSVikram Hegde;
252*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlanex16_p3:
253*35f7b60aSVikram Hegde; GFX11-SDAG:       ; %bb.0:
254*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
255*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s0, v3
256*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s1, v4
257*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
258*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s0, s1
259*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
260*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
261*35f7b60aSVikram Hegde;
262*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlanex16_p3:
263*35f7b60aSVikram Hegde; GFX12-SDAG:       ; %bb.0:
264*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
265*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
266*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
267*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
268*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
269*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s0, v3
270*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s1, v4
271*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
272*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s0, s1
273*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
274*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
275*35f7b60aSVikram Hegde  %v = call ptr addrspace(3) @llvm.amdgcn.permlanex16.p3(ptr addrspace(3) %src0, ptr addrspace(3) %src0, i32 %src1, i32 %src2, i1 false, i1 false)
276*35f7b60aSVikram Hegde  store ptr addrspace(3) %v, ptr addrspace(1) %out
277*35f7b60aSVikram Hegde  ret void
278*35f7b60aSVikram Hegde}
279*35f7b60aSVikram Hegde
280*35f7b60aSVikram Hegdedefine void @v_permlane16_v3p3(ptr addrspace(1) %out, <3 x ptr addrspace(3)> %src0, i32 %src1, i32 %src2) {
281*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlane16_v3p3:
282*35f7b60aSVikram Hegde; GFX10-SDAG:       ; %bb.0:
283*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
284*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s4, v5
285*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s5, v6
286*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlane16_b32 v4, v4, s4, s5
287*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlane16_b32 v3, v3, s4, s5
288*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlane16_b32 v2, v2, s4, s5
289*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    global_store_dwordx3 v[0:1], v[2:4], off
290*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
291*35f7b60aSVikram Hegde;
292*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlane16_v3p3:
293*35f7b60aSVikram Hegde; GFX11-SDAG:       ; %bb.0:
294*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
295*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s0, v5
296*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s1, v6
297*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
298*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlane16_b32 v4, v4, s0, s1
299*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlane16_b32 v3, v3, s0, s1
300*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlane16_b32 v2, v2, s0, s1
301*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    global_store_b96 v[0:1], v[2:4], off
302*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
303*35f7b60aSVikram Hegde;
304*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlane16_v3p3:
305*35f7b60aSVikram Hegde; GFX12-SDAG:       ; %bb.0:
306*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
307*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
308*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
309*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
310*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
311*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s0, v5
312*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s1, v6
313*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
314*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlane16_b32 v4, v4, s0, s1
315*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlane16_b32 v3, v3, s0, s1
316*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlane16_b32 v2, v2, s0, s1
317*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    global_store_b96 v[0:1], v[2:4], off
318*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
319*35f7b60aSVikram Hegde  %v = call <3 x ptr addrspace(3)> @llvm.amdgcn.permlane16.v3p3(<3 x ptr addrspace(3)> %src0, <3 x ptr addrspace(3)> %src0, i32 %src1, i32 %src2, i1 false, i1 false)
320*35f7b60aSVikram Hegde  store <3 x ptr addrspace(3)> %v, ptr addrspace(1) %out
321*35f7b60aSVikram Hegde  ret void
322*35f7b60aSVikram Hegde}
323*35f7b60aSVikram Hegde
324*35f7b60aSVikram Hegdedefine void @v_permlanex16_v3p3(ptr addrspace(1) %out, <3 x ptr addrspace(3)> %src0, i32 %src1, i32 %src2) {
325*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlanex16_v3p3:
326*35f7b60aSVikram Hegde; GFX10-SDAG:       ; %bb.0:
327*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
328*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s4, v5
329*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s5, v6
330*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlanex16_b32 v4, v4, s4, s5
331*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlanex16_b32 v3, v3, s4, s5
332*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s4, s5
333*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    global_store_dwordx3 v[0:1], v[2:4], off
334*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
335*35f7b60aSVikram Hegde;
336*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlanex16_v3p3:
337*35f7b60aSVikram Hegde; GFX11-SDAG:       ; %bb.0:
338*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s0, v5
340*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s1, v6
341*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
342*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlanex16_b32 v4, v4, s0, s1
343*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlanex16_b32 v3, v3, s0, s1
344*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s0, s1
345*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    global_store_b96 v[0:1], v[2:4], off
346*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
347*35f7b60aSVikram Hegde;
348*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlanex16_v3p3:
349*35f7b60aSVikram Hegde; GFX12-SDAG:       ; %bb.0:
350*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
351*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
352*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
353*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
354*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
355*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s0, v5
356*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s1, v6
357*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
358*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlanex16_b32 v4, v4, s0, s1
359*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlanex16_b32 v3, v3, s0, s1
360*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s0, s1
361*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    global_store_b96 v[0:1], v[2:4], off
362*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
363*35f7b60aSVikram Hegde  %v = call <3 x ptr addrspace(3)> @llvm.amdgcn.permlanex16.v3p3(<3 x ptr addrspace(3)> %src0, <3 x ptr addrspace(3)> %src0, i32 %src1, i32 %src2, i1 false, i1 false)
364*35f7b60aSVikram Hegde  store <3 x ptr addrspace(3)> %v, ptr addrspace(1) %out
365*35f7b60aSVikram Hegde  ret void
366*35f7b60aSVikram Hegde}
367*35f7b60aSVikram Hegde
368*35f7b60aSVikram Hegdedefine void @v_permlane16_p5(ptr addrspace(1) %out, ptr addrspace(5) %src0, i32 %src1, i32 %src2) {
369*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlane16_p5:
370*35f7b60aSVikram Hegde; GFX10-SDAG:       ; %bb.0:
371*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s4, v3
373*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s5, v4
374*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlane16_b32 v2, v2, s4, s5
375*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    global_store_dword v[0:1], v2, off
376*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
377*35f7b60aSVikram Hegde;
378*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlane16_p5:
379*35f7b60aSVikram Hegde; GFX11-SDAG:       ; %bb.0:
380*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
381*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s0, v3
382*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s1, v4
383*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
384*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlane16_b32 v2, v2, s0, s1
385*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
386*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
387*35f7b60aSVikram Hegde;
388*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlane16_p5:
389*35f7b60aSVikram Hegde; GFX12-SDAG:       ; %bb.0:
390*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
391*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
392*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
393*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
394*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
395*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s0, v3
396*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s1, v4
397*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
398*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlane16_b32 v2, v2, s0, s1
399*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
400*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
401*35f7b60aSVikram Hegde  %v = call ptr addrspace(5) @llvm.amdgcn.permlane16.p5(ptr addrspace(5) %src0, ptr addrspace(5) %src0, i32 %src1, i32 %src2, i1 false, i1 false)
402*35f7b60aSVikram Hegde  store ptr addrspace(5) %v, ptr addrspace(1) %out
403*35f7b60aSVikram Hegde  ret void
404*35f7b60aSVikram Hegde}
405*35f7b60aSVikram Hegde
406*35f7b60aSVikram Hegdedefine void @v_permlanex16_p5(ptr addrspace(1) %out, ptr addrspace(5) %src0, i32 %src1, i32 %src2) {
407*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlanex16_p5:
408*35f7b60aSVikram Hegde; GFX10-SDAG:       ; %bb.0:
409*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
410*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s4, v3
411*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s5, v4
412*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s4, s5
413*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    global_store_dword v[0:1], v2, off
414*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
415*35f7b60aSVikram Hegde;
416*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlanex16_p5:
417*35f7b60aSVikram Hegde; GFX11-SDAG:       ; %bb.0:
418*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
419*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s0, v3
420*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s1, v4
421*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
422*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s0, s1
423*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
424*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
425*35f7b60aSVikram Hegde;
426*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlanex16_p5:
427*35f7b60aSVikram Hegde; GFX12-SDAG:       ; %bb.0:
428*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
429*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
430*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
431*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
432*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
433*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s0, v3
434*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s1, v4
435*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
436*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s0, s1
437*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
438*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
439*35f7b60aSVikram Hegde  %v = call ptr addrspace(5) @llvm.amdgcn.permlanex16.p5(ptr addrspace(5) %src0, ptr addrspace(5) %src0, i32 %src1, i32 %src2, i1 false, i1 false)
440*35f7b60aSVikram Hegde  store ptr addrspace(5) %v, ptr addrspace(1) %out
441*35f7b60aSVikram Hegde  ret void
442*35f7b60aSVikram Hegde}
443*35f7b60aSVikram Hegde
444*35f7b60aSVikram Hegdedefine void @v_permlane16_v3p5(ptr addrspace(1) %out, <3 x ptr addrspace(5)> %src0, i32 %src1, i32 %src2) {
445*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlane16_v3p5:
446*35f7b60aSVikram Hegde; GFX10-SDAG:       ; %bb.0:
447*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
448*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s4, v5
449*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s5, v6
450*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlane16_b32 v4, v4, s4, s5
451*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlane16_b32 v3, v3, s4, s5
452*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlane16_b32 v2, v2, s4, s5
453*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    global_store_dwordx3 v[0:1], v[2:4], off
454*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
455*35f7b60aSVikram Hegde;
456*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlane16_v3p5:
457*35f7b60aSVikram Hegde; GFX11-SDAG:       ; %bb.0:
458*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
459*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s0, v5
460*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s1, v6
461*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
462*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlane16_b32 v4, v4, s0, s1
463*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlane16_b32 v3, v3, s0, s1
464*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlane16_b32 v2, v2, s0, s1
465*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    global_store_b96 v[0:1], v[2:4], off
466*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
467*35f7b60aSVikram Hegde;
468*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlane16_v3p5:
469*35f7b60aSVikram Hegde; GFX12-SDAG:       ; %bb.0:
470*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
471*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
472*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
473*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
474*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
475*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s0, v5
476*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s1, v6
477*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
478*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlane16_b32 v4, v4, s0, s1
479*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlane16_b32 v3, v3, s0, s1
480*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlane16_b32 v2, v2, s0, s1
481*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    global_store_b96 v[0:1], v[2:4], off
482*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
483*35f7b60aSVikram Hegde  %v = call <3 x ptr addrspace(5)> @llvm.amdgcn.permlane16.v3p5(<3 x ptr addrspace(5)> %src0, <3 x ptr addrspace(5)> %src0, i32 %src1, i32 %src2, i1 false, i1 false)
484*35f7b60aSVikram Hegde  store <3 x ptr addrspace(5)> %v, ptr addrspace(1) %out
485*35f7b60aSVikram Hegde  ret void
486*35f7b60aSVikram Hegde}
487*35f7b60aSVikram Hegde
488*35f7b60aSVikram Hegdedefine void @v_permlanex16_v3p5(ptr addrspace(1) %out, <3 x ptr addrspace(5)> %src0, i32 %src1, i32 %src2) {
489*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlanex16_v3p5:
490*35f7b60aSVikram Hegde; GFX10-SDAG:       ; %bb.0:
491*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s4, v5
493*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s5, v6
494*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlanex16_b32 v4, v4, s4, s5
495*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlanex16_b32 v3, v3, s4, s5
496*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s4, s5
497*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    global_store_dwordx3 v[0:1], v[2:4], off
498*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
499*35f7b60aSVikram Hegde;
500*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlanex16_v3p5:
501*35f7b60aSVikram Hegde; GFX11-SDAG:       ; %bb.0:
502*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
503*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s0, v5
504*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s1, v6
505*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
506*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlanex16_b32 v4, v4, s0, s1
507*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlanex16_b32 v3, v3, s0, s1
508*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s0, s1
509*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    global_store_b96 v[0:1], v[2:4], off
510*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
511*35f7b60aSVikram Hegde;
512*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlanex16_v3p5:
513*35f7b60aSVikram Hegde; GFX12-SDAG:       ; %bb.0:
514*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
515*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
516*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
517*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
518*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
519*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s0, v5
520*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s1, v6
521*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
522*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlanex16_b32 v4, v4, s0, s1
523*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlanex16_b32 v3, v3, s0, s1
524*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s0, s1
525*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    global_store_b96 v[0:1], v[2:4], off
526*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
527*35f7b60aSVikram Hegde  %v = call <3 x ptr addrspace(5)> @llvm.amdgcn.permlanex16.v3p5(<3 x ptr addrspace(5)> %src0, <3 x ptr addrspace(5)> %src0, i32 %src1, i32 %src2, i1 false, i1 false)
528*35f7b60aSVikram Hegde  store <3 x ptr addrspace(5)> %v, ptr addrspace(1) %out
529*35f7b60aSVikram Hegde  ret void
530*35f7b60aSVikram Hegde}
531*35f7b60aSVikram Hegde
532*35f7b60aSVikram Hegdedefine void @v_permlane16_p6(ptr addrspace(1) %out, ptr addrspace(6) %src0, i32 %src1, i32 %src2) {
533*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlane16_p6:
534*35f7b60aSVikram Hegde; GFX10-SDAG:       ; %bb.0:
535*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
536*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s4, v3
537*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s5, v4
538*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlane16_b32 v2, v2, s4, s5
539*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    global_store_dword v[0:1], v2, off
540*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
541*35f7b60aSVikram Hegde;
542*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlane16_p6:
543*35f7b60aSVikram Hegde; GFX11-SDAG:       ; %bb.0:
544*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
545*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s0, v3
546*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s1, v4
547*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
548*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlane16_b32 v2, v2, s0, s1
549*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
550*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
551*35f7b60aSVikram Hegde;
552*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlane16_p6:
553*35f7b60aSVikram Hegde; GFX12-SDAG:       ; %bb.0:
554*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
555*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
556*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
557*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
558*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
559*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s0, v3
560*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s1, v4
561*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
562*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlane16_b32 v2, v2, s0, s1
563*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
564*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
565*35f7b60aSVikram Hegde  %v = call ptr addrspace(6) @llvm.amdgcn.permlane16.p6(ptr addrspace(6) %src0, ptr addrspace(6) %src0, i32 %src1, i32 %src2, i1 false, i1 false)
566*35f7b60aSVikram Hegde  store ptr addrspace(6) %v, ptr addrspace(1) %out
567*35f7b60aSVikram Hegde  ret void
568*35f7b60aSVikram Hegde}
569*35f7b60aSVikram Hegde
570*35f7b60aSVikram Hegdedefine void @v_permlanex16_p6(ptr addrspace(1) %out, ptr addrspace(6) %src0, i32 %src1, i32 %src2) {
571*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlanex16_p6:
572*35f7b60aSVikram Hegde; GFX10-SDAG:       ; %bb.0:
573*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
574*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s4, v3
575*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s5, v4
576*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s4, s5
577*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    global_store_dword v[0:1], v2, off
578*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
579*35f7b60aSVikram Hegde;
580*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlanex16_p6:
581*35f7b60aSVikram Hegde; GFX11-SDAG:       ; %bb.0:
582*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
583*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s0, v3
584*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s1, v4
585*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
586*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s0, s1
587*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
588*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
589*35f7b60aSVikram Hegde;
590*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlanex16_p6:
591*35f7b60aSVikram Hegde; GFX12-SDAG:       ; %bb.0:
592*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
593*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
594*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
595*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
596*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
597*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s0, v3
598*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s1, v4
599*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
600*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s0, s1
601*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    global_store_b32 v[0:1], v2, off
602*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
603*35f7b60aSVikram Hegde  %v = call ptr addrspace(6) @llvm.amdgcn.permlanex16.p6(ptr addrspace(6) %src0, ptr addrspace(6) %src0, i32 %src1, i32 %src2, i1 false, i1 false)
604*35f7b60aSVikram Hegde  store ptr addrspace(6) %v, ptr addrspace(1) %out
605*35f7b60aSVikram Hegde  ret void
606*35f7b60aSVikram Hegde}
607*35f7b60aSVikram Hegde
608*35f7b60aSVikram Hegdedefine void @v_permlane16_v3p6(ptr addrspace(1) %out, <3 x ptr addrspace(6)> %src0, i32 %src1, i32 %src2) {
609*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlane16_v3p6:
610*35f7b60aSVikram Hegde; GFX10-SDAG:       ; %bb.0:
611*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
612*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s4, v5
613*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s5, v6
614*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlane16_b32 v4, v4, s4, s5
615*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlane16_b32 v3, v3, s4, s5
616*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlane16_b32 v2, v2, s4, s5
617*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    global_store_dwordx3 v[0:1], v[2:4], off
618*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
619*35f7b60aSVikram Hegde;
620*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlane16_v3p6:
621*35f7b60aSVikram Hegde; GFX11-SDAG:       ; %bb.0:
622*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
623*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s0, v5
624*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s1, v6
625*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
626*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlane16_b32 v4, v4, s0, s1
627*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlane16_b32 v3, v3, s0, s1
628*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlane16_b32 v2, v2, s0, s1
629*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    global_store_b96 v[0:1], v[2:4], off
630*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
631*35f7b60aSVikram Hegde;
632*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlane16_v3p6:
633*35f7b60aSVikram Hegde; GFX12-SDAG:       ; %bb.0:
634*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
635*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
636*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
637*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
638*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
639*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s0, v5
640*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s1, v6
641*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
642*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlane16_b32 v4, v4, s0, s1
643*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlane16_b32 v3, v3, s0, s1
644*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlane16_b32 v2, v2, s0, s1
645*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    global_store_b96 v[0:1], v[2:4], off
646*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
647*35f7b60aSVikram Hegde  %v = call <3 x ptr addrspace(6)> @llvm.amdgcn.permlane16.v3p6(<3 x ptr addrspace(6)> %src0, <3 x ptr addrspace(6)> %src0, i32 %src1, i32 %src2, i1 false, i1 false)
648*35f7b60aSVikram Hegde  store <3 x ptr addrspace(6)> %v, ptr addrspace(1) %out
649*35f7b60aSVikram Hegde  ret void
650*35f7b60aSVikram Hegde}
651*35f7b60aSVikram Hegde
652*35f7b60aSVikram Hegdedefine void @v_permlanex16_v3p6(ptr addrspace(1) %out, <3 x ptr addrspace(6)> %src0, i32 %src1, i32 %src2) {
653*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlanex16_v3p6:
654*35f7b60aSVikram Hegde; GFX10-SDAG:       ; %bb.0:
655*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
656*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s4, v5
657*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_readfirstlane_b32 s5, v6
658*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlanex16_b32 v4, v4, s4, s5
659*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlanex16_b32 v3, v3, s4, s5
660*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s4, s5
661*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    global_store_dwordx3 v[0:1], v[2:4], off
662*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
663*35f7b60aSVikram Hegde;
664*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlanex16_v3p6:
665*35f7b60aSVikram Hegde; GFX11-SDAG:       ; %bb.0:
666*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
667*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s0, v5
668*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s1, v6
669*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
670*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlanex16_b32 v4, v4, s0, s1
671*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlanex16_b32 v3, v3, s0, s1
672*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s0, s1
673*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    global_store_b96 v[0:1], v[2:4], off
674*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
675*35f7b60aSVikram Hegde;
676*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlanex16_v3p6:
677*35f7b60aSVikram Hegde; GFX12-SDAG:       ; %bb.0:
678*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
679*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_expcnt 0x0
680*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
681*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_bvhcnt 0x0
682*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
683*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s0, v5
684*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s1, v6
685*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
686*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlanex16_b32 v4, v4, s0, s1
687*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlanex16_b32 v3, v3, s0, s1
688*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    v_permlanex16_b32 v2, v2, s0, s1
689*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    global_store_b96 v[0:1], v[2:4], off
690*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT:    s_setpc_b64 s[30:31]
691*35f7b60aSVikram Hegde  %v = call <3 x ptr addrspace(6)> @llvm.amdgcn.permlanex16.v3p6(<3 x ptr addrspace(6)> %src0, <3 x ptr addrspace(6)> %src0, i32 %src1, i32 %src2, i1 false, i1 false)
692*35f7b60aSVikram Hegde  store <3 x ptr addrspace(6)> %v, ptr addrspace(1) %out
693*35f7b60aSVikram Hegde  ret void
694*35f7b60aSVikram Hegde}
695