xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ptr.ll (revision 5feb32ba929f9e517c530217cabb09d1d734a763)
1*5feb32baSVikram Hegde; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2*5feb32baSVikram Hegde; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx802 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX802-SDAG %s
3*5feb32baSVikram Hegde; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1010-SDAG %s
4*5feb32baSVikram Hegde; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GFX1100-SDAG %s
5*5feb32baSVikram Hegde
6*5feb32baSVikram Hegdedefine void @test_writelane_p0(ptr addrspace(1) %out, ptr %src, i32 %src1) {
7*5feb32baSVikram Hegde; GFX802-SDAG-LABEL: test_writelane_p0:
8*5feb32baSVikram Hegde; GFX802-SDAG:       ; %bb.0:
9*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    flat_load_dwordx2 v[5:6], v[0:1]
11*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 m0, v4
12*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s4, v3
13*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s5, v2
14*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
15*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_nop 0
16*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_writelane_b32 v6, s4, m0
17*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_writelane_b32 v5, s5, m0
18*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    flat_store_dwordx2 v[0:1], v[5:6]
19*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
20*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_setpc_b64 s[30:31]
21*5feb32baSVikram Hegde;
22*5feb32baSVikram Hegde; GFX1010-SDAG-LABEL: test_writelane_p0:
23*5feb32baSVikram Hegde; GFX1010-SDAG:       ; %bb.0:
24*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    global_load_dwordx2 v[5:6], v[0:1], off
26*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s4, v3
27*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s5, v4
28*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s6, v2
29*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0)
30*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_writelane_b32 v6, s4, s5
31*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_writelane_b32 v5, s6, s5
32*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    global_store_dwordx2 v[0:1], v[5:6], off
33*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_setpc_b64 s[30:31]
34*5feb32baSVikram Hegde;
35*5feb32baSVikram Hegde; GFX1100-SDAG-LABEL: test_writelane_p0:
36*5feb32baSVikram Hegde; GFX1100-SDAG:       ; %bb.0:
37*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    global_load_b64 v[5:6], v[0:1], off
39*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s0, v3
40*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s1, v4
41*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s2, v2
42*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0)
43*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
44*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_writelane_b32 v6, s0, s1
45*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_writelane_b32 v5, s2, s1
46*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    global_store_b64 v[0:1], v[5:6], off
47*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
48*5feb32baSVikram Hegde  %oldval = load ptr, ptr addrspace(1) %out
49*5feb32baSVikram Hegde  %writelane = call ptr @llvm.amdgcn.writelane.p0(ptr %src, i32 %src1, ptr %oldval)
50*5feb32baSVikram Hegde  store ptr %writelane, ptr addrspace(1) %out, align 4
51*5feb32baSVikram Hegde  ret void
52*5feb32baSVikram Hegde}
53*5feb32baSVikram Hegde
54*5feb32baSVikram Hegdedefine void @test_writelane_v3p0(ptr addrspace(1) %out, <3 x ptr> %src, i32 %src1) {
55*5feb32baSVikram Hegde; GFX802-SDAG-LABEL: test_writelane_v3p0:
56*5feb32baSVikram Hegde; GFX802-SDAG:       ; %bb.0:
57*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
58*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_add_u32_e32 v13, vcc, 16, v0
59*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    flat_load_dwordx4 v[9:12], v[0:1]
60*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_addc_u32_e32 v14, vcc, 0, v1, vcc
61*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    flat_load_dwordx2 v[15:16], v[13:14]
62*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 m0, v8
63*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s6, v5
64*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s7, v4
65*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s8, v3
66*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s9, v2
67*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s4, v7
68*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s5, v6
69*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(1)
70*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_writelane_b32 v12, s6, m0
71*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_writelane_b32 v11, s7, m0
72*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_writelane_b32 v10, s8, m0
73*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_writelane_b32 v9, s9, m0
74*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
75*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_writelane_b32 v16, s4, m0
76*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_writelane_b32 v15, s5, m0
77*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    flat_store_dwordx4 v[0:1], v[9:12]
78*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    flat_store_dwordx2 v[13:14], v[15:16]
79*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
80*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_setpc_b64 s[30:31]
81*5feb32baSVikram Hegde;
82*5feb32baSVikram Hegde; GFX1010-SDAG-LABEL: test_writelane_v3p0:
83*5feb32baSVikram Hegde; GFX1010-SDAG:       ; %bb.0:
84*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_clause 0x1
86*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    global_load_dwordx2 v[13:14], v[0:1], off offset:16
87*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    global_load_dwordx4 v[9:12], v[0:1], off
88*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s5, v8
89*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s7, v5
90*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s8, v4
91*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s9, v3
92*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s10, v2
93*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s4, v7
94*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s6, v6
95*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(1)
96*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_writelane_b32 v14, s4, s5
97*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0)
98*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_writelane_b32 v12, s7, s5
99*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_writelane_b32 v11, s8, s5
100*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_writelane_b32 v10, s9, s5
101*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_writelane_b32 v9, s10, s5
102*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_writelane_b32 v13, s6, s5
103*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[9:12], off
104*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    global_store_dwordx2 v[0:1], v[13:14], off offset:16
105*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_setpc_b64 s[30:31]
106*5feb32baSVikram Hegde;
107*5feb32baSVikram Hegde; GFX1100-SDAG-LABEL: test_writelane_v3p0:
108*5feb32baSVikram Hegde; GFX1100-SDAG:       ; %bb.0:
109*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_clause 0x1
111*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    global_load_b64 v[13:14], v[0:1], off offset:16
112*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    global_load_b128 v[9:12], v[0:1], off
113*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s1, v8
114*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s3, v5
115*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s4, v4
116*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s5, v3
117*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s6, v2
118*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s0, v7
119*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s2, v6
120*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(1)
121*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
122*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_writelane_b32 v14, s0, s1
123*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0)
124*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_writelane_b32 v12, s3, s1
125*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_writelane_b32 v11, s4, s1
126*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_writelane_b32 v10, s5, s1
127*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_writelane_b32 v9, s6, s1
128*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_writelane_b32 v13, s2, s1
129*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_clause 0x1
130*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    global_store_b128 v[0:1], v[9:12], off
131*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    global_store_b64 v[0:1], v[13:14], off offset:16
132*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
133*5feb32baSVikram Hegde  %oldval = load <3 x ptr>, ptr addrspace(1) %out
134*5feb32baSVikram Hegde  %writelane = call <3 x ptr> @llvm.amdgcn.writelane.v3p0(<3 x ptr> %src, i32 %src1, <3 x ptr> %oldval)
135*5feb32baSVikram Hegde  store <3 x ptr> %writelane, ptr addrspace(1) %out, align 4
136*5feb32baSVikram Hegde  ret void
137*5feb32baSVikram Hegde}
138*5feb32baSVikram Hegde
139*5feb32baSVikram Hegdedefine void @test_writelane_p3(ptr addrspace(1) %out, ptr addrspace(3) %src, i32 %src1) {
140*5feb32baSVikram Hegde; GFX802-SDAG-LABEL: test_writelane_p3:
141*5feb32baSVikram Hegde; GFX802-SDAG:       ; %bb.0:
142*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    flat_load_dword v4, v[0:1]
144*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 m0, v3
145*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
146*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
147*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_nop 1
148*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_writelane_b32 v4, s4, m0
149*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    flat_store_dword v[0:1], v4
150*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
151*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_setpc_b64 s[30:31]
152*5feb32baSVikram Hegde;
153*5feb32baSVikram Hegde; GFX1010-SDAG-LABEL: test_writelane_p3:
154*5feb32baSVikram Hegde; GFX1010-SDAG:       ; %bb.0:
155*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    global_load_dword v4, v[0:1], off
157*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
158*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s5, v3
159*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0)
160*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_writelane_b32 v4, s4, s5
161*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    global_store_dword v[0:1], v4, off
162*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_setpc_b64 s[30:31]
163*5feb32baSVikram Hegde;
164*5feb32baSVikram Hegde; GFX1100-SDAG-LABEL: test_writelane_p3:
165*5feb32baSVikram Hegde; GFX1100-SDAG:       ; %bb.0:
166*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
167*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    global_load_b32 v4, v[0:1], off
168*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s0, v2
169*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s1, v3
170*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0)
171*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
172*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_writelane_b32 v4, s0, s1
173*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    global_store_b32 v[0:1], v4, off
174*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
175*5feb32baSVikram Hegde  %oldval = load ptr addrspace(3), ptr addrspace(1) %out
176*5feb32baSVikram Hegde  %writelane = call ptr addrspace(3) @llvm.amdgcn.writelane.p3(ptr addrspace(3) %src, i32 %src1, ptr addrspace(3) %oldval)
177*5feb32baSVikram Hegde  store ptr addrspace(3) %writelane, ptr addrspace(1) %out, align 4
178*5feb32baSVikram Hegde  ret void
179*5feb32baSVikram Hegde}
180*5feb32baSVikram Hegde
181*5feb32baSVikram Hegdedefine void @test_writelane_v3p3(ptr addrspace(1) %out, <3 x ptr addrspace(3)> %src, i32 %src1) {
182*5feb32baSVikram Hegde; GFX802-SDAG-LABEL: test_writelane_v3p3:
183*5feb32baSVikram Hegde; GFX802-SDAG:       ; %bb.0:
184*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    flat_load_dwordx3 v[6:8], v[0:1]
186*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 m0, v5
187*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s4, v4
188*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s5, v3
189*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s6, v2
190*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
191*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_writelane_b32 v8, s4, m0
192*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_writelane_b32 v7, s5, m0
193*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_writelane_b32 v6, s6, m0
194*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    flat_store_dwordx3 v[0:1], v[6:8]
195*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
196*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_setpc_b64 s[30:31]
197*5feb32baSVikram Hegde;
198*5feb32baSVikram Hegde; GFX1010-SDAG-LABEL: test_writelane_v3p3:
199*5feb32baSVikram Hegde; GFX1010-SDAG:       ; %bb.0:
200*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
201*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    global_load_dwordx3 v[6:8], v[0:1], off
202*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s4, v4
203*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s5, v5
204*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s6, v3
205*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s7, v2
206*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0)
207*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_writelane_b32 v8, s4, s5
208*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_writelane_b32 v7, s6, s5
209*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_writelane_b32 v6, s7, s5
210*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    global_store_dwordx3 v[0:1], v[6:8], off
211*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_setpc_b64 s[30:31]
212*5feb32baSVikram Hegde;
213*5feb32baSVikram Hegde; GFX1100-SDAG-LABEL: test_writelane_v3p3:
214*5feb32baSVikram Hegde; GFX1100-SDAG:       ; %bb.0:
215*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
216*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    global_load_b96 v[6:8], v[0:1], off
217*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
218*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
219*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s2, v3
220*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s3, v2
221*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0)
222*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
223*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_writelane_b32 v8, s0, s1
224*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_writelane_b32 v7, s2, s1
225*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3)
226*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_writelane_b32 v6, s3, s1
227*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    global_store_b96 v[0:1], v[6:8], off
228*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
229*5feb32baSVikram Hegde  %oldval = load <3 x ptr addrspace(3)>, ptr addrspace(1) %out
230*5feb32baSVikram Hegde  %writelane = call <3 x ptr addrspace(3)> @llvm.amdgcn.writelane.v3p3(<3 x ptr addrspace(3)> %src, i32 %src1, <3 x ptr addrspace(3)> %oldval)
231*5feb32baSVikram Hegde  store <3 x ptr addrspace(3)> %writelane, ptr addrspace(1) %out, align 4
232*5feb32baSVikram Hegde  ret void
233*5feb32baSVikram Hegde}
234*5feb32baSVikram Hegde
235*5feb32baSVikram Hegdedefine void @test_writelane_p5(ptr addrspace(1) %out, ptr addrspace(5) %src, i32 %src1) {
236*5feb32baSVikram Hegde; GFX802-SDAG-LABEL: test_writelane_p5:
237*5feb32baSVikram Hegde; GFX802-SDAG:       ; %bb.0:
238*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
239*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    flat_load_dword v4, v[0:1]
240*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 m0, v3
241*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
242*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
243*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_nop 1
244*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_writelane_b32 v4, s4, m0
245*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    flat_store_dword v[0:1], v4
246*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
247*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_setpc_b64 s[30:31]
248*5feb32baSVikram Hegde;
249*5feb32baSVikram Hegde; GFX1010-SDAG-LABEL: test_writelane_p5:
250*5feb32baSVikram Hegde; GFX1010-SDAG:       ; %bb.0:
251*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
252*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    global_load_dword v4, v[0:1], off
253*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
254*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s5, v3
255*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0)
256*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_writelane_b32 v4, s4, s5
257*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    global_store_dword v[0:1], v4, off
258*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_setpc_b64 s[30:31]
259*5feb32baSVikram Hegde;
260*5feb32baSVikram Hegde; GFX1100-SDAG-LABEL: test_writelane_p5:
261*5feb32baSVikram Hegde; GFX1100-SDAG:       ; %bb.0:
262*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
263*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    global_load_b32 v4, v[0:1], off
264*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s0, v2
265*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s1, v3
266*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0)
267*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
268*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_writelane_b32 v4, s0, s1
269*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    global_store_b32 v[0:1], v4, off
270*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
271*5feb32baSVikram Hegde  %oldval = load ptr addrspace(5), ptr addrspace(1) %out
272*5feb32baSVikram Hegde  %writelane = call ptr addrspace(5) @llvm.amdgcn.writelane.p5(ptr addrspace(5) %src, i32 %src1, ptr addrspace(5) %oldval)
273*5feb32baSVikram Hegde  store ptr addrspace(5) %writelane, ptr addrspace(1) %out, align 4
274*5feb32baSVikram Hegde  ret void
275*5feb32baSVikram Hegde}
276*5feb32baSVikram Hegde
277*5feb32baSVikram Hegdedefine void @test_writelane_v3p5(ptr addrspace(1) %out, <3 x ptr addrspace(5)> %src, i32 %src1) {
278*5feb32baSVikram Hegde; GFX802-SDAG-LABEL: test_writelane_v3p5:
279*5feb32baSVikram Hegde; GFX802-SDAG:       ; %bb.0:
280*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
281*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    flat_load_dwordx3 v[6:8], v[0:1]
282*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 m0, v5
283*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s4, v4
284*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s5, v3
285*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s6, v2
286*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
287*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_writelane_b32 v8, s4, m0
288*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_writelane_b32 v7, s5, m0
289*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_writelane_b32 v6, s6, m0
290*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    flat_store_dwordx3 v[0:1], v[6:8]
291*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
292*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_setpc_b64 s[30:31]
293*5feb32baSVikram Hegde;
294*5feb32baSVikram Hegde; GFX1010-SDAG-LABEL: test_writelane_v3p5:
295*5feb32baSVikram Hegde; GFX1010-SDAG:       ; %bb.0:
296*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
297*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    global_load_dwordx3 v[6:8], v[0:1], off
298*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s4, v4
299*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s5, v5
300*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s6, v3
301*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s7, v2
302*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0)
303*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_writelane_b32 v8, s4, s5
304*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_writelane_b32 v7, s6, s5
305*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_writelane_b32 v6, s7, s5
306*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    global_store_dwordx3 v[0:1], v[6:8], off
307*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_setpc_b64 s[30:31]
308*5feb32baSVikram Hegde;
309*5feb32baSVikram Hegde; GFX1100-SDAG-LABEL: test_writelane_v3p5:
310*5feb32baSVikram Hegde; GFX1100-SDAG:       ; %bb.0:
311*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
312*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    global_load_b96 v[6:8], v[0:1], off
313*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
314*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
315*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s2, v3
316*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s3, v2
317*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0)
318*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
319*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_writelane_b32 v8, s0, s1
320*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_writelane_b32 v7, s2, s1
321*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3)
322*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_writelane_b32 v6, s3, s1
323*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    global_store_b96 v[0:1], v[6:8], off
324*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
325*5feb32baSVikram Hegde  %oldval = load <3 x ptr addrspace(5)>, ptr addrspace(1) %out
326*5feb32baSVikram Hegde  %writelane = call <3 x ptr addrspace(5)> @llvm.amdgcn.writelane.v3p5(<3 x ptr addrspace(5)> %src, i32 %src1, <3 x ptr addrspace(5)> %oldval)
327*5feb32baSVikram Hegde  store <3 x ptr addrspace(5)> %writelane, ptr addrspace(1) %out, align 4
328*5feb32baSVikram Hegde  ret void
329*5feb32baSVikram Hegde}
330*5feb32baSVikram Hegde
331*5feb32baSVikram Hegdedefine void @test_writelane_p6(ptr addrspace(1) %out, ptr addrspace(6) %src, i32 %src1) {
332*5feb32baSVikram Hegde; GFX802-SDAG-LABEL: test_writelane_p6:
333*5feb32baSVikram Hegde; GFX802-SDAG:       ; %bb.0:
334*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    flat_load_dword v4, v[0:1]
336*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 m0, v3
337*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
338*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
339*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_nop 1
340*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_writelane_b32 v4, s4, m0
341*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    flat_store_dword v[0:1], v4
342*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
343*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_setpc_b64 s[30:31]
344*5feb32baSVikram Hegde;
345*5feb32baSVikram Hegde; GFX1010-SDAG-LABEL: test_writelane_p6:
346*5feb32baSVikram Hegde; GFX1010-SDAG:       ; %bb.0:
347*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
348*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    global_load_dword v4, v[0:1], off
349*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
350*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s5, v3
351*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0)
352*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_writelane_b32 v4, s4, s5
353*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    global_store_dword v[0:1], v4, off
354*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_setpc_b64 s[30:31]
355*5feb32baSVikram Hegde;
356*5feb32baSVikram Hegde; GFX1100-SDAG-LABEL: test_writelane_p6:
357*5feb32baSVikram Hegde; GFX1100-SDAG:       ; %bb.0:
358*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
359*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    global_load_b32 v4, v[0:1], off
360*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s0, v2
361*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s1, v3
362*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0)
363*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
364*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_writelane_b32 v4, s0, s1
365*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    global_store_b32 v[0:1], v4, off
366*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
367*5feb32baSVikram Hegde  %oldval = load ptr addrspace(6), ptr addrspace(1) %out
368*5feb32baSVikram Hegde  %writelane = call ptr addrspace(6) @llvm.amdgcn.writelane.p6(ptr addrspace(6) %src, i32 %src1, ptr addrspace(6) %oldval)
369*5feb32baSVikram Hegde  store ptr addrspace(6) %writelane, ptr addrspace(1) %out, align 4
370*5feb32baSVikram Hegde  ret void
371*5feb32baSVikram Hegde}
372*5feb32baSVikram Hegde
373*5feb32baSVikram Hegdedefine void @test_writelane_v3p6(ptr addrspace(1) %out, <3 x ptr addrspace(6)> %src, i32 %src1) {
374*5feb32baSVikram Hegde; GFX802-SDAG-LABEL: test_writelane_v3p6:
375*5feb32baSVikram Hegde; GFX802-SDAG:       ; %bb.0:
376*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    flat_load_dwordx3 v[6:8], v[0:1]
378*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 m0, v5
379*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s4, v4
380*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s5, v3
381*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s6, v2
382*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
383*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_writelane_b32 v8, s4, m0
384*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_writelane_b32 v7, s5, m0
385*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    v_writelane_b32 v6, s6, m0
386*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    flat_store_dwordx3 v[0:1], v[6:8]
387*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
388*5feb32baSVikram Hegde; GFX802-SDAG-NEXT:    s_setpc_b64 s[30:31]
389*5feb32baSVikram Hegde;
390*5feb32baSVikram Hegde; GFX1010-SDAG-LABEL: test_writelane_v3p6:
391*5feb32baSVikram Hegde; GFX1010-SDAG:       ; %bb.0:
392*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
393*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    global_load_dwordx3 v[6:8], v[0:1], off
394*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s4, v4
395*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s5, v5
396*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s6, v3
397*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s7, v2
398*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0)
399*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_writelane_b32 v8, s4, s5
400*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_writelane_b32 v7, s6, s5
401*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    v_writelane_b32 v6, s7, s5
402*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    global_store_dwordx3 v[0:1], v[6:8], off
403*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT:    s_setpc_b64 s[30:31]
404*5feb32baSVikram Hegde;
405*5feb32baSVikram Hegde; GFX1100-SDAG-LABEL: test_writelane_v3p6:
406*5feb32baSVikram Hegde; GFX1100-SDAG:       ; %bb.0:
407*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
408*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    global_load_b96 v[6:8], v[0:1], off
409*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s0, v4
410*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s1, v5
411*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s2, v3
412*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s3, v2
413*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0)
414*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
415*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_writelane_b32 v8, s0, s1
416*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_writelane_b32 v7, s2, s1
417*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3)
418*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    v_writelane_b32 v6, s3, s1
419*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    global_store_b96 v[0:1], v[6:8], off
420*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
421*5feb32baSVikram Hegde  %oldval = load <3 x ptr addrspace(6)>, ptr addrspace(1) %out
422*5feb32baSVikram Hegde  %writelane = call <3 x ptr addrspace(6)> @llvm.amdgcn.writelane.v3p6(<3 x ptr addrspace(6)> %src, i32 %src1, <3 x ptr addrspace(6)> %oldval)
423*5feb32baSVikram Hegde  store <3 x ptr addrspace(6)> %writelane, ptr addrspace(1) %out, align 4
424*5feb32baSVikram Hegde  ret void
425*5feb32baSVikram Hegde}
426