xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll (revision 5a81a559d69fb84e1e8ef623ac4b642081c14c51)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx802 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX802-SDAG %s
3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1010-SDAG %s
4; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GFX1100-SDAG %s
5
6; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx802 -verify-machineinstrs -global-isel -global-isel-abort=2 < %s | FileCheck -check-prefixes=GFX802-GISEL %s
7; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -verify-machineinstrs -global-isel -global-isel-abort=2 < %s | FileCheck -check-prefixes=GFX1010-GISEL %s
8; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 -global-isel -global-isel-abort=2 < %s | FileCheck -check-prefixes=GFX1100-GISEL %s
9
10declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #0
11declare i64 @llvm.amdgcn.writelane.i64(i64, i32, i64) #0
12declare double @llvm.amdgcn.writelane.f64(double, i32, double) #0
13
14define amdgpu_kernel void @test_writelane_sreg_i32(ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 {
15; GFX802-SDAG-LABEL: test_writelane_sreg_i32:
16; GFX802-SDAG:       ; %bb.0:
17; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
18; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
19; GFX802-SDAG-NEXT:    s_mov_b32 m0, s3
20; GFX802-SDAG-NEXT:    s_load_dword s3, s[0:1], 0x0
21; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s0
22; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s1
23; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
24; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s3
25; GFX802-SDAG-NEXT:    v_writelane_b32 v2, s2, m0
26; GFX802-SDAG-NEXT:    flat_store_dword v[0:1], v2
27; GFX802-SDAG-NEXT:    s_endpgm
28;
29; GFX1010-SDAG-LABEL: test_writelane_sreg_i32:
30; GFX1010-SDAG:       ; %bb.0:
31; GFX1010-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
32; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v1, 0
33; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
34; GFX1010-SDAG-NEXT:    s_load_dword s4, s[0:1], 0x0
35; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
36; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v0, s4
37; GFX1010-SDAG-NEXT:    v_writelane_b32 v0, s2, s3
38; GFX1010-SDAG-NEXT:    global_store_dword v1, v0, s[0:1]
39; GFX1010-SDAG-NEXT:    s_endpgm
40;
41; GFX1100-SDAG-LABEL: test_writelane_sreg_i32:
42; GFX1100-SDAG:       ; %bb.0:
43; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
44; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v1, 0
45; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
46; GFX1100-SDAG-NEXT:    s_load_b32 s4, s[0:1], 0x0
47; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
48; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, s4
49; GFX1100-SDAG-NEXT:    v_writelane_b32 v0, s2, s3
50; GFX1100-SDAG-NEXT:    global_store_b32 v1, v0, s[0:1]
51; GFX1100-SDAG-NEXT:    s_endpgm
52;
53; GFX802-GISEL-LABEL: test_writelane_sreg_i32:
54; GFX802-GISEL:       ; %bb.0:
55; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
56; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
57; GFX802-GISEL-NEXT:    s_mov_b32 m0, s3
58; GFX802-GISEL-NEXT:    s_load_dword s3, s[0:1], 0x0
59; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s0
60; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s1
61; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
62; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s3
63; GFX802-GISEL-NEXT:    v_writelane_b32 v2, s2, m0
64; GFX802-GISEL-NEXT:    flat_store_dword v[0:1], v2
65; GFX802-GISEL-NEXT:    s_endpgm
66;
67; GFX1010-GISEL-LABEL: test_writelane_sreg_i32:
68; GFX1010-GISEL:       ; %bb.0:
69; GFX1010-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
70; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v1, 0
71; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
72; GFX1010-GISEL-NEXT:    s_load_dword s4, s[0:1], 0x0
73; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
74; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v0, s4
75; GFX1010-GISEL-NEXT:    v_writelane_b32 v0, s2, s3
76; GFX1010-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
77; GFX1010-GISEL-NEXT:    s_endpgm
78;
79; GFX1100-GISEL-LABEL: test_writelane_sreg_i32:
80; GFX1100-GISEL:       ; %bb.0:
81; GFX1100-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
82; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v1, 0
83; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
84; GFX1100-GISEL-NEXT:    s_load_b32 s4, s[0:1], 0x0
85; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
86; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v0, s4
87; GFX1100-GISEL-NEXT:    v_writelane_b32 v0, s2, s3
88; GFX1100-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
89; GFX1100-GISEL-NEXT:    s_endpgm
90  %oldval = load i32, ptr addrspace(1) %out
91  %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 %src0, i32 %src1, i32 %oldval)
92  store i32 %writelane, ptr addrspace(1) %out, align 4
93  ret void
94}
95
96define amdgpu_kernel void @test_writelane_sreg_i64(ptr addrspace(1) %out, i64 %src0, i32 %src1) #1 {
97; GFX802-SDAG-LABEL: test_writelane_sreg_i64:
98; GFX802-SDAG:       ; %bb.0:
99; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
100; GFX802-SDAG-NEXT:    s_load_dword s6, s[8:9], 0x10
101; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
102; GFX802-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
103; GFX802-SDAG-NEXT:    s_mov_b32 m0, s6
104; GFX802-SDAG-NEXT:    v_mov_b32_e32 v3, s1
105; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s0
106; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
107; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s5
108; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s4
109; GFX802-SDAG-NEXT:    v_writelane_b32 v1, s3, m0
110; GFX802-SDAG-NEXT:    v_writelane_b32 v0, s2, m0
111; GFX802-SDAG-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
112; GFX802-SDAG-NEXT:    s_endpgm
113;
114; GFX1010-SDAG-LABEL: test_writelane_sreg_i64:
115; GFX1010-SDAG:       ; %bb.0:
116; GFX1010-SDAG-NEXT:    s_clause 0x1
117; GFX1010-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
118; GFX1010-SDAG-NEXT:    s_load_dword s6, s[8:9], 0x10
119; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v2, 0
120; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
121; GFX1010-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
122; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
123; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v1, s5
124; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v0, s4
125; GFX1010-SDAG-NEXT:    v_writelane_b32 v1, s3, s6
126; GFX1010-SDAG-NEXT:    v_writelane_b32 v0, s2, s6
127; GFX1010-SDAG-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
128; GFX1010-SDAG-NEXT:    s_endpgm
129;
130; GFX1100-SDAG-LABEL: test_writelane_sreg_i64:
131; GFX1100-SDAG:       ; %bb.0:
132; GFX1100-SDAG-NEXT:    s_clause 0x1
133; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
134; GFX1100-SDAG-NEXT:    s_load_b32 s6, s[4:5], 0x10
135; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v2, 0
136; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
137; GFX1100-SDAG-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
138; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
139; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v1, s5
140; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, s4
141; GFX1100-SDAG-NEXT:    v_writelane_b32 v1, s3, s6
142; GFX1100-SDAG-NEXT:    v_writelane_b32 v0, s2, s6
143; GFX1100-SDAG-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
144; GFX1100-SDAG-NEXT:    s_endpgm
145;
146; GFX802-GISEL-LABEL: test_writelane_sreg_i64:
147; GFX802-GISEL:       ; %bb.0:
148; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
149; GFX802-GISEL-NEXT:    s_load_dword s6, s[8:9], 0x10
150; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
151; GFX802-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
152; GFX802-GISEL-NEXT:    s_mov_b32 m0, s6
153; GFX802-GISEL-NEXT:    v_mov_b32_e32 v3, s1
154; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s0
155; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
156; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s4
157; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s5
158; GFX802-GISEL-NEXT:    v_writelane_b32 v0, s2, m0
159; GFX802-GISEL-NEXT:    v_writelane_b32 v1, s3, m0
160; GFX802-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
161; GFX802-GISEL-NEXT:    s_endpgm
162;
163; GFX1010-GISEL-LABEL: test_writelane_sreg_i64:
164; GFX1010-GISEL:       ; %bb.0:
165; GFX1010-GISEL-NEXT:    s_clause 0x1
166; GFX1010-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
167; GFX1010-GISEL-NEXT:    s_load_dword s6, s[8:9], 0x10
168; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v2, 0
169; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
170; GFX1010-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
171; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
172; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v0, s4
173; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v1, s5
174; GFX1010-GISEL-NEXT:    v_writelane_b32 v0, s2, s6
175; GFX1010-GISEL-NEXT:    v_writelane_b32 v1, s3, s6
176; GFX1010-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
177; GFX1010-GISEL-NEXT:    s_endpgm
178;
179; GFX1100-GISEL-LABEL: test_writelane_sreg_i64:
180; GFX1100-GISEL:       ; %bb.0:
181; GFX1100-GISEL-NEXT:    s_clause 0x1
182; GFX1100-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
183; GFX1100-GISEL-NEXT:    s_load_b32 s6, s[4:5], 0x10
184; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v2, 0
185; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
186; GFX1100-GISEL-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
187; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
188; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v0, s4
189; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v1, s5
190; GFX1100-GISEL-NEXT:    v_writelane_b32 v0, s2, s6
191; GFX1100-GISEL-NEXT:    v_writelane_b32 v1, s3, s6
192; GFX1100-GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
193; GFX1100-GISEL-NEXT:    s_endpgm
194  %oldval = load i64, ptr addrspace(1) %out
195  %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 %src0, i32 %src1, i64 %oldval)
196  store i64 %writelane, ptr addrspace(1) %out, align 4
197  ret void
198}
199
200define amdgpu_kernel void @test_writelane_sreg_f64(ptr addrspace(1) %out, double %src0, i32 %src1) #1 {
201; GFX802-SDAG-LABEL: test_writelane_sreg_f64:
202; GFX802-SDAG:       ; %bb.0:
203; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
204; GFX802-SDAG-NEXT:    s_load_dword s6, s[8:9], 0x10
205; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
206; GFX802-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
207; GFX802-SDAG-NEXT:    s_mov_b32 m0, s6
208; GFX802-SDAG-NEXT:    v_mov_b32_e32 v3, s1
209; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s0
210; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
211; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s5
212; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s4
213; GFX802-SDAG-NEXT:    v_writelane_b32 v1, s3, m0
214; GFX802-SDAG-NEXT:    v_writelane_b32 v0, s2, m0
215; GFX802-SDAG-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
216; GFX802-SDAG-NEXT:    s_endpgm
217;
218; GFX1010-SDAG-LABEL: test_writelane_sreg_f64:
219; GFX1010-SDAG:       ; %bb.0:
220; GFX1010-SDAG-NEXT:    s_clause 0x1
221; GFX1010-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
222; GFX1010-SDAG-NEXT:    s_load_dword s6, s[8:9], 0x10
223; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v2, 0
224; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
225; GFX1010-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
226; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
227; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v1, s5
228; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v0, s4
229; GFX1010-SDAG-NEXT:    v_writelane_b32 v1, s3, s6
230; GFX1010-SDAG-NEXT:    v_writelane_b32 v0, s2, s6
231; GFX1010-SDAG-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
232; GFX1010-SDAG-NEXT:    s_endpgm
233;
234; GFX1100-SDAG-LABEL: test_writelane_sreg_f64:
235; GFX1100-SDAG:       ; %bb.0:
236; GFX1100-SDAG-NEXT:    s_clause 0x1
237; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
238; GFX1100-SDAG-NEXT:    s_load_b32 s6, s[4:5], 0x10
239; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v2, 0
240; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
241; GFX1100-SDAG-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
242; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
243; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v1, s5
244; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, s4
245; GFX1100-SDAG-NEXT:    v_writelane_b32 v1, s3, s6
246; GFX1100-SDAG-NEXT:    v_writelane_b32 v0, s2, s6
247; GFX1100-SDAG-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
248; GFX1100-SDAG-NEXT:    s_endpgm
249;
250; GFX802-GISEL-LABEL: test_writelane_sreg_f64:
251; GFX802-GISEL:       ; %bb.0:
252; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
253; GFX802-GISEL-NEXT:    s_load_dword s6, s[8:9], 0x10
254; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
255; GFX802-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
256; GFX802-GISEL-NEXT:    s_mov_b32 m0, s6
257; GFX802-GISEL-NEXT:    v_mov_b32_e32 v3, s1
258; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s0
259; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
260; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s4
261; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s5
262; GFX802-GISEL-NEXT:    v_writelane_b32 v0, s2, m0
263; GFX802-GISEL-NEXT:    v_writelane_b32 v1, s3, m0
264; GFX802-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
265; GFX802-GISEL-NEXT:    s_endpgm
266;
267; GFX1010-GISEL-LABEL: test_writelane_sreg_f64:
268; GFX1010-GISEL:       ; %bb.0:
269; GFX1010-GISEL-NEXT:    s_clause 0x1
270; GFX1010-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
271; GFX1010-GISEL-NEXT:    s_load_dword s6, s[8:9], 0x10
272; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v2, 0
273; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
274; GFX1010-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
275; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
276; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v0, s4
277; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v1, s5
278; GFX1010-GISEL-NEXT:    v_writelane_b32 v0, s2, s6
279; GFX1010-GISEL-NEXT:    v_writelane_b32 v1, s3, s6
280; GFX1010-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
281; GFX1010-GISEL-NEXT:    s_endpgm
282;
283; GFX1100-GISEL-LABEL: test_writelane_sreg_f64:
284; GFX1100-GISEL:       ; %bb.0:
285; GFX1100-GISEL-NEXT:    s_clause 0x1
286; GFX1100-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
287; GFX1100-GISEL-NEXT:    s_load_b32 s6, s[4:5], 0x10
288; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v2, 0
289; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
290; GFX1100-GISEL-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
291; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
292; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v0, s4
293; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v1, s5
294; GFX1100-GISEL-NEXT:    v_writelane_b32 v0, s2, s6
295; GFX1100-GISEL-NEXT:    v_writelane_b32 v1, s3, s6
296; GFX1100-GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
297; GFX1100-GISEL-NEXT:    s_endpgm
298  %oldval = load double, ptr addrspace(1) %out
299  %writelane = call double @llvm.amdgcn.writelane.f64(double %src0, i32 %src1, double %oldval)
300  store double %writelane, ptr addrspace(1) %out, align 4
301  ret void
302}
303
304define amdgpu_kernel void @test_writelane_imm_sreg_i32(ptr addrspace(1) %out, i32 %src1) #1 {
305; GFX802-SDAG-LABEL: test_writelane_imm_sreg_i32:
306; GFX802-SDAG:       ; %bb.0:
307; GFX802-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
308; GFX802-SDAG-NEXT:    s_load_dword s2, s[8:9], 0x8
309; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
310; GFX802-SDAG-NEXT:    s_load_dword s3, s[0:1], 0x0
311; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s0
312; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s1
313; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
314; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s3
315; GFX802-SDAG-NEXT:    v_writelane_b32 v2, 32, s2
316; GFX802-SDAG-NEXT:    flat_store_dword v[0:1], v2
317; GFX802-SDAG-NEXT:    s_endpgm
318;
319; GFX1010-SDAG-LABEL: test_writelane_imm_sreg_i32:
320; GFX1010-SDAG:       ; %bb.0:
321; GFX1010-SDAG-NEXT:    s_clause 0x1
322; GFX1010-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
323; GFX1010-SDAG-NEXT:    s_load_dword s2, s[8:9], 0x8
324; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v1, 0
325; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
326; GFX1010-SDAG-NEXT:    s_load_dword s3, s[0:1], 0x0
327; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
328; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v0, s3
329; GFX1010-SDAG-NEXT:    v_writelane_b32 v0, 32, s2
330; GFX1010-SDAG-NEXT:    global_store_dword v1, v0, s[0:1]
331; GFX1010-SDAG-NEXT:    s_endpgm
332;
333; GFX1100-SDAG-LABEL: test_writelane_imm_sreg_i32:
334; GFX1100-SDAG:       ; %bb.0:
335; GFX1100-SDAG-NEXT:    s_clause 0x1
336; GFX1100-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
337; GFX1100-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x8
338; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v1, 0
339; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
340; GFX1100-SDAG-NEXT:    s_load_b32 s3, s[0:1], 0x0
341; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
342; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, s3
343; GFX1100-SDAG-NEXT:    v_writelane_b32 v0, 32, s2
344; GFX1100-SDAG-NEXT:    global_store_b32 v1, v0, s[0:1]
345; GFX1100-SDAG-NEXT:    s_endpgm
346;
347; GFX802-GISEL-LABEL: test_writelane_imm_sreg_i32:
348; GFX802-GISEL:       ; %bb.0:
349; GFX802-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
350; GFX802-GISEL-NEXT:    s_load_dword s2, s[8:9], 0x8
351; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
352; GFX802-GISEL-NEXT:    s_load_dword s3, s[0:1], 0x0
353; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s0
354; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s1
355; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
356; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s3
357; GFX802-GISEL-NEXT:    v_writelane_b32 v2, 32, s2
358; GFX802-GISEL-NEXT:    flat_store_dword v[0:1], v2
359; GFX802-GISEL-NEXT:    s_endpgm
360;
361; GFX1010-GISEL-LABEL: test_writelane_imm_sreg_i32:
362; GFX1010-GISEL:       ; %bb.0:
363; GFX1010-GISEL-NEXT:    s_clause 0x1
364; GFX1010-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
365; GFX1010-GISEL-NEXT:    s_load_dword s2, s[8:9], 0x8
366; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v1, 0
367; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
368; GFX1010-GISEL-NEXT:    s_load_dword s3, s[0:1], 0x0
369; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
370; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v0, s3
371; GFX1010-GISEL-NEXT:    v_writelane_b32 v0, 32, s2
372; GFX1010-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
373; GFX1010-GISEL-NEXT:    s_endpgm
374;
375; GFX1100-GISEL-LABEL: test_writelane_imm_sreg_i32:
376; GFX1100-GISEL:       ; %bb.0:
377; GFX1100-GISEL-NEXT:    s_clause 0x1
378; GFX1100-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
379; GFX1100-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x8
380; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v1, 0
381; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
382; GFX1100-GISEL-NEXT:    s_load_b32 s3, s[0:1], 0x0
383; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
384; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v0, s3
385; GFX1100-GISEL-NEXT:    v_writelane_b32 v0, 32, s2
386; GFX1100-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
387; GFX1100-GISEL-NEXT:    s_endpgm
388  %oldval = load i32, ptr addrspace(1) %out
389  %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 32, i32 %src1, i32 %oldval)
390  store i32 %writelane, ptr addrspace(1) %out, align 4
391  ret void
392}
393
394define amdgpu_kernel void @test_writelane_imm_sreg_i64(ptr addrspace(1) %out, i32 %src1) #1 {
395; GFX802-SDAG-LABEL: test_writelane_imm_sreg_i64:
396; GFX802-SDAG:       ; %bb.0:
397; GFX802-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
398; GFX802-SDAG-NEXT:    s_load_dword s4, s[8:9], 0x8
399; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
400; GFX802-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
401; GFX802-SDAG-NEXT:    v_mov_b32_e32 v3, s1
402; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s0
403; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
404; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s3
405; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s2
406; GFX802-SDAG-NEXT:    v_writelane_b32 v1, 0, s4
407; GFX802-SDAG-NEXT:    v_writelane_b32 v0, 32, s4
408; GFX802-SDAG-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
409; GFX802-SDAG-NEXT:    s_endpgm
410;
411; GFX1010-SDAG-LABEL: test_writelane_imm_sreg_i64:
412; GFX1010-SDAG:       ; %bb.0:
413; GFX1010-SDAG-NEXT:    s_clause 0x1
414; GFX1010-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
415; GFX1010-SDAG-NEXT:    s_load_dword s4, s[8:9], 0x8
416; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v2, 0
417; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
418; GFX1010-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
419; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
420; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v1, s3
421; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v0, s2
422; GFX1010-SDAG-NEXT:    v_writelane_b32 v1, 0, s4
423; GFX1010-SDAG-NEXT:    v_writelane_b32 v0, 32, s4
424; GFX1010-SDAG-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
425; GFX1010-SDAG-NEXT:    s_endpgm
426;
427; GFX1100-SDAG-LABEL: test_writelane_imm_sreg_i64:
428; GFX1100-SDAG:       ; %bb.0:
429; GFX1100-SDAG-NEXT:    s_clause 0x1
430; GFX1100-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
431; GFX1100-SDAG-NEXT:    s_load_b32 s4, s[4:5], 0x8
432; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v2, 0
433; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
434; GFX1100-SDAG-NEXT:    s_load_b64 s[2:3], s[0:1], 0x0
435; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
436; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v1, s3
437; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, s2
438; GFX1100-SDAG-NEXT:    v_writelane_b32 v1, 0, s4
439; GFX1100-SDAG-NEXT:    v_writelane_b32 v0, 32, s4
440; GFX1100-SDAG-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
441; GFX1100-SDAG-NEXT:    s_endpgm
442;
443; GFX802-GISEL-LABEL: test_writelane_imm_sreg_i64:
444; GFX802-GISEL:       ; %bb.0:
445; GFX802-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
446; GFX802-GISEL-NEXT:    s_load_dword s4, s[8:9], 0x8
447; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
448; GFX802-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
449; GFX802-GISEL-NEXT:    v_mov_b32_e32 v3, s1
450; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s0
451; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
452; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s2
453; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s3
454; GFX802-GISEL-NEXT:    v_writelane_b32 v0, 32, s4
455; GFX802-GISEL-NEXT:    v_writelane_b32 v1, 0, s4
456; GFX802-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
457; GFX802-GISEL-NEXT:    s_endpgm
458;
459; GFX1010-GISEL-LABEL: test_writelane_imm_sreg_i64:
460; GFX1010-GISEL:       ; %bb.0:
461; GFX1010-GISEL-NEXT:    s_clause 0x1
462; GFX1010-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
463; GFX1010-GISEL-NEXT:    s_load_dword s4, s[8:9], 0x8
464; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v2, 0
465; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
466; GFX1010-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
467; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
468; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v0, s2
469; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v1, s3
470; GFX1010-GISEL-NEXT:    v_writelane_b32 v0, 32, s4
471; GFX1010-GISEL-NEXT:    v_writelane_b32 v1, 0, s4
472; GFX1010-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
473; GFX1010-GISEL-NEXT:    s_endpgm
474;
475; GFX1100-GISEL-LABEL: test_writelane_imm_sreg_i64:
476; GFX1100-GISEL:       ; %bb.0:
477; GFX1100-GISEL-NEXT:    s_clause 0x1
478; GFX1100-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
479; GFX1100-GISEL-NEXT:    s_load_b32 s4, s[4:5], 0x8
480; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v2, 0
481; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
482; GFX1100-GISEL-NEXT:    s_load_b64 s[2:3], s[0:1], 0x0
483; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
484; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v0, s2
485; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v1, s3
486; GFX1100-GISEL-NEXT:    v_writelane_b32 v0, 32, s4
487; GFX1100-GISEL-NEXT:    v_writelane_b32 v1, 0, s4
488; GFX1100-GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
489; GFX1100-GISEL-NEXT:    s_endpgm
490  %oldval = load i64, ptr addrspace(1) %out
491  %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 32, i32 %src1, i64 %oldval)
492  store i64 %writelane, ptr addrspace(1) %out, align 4
493  ret void
494}
495
496define amdgpu_kernel void @test_writelane_imm_sreg_f64(ptr addrspace(1) %out, i32 %src1) #1 {
497; GFX802-SDAG-LABEL: test_writelane_imm_sreg_f64:
498; GFX802-SDAG:       ; %bb.0:
499; GFX802-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
500; GFX802-SDAG-NEXT:    s_load_dword s4, s[8:9], 0x8
501; GFX802-SDAG-NEXT:    s_mov_b32 s5, 0x40400000
502; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
503; GFX802-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
504; GFX802-SDAG-NEXT:    s_mov_b32 m0, s4
505; GFX802-SDAG-NEXT:    v_mov_b32_e32 v3, s1
506; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s0
507; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
508; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s3
509; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s2
510; GFX802-SDAG-NEXT:    v_writelane_b32 v1, s5, m0
511; GFX802-SDAG-NEXT:    v_writelane_b32 v0, 0, s4
512; GFX802-SDAG-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
513; GFX802-SDAG-NEXT:    s_endpgm
514;
515; GFX1010-SDAG-LABEL: test_writelane_imm_sreg_f64:
516; GFX1010-SDAG:       ; %bb.0:
517; GFX1010-SDAG-NEXT:    s_clause 0x1
518; GFX1010-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
519; GFX1010-SDAG-NEXT:    s_load_dword s4, s[8:9], 0x8
520; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v2, 0
521; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
522; GFX1010-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
523; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
524; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v1, s3
525; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v0, s2
526; GFX1010-SDAG-NEXT:    s_mov_b32 s2, 0x40400000
527; GFX1010-SDAG-NEXT:    v_writelane_b32 v1, s2, s4
528; GFX1010-SDAG-NEXT:    v_writelane_b32 v0, 0, s4
529; GFX1010-SDAG-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
530; GFX1010-SDAG-NEXT:    s_endpgm
531;
532; GFX1100-SDAG-LABEL: test_writelane_imm_sreg_f64:
533; GFX1100-SDAG:       ; %bb.0:
534; GFX1100-SDAG-NEXT:    s_clause 0x1
535; GFX1100-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
536; GFX1100-SDAG-NEXT:    s_load_b32 s4, s[4:5], 0x8
537; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v2, 0
538; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
539; GFX1100-SDAG-NEXT:    s_load_b64 s[2:3], s[0:1], 0x0
540; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
541; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v1, s3
542; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, s2
543; GFX1100-SDAG-NEXT:    s_mov_b32 s2, 0x40400000
544; GFX1100-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
545; GFX1100-SDAG-NEXT:    v_writelane_b32 v1, s2, s4
546; GFX1100-SDAG-NEXT:    v_writelane_b32 v0, 0, s4
547; GFX1100-SDAG-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
548; GFX1100-SDAG-NEXT:    s_endpgm
549;
550; GFX802-GISEL-LABEL: test_writelane_imm_sreg_f64:
551; GFX802-GISEL:       ; %bb.0:
552; GFX802-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
553; GFX802-GISEL-NEXT:    s_load_dword s4, s[8:9], 0x8
554; GFX802-GISEL-NEXT:    s_mov_b32 s5, 0x40400000
555; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
556; GFX802-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
557; GFX802-GISEL-NEXT:    s_mov_b32 m0, s4
558; GFX802-GISEL-NEXT:    v_mov_b32_e32 v3, s1
559; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s0
560; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
561; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s2
562; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s3
563; GFX802-GISEL-NEXT:    v_writelane_b32 v0, 0, s4
564; GFX802-GISEL-NEXT:    v_writelane_b32 v1, s5, m0
565; GFX802-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
566; GFX802-GISEL-NEXT:    s_endpgm
567;
568; GFX1010-GISEL-LABEL: test_writelane_imm_sreg_f64:
569; GFX1010-GISEL:       ; %bb.0:
570; GFX1010-GISEL-NEXT:    s_clause 0x1
571; GFX1010-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
572; GFX1010-GISEL-NEXT:    s_load_dword s4, s[8:9], 0x8
573; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v2, 0
574; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
575; GFX1010-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
576; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
577; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v0, s2
578; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v1, s3
579; GFX1010-GISEL-NEXT:    s_mov_b32 s2, 0x40400000
580; GFX1010-GISEL-NEXT:    v_writelane_b32 v0, 0, s4
581; GFX1010-GISEL-NEXT:    v_writelane_b32 v1, s2, s4
582; GFX1010-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
583; GFX1010-GISEL-NEXT:    s_endpgm
584;
585; GFX1100-GISEL-LABEL: test_writelane_imm_sreg_f64:
586; GFX1100-GISEL:       ; %bb.0:
587; GFX1100-GISEL-NEXT:    s_clause 0x1
588; GFX1100-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
589; GFX1100-GISEL-NEXT:    s_load_b32 s4, s[4:5], 0x8
590; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v2, 0
591; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
592; GFX1100-GISEL-NEXT:    s_load_b64 s[2:3], s[0:1], 0x0
593; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
594; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v0, s2
595; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v1, s3
596; GFX1100-GISEL-NEXT:    s_mov_b32 s2, 0x40400000
597; GFX1100-GISEL-NEXT:    v_writelane_b32 v0, 0, s4
598; GFX1100-GISEL-NEXT:    v_writelane_b32 v1, s2, s4
599; GFX1100-GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
600; GFX1100-GISEL-NEXT:    s_endpgm
601  %oldval = load double, ptr addrspace(1) %out
602  %writelane = call double @llvm.amdgcn.writelane.f64(double 32.0, i32 %src1, double %oldval)
603  store double %writelane, ptr addrspace(1) %out, align 4
604  ret void
605}
606
607define amdgpu_kernel void @test_writelane_vreg_lane_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
608; GFX802-SDAG-LABEL: test_writelane_vreg_lane_i32:
609; GFX802-SDAG:       ; %bb.0:
610; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
611; GFX802-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
612; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
613; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s3
614; GFX802-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
615; GFX802-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
616; GFX802-SDAG-NEXT:    v_add_u32_e32 v0, vcc, 4, v0
617; GFX802-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
618; GFX802-SDAG-NEXT:    flat_load_dword v0, v[0:1]
619; GFX802-SDAG-NEXT:    s_load_dword s2, s[0:1], 0x0
620; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
621; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s2
622; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
623; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s2, v0
624; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s0
625; GFX802-SDAG-NEXT:    s_nop 2
626; GFX802-SDAG-NEXT:    v_writelane_b32 v2, 12, s2
627; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s1
628; GFX802-SDAG-NEXT:    flat_store_dword v[0:1], v2
629; GFX802-SDAG-NEXT:    s_endpgm
630;
631; GFX1010-SDAG-LABEL: test_writelane_vreg_lane_i32:
632; GFX1010-SDAG:       ; %bb.0:
633; GFX1010-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
634; GFX1010-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
635; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
636; GFX1010-SDAG-NEXT:    global_load_dword v0, v0, s[2:3] offset:4
637; GFX1010-SDAG-NEXT:    s_waitcnt_depctr 0xffe3
638; GFX1010-SDAG-NEXT:    s_load_dword s2, s[0:1], 0x0
639; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
640; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v1, s2
641; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0)
642; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s2, v0
643; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v0, 0
644; GFX1010-SDAG-NEXT:    v_writelane_b32 v1, 12, s2
645; GFX1010-SDAG-NEXT:    global_store_dword v0, v1, s[0:1]
646; GFX1010-SDAG-NEXT:    s_endpgm
647;
648; GFX1100-SDAG-LABEL: test_writelane_vreg_lane_i32:
649; GFX1100-SDAG:       ; %bb.0:
650; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
651; GFX1100-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
652; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
653; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
654; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
655; GFX1100-SDAG-NEXT:    global_load_b32 v0, v0, s[2:3] offset:4
656; GFX1100-SDAG-NEXT:    s_load_b32 s2, s[0:1], 0x0
657; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
658; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v1, s2
659; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0)
660; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s2, v0
661; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, 0
662; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
663; GFX1100-SDAG-NEXT:    v_writelane_b32 v1, 12, s2
664; GFX1100-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
665; GFX1100-SDAG-NEXT:    s_endpgm
666;
667; GFX802-GISEL-LABEL: test_writelane_vreg_lane_i32:
668; GFX802-GISEL:       ; %bb.0:
669; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
670; GFX802-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
671; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
672; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s2
673; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s3
674; GFX802-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
675; GFX802-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
676; GFX802-GISEL-NEXT:    v_add_u32_e32 v0, vcc, 4, v0
677; GFX802-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
678; GFX802-GISEL-NEXT:    flat_load_dword v0, v[0:1]
679; GFX802-GISEL-NEXT:    s_load_dword s2, s[0:1], 0x0
680; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
681; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s2
682; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0)
683; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s2, v0
684; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s0
685; GFX802-GISEL-NEXT:    s_nop 2
686; GFX802-GISEL-NEXT:    v_writelane_b32 v2, 12, s2
687; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s1
688; GFX802-GISEL-NEXT:    flat_store_dword v[0:1], v2
689; GFX802-GISEL-NEXT:    s_endpgm
690;
691; GFX1010-GISEL-LABEL: test_writelane_vreg_lane_i32:
692; GFX1010-GISEL:       ; %bb.0:
693; GFX1010-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
694; GFX1010-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
695; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
696; GFX1010-GISEL-NEXT:    global_load_dword v0, v0, s[2:3] offset:4
697; GFX1010-GISEL-NEXT:    s_waitcnt_depctr 0xffe3
698; GFX1010-GISEL-NEXT:    s_load_dword s2, s[0:1], 0x0
699; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
700; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v1, s2
701; GFX1010-GISEL-NEXT:    s_waitcnt vmcnt(0)
702; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s2, v0
703; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v0, 0
704; GFX1010-GISEL-NEXT:    v_writelane_b32 v1, 12, s2
705; GFX1010-GISEL-NEXT:    global_store_dword v0, v1, s[0:1]
706; GFX1010-GISEL-NEXT:    s_endpgm
707;
708; GFX1100-GISEL-LABEL: test_writelane_vreg_lane_i32:
709; GFX1100-GISEL:       ; %bb.0:
710; GFX1100-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
711; GFX1100-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
712; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
713; GFX1100-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
714; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
715; GFX1100-GISEL-NEXT:    global_load_b32 v0, v0, s[2:3] offset:4
716; GFX1100-GISEL-NEXT:    s_load_b32 s2, s[0:1], 0x0
717; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
718; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v1, s2
719; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0)
720; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s2, v0
721; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v0, 0
722; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
723; GFX1100-GISEL-NEXT:    v_writelane_b32 v1, 12, s2
724; GFX1100-GISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
725; GFX1100-GISEL-NEXT:    s_endpgm
726  %tid = call i32 @llvm.amdgcn.workitem.id.x()
727  %gep.in = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 %tid
728  %args = load <2 x i32>, ptr addrspace(1) %gep.in
729  %oldval = load i32, ptr addrspace(1) %out
730  %lane = extractelement <2 x i32> %args, i32 1
731  %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 12, i32 %lane, i32 %oldval)
732  store i32 %writelane, ptr addrspace(1) %out, align 4
733  ret void
734}
735
736define amdgpu_kernel void @test_writelane_vreg_lane_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
737; GFX802-SDAG-LABEL: test_writelane_vreg_lane_i64:
738; GFX802-SDAG:       ; %bb.0:
739; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
740; GFX802-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
741; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
742; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s3
743; GFX802-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
744; GFX802-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
745; GFX802-SDAG-NEXT:    v_add_u32_e32 v0, vcc, 8, v0
746; GFX802-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
747; GFX802-SDAG-NEXT:    flat_load_dword v2, v[0:1]
748; GFX802-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
749; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
750; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s3
751; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s2
752; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
753; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s2, v2
754; GFX802-SDAG-NEXT:    v_mov_b32_e32 v3, s1
755; GFX802-SDAG-NEXT:    s_nop 2
756; GFX802-SDAG-NEXT:    v_writelane_b32 v1, 0, s2
757; GFX802-SDAG-NEXT:    v_writelane_b32 v0, 12, s2
758; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s0
759; GFX802-SDAG-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
760; GFX802-SDAG-NEXT:    s_endpgm
761;
762; GFX1010-SDAG-LABEL: test_writelane_vreg_lane_i64:
763; GFX1010-SDAG:       ; %bb.0:
764; GFX1010-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
765; GFX1010-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
766; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v2, 0
767; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
768; GFX1010-SDAG-NEXT:    global_load_dword v0, v0, s[2:3] offset:8
769; GFX1010-SDAG-NEXT:    s_waitcnt_depctr 0xffe3
770; GFX1010-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
771; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
772; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v1, s3
773; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0)
774; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s3, v0
775; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v0, s2
776; GFX1010-SDAG-NEXT:    v_writelane_b32 v1, 0, s3
777; GFX1010-SDAG-NEXT:    v_writelane_b32 v0, 12, s3
778; GFX1010-SDAG-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
779; GFX1010-SDAG-NEXT:    s_endpgm
780;
781; GFX1100-SDAG-LABEL: test_writelane_vreg_lane_i64:
782; GFX1100-SDAG:       ; %bb.0:
783; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
784; GFX1100-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
785; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v2, 0
786; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
787; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
788; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
789; GFX1100-SDAG-NEXT:    global_load_b32 v0, v0, s[2:3] offset:8
790; GFX1100-SDAG-NEXT:    s_load_b64 s[2:3], s[0:1], 0x0
791; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
792; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v1, s3
793; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0)
794; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s3, v0
795; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, s2
796; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
797; GFX1100-SDAG-NEXT:    v_writelane_b32 v1, 0, s3
798; GFX1100-SDAG-NEXT:    v_writelane_b32 v0, 12, s3
799; GFX1100-SDAG-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
800; GFX1100-SDAG-NEXT:    s_endpgm
801;
802; GFX802-GISEL-LABEL: test_writelane_vreg_lane_i64:
803; GFX802-GISEL:       ; %bb.0:
804; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
805; GFX802-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 4, v0
806; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
807; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s2
808; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s3
809; GFX802-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
810; GFX802-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
811; GFX802-GISEL-NEXT:    v_add_u32_e32 v0, vcc, 8, v0
812; GFX802-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
813; GFX802-GISEL-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
814; GFX802-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
815; GFX802-GISEL-NEXT:    v_mov_b32_e32 v4, s1
816; GFX802-GISEL-NEXT:    v_mov_b32_e32 v3, s0
817; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
818; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s2
819; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s3
820; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s2, v0
821; GFX802-GISEL-NEXT:    s_nop 3
822; GFX802-GISEL-NEXT:    v_writelane_b32 v1, 12, s2
823; GFX802-GISEL-NEXT:    v_writelane_b32 v2, 0, s2
824; GFX802-GISEL-NEXT:    flat_store_dwordx2 v[3:4], v[1:2]
825; GFX802-GISEL-NEXT:    s_endpgm
826;
827; GFX1010-GISEL-LABEL: test_writelane_vreg_lane_i64:
828; GFX1010-GISEL:       ; %bb.0:
829; GFX1010-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
830; GFX1010-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
831; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
832; GFX1010-GISEL-NEXT:    global_load_dwordx2 v[0:1], v0, s[2:3] offset:8
833; GFX1010-GISEL-NEXT:    s_waitcnt_depctr 0xffe3
834; GFX1010-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
835; GFX1010-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
836; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v1, s2
837; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v2, s3
838; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s2, v0
839; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v0, 0
840; GFX1010-GISEL-NEXT:    v_writelane_b32 v1, 12, s2
841; GFX1010-GISEL-NEXT:    v_writelane_b32 v2, 0, s2
842; GFX1010-GISEL-NEXT:    global_store_dwordx2 v0, v[1:2], s[0:1]
843; GFX1010-GISEL-NEXT:    s_endpgm
844;
845; GFX1100-GISEL-LABEL: test_writelane_vreg_lane_i64:
846; GFX1100-GISEL:       ; %bb.0:
847; GFX1100-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
848; GFX1100-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
849; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
850; GFX1100-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
851; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
852; GFX1100-GISEL-NEXT:    global_load_b64 v[0:1], v0, s[2:3] offset:8
853; GFX1100-GISEL-NEXT:    s_load_b64 s[2:3], s[0:1], 0x0
854; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
855; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v1, s2
856; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v2, s3
857; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s2, v0
858; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v0, 0
859; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
860; GFX1100-GISEL-NEXT:    v_writelane_b32 v1, 12, s2
861; GFX1100-GISEL-NEXT:    v_writelane_b32 v2, 0, s2
862; GFX1100-GISEL-NEXT:    global_store_b64 v0, v[1:2], s[0:1]
863; GFX1100-GISEL-NEXT:    s_endpgm
864  %tid = call i32 @llvm.amdgcn.workitem.id.x()
865  %gep.in = getelementptr <2 x i64>, ptr addrspace(1) %in, i32 %tid
866  %args = load <2 x i64>, ptr addrspace(1) %gep.in
867  %oldval = load i64, ptr addrspace(1) %out
868  %lane = extractelement <2 x i64> %args, i32 1
869  %lane32 = trunc i64 %lane to i32
870  %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 12, i32 %lane32, i64 %oldval)
871  store i64 %writelane, ptr addrspace(1) %out, align 4
872  ret void
873}
874
875define amdgpu_kernel void @test_writelane_vreg_lane_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
876; GFX802-SDAG-LABEL: test_writelane_vreg_lane_f64:
877; GFX802-SDAG:       ; %bb.0:
878; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
879; GFX802-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
880; GFX802-SDAG-NEXT:    s_mov_b32 s4, 0x40280000
881; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
882; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s3
883; GFX802-SDAG-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
884; GFX802-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
885; GFX802-SDAG-NEXT:    v_add_u32_e32 v0, vcc, 8, v0
886; GFX802-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
887; GFX802-SDAG-NEXT:    flat_load_dword v2, v[0:1]
888; GFX802-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
889; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
890; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s3
891; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s2
892; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
893; GFX802-SDAG-NEXT:    v_readfirstlane_b32 m0, v2
894; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s2, v2
895; GFX802-SDAG-NEXT:    v_mov_b32_e32 v3, s1
896; GFX802-SDAG-NEXT:    s_nop 1
897; GFX802-SDAG-NEXT:    v_writelane_b32 v1, s4, m0
898; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s0
899; GFX802-SDAG-NEXT:    v_writelane_b32 v0, 0, s2
900; GFX802-SDAG-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
901; GFX802-SDAG-NEXT:    s_endpgm
902;
903; GFX1010-SDAG-LABEL: test_writelane_vreg_lane_f64:
904; GFX1010-SDAG:       ; %bb.0:
905; GFX1010-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
906; GFX1010-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
907; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v2, 0
908; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
909; GFX1010-SDAG-NEXT:    global_load_dword v0, v0, s[2:3] offset:8
910; GFX1010-SDAG-NEXT:    s_waitcnt_depctr 0xffe3
911; GFX1010-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
912; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
913; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v1, s3
914; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0)
915; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s3, v0
916; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v0, s2
917; GFX1010-SDAG-NEXT:    s_mov_b32 s2, 0x40280000
918; GFX1010-SDAG-NEXT:    v_writelane_b32 v1, s2, s3
919; GFX1010-SDAG-NEXT:    v_writelane_b32 v0, 0, s3
920; GFX1010-SDAG-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
921; GFX1010-SDAG-NEXT:    s_endpgm
922;
923; GFX1100-SDAG-LABEL: test_writelane_vreg_lane_f64:
924; GFX1100-SDAG:       ; %bb.0:
925; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
926; GFX1100-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
927; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v2, 0
928; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
929; GFX1100-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
930; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
931; GFX1100-SDAG-NEXT:    global_load_b32 v0, v0, s[2:3] offset:8
932; GFX1100-SDAG-NEXT:    s_load_b64 s[2:3], s[0:1], 0x0
933; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
934; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v1, s3
935; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0)
936; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s3, v0
937; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, s2
938; GFX1100-SDAG-NEXT:    s_mov_b32 s2, 0x40280000
939; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
940; GFX1100-SDAG-NEXT:    v_writelane_b32 v1, s2, s3
941; GFX1100-SDAG-NEXT:    v_writelane_b32 v0, 0, s3
942; GFX1100-SDAG-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
943; GFX1100-SDAG-NEXT:    s_endpgm
944;
945; GFX802-GISEL-LABEL: test_writelane_vreg_lane_f64:
946; GFX802-GISEL:       ; %bb.0:
947; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
948; GFX802-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 4, v0
949; GFX802-GISEL-NEXT:    s_mov_b32 s4, 0x40280000
950; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
951; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s2
952; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s3
953; GFX802-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
954; GFX802-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
955; GFX802-GISEL-NEXT:    v_add_u32_e32 v0, vcc, 8, v0
956; GFX802-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
957; GFX802-GISEL-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
958; GFX802-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
959; GFX802-GISEL-NEXT:    v_mov_b32_e32 v4, s1
960; GFX802-GISEL-NEXT:    v_mov_b32_e32 v3, s0
961; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
962; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s2
963; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s3
964; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s2, v0
965; GFX802-GISEL-NEXT:    s_mov_b32 m0, s2
966; GFX802-GISEL-NEXT:    s_nop 2
967; GFX802-GISEL-NEXT:    v_writelane_b32 v1, 0, s2
968; GFX802-GISEL-NEXT:    v_writelane_b32 v2, s4, m0
969; GFX802-GISEL-NEXT:    flat_store_dwordx2 v[3:4], v[1:2]
970; GFX802-GISEL-NEXT:    s_endpgm
971;
972; GFX1010-GISEL-LABEL: test_writelane_vreg_lane_f64:
973; GFX1010-GISEL:       ; %bb.0:
974; GFX1010-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
975; GFX1010-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
976; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
977; GFX1010-GISEL-NEXT:    global_load_dwordx2 v[0:1], v0, s[2:3] offset:8
978; GFX1010-GISEL-NEXT:    s_waitcnt_depctr 0xffe3
979; GFX1010-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
980; GFX1010-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
981; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v1, s2
982; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v2, s3
983; GFX1010-GISEL-NEXT:    s_mov_b32 s3, 0x40280000
984; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s2, v0
985; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v0, 0
986; GFX1010-GISEL-NEXT:    v_writelane_b32 v1, 0, s2
987; GFX1010-GISEL-NEXT:    v_writelane_b32 v2, s3, s2
988; GFX1010-GISEL-NEXT:    global_store_dwordx2 v0, v[1:2], s[0:1]
989; GFX1010-GISEL-NEXT:    s_endpgm
990;
991; GFX1100-GISEL-LABEL: test_writelane_vreg_lane_f64:
992; GFX1100-GISEL:       ; %bb.0:
993; GFX1100-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
994; GFX1100-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
995; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
996; GFX1100-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
997; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
998; GFX1100-GISEL-NEXT:    global_load_b64 v[0:1], v0, s[2:3] offset:8
999; GFX1100-GISEL-NEXT:    s_load_b64 s[2:3], s[0:1], 0x0
1000; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1001; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v1, s2
1002; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v2, s3
1003; GFX1100-GISEL-NEXT:    s_mov_b32 s3, 0x40280000
1004; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s2, v0
1005; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v0, 0
1006; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1007; GFX1100-GISEL-NEXT:    v_writelane_b32 v1, 0, s2
1008; GFX1100-GISEL-NEXT:    v_writelane_b32 v2, s3, s2
1009; GFX1100-GISEL-NEXT:    global_store_b64 v0, v[1:2], s[0:1]
1010; GFX1100-GISEL-NEXT:    s_endpgm
1011  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1012  %gep.in = getelementptr <2 x double>, ptr addrspace(1) %in, i32 %tid
1013  %args = load <2 x double>, ptr addrspace(1) %gep.in
1014  %oldval = load double, ptr addrspace(1) %out
1015  %lane = extractelement <2 x double> %args, i32 1
1016  %lane_cast = bitcast double %lane to i64
1017  %lane32 = trunc i64 %lane_cast to i32
1018  %writelane = call double @llvm.amdgcn.writelane.f64(double 12.0, i32 %lane32, double %oldval)
1019  store double %writelane, ptr addrspace(1) %out, align 4
1020  ret void
1021}
1022
1023define amdgpu_kernel void @test_writelane_m0_sreg_i32(ptr addrspace(1) %out, i32 %src1) #1 {
1024; GFX802-SDAG-LABEL: test_writelane_m0_sreg_i32:
1025; GFX802-SDAG:       ; %bb.0:
1026; GFX802-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
1027; GFX802-SDAG-NEXT:    s_load_dword s2, s[8:9], 0x8
1028; GFX802-SDAG-NEXT:    ;;#ASMSTART
1029; GFX802-SDAG-NEXT:    s_mov_b32 m0, -1
1030; GFX802-SDAG-NEXT:    ;;#ASMEND
1031; GFX802-SDAG-NEXT:    s_mov_b32 s4, m0
1032; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1033; GFX802-SDAG-NEXT:    s_load_dword s3, s[0:1], 0x0
1034; GFX802-SDAG-NEXT:    s_mov_b32 m0, s2
1035; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s0
1036; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s1
1037; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1038; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s3
1039; GFX802-SDAG-NEXT:    v_writelane_b32 v2, s4, m0
1040; GFX802-SDAG-NEXT:    flat_store_dword v[0:1], v2
1041; GFX802-SDAG-NEXT:    s_endpgm
1042;
1043; GFX1010-SDAG-LABEL: test_writelane_m0_sreg_i32:
1044; GFX1010-SDAG:       ; %bb.0:
1045; GFX1010-SDAG-NEXT:    s_clause 0x1
1046; GFX1010-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
1047; GFX1010-SDAG-NEXT:    s_load_dword s2, s[8:9], 0x8
1048; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v1, 0
1049; GFX1010-SDAG-NEXT:    ;;#ASMSTART
1050; GFX1010-SDAG-NEXT:    s_mov_b32 m0, -1
1051; GFX1010-SDAG-NEXT:    ;;#ASMEND
1052; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1053; GFX1010-SDAG-NEXT:    s_load_dword s3, s[0:1], 0x0
1054; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1055; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v0, s3
1056; GFX1010-SDAG-NEXT:    v_writelane_b32 v0, m0, s2
1057; GFX1010-SDAG-NEXT:    global_store_dword v1, v0, s[0:1]
1058; GFX1010-SDAG-NEXT:    s_endpgm
1059;
1060; GFX1100-SDAG-LABEL: test_writelane_m0_sreg_i32:
1061; GFX1100-SDAG:       ; %bb.0:
1062; GFX1100-SDAG-NEXT:    s_clause 0x1
1063; GFX1100-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1064; GFX1100-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x8
1065; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v1, 0
1066; GFX1100-SDAG-NEXT:    ;;#ASMSTART
1067; GFX1100-SDAG-NEXT:    s_mov_b32 m0, -1
1068; GFX1100-SDAG-NEXT:    ;;#ASMEND
1069; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1070; GFX1100-SDAG-NEXT:    s_load_b32 s3, s[0:1], 0x0
1071; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1072; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, s3
1073; GFX1100-SDAG-NEXT:    v_writelane_b32 v0, m0, s2
1074; GFX1100-SDAG-NEXT:    global_store_b32 v1, v0, s[0:1]
1075; GFX1100-SDAG-NEXT:    s_endpgm
1076;
1077; GFX802-GISEL-LABEL: test_writelane_m0_sreg_i32:
1078; GFX802-GISEL:       ; %bb.0:
1079; GFX802-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
1080; GFX802-GISEL-NEXT:    s_load_dword s2, s[8:9], 0x8
1081; GFX802-GISEL-NEXT:    ;;#ASMSTART
1082; GFX802-GISEL-NEXT:    s_mov_b32 m0, -1
1083; GFX802-GISEL-NEXT:    ;;#ASMEND
1084; GFX802-GISEL-NEXT:    s_mov_b32 s4, m0
1085; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1086; GFX802-GISEL-NEXT:    s_load_dword s3, s[0:1], 0x0
1087; GFX802-GISEL-NEXT:    s_mov_b32 m0, s2
1088; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1089; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1090; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1091; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s3
1092; GFX802-GISEL-NEXT:    v_writelane_b32 v2, s4, m0
1093; GFX802-GISEL-NEXT:    flat_store_dword v[0:1], v2
1094; GFX802-GISEL-NEXT:    s_endpgm
1095;
1096; GFX1010-GISEL-LABEL: test_writelane_m0_sreg_i32:
1097; GFX1010-GISEL:       ; %bb.0:
1098; GFX1010-GISEL-NEXT:    s_clause 0x1
1099; GFX1010-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
1100; GFX1010-GISEL-NEXT:    s_load_dword s2, s[8:9], 0x8
1101; GFX1010-GISEL-NEXT:    ;;#ASMSTART
1102; GFX1010-GISEL-NEXT:    s_mov_b32 m0, -1
1103; GFX1010-GISEL-NEXT:    ;;#ASMEND
1104; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v1, 0
1105; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1106; GFX1010-GISEL-NEXT:    s_load_dword s3, s[0:1], 0x0
1107; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1108; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v0, s3
1109; GFX1010-GISEL-NEXT:    v_writelane_b32 v0, m0, s2
1110; GFX1010-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
1111; GFX1010-GISEL-NEXT:    s_endpgm
1112;
1113; GFX1100-GISEL-LABEL: test_writelane_m0_sreg_i32:
1114; GFX1100-GISEL:       ; %bb.0:
1115; GFX1100-GISEL-NEXT:    s_clause 0x1
1116; GFX1100-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1117; GFX1100-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x8
1118; GFX1100-GISEL-NEXT:    ;;#ASMSTART
1119; GFX1100-GISEL-NEXT:    s_mov_b32 m0, -1
1120; GFX1100-GISEL-NEXT:    ;;#ASMEND
1121; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v1, 0
1122; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1123; GFX1100-GISEL-NEXT:    s_load_b32 s3, s[0:1], 0x0
1124; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1125; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v0, s3
1126; GFX1100-GISEL-NEXT:    v_writelane_b32 v0, m0, s2
1127; GFX1100-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
1128; GFX1100-GISEL-NEXT:    s_endpgm
1129  %oldval = load i32, ptr addrspace(1) %out
1130  %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"()
1131  %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 %m0, i32 %src1, i32 %oldval)
1132  store i32 %writelane, ptr addrspace(1) %out, align 4
1133  ret void
1134}
1135
1136define amdgpu_kernel void @test_writelane_imm_i32(ptr addrspace(1) %out, i32 %src0) #1 {
1137; GFX802-SDAG-LABEL: test_writelane_imm_i32:
1138; GFX802-SDAG:       ; %bb.0:
1139; GFX802-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
1140; GFX802-SDAG-NEXT:    s_load_dword s2, s[8:9], 0x8
1141; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1142; GFX802-SDAG-NEXT:    s_load_dword s3, s[0:1], 0x0
1143; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s0
1144; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s1
1145; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1146; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s3
1147; GFX802-SDAG-NEXT:    v_writelane_b32 v2, s2, 32
1148; GFX802-SDAG-NEXT:    flat_store_dword v[0:1], v2
1149; GFX802-SDAG-NEXT:    s_endpgm
1150;
1151; GFX1010-SDAG-LABEL: test_writelane_imm_i32:
1152; GFX1010-SDAG:       ; %bb.0:
1153; GFX1010-SDAG-NEXT:    s_clause 0x1
1154; GFX1010-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
1155; GFX1010-SDAG-NEXT:    s_load_dword s2, s[8:9], 0x8
1156; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v1, 0
1157; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1158; GFX1010-SDAG-NEXT:    s_load_dword s3, s[0:1], 0x0
1159; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1160; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v0, s3
1161; GFX1010-SDAG-NEXT:    v_writelane_b32 v0, s2, 32
1162; GFX1010-SDAG-NEXT:    global_store_dword v1, v0, s[0:1]
1163; GFX1010-SDAG-NEXT:    s_endpgm
1164;
1165; GFX1100-SDAG-LABEL: test_writelane_imm_i32:
1166; GFX1100-SDAG:       ; %bb.0:
1167; GFX1100-SDAG-NEXT:    s_clause 0x1
1168; GFX1100-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1169; GFX1100-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x8
1170; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v1, 0
1171; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1172; GFX1100-SDAG-NEXT:    s_load_b32 s3, s[0:1], 0x0
1173; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1174; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, s3
1175; GFX1100-SDAG-NEXT:    v_writelane_b32 v0, s2, 32
1176; GFX1100-SDAG-NEXT:    global_store_b32 v1, v0, s[0:1]
1177; GFX1100-SDAG-NEXT:    s_endpgm
1178;
1179; GFX802-GISEL-LABEL: test_writelane_imm_i32:
1180; GFX802-GISEL:       ; %bb.0:
1181; GFX802-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
1182; GFX802-GISEL-NEXT:    s_load_dword s2, s[8:9], 0x8
1183; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1184; GFX802-GISEL-NEXT:    s_load_dword s3, s[0:1], 0x0
1185; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1186; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1187; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1188; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s3
1189; GFX802-GISEL-NEXT:    v_writelane_b32 v2, s2, 32
1190; GFX802-GISEL-NEXT:    flat_store_dword v[0:1], v2
1191; GFX802-GISEL-NEXT:    s_endpgm
1192;
1193; GFX1010-GISEL-LABEL: test_writelane_imm_i32:
1194; GFX1010-GISEL:       ; %bb.0:
1195; GFX1010-GISEL-NEXT:    s_clause 0x1
1196; GFX1010-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
1197; GFX1010-GISEL-NEXT:    s_load_dword s2, s[8:9], 0x8
1198; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v1, 0
1199; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1200; GFX1010-GISEL-NEXT:    s_load_dword s3, s[0:1], 0x0
1201; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1202; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v0, s3
1203; GFX1010-GISEL-NEXT:    v_writelane_b32 v0, s2, 32
1204; GFX1010-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
1205; GFX1010-GISEL-NEXT:    s_endpgm
1206;
1207; GFX1100-GISEL-LABEL: test_writelane_imm_i32:
1208; GFX1100-GISEL:       ; %bb.0:
1209; GFX1100-GISEL-NEXT:    s_clause 0x1
1210; GFX1100-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1211; GFX1100-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x8
1212; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v1, 0
1213; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1214; GFX1100-GISEL-NEXT:    s_load_b32 s3, s[0:1], 0x0
1215; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1216; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v0, s3
1217; GFX1100-GISEL-NEXT:    v_writelane_b32 v0, s2, 32
1218; GFX1100-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
1219; GFX1100-GISEL-NEXT:    s_endpgm
1220  %oldval = load i32, ptr addrspace(1) %out
1221  %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 %src0, i32 32, i32 %oldval) #0
1222  store i32 %writelane, ptr addrspace(1) %out, align 4
1223  ret void
1224}
1225
1226define amdgpu_kernel void @test_writelane_imm_i64(ptr addrspace(1) %out, i64 %src0) #1 {
1227; GFX802-SDAG-LABEL: test_writelane_imm_i64:
1228; GFX802-SDAG:       ; %bb.0:
1229; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1230; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1231; GFX802-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
1232; GFX802-SDAG-NEXT:    v_mov_b32_e32 v3, s1
1233; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s0
1234; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1235; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s5
1236; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s4
1237; GFX802-SDAG-NEXT:    v_writelane_b32 v1, s3, 32
1238; GFX802-SDAG-NEXT:    v_writelane_b32 v0, s2, 32
1239; GFX802-SDAG-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
1240; GFX802-SDAG-NEXT:    s_endpgm
1241;
1242; GFX1010-SDAG-LABEL: test_writelane_imm_i64:
1243; GFX1010-SDAG:       ; %bb.0:
1244; GFX1010-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1245; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v2, 0
1246; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1247; GFX1010-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
1248; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1249; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v1, s5
1250; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v0, s4
1251; GFX1010-SDAG-NEXT:    v_writelane_b32 v1, s3, 32
1252; GFX1010-SDAG-NEXT:    v_writelane_b32 v0, s2, 32
1253; GFX1010-SDAG-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
1254; GFX1010-SDAG-NEXT:    s_endpgm
1255;
1256; GFX1100-SDAG-LABEL: test_writelane_imm_i64:
1257; GFX1100-SDAG:       ; %bb.0:
1258; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
1259; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v2, 0
1260; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1261; GFX1100-SDAG-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
1262; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1263; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v1, s5
1264; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, s4
1265; GFX1100-SDAG-NEXT:    v_writelane_b32 v1, s3, 32
1266; GFX1100-SDAG-NEXT:    v_writelane_b32 v0, s2, 32
1267; GFX1100-SDAG-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
1268; GFX1100-SDAG-NEXT:    s_endpgm
1269;
1270; GFX802-GISEL-LABEL: test_writelane_imm_i64:
1271; GFX802-GISEL:       ; %bb.0:
1272; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1273; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1274; GFX802-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
1275; GFX802-GISEL-NEXT:    v_mov_b32_e32 v3, s1
1276; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s0
1277; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1278; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s4
1279; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s5
1280; GFX802-GISEL-NEXT:    v_writelane_b32 v0, s2, 32
1281; GFX802-GISEL-NEXT:    v_writelane_b32 v1, s3, 32
1282; GFX802-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
1283; GFX802-GISEL-NEXT:    s_endpgm
1284;
1285; GFX1010-GISEL-LABEL: test_writelane_imm_i64:
1286; GFX1010-GISEL:       ; %bb.0:
1287; GFX1010-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1288; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v2, 0
1289; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1290; GFX1010-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
1291; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1292; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v0, s4
1293; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v1, s5
1294; GFX1010-GISEL-NEXT:    v_writelane_b32 v0, s2, 32
1295; GFX1010-GISEL-NEXT:    v_writelane_b32 v1, s3, 32
1296; GFX1010-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
1297; GFX1010-GISEL-NEXT:    s_endpgm
1298;
1299; GFX1100-GISEL-LABEL: test_writelane_imm_i64:
1300; GFX1100-GISEL:       ; %bb.0:
1301; GFX1100-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
1302; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v2, 0
1303; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1304; GFX1100-GISEL-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
1305; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1306; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v0, s4
1307; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v1, s5
1308; GFX1100-GISEL-NEXT:    v_writelane_b32 v0, s2, 32
1309; GFX1100-GISEL-NEXT:    v_writelane_b32 v1, s3, 32
1310; GFX1100-GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
1311; GFX1100-GISEL-NEXT:    s_endpgm
1312  %oldval = load i64, ptr addrspace(1) %out
1313  %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 %src0, i32 32, i64 %oldval) #0
1314  store i64 %writelane, ptr addrspace(1) %out, align 4
1315  ret void
1316}
1317
1318define amdgpu_kernel void @test_writelane_imm_f64(ptr addrspace(1) %out, double %src0) #1 {
1319; GFX802-SDAG-LABEL: test_writelane_imm_f64:
1320; GFX802-SDAG:       ; %bb.0:
1321; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1322; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1323; GFX802-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
1324; GFX802-SDAG-NEXT:    v_mov_b32_e32 v3, s1
1325; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s0
1326; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1327; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s5
1328; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s4
1329; GFX802-SDAG-NEXT:    v_writelane_b32 v1, s3, 32
1330; GFX802-SDAG-NEXT:    v_writelane_b32 v0, s2, 32
1331; GFX802-SDAG-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
1332; GFX802-SDAG-NEXT:    s_endpgm
1333;
1334; GFX1010-SDAG-LABEL: test_writelane_imm_f64:
1335; GFX1010-SDAG:       ; %bb.0:
1336; GFX1010-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1337; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v2, 0
1338; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1339; GFX1010-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
1340; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1341; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v1, s5
1342; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v0, s4
1343; GFX1010-SDAG-NEXT:    v_writelane_b32 v1, s3, 32
1344; GFX1010-SDAG-NEXT:    v_writelane_b32 v0, s2, 32
1345; GFX1010-SDAG-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
1346; GFX1010-SDAG-NEXT:    s_endpgm
1347;
1348; GFX1100-SDAG-LABEL: test_writelane_imm_f64:
1349; GFX1100-SDAG:       ; %bb.0:
1350; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
1351; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v2, 0
1352; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1353; GFX1100-SDAG-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
1354; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1355; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v1, s5
1356; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, s4
1357; GFX1100-SDAG-NEXT:    v_writelane_b32 v1, s3, 32
1358; GFX1100-SDAG-NEXT:    v_writelane_b32 v0, s2, 32
1359; GFX1100-SDAG-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
1360; GFX1100-SDAG-NEXT:    s_endpgm
1361;
1362; GFX802-GISEL-LABEL: test_writelane_imm_f64:
1363; GFX802-GISEL:       ; %bb.0:
1364; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1365; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1366; GFX802-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
1367; GFX802-GISEL-NEXT:    v_mov_b32_e32 v3, s1
1368; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s0
1369; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1370; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s4
1371; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s5
1372; GFX802-GISEL-NEXT:    v_writelane_b32 v0, s2, 32
1373; GFX802-GISEL-NEXT:    v_writelane_b32 v1, s3, 32
1374; GFX802-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
1375; GFX802-GISEL-NEXT:    s_endpgm
1376;
1377; GFX1010-GISEL-LABEL: test_writelane_imm_f64:
1378; GFX1010-GISEL:       ; %bb.0:
1379; GFX1010-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1380; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v2, 0
1381; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1382; GFX1010-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
1383; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1384; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v0, s4
1385; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v1, s5
1386; GFX1010-GISEL-NEXT:    v_writelane_b32 v0, s2, 32
1387; GFX1010-GISEL-NEXT:    v_writelane_b32 v1, s3, 32
1388; GFX1010-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
1389; GFX1010-GISEL-NEXT:    s_endpgm
1390;
1391; GFX1100-GISEL-LABEL: test_writelane_imm_f64:
1392; GFX1100-GISEL:       ; %bb.0:
1393; GFX1100-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
1394; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v2, 0
1395; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1396; GFX1100-GISEL-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
1397; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1398; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v0, s4
1399; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v1, s5
1400; GFX1100-GISEL-NEXT:    v_writelane_b32 v0, s2, 32
1401; GFX1100-GISEL-NEXT:    v_writelane_b32 v1, s3, 32
1402; GFX1100-GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
1403; GFX1100-GISEL-NEXT:    s_endpgm
1404  %oldval = load double, ptr addrspace(1) %out
1405  %writelane = call double @llvm.amdgcn.writelane.f64(double %src0, i32 32, double %oldval) #0
1406  store double %writelane, ptr addrspace(1) %out, align 4
1407  ret void
1408}
1409
1410define amdgpu_kernel void @test_writelane_sreg_oldval_i32(i32 inreg %oldval, ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 {
1411; GFX802-SDAG-LABEL: test_writelane_sreg_oldval_i32:
1412; GFX802-SDAG:       ; %bb.0:
1413; GFX802-SDAG-NEXT:    s_load_dword s4, s[8:9], 0x0
1414; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x8
1415; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1416; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s4
1417; GFX802-SDAG-NEXT:    s_mov_b32 m0, s3
1418; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s0
1419; GFX802-SDAG-NEXT:    v_writelane_b32 v2, s2, m0
1420; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s1
1421; GFX802-SDAG-NEXT:    flat_store_dword v[0:1], v2
1422; GFX802-SDAG-NEXT:    s_endpgm
1423;
1424; GFX1010-SDAG-LABEL: test_writelane_sreg_oldval_i32:
1425; GFX1010-SDAG:       ; %bb.0:
1426; GFX1010-SDAG-NEXT:    s_clause 0x1
1427; GFX1010-SDAG-NEXT:    s_load_dword s4, s[8:9], 0x0
1428; GFX1010-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x8
1429; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v1, 0
1430; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1431; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v0, s4
1432; GFX1010-SDAG-NEXT:    v_writelane_b32 v0, s2, s3
1433; GFX1010-SDAG-NEXT:    global_store_dword v1, v0, s[0:1]
1434; GFX1010-SDAG-NEXT:    s_endpgm
1435;
1436; GFX1100-SDAG-LABEL: test_writelane_sreg_oldval_i32:
1437; GFX1100-SDAG:       ; %bb.0:
1438; GFX1100-SDAG-NEXT:    s_clause 0x1
1439; GFX1100-SDAG-NEXT:    s_load_b32 s6, s[4:5], 0x0
1440; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x8
1441; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v1, 0
1442; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1443; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, s6
1444; GFX1100-SDAG-NEXT:    v_writelane_b32 v0, s2, s3
1445; GFX1100-SDAG-NEXT:    global_store_b32 v1, v0, s[0:1]
1446; GFX1100-SDAG-NEXT:    s_endpgm
1447;
1448; GFX802-GISEL-LABEL: test_writelane_sreg_oldval_i32:
1449; GFX802-GISEL:       ; %bb.0:
1450; GFX802-GISEL-NEXT:    s_load_dword s4, s[8:9], 0x0
1451; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x8
1452; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1453; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s4
1454; GFX802-GISEL-NEXT:    s_mov_b32 m0, s3
1455; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1456; GFX802-GISEL-NEXT:    v_writelane_b32 v2, s2, m0
1457; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1458; GFX802-GISEL-NEXT:    flat_store_dword v[0:1], v2
1459; GFX802-GISEL-NEXT:    s_endpgm
1460;
1461; GFX1010-GISEL-LABEL: test_writelane_sreg_oldval_i32:
1462; GFX1010-GISEL:       ; %bb.0:
1463; GFX1010-GISEL-NEXT:    s_clause 0x1
1464; GFX1010-GISEL-NEXT:    s_load_dword s4, s[8:9], 0x0
1465; GFX1010-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x8
1466; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v1, 0
1467; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1468; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v0, s4
1469; GFX1010-GISEL-NEXT:    v_writelane_b32 v0, s2, s3
1470; GFX1010-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
1471; GFX1010-GISEL-NEXT:    s_endpgm
1472;
1473; GFX1100-GISEL-LABEL: test_writelane_sreg_oldval_i32:
1474; GFX1100-GISEL:       ; %bb.0:
1475; GFX1100-GISEL-NEXT:    s_clause 0x1
1476; GFX1100-GISEL-NEXT:    s_load_b32 s6, s[4:5], 0x0
1477; GFX1100-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x8
1478; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v1, 0
1479; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1480; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v0, s6
1481; GFX1100-GISEL-NEXT:    v_writelane_b32 v0, s2, s3
1482; GFX1100-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
1483; GFX1100-GISEL-NEXT:    s_endpgm
1484  %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 %src0, i32 %src1, i32 %oldval)
1485  store i32 %writelane, ptr addrspace(1) %out, align 4
1486  ret void
1487}
1488
1489define amdgpu_kernel void @test_writelane_sreg_oldval_i64(i64 inreg %oldval, ptr addrspace(1) %out, i64 %src0, i32 %src1) #1 {
1490; GFX802-SDAG-LABEL: test_writelane_sreg_oldval_i64:
1491; GFX802-SDAG:       ; %bb.0:
1492; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1493; GFX802-SDAG-NEXT:    s_load_dword s6, s[8:9], 0x18
1494; GFX802-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
1495; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1496; GFX802-SDAG-NEXT:    v_mov_b32_e32 v3, s1
1497; GFX802-SDAG-NEXT:    s_mov_b32 m0, s6
1498; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s0
1499; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s2
1500; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s3
1501; GFX802-SDAG-NEXT:    v_writelane_b32 v3, s5, m0
1502; GFX802-SDAG-NEXT:    v_writelane_b32 v2, s4, m0
1503; GFX802-SDAG-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
1504; GFX802-SDAG-NEXT:    s_endpgm
1505;
1506; GFX1010-SDAG-LABEL: test_writelane_sreg_oldval_i64:
1507; GFX1010-SDAG:       ; %bb.0:
1508; GFX1010-SDAG-NEXT:    s_clause 0x2
1509; GFX1010-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1510; GFX1010-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
1511; GFX1010-SDAG-NEXT:    s_load_dword s6, s[8:9], 0x18
1512; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v2, 0
1513; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1514; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v1, s1
1515; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v0, s0
1516; GFX1010-SDAG-NEXT:    v_writelane_b32 v1, s5, s6
1517; GFX1010-SDAG-NEXT:    v_writelane_b32 v0, s4, s6
1518; GFX1010-SDAG-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
1519; GFX1010-SDAG-NEXT:    s_endpgm
1520;
1521; GFX1100-SDAG-LABEL: test_writelane_sreg_oldval_i64:
1522; GFX1100-SDAG:       ; %bb.0:
1523; GFX1100-SDAG-NEXT:    s_clause 0x2
1524; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
1525; GFX1100-SDAG-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
1526; GFX1100-SDAG-NEXT:    s_load_b32 s4, s[4:5], 0x18
1527; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v2, 0
1528; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1529; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v1, s1
1530; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, s0
1531; GFX1100-SDAG-NEXT:    v_writelane_b32 v1, s7, s4
1532; GFX1100-SDAG-NEXT:    v_writelane_b32 v0, s6, s4
1533; GFX1100-SDAG-NEXT:    global_store_b64 v2, v[0:1], s[2:3]
1534; GFX1100-SDAG-NEXT:    s_endpgm
1535;
1536; GFX802-GISEL-LABEL: test_writelane_sreg_oldval_i64:
1537; GFX802-GISEL:       ; %bb.0:
1538; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1539; GFX802-GISEL-NEXT:    s_load_dword s6, s[8:9], 0x18
1540; GFX802-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
1541; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1542; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1543; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1544; GFX802-GISEL-NEXT:    s_mov_b32 m0, s6
1545; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s2
1546; GFX802-GISEL-NEXT:    v_writelane_b32 v0, s4, m0
1547; GFX802-GISEL-NEXT:    v_writelane_b32 v1, s5, m0
1548; GFX802-GISEL-NEXT:    v_mov_b32_e32 v3, s3
1549; GFX802-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
1550; GFX802-GISEL-NEXT:    s_endpgm
1551;
1552; GFX1010-GISEL-LABEL: test_writelane_sreg_oldval_i64:
1553; GFX1010-GISEL:       ; %bb.0:
1554; GFX1010-GISEL-NEXT:    s_clause 0x2
1555; GFX1010-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1556; GFX1010-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
1557; GFX1010-GISEL-NEXT:    s_load_dword s6, s[8:9], 0x18
1558; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v2, 0
1559; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1560; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1561; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1562; GFX1010-GISEL-NEXT:    v_writelane_b32 v0, s4, s6
1563; GFX1010-GISEL-NEXT:    v_writelane_b32 v1, s5, s6
1564; GFX1010-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
1565; GFX1010-GISEL-NEXT:    s_endpgm
1566;
1567; GFX1100-GISEL-LABEL: test_writelane_sreg_oldval_i64:
1568; GFX1100-GISEL:       ; %bb.0:
1569; GFX1100-GISEL-NEXT:    s_clause 0x2
1570; GFX1100-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
1571; GFX1100-GISEL-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
1572; GFX1100-GISEL-NEXT:    s_load_b32 s4, s[4:5], 0x18
1573; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v2, 0
1574; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1575; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1576; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1577; GFX1100-GISEL-NEXT:    v_writelane_b32 v0, s6, s4
1578; GFX1100-GISEL-NEXT:    v_writelane_b32 v1, s7, s4
1579; GFX1100-GISEL-NEXT:    global_store_b64 v2, v[0:1], s[2:3]
1580; GFX1100-GISEL-NEXT:    s_endpgm
1581  %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 %src0, i32 %src1, i64 %oldval)
1582  store i64 %writelane, ptr addrspace(1) %out, align 4
1583  ret void
1584}
1585
1586define amdgpu_kernel void @test_writelane_sreg_oldval_f64(double inreg %oldval, ptr addrspace(1) %out, double %src0, i32 %src1) #1 {
1587; GFX802-SDAG-LABEL: test_writelane_sreg_oldval_f64:
1588; GFX802-SDAG:       ; %bb.0:
1589; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1590; GFX802-SDAG-NEXT:    s_load_dword s6, s[8:9], 0x18
1591; GFX802-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
1592; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1593; GFX802-SDAG-NEXT:    v_mov_b32_e32 v3, s1
1594; GFX802-SDAG-NEXT:    s_mov_b32 m0, s6
1595; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s0
1596; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s2
1597; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s3
1598; GFX802-SDAG-NEXT:    v_writelane_b32 v3, s5, m0
1599; GFX802-SDAG-NEXT:    v_writelane_b32 v2, s4, m0
1600; GFX802-SDAG-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
1601; GFX802-SDAG-NEXT:    s_endpgm
1602;
1603; GFX1010-SDAG-LABEL: test_writelane_sreg_oldval_f64:
1604; GFX1010-SDAG:       ; %bb.0:
1605; GFX1010-SDAG-NEXT:    s_clause 0x2
1606; GFX1010-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1607; GFX1010-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
1608; GFX1010-SDAG-NEXT:    s_load_dword s6, s[8:9], 0x18
1609; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v2, 0
1610; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1611; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v1, s1
1612; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v0, s0
1613; GFX1010-SDAG-NEXT:    v_writelane_b32 v1, s5, s6
1614; GFX1010-SDAG-NEXT:    v_writelane_b32 v0, s4, s6
1615; GFX1010-SDAG-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
1616; GFX1010-SDAG-NEXT:    s_endpgm
1617;
1618; GFX1100-SDAG-LABEL: test_writelane_sreg_oldval_f64:
1619; GFX1100-SDAG:       ; %bb.0:
1620; GFX1100-SDAG-NEXT:    s_clause 0x2
1621; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
1622; GFX1100-SDAG-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
1623; GFX1100-SDAG-NEXT:    s_load_b32 s4, s[4:5], 0x18
1624; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v2, 0
1625; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1626; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v1, s1
1627; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, s0
1628; GFX1100-SDAG-NEXT:    v_writelane_b32 v1, s7, s4
1629; GFX1100-SDAG-NEXT:    v_writelane_b32 v0, s6, s4
1630; GFX1100-SDAG-NEXT:    global_store_b64 v2, v[0:1], s[2:3]
1631; GFX1100-SDAG-NEXT:    s_endpgm
1632;
1633; GFX802-GISEL-LABEL: test_writelane_sreg_oldval_f64:
1634; GFX802-GISEL:       ; %bb.0:
1635; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1636; GFX802-GISEL-NEXT:    s_load_dword s6, s[8:9], 0x18
1637; GFX802-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
1638; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1639; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1640; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1641; GFX802-GISEL-NEXT:    s_mov_b32 m0, s6
1642; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s2
1643; GFX802-GISEL-NEXT:    v_writelane_b32 v0, s4, m0
1644; GFX802-GISEL-NEXT:    v_writelane_b32 v1, s5, m0
1645; GFX802-GISEL-NEXT:    v_mov_b32_e32 v3, s3
1646; GFX802-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
1647; GFX802-GISEL-NEXT:    s_endpgm
1648;
1649; GFX1010-GISEL-LABEL: test_writelane_sreg_oldval_f64:
1650; GFX1010-GISEL:       ; %bb.0:
1651; GFX1010-GISEL-NEXT:    s_clause 0x2
1652; GFX1010-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1653; GFX1010-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
1654; GFX1010-GISEL-NEXT:    s_load_dword s6, s[8:9], 0x18
1655; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v2, 0
1656; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1657; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1658; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1659; GFX1010-GISEL-NEXT:    v_writelane_b32 v0, s4, s6
1660; GFX1010-GISEL-NEXT:    v_writelane_b32 v1, s5, s6
1661; GFX1010-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
1662; GFX1010-GISEL-NEXT:    s_endpgm
1663;
1664; GFX1100-GISEL-LABEL: test_writelane_sreg_oldval_f64:
1665; GFX1100-GISEL:       ; %bb.0:
1666; GFX1100-GISEL-NEXT:    s_clause 0x2
1667; GFX1100-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
1668; GFX1100-GISEL-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
1669; GFX1100-GISEL-NEXT:    s_load_b32 s4, s[4:5], 0x18
1670; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v2, 0
1671; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1672; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1673; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1674; GFX1100-GISEL-NEXT:    v_writelane_b32 v0, s6, s4
1675; GFX1100-GISEL-NEXT:    v_writelane_b32 v1, s7, s4
1676; GFX1100-GISEL-NEXT:    global_store_b64 v2, v[0:1], s[2:3]
1677; GFX1100-GISEL-NEXT:    s_endpgm
1678  %writelane = call double @llvm.amdgcn.writelane.f64(double %src0, i32 %src1, double %oldval)
1679  store double %writelane, ptr addrspace(1) %out, align 4
1680  ret void
1681}
1682
1683define amdgpu_kernel void @test_writelane_imm_oldval_i32(ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 {
1684; GFX802-SDAG-LABEL: test_writelane_imm_oldval_i32:
1685; GFX802-SDAG:       ; %bb.0:
1686; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1687; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, 42
1688; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1689; GFX802-SDAG-NEXT:    s_mov_b32 m0, s3
1690; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, s0
1691; GFX802-SDAG-NEXT:    v_writelane_b32 v2, s2, m0
1692; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, s1
1693; GFX802-SDAG-NEXT:    flat_store_dword v[0:1], v2
1694; GFX802-SDAG-NEXT:    s_endpgm
1695;
1696; GFX1010-SDAG-LABEL: test_writelane_imm_oldval_i32:
1697; GFX1010-SDAG:       ; %bb.0:
1698; GFX1010-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1699; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v0, 42
1700; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v1, 0
1701; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1702; GFX1010-SDAG-NEXT:    v_writelane_b32 v0, s2, s3
1703; GFX1010-SDAG-NEXT:    global_store_dword v1, v0, s[0:1]
1704; GFX1010-SDAG-NEXT:    s_endpgm
1705;
1706; GFX1100-SDAG-LABEL: test_writelane_imm_oldval_i32:
1707; GFX1100-SDAG:       ; %bb.0:
1708; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
1709; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, 42
1710; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v1, 0
1711; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1712; GFX1100-SDAG-NEXT:    v_writelane_b32 v0, s2, s3
1713; GFX1100-SDAG-NEXT:    global_store_b32 v1, v0, s[0:1]
1714; GFX1100-SDAG-NEXT:    s_endpgm
1715;
1716; GFX802-GISEL-LABEL: test_writelane_imm_oldval_i32:
1717; GFX802-GISEL:       ; %bb.0:
1718; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1719; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, 42
1720; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1721; GFX802-GISEL-NEXT:    s_mov_b32 m0, s3
1722; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, s0
1723; GFX802-GISEL-NEXT:    v_writelane_b32 v2, s2, m0
1724; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, s1
1725; GFX802-GISEL-NEXT:    flat_store_dword v[0:1], v2
1726; GFX802-GISEL-NEXT:    s_endpgm
1727;
1728; GFX1010-GISEL-LABEL: test_writelane_imm_oldval_i32:
1729; GFX1010-GISEL:       ; %bb.0:
1730; GFX1010-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1731; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v0, 42
1732; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v1, 0
1733; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1734; GFX1010-GISEL-NEXT:    v_writelane_b32 v0, s2, s3
1735; GFX1010-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
1736; GFX1010-GISEL-NEXT:    s_endpgm
1737;
1738; GFX1100-GISEL-LABEL: test_writelane_imm_oldval_i32:
1739; GFX1100-GISEL:       ; %bb.0:
1740; GFX1100-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
1741; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v0, 42
1742; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v1, 0
1743; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1744; GFX1100-GISEL-NEXT:    v_writelane_b32 v0, s2, s3
1745; GFX1100-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
1746; GFX1100-GISEL-NEXT:    s_endpgm
1747  %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 %src0, i32 %src1, i32 42)
1748  store i32 %writelane, ptr addrspace(1) %out, align 4
1749  ret void
1750}
1751
1752define amdgpu_kernel void @test_writelane_imm_oldval_i64(ptr addrspace(1) %out, i64 %src0, i32 %src1) #1 {
1753; GFX802-SDAG-LABEL: test_writelane_imm_oldval_i64:
1754; GFX802-SDAG:       ; %bb.0:
1755; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1756; GFX802-SDAG-NEXT:    s_load_dword s4, s[8:9], 0x10
1757; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, 0
1758; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, 42
1759; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1760; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s0
1761; GFX802-SDAG-NEXT:    s_mov_b32 m0, s4
1762; GFX802-SDAG-NEXT:    v_mov_b32_e32 v3, s1
1763; GFX802-SDAG-NEXT:    v_writelane_b32 v1, s3, m0
1764; GFX802-SDAG-NEXT:    v_writelane_b32 v0, s2, m0
1765; GFX802-SDAG-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
1766; GFX802-SDAG-NEXT:    s_endpgm
1767;
1768; GFX1010-SDAG-LABEL: test_writelane_imm_oldval_i64:
1769; GFX1010-SDAG:       ; %bb.0:
1770; GFX1010-SDAG-NEXT:    s_clause 0x1
1771; GFX1010-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1772; GFX1010-SDAG-NEXT:    s_load_dword s4, s[8:9], 0x10
1773; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v1, 0
1774; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v0, 42
1775; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v2, 0
1776; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1777; GFX1010-SDAG-NEXT:    v_writelane_b32 v1, s3, s4
1778; GFX1010-SDAG-NEXT:    v_writelane_b32 v0, s2, s4
1779; GFX1010-SDAG-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
1780; GFX1010-SDAG-NEXT:    s_endpgm
1781;
1782; GFX1100-SDAG-LABEL: test_writelane_imm_oldval_i64:
1783; GFX1100-SDAG:       ; %bb.0:
1784; GFX1100-SDAG-NEXT:    s_clause 0x1
1785; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
1786; GFX1100-SDAG-NEXT:    s_load_b32 s4, s[4:5], 0x10
1787; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v1, 0
1788; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, 42
1789; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v2, 0
1790; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1791; GFX1100-SDAG-NEXT:    v_writelane_b32 v1, s3, s4
1792; GFX1100-SDAG-NEXT:    v_writelane_b32 v0, s2, s4
1793; GFX1100-SDAG-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
1794; GFX1100-SDAG-NEXT:    s_endpgm
1795;
1796; GFX802-GISEL-LABEL: test_writelane_imm_oldval_i64:
1797; GFX802-GISEL:       ; %bb.0:
1798; GFX802-GISEL-NEXT:    s_load_dword s4, s[8:9], 0x10
1799; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1800; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, 42
1801; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, 0
1802; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1803; GFX802-GISEL-NEXT:    s_mov_b32 m0, s4
1804; GFX802-GISEL-NEXT:    v_mov_b32_e32 v3, s1
1805; GFX802-GISEL-NEXT:    v_writelane_b32 v0, s2, m0
1806; GFX802-GISEL-NEXT:    v_writelane_b32 v1, s3, m0
1807; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s0
1808; GFX802-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
1809; GFX802-GISEL-NEXT:    s_endpgm
1810;
1811; GFX1010-GISEL-LABEL: test_writelane_imm_oldval_i64:
1812; GFX1010-GISEL:       ; %bb.0:
1813; GFX1010-GISEL-NEXT:    s_clause 0x1
1814; GFX1010-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1815; GFX1010-GISEL-NEXT:    s_load_dword s4, s[8:9], 0x10
1816; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v0, 42
1817; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v1, 0
1818; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v2, 0
1819; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1820; GFX1010-GISEL-NEXT:    v_writelane_b32 v0, s2, s4
1821; GFX1010-GISEL-NEXT:    v_writelane_b32 v1, s3, s4
1822; GFX1010-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
1823; GFX1010-GISEL-NEXT:    s_endpgm
1824;
1825; GFX1100-GISEL-LABEL: test_writelane_imm_oldval_i64:
1826; GFX1100-GISEL:       ; %bb.0:
1827; GFX1100-GISEL-NEXT:    s_clause 0x1
1828; GFX1100-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
1829; GFX1100-GISEL-NEXT:    s_load_b32 s4, s[4:5], 0x10
1830; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v0, 42
1831; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v1, 0
1832; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v2, 0
1833; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1834; GFX1100-GISEL-NEXT:    v_writelane_b32 v0, s2, s4
1835; GFX1100-GISEL-NEXT:    v_writelane_b32 v1, s3, s4
1836; GFX1100-GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
1837; GFX1100-GISEL-NEXT:    s_endpgm
1838  %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 %src0, i32 %src1, i64 42)
1839  store i64 %writelane, ptr addrspace(1) %out, align 4
1840  ret void
1841}
1842
1843define amdgpu_kernel void @test_writelane_imm_oldval_f64(ptr addrspace(1) %out, double %src0, i32 %src1) #1 {
1844; GFX802-SDAG-LABEL: test_writelane_imm_oldval_f64:
1845; GFX802-SDAG:       ; %bb.0:
1846; GFX802-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1847; GFX802-SDAG-NEXT:    s_load_dword s4, s[8:9], 0x10
1848; GFX802-SDAG-NEXT:    v_mov_b32_e32 v1, 0x40450000
1849; GFX802-SDAG-NEXT:    v_mov_b32_e32 v0, 0
1850; GFX802-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1851; GFX802-SDAG-NEXT:    v_mov_b32_e32 v2, s0
1852; GFX802-SDAG-NEXT:    s_mov_b32 m0, s4
1853; GFX802-SDAG-NEXT:    v_mov_b32_e32 v3, s1
1854; GFX802-SDAG-NEXT:    v_writelane_b32 v1, s3, m0
1855; GFX802-SDAG-NEXT:    v_writelane_b32 v0, s2, m0
1856; GFX802-SDAG-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
1857; GFX802-SDAG-NEXT:    s_endpgm
1858;
1859; GFX1010-SDAG-LABEL: test_writelane_imm_oldval_f64:
1860; GFX1010-SDAG:       ; %bb.0:
1861; GFX1010-SDAG-NEXT:    s_clause 0x1
1862; GFX1010-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1863; GFX1010-SDAG-NEXT:    s_load_dword s4, s[8:9], 0x10
1864; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v1, 0x40450000
1865; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v0, 0
1866; GFX1010-SDAG-NEXT:    v_mov_b32_e32 v2, 0
1867; GFX1010-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1868; GFX1010-SDAG-NEXT:    v_writelane_b32 v1, s3, s4
1869; GFX1010-SDAG-NEXT:    v_writelane_b32 v0, s2, s4
1870; GFX1010-SDAG-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
1871; GFX1010-SDAG-NEXT:    s_endpgm
1872;
1873; GFX1100-SDAG-LABEL: test_writelane_imm_oldval_f64:
1874; GFX1100-SDAG:       ; %bb.0:
1875; GFX1100-SDAG-NEXT:    s_clause 0x1
1876; GFX1100-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
1877; GFX1100-SDAG-NEXT:    s_load_b32 s4, s[4:5], 0x10
1878; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v1, 0x40450000
1879; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, 0
1880; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v2, 0
1881; GFX1100-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1882; GFX1100-SDAG-NEXT:    v_writelane_b32 v1, s3, s4
1883; GFX1100-SDAG-NEXT:    v_writelane_b32 v0, s2, s4
1884; GFX1100-SDAG-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
1885; GFX1100-SDAG-NEXT:    s_endpgm
1886;
1887; GFX802-GISEL-LABEL: test_writelane_imm_oldval_f64:
1888; GFX802-GISEL:       ; %bb.0:
1889; GFX802-GISEL-NEXT:    s_load_dword s4, s[8:9], 0x10
1890; GFX802-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1891; GFX802-GISEL-NEXT:    v_mov_b32_e32 v0, 0
1892; GFX802-GISEL-NEXT:    v_mov_b32_e32 v1, 0x40450000
1893; GFX802-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1894; GFX802-GISEL-NEXT:    s_mov_b32 m0, s4
1895; GFX802-GISEL-NEXT:    v_mov_b32_e32 v3, s1
1896; GFX802-GISEL-NEXT:    v_writelane_b32 v0, s2, m0
1897; GFX802-GISEL-NEXT:    v_writelane_b32 v1, s3, m0
1898; GFX802-GISEL-NEXT:    v_mov_b32_e32 v2, s0
1899; GFX802-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
1900; GFX802-GISEL-NEXT:    s_endpgm
1901;
1902; GFX1010-GISEL-LABEL: test_writelane_imm_oldval_f64:
1903; GFX1010-GISEL:       ; %bb.0:
1904; GFX1010-GISEL-NEXT:    s_clause 0x1
1905; GFX1010-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1906; GFX1010-GISEL-NEXT:    s_load_dword s4, s[8:9], 0x10
1907; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v0, 0
1908; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v1, 0x40450000
1909; GFX1010-GISEL-NEXT:    v_mov_b32_e32 v2, 0
1910; GFX1010-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1911; GFX1010-GISEL-NEXT:    v_writelane_b32 v0, s2, s4
1912; GFX1010-GISEL-NEXT:    v_writelane_b32 v1, s3, s4
1913; GFX1010-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
1914; GFX1010-GISEL-NEXT:    s_endpgm
1915;
1916; GFX1100-GISEL-LABEL: test_writelane_imm_oldval_f64:
1917; GFX1100-GISEL:       ; %bb.0:
1918; GFX1100-GISEL-NEXT:    s_clause 0x1
1919; GFX1100-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
1920; GFX1100-GISEL-NEXT:    s_load_b32 s4, s[4:5], 0x10
1921; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v0, 0
1922; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v1, 0x40450000
1923; GFX1100-GISEL-NEXT:    v_mov_b32_e32 v2, 0
1924; GFX1100-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1925; GFX1100-GISEL-NEXT:    v_writelane_b32 v0, s2, s4
1926; GFX1100-GISEL-NEXT:    v_writelane_b32 v1, s3, s4
1927; GFX1100-GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
1928; GFX1100-GISEL-NEXT:    s_endpgm
1929  %writelane = call double @llvm.amdgcn.writelane.f64(double %src0, i32 %src1, double 42.0)
1930  store double %writelane, ptr addrspace(1) %out, align 4
1931  ret void
1932}
1933
1934define void @test_writelane_half(ptr addrspace(1) %out, half %src, i32 %src1) {
1935; GFX802-SDAG-LABEL: test_writelane_half:
1936; GFX802-SDAG:       ; %bb.0:
1937; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1938; GFX802-SDAG-NEXT:    flat_load_ushort v4, v[0:1]
1939; GFX802-SDAG-NEXT:    v_readfirstlane_b32 m0, v3
1940; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
1941; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
1942; GFX802-SDAG-NEXT:    s_nop 1
1943; GFX802-SDAG-NEXT:    v_writelane_b32 v4, s4, m0
1944; GFX802-SDAG-NEXT:    flat_store_short v[0:1], v4
1945; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
1946; GFX802-SDAG-NEXT:    s_setpc_b64 s[30:31]
1947;
1948; GFX1010-SDAG-LABEL: test_writelane_half:
1949; GFX1010-SDAG:       ; %bb.0:
1950; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1951; GFX1010-SDAG-NEXT:    global_load_ushort v4, v[0:1], off
1952; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
1953; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s5, v3
1954; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0)
1955; GFX1010-SDAG-NEXT:    v_writelane_b32 v4, s4, s5
1956; GFX1010-SDAG-NEXT:    global_store_short v[0:1], v4, off
1957; GFX1010-SDAG-NEXT:    s_setpc_b64 s[30:31]
1958;
1959; GFX1100-SDAG-LABEL: test_writelane_half:
1960; GFX1100-SDAG:       ; %bb.0:
1961; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1962; GFX1100-SDAG-NEXT:    global_load_u16 v4, v[0:1], off
1963; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s0, v2
1964; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s1, v3
1965; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0)
1966; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1967; GFX1100-SDAG-NEXT:    v_writelane_b32 v4, s0, s1
1968; GFX1100-SDAG-NEXT:    global_store_b16 v[0:1], v4, off
1969; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
1970;
1971; GFX802-GISEL-LABEL: test_writelane_half:
1972; GFX802-GISEL:       ; %bb.0:
1973; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1974; GFX802-GISEL-NEXT:    flat_load_ushort v4, v[0:1]
1975; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s5, v3
1976; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
1977; GFX802-GISEL-NEXT:    s_mov_b32 m0, s5
1978; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0)
1979; GFX802-GISEL-NEXT:    v_writelane_b32 v4, s4, m0
1980; GFX802-GISEL-NEXT:    flat_store_short v[0:1], v4
1981; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0)
1982; GFX802-GISEL-NEXT:    s_setpc_b64 s[30:31]
1983;
1984; GFX1010-GISEL-LABEL: test_writelane_half:
1985; GFX1010-GISEL:       ; %bb.0:
1986; GFX1010-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1987; GFX1010-GISEL-NEXT:    global_load_ushort v4, v[0:1], off
1988; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
1989; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s5, v3
1990; GFX1010-GISEL-NEXT:    s_waitcnt vmcnt(0)
1991; GFX1010-GISEL-NEXT:    v_writelane_b32 v4, s4, s5
1992; GFX1010-GISEL-NEXT:    global_store_short v[0:1], v4, off
1993; GFX1010-GISEL-NEXT:    s_setpc_b64 s[30:31]
1994;
1995; GFX1100-GISEL-LABEL: test_writelane_half:
1996; GFX1100-GISEL:       ; %bb.0:
1997; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1998; GFX1100-GISEL-NEXT:    global_load_u16 v4, v[0:1], off
1999; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s0, v2
2000; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s1, v3
2001; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0)
2002; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2003; GFX1100-GISEL-NEXT:    v_writelane_b32 v4, s0, s1
2004; GFX1100-GISEL-NEXT:    global_store_b16 v[0:1], v4, off
2005; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
2006  %oldval = load half, ptr addrspace(1) %out
2007  %writelane = call half @llvm.amdgcn.writelane.f16(half %src, i32 %src1, half %oldval)
2008  store half %writelane, ptr addrspace(1) %out, align 4
2009  ret void
2010}
2011
2012define void @test_writelane_float(ptr addrspace(1) %out, float %src, i32 %src1) {
2013; GFX802-SDAG-LABEL: test_writelane_float:
2014; GFX802-SDAG:       ; %bb.0:
2015; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2016; GFX802-SDAG-NEXT:    flat_load_dword v4, v[0:1]
2017; GFX802-SDAG-NEXT:    v_readfirstlane_b32 m0, v3
2018; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
2019; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
2020; GFX802-SDAG-NEXT:    s_nop 1
2021; GFX802-SDAG-NEXT:    v_writelane_b32 v4, s4, m0
2022; GFX802-SDAG-NEXT:    flat_store_dword v[0:1], v4
2023; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
2024; GFX802-SDAG-NEXT:    s_setpc_b64 s[30:31]
2025;
2026; GFX1010-SDAG-LABEL: test_writelane_float:
2027; GFX1010-SDAG:       ; %bb.0:
2028; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2029; GFX1010-SDAG-NEXT:    global_load_dword v4, v[0:1], off
2030; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
2031; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s5, v3
2032; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0)
2033; GFX1010-SDAG-NEXT:    v_writelane_b32 v4, s4, s5
2034; GFX1010-SDAG-NEXT:    global_store_dword v[0:1], v4, off
2035; GFX1010-SDAG-NEXT:    s_setpc_b64 s[30:31]
2036;
2037; GFX1100-SDAG-LABEL: test_writelane_float:
2038; GFX1100-SDAG:       ; %bb.0:
2039; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2040; GFX1100-SDAG-NEXT:    global_load_b32 v4, v[0:1], off
2041; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s0, v2
2042; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s1, v3
2043; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0)
2044; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2045; GFX1100-SDAG-NEXT:    v_writelane_b32 v4, s0, s1
2046; GFX1100-SDAG-NEXT:    global_store_b32 v[0:1], v4, off
2047; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
2048;
2049; GFX802-GISEL-LABEL: test_writelane_float:
2050; GFX802-GISEL:       ; %bb.0:
2051; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2052; GFX802-GISEL-NEXT:    flat_load_dword v4, v[0:1]
2053; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s5, v3
2054; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
2055; GFX802-GISEL-NEXT:    s_mov_b32 m0, s5
2056; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0)
2057; GFX802-GISEL-NEXT:    v_writelane_b32 v4, s4, m0
2058; GFX802-GISEL-NEXT:    flat_store_dword v[0:1], v4
2059; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0)
2060; GFX802-GISEL-NEXT:    s_setpc_b64 s[30:31]
2061;
2062; GFX1010-GISEL-LABEL: test_writelane_float:
2063; GFX1010-GISEL:       ; %bb.0:
2064; GFX1010-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2065; GFX1010-GISEL-NEXT:    global_load_dword v4, v[0:1], off
2066; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
2067; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s5, v3
2068; GFX1010-GISEL-NEXT:    s_waitcnt vmcnt(0)
2069; GFX1010-GISEL-NEXT:    v_writelane_b32 v4, s4, s5
2070; GFX1010-GISEL-NEXT:    global_store_dword v[0:1], v4, off
2071; GFX1010-GISEL-NEXT:    s_setpc_b64 s[30:31]
2072;
2073; GFX1100-GISEL-LABEL: test_writelane_float:
2074; GFX1100-GISEL:       ; %bb.0:
2075; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2076; GFX1100-GISEL-NEXT:    global_load_b32 v4, v[0:1], off
2077; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s0, v2
2078; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s1, v3
2079; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0)
2080; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2081; GFX1100-GISEL-NEXT:    v_writelane_b32 v4, s0, s1
2082; GFX1100-GISEL-NEXT:    global_store_b32 v[0:1], v4, off
2083; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
2084  %oldval = load float, ptr addrspace(1) %out
2085  %writelane = call float @llvm.amdgcn.writelane.f32(float %src, i32 %src1, float %oldval)
2086  store float %writelane, ptr addrspace(1) %out, align 4
2087  ret void
2088}
2089
2090define void @test_writelane_bfloat(ptr addrspace(1) %out, bfloat %src, i32 %src1) {
2091; GFX802-SDAG-LABEL: test_writelane_bfloat:
2092; GFX802-SDAG:       ; %bb.0:
2093; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2094; GFX802-SDAG-NEXT:    flat_load_ushort v4, v[0:1]
2095; GFX802-SDAG-NEXT:    v_readfirstlane_b32 m0, v3
2096; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
2097; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
2098; GFX802-SDAG-NEXT:    s_nop 1
2099; GFX802-SDAG-NEXT:    v_writelane_b32 v4, s4, m0
2100; GFX802-SDAG-NEXT:    flat_store_short v[0:1], v4
2101; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
2102; GFX802-SDAG-NEXT:    s_setpc_b64 s[30:31]
2103;
2104; GFX1010-SDAG-LABEL: test_writelane_bfloat:
2105; GFX1010-SDAG:       ; %bb.0:
2106; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2107; GFX1010-SDAG-NEXT:    global_load_ushort v4, v[0:1], off
2108; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
2109; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s5, v3
2110; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0)
2111; GFX1010-SDAG-NEXT:    v_writelane_b32 v4, s4, s5
2112; GFX1010-SDAG-NEXT:    global_store_short v[0:1], v4, off
2113; GFX1010-SDAG-NEXT:    s_setpc_b64 s[30:31]
2114;
2115; GFX1100-SDAG-LABEL: test_writelane_bfloat:
2116; GFX1100-SDAG:       ; %bb.0:
2117; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2118; GFX1100-SDAG-NEXT:    global_load_u16 v4, v[0:1], off
2119; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s0, v2
2120; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s1, v3
2121; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0)
2122; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2123; GFX1100-SDAG-NEXT:    v_writelane_b32 v4, s0, s1
2124; GFX1100-SDAG-NEXT:    global_store_b16 v[0:1], v4, off
2125; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
2126;
2127; GFX802-GISEL-LABEL: test_writelane_bfloat:
2128; GFX802-GISEL:       ; %bb.0:
2129; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2130; GFX802-GISEL-NEXT:    flat_load_ushort v4, v[0:1]
2131; GFX802-GISEL-NEXT:    v_readfirstlane_b32 m0, v3
2132; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
2133; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0)
2134; GFX802-GISEL-NEXT:    s_nop 1
2135; GFX802-GISEL-NEXT:    v_writelane_b32 v4, s4, m0
2136; GFX802-GISEL-NEXT:    flat_store_short v[0:1], v4
2137; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0)
2138; GFX802-GISEL-NEXT:    s_setpc_b64 s[30:31]
2139;
2140; GFX1010-GISEL-LABEL: test_writelane_bfloat:
2141; GFX1010-GISEL:       ; %bb.0:
2142; GFX1010-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2143; GFX1010-GISEL-NEXT:    global_load_ushort v4, v[0:1], off
2144; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
2145; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s5, v3
2146; GFX1010-GISEL-NEXT:    s_waitcnt vmcnt(0)
2147; GFX1010-GISEL-NEXT:    v_writelane_b32 v4, s4, s5
2148; GFX1010-GISEL-NEXT:    global_store_short v[0:1], v4, off
2149; GFX1010-GISEL-NEXT:    s_setpc_b64 s[30:31]
2150;
2151; GFX1100-GISEL-LABEL: test_writelane_bfloat:
2152; GFX1100-GISEL:       ; %bb.0:
2153; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2154; GFX1100-GISEL-NEXT:    global_load_u16 v4, v[0:1], off
2155; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s0, v2
2156; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s1, v3
2157; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0)
2158; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2159; GFX1100-GISEL-NEXT:    v_writelane_b32 v4, s0, s1
2160; GFX1100-GISEL-NEXT:    global_store_b16 v[0:1], v4, off
2161; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
2162  %oldval = load bfloat, ptr addrspace(1) %out
2163  %writelane = call bfloat @llvm.amdgcn.writelane.bf16(bfloat %src, i32 %src1, bfloat %oldval)
2164  store bfloat %writelane, ptr addrspace(1) %out, align 4
2165  ret void
2166}
2167
2168define void @test_writelane_i16(ptr addrspace(1) %out, i16 %src, i32 %src1) {
2169; GFX802-SDAG-LABEL: test_writelane_i16:
2170; GFX802-SDAG:       ; %bb.0:
2171; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2172; GFX802-SDAG-NEXT:    flat_load_ushort v4, v[0:1]
2173; GFX802-SDAG-NEXT:    v_readfirstlane_b32 m0, v3
2174; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
2175; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
2176; GFX802-SDAG-NEXT:    s_nop 1
2177; GFX802-SDAG-NEXT:    v_writelane_b32 v4, s4, m0
2178; GFX802-SDAG-NEXT:    flat_store_short v[0:1], v4
2179; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
2180; GFX802-SDAG-NEXT:    s_setpc_b64 s[30:31]
2181;
2182; GFX1010-SDAG-LABEL: test_writelane_i16:
2183; GFX1010-SDAG:       ; %bb.0:
2184; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2185; GFX1010-SDAG-NEXT:    global_load_ushort v4, v[0:1], off
2186; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
2187; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s5, v3
2188; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0)
2189; GFX1010-SDAG-NEXT:    v_writelane_b32 v4, s4, s5
2190; GFX1010-SDAG-NEXT:    global_store_short v[0:1], v4, off
2191; GFX1010-SDAG-NEXT:    s_setpc_b64 s[30:31]
2192;
2193; GFX1100-SDAG-LABEL: test_writelane_i16:
2194; GFX1100-SDAG:       ; %bb.0:
2195; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2196; GFX1100-SDAG-NEXT:    global_load_u16 v4, v[0:1], off
2197; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s0, v2
2198; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s1, v3
2199; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0)
2200; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2201; GFX1100-SDAG-NEXT:    v_writelane_b32 v4, s0, s1
2202; GFX1100-SDAG-NEXT:    global_store_b16 v[0:1], v4, off
2203; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
2204;
2205; GFX802-GISEL-LABEL: test_writelane_i16:
2206; GFX802-GISEL:       ; %bb.0:
2207; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2208; GFX802-GISEL-NEXT:    flat_load_ushort v4, v[0:1]
2209; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s5, v3
2210; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
2211; GFX802-GISEL-NEXT:    s_mov_b32 m0, s5
2212; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0)
2213; GFX802-GISEL-NEXT:    v_writelane_b32 v4, s4, m0
2214; GFX802-GISEL-NEXT:    flat_store_short v[0:1], v4
2215; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0)
2216; GFX802-GISEL-NEXT:    s_setpc_b64 s[30:31]
2217;
2218; GFX1010-GISEL-LABEL: test_writelane_i16:
2219; GFX1010-GISEL:       ; %bb.0:
2220; GFX1010-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2221; GFX1010-GISEL-NEXT:    global_load_ushort v4, v[0:1], off
2222; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
2223; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s5, v3
2224; GFX1010-GISEL-NEXT:    s_waitcnt vmcnt(0)
2225; GFX1010-GISEL-NEXT:    v_writelane_b32 v4, s4, s5
2226; GFX1010-GISEL-NEXT:    global_store_short v[0:1], v4, off
2227; GFX1010-GISEL-NEXT:    s_setpc_b64 s[30:31]
2228;
2229; GFX1100-GISEL-LABEL: test_writelane_i16:
2230; GFX1100-GISEL:       ; %bb.0:
2231; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2232; GFX1100-GISEL-NEXT:    global_load_u16 v4, v[0:1], off
2233; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s0, v2
2234; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s1, v3
2235; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0)
2236; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2237; GFX1100-GISEL-NEXT:    v_writelane_b32 v4, s0, s1
2238; GFX1100-GISEL-NEXT:    global_store_b16 v[0:1], v4, off
2239; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
2240  %oldval = load i16, ptr addrspace(1) %out
2241  %writelane = call i16 @llvm.amdgcn.writelane.i16(i16 %src, i32 %src1, i16 %oldval)
2242  store i16 %writelane, ptr addrspace(1) %out, align 4
2243  ret void
2244}
2245
2246define void @test_writelane_v2f16(ptr addrspace(1) %out, <2 x half> %src, i32 %src1) {
2247; GFX802-SDAG-LABEL: test_writelane_v2f16:
2248; GFX802-SDAG:       ; %bb.0:
2249; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2250; GFX802-SDAG-NEXT:    flat_load_dword v4, v[0:1]
2251; GFX802-SDAG-NEXT:    v_readfirstlane_b32 m0, v3
2252; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
2253; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
2254; GFX802-SDAG-NEXT:    s_nop 1
2255; GFX802-SDAG-NEXT:    v_writelane_b32 v4, s4, m0
2256; GFX802-SDAG-NEXT:    flat_store_dword v[0:1], v4
2257; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
2258; GFX802-SDAG-NEXT:    s_setpc_b64 s[30:31]
2259;
2260; GFX1010-SDAG-LABEL: test_writelane_v2f16:
2261; GFX1010-SDAG:       ; %bb.0:
2262; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2263; GFX1010-SDAG-NEXT:    global_load_dword v4, v[0:1], off
2264; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
2265; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s5, v3
2266; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0)
2267; GFX1010-SDAG-NEXT:    v_writelane_b32 v4, s4, s5
2268; GFX1010-SDAG-NEXT:    global_store_dword v[0:1], v4, off
2269; GFX1010-SDAG-NEXT:    s_setpc_b64 s[30:31]
2270;
2271; GFX1100-SDAG-LABEL: test_writelane_v2f16:
2272; GFX1100-SDAG:       ; %bb.0:
2273; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2274; GFX1100-SDAG-NEXT:    global_load_b32 v4, v[0:1], off
2275; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s0, v2
2276; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s1, v3
2277; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0)
2278; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2279; GFX1100-SDAG-NEXT:    v_writelane_b32 v4, s0, s1
2280; GFX1100-SDAG-NEXT:    global_store_b32 v[0:1], v4, off
2281; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
2282;
2283; GFX802-GISEL-LABEL: test_writelane_v2f16:
2284; GFX802-GISEL:       ; %bb.0:
2285; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2286; GFX802-GISEL-NEXT:    flat_load_dword v4, v[0:1]
2287; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s5, v3
2288; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
2289; GFX802-GISEL-NEXT:    s_mov_b32 m0, s5
2290; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0)
2291; GFX802-GISEL-NEXT:    v_writelane_b32 v4, s4, m0
2292; GFX802-GISEL-NEXT:    flat_store_dword v[0:1], v4
2293; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0)
2294; GFX802-GISEL-NEXT:    s_setpc_b64 s[30:31]
2295;
2296; GFX1010-GISEL-LABEL: test_writelane_v2f16:
2297; GFX1010-GISEL:       ; %bb.0:
2298; GFX1010-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2299; GFX1010-GISEL-NEXT:    global_load_dword v4, v[0:1], off
2300; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
2301; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s5, v3
2302; GFX1010-GISEL-NEXT:    s_waitcnt vmcnt(0)
2303; GFX1010-GISEL-NEXT:    v_writelane_b32 v4, s4, s5
2304; GFX1010-GISEL-NEXT:    global_store_dword v[0:1], v4, off
2305; GFX1010-GISEL-NEXT:    s_setpc_b64 s[30:31]
2306;
2307; GFX1100-GISEL-LABEL: test_writelane_v2f16:
2308; GFX1100-GISEL:       ; %bb.0:
2309; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2310; GFX1100-GISEL-NEXT:    global_load_b32 v4, v[0:1], off
2311; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s0, v2
2312; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s1, v3
2313; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0)
2314; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2315; GFX1100-GISEL-NEXT:    v_writelane_b32 v4, s0, s1
2316; GFX1100-GISEL-NEXT:    global_store_b32 v[0:1], v4, off
2317; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
2318  %oldval = load <2 x half>, ptr addrspace(1) %out
2319  %writelane = call <2 x half> @llvm.amdgcn.writelane.v2f16(<2 x half> %src, i32 %src1, <2 x half> %oldval)
2320  store <2 x half> %writelane, ptr addrspace(1) %out, align 4
2321  ret void
2322}
2323
2324define void @test_readlane_v2f32(ptr addrspace(1) %out, <2 x float> %src, i32 %src1) {
2325; GFX802-SDAG-LABEL: test_readlane_v2f32:
2326; GFX802-SDAG:       ; %bb.0:
2327; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2328; GFX802-SDAG-NEXT:    flat_load_dwordx2 v[5:6], v[0:1]
2329; GFX802-SDAG-NEXT:    v_readfirstlane_b32 m0, v4
2330; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s4, v3
2331; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s5, v2
2332; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
2333; GFX802-SDAG-NEXT:    s_nop 0
2334; GFX802-SDAG-NEXT:    v_writelane_b32 v6, s4, m0
2335; GFX802-SDAG-NEXT:    v_writelane_b32 v5, s5, m0
2336; GFX802-SDAG-NEXT:    flat_store_dwordx2 v[0:1], v[5:6]
2337; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
2338; GFX802-SDAG-NEXT:    s_setpc_b64 s[30:31]
2339;
2340; GFX1010-SDAG-LABEL: test_readlane_v2f32:
2341; GFX1010-SDAG:       ; %bb.0:
2342; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2343; GFX1010-SDAG-NEXT:    global_load_dwordx2 v[5:6], v[0:1], off
2344; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s4, v3
2345; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s5, v4
2346; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s6, v2
2347; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0)
2348; GFX1010-SDAG-NEXT:    v_writelane_b32 v6, s4, s5
2349; GFX1010-SDAG-NEXT:    v_writelane_b32 v5, s6, s5
2350; GFX1010-SDAG-NEXT:    global_store_dwordx2 v[0:1], v[5:6], off
2351; GFX1010-SDAG-NEXT:    s_setpc_b64 s[30:31]
2352;
2353; GFX1100-SDAG-LABEL: test_readlane_v2f32:
2354; GFX1100-SDAG:       ; %bb.0:
2355; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2356; GFX1100-SDAG-NEXT:    global_load_b64 v[5:6], v[0:1], off
2357; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s0, v3
2358; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s1, v4
2359; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s2, v2
2360; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0)
2361; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2362; GFX1100-SDAG-NEXT:    v_writelane_b32 v6, s0, s1
2363; GFX1100-SDAG-NEXT:    v_writelane_b32 v5, s2, s1
2364; GFX1100-SDAG-NEXT:    global_store_b64 v[0:1], v[5:6], off
2365; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
2366;
2367; GFX802-GISEL-LABEL: test_readlane_v2f32:
2368; GFX802-GISEL:       ; %bb.0:
2369; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2370; GFX802-GISEL-NEXT:    flat_load_dwordx2 v[5:6], v[0:1]
2371; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s5, v4
2372; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
2373; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s6, v3
2374; GFX802-GISEL-NEXT:    s_mov_b32 m0, s5
2375; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0)
2376; GFX802-GISEL-NEXT:    v_writelane_b32 v5, s4, m0
2377; GFX802-GISEL-NEXT:    v_writelane_b32 v6, s6, m0
2378; GFX802-GISEL-NEXT:    flat_store_dwordx2 v[0:1], v[5:6]
2379; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0)
2380; GFX802-GISEL-NEXT:    s_setpc_b64 s[30:31]
2381;
2382; GFX1010-GISEL-LABEL: test_readlane_v2f32:
2383; GFX1010-GISEL:       ; %bb.0:
2384; GFX1010-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2385; GFX1010-GISEL-NEXT:    global_load_dwordx2 v[5:6], v[0:1], off
2386; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
2387; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s5, v4
2388; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s6, v3
2389; GFX1010-GISEL-NEXT:    s_waitcnt vmcnt(0)
2390; GFX1010-GISEL-NEXT:    v_writelane_b32 v5, s4, s5
2391; GFX1010-GISEL-NEXT:    v_writelane_b32 v6, s6, s5
2392; GFX1010-GISEL-NEXT:    global_store_dwordx2 v[0:1], v[5:6], off
2393; GFX1010-GISEL-NEXT:    s_setpc_b64 s[30:31]
2394;
2395; GFX1100-GISEL-LABEL: test_readlane_v2f32:
2396; GFX1100-GISEL:       ; %bb.0:
2397; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2398; GFX1100-GISEL-NEXT:    global_load_b64 v[5:6], v[0:1], off
2399; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s0, v2
2400; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s1, v4
2401; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s2, v3
2402; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0)
2403; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2404; GFX1100-GISEL-NEXT:    v_writelane_b32 v5, s0, s1
2405; GFX1100-GISEL-NEXT:    v_writelane_b32 v6, s2, s1
2406; GFX1100-GISEL-NEXT:    global_store_b64 v[0:1], v[5:6], off
2407; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
2408  %oldval = load <2 x float>, ptr addrspace(1) %out
2409  %writelane = call <2 x float> @llvm.amdgcn.writelane.v2f32(<2 x float> %src, i32 %src1, <2 x float> %oldval)
2410  store <2 x float> %writelane, ptr addrspace(1) %out, align 4
2411  ret void
2412}
2413
2414define void @test_writelane_v7i32(ptr addrspace(1) %out, <7 x i32> %src, i32 %src1) {
2415; GFX802-SDAG-LABEL: test_writelane_v7i32:
2416; GFX802-SDAG:       ; %bb.0:
2417; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2418; GFX802-SDAG-NEXT:    v_add_u32_e32 v17, vcc, 16, v0
2419; GFX802-SDAG-NEXT:    flat_load_dwordx4 v[10:13], v[0:1]
2420; GFX802-SDAG-NEXT:    v_addc_u32_e32 v18, vcc, 0, v1, vcc
2421; GFX802-SDAG-NEXT:    flat_load_dwordx3 v[14:16], v[17:18]
2422; GFX802-SDAG-NEXT:    v_readfirstlane_b32 m0, v9
2423; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s7, v5
2424; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s8, v4
2425; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s9, v3
2426; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s10, v2
2427; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s4, v8
2428; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s5, v7
2429; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s6, v6
2430; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(1)
2431; GFX802-SDAG-NEXT:    v_writelane_b32 v13, s7, m0
2432; GFX802-SDAG-NEXT:    v_writelane_b32 v12, s8, m0
2433; GFX802-SDAG-NEXT:    v_writelane_b32 v11, s9, m0
2434; GFX802-SDAG-NEXT:    v_writelane_b32 v10, s10, m0
2435; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
2436; GFX802-SDAG-NEXT:    v_writelane_b32 v16, s4, m0
2437; GFX802-SDAG-NEXT:    v_writelane_b32 v15, s5, m0
2438; GFX802-SDAG-NEXT:    v_writelane_b32 v14, s6, m0
2439; GFX802-SDAG-NEXT:    flat_store_dwordx4 v[0:1], v[10:13]
2440; GFX802-SDAG-NEXT:    flat_store_dwordx3 v[17:18], v[14:16]
2441; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
2442; GFX802-SDAG-NEXT:    s_setpc_b64 s[30:31]
2443;
2444; GFX1010-SDAG-LABEL: test_writelane_v7i32:
2445; GFX1010-SDAG:       ; %bb.0:
2446; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2447; GFX1010-SDAG-NEXT:    s_clause 0x1
2448; GFX1010-SDAG-NEXT:    global_load_dwordx3 v[14:16], v[0:1], off offset:16
2449; GFX1010-SDAG-NEXT:    global_load_dwordx4 v[10:13], v[0:1], off
2450; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s5, v9
2451; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s8, v5
2452; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s9, v4
2453; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s10, v3
2454; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s11, v2
2455; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s4, v8
2456; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s6, v7
2457; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s7, v6
2458; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(1)
2459; GFX1010-SDAG-NEXT:    v_writelane_b32 v16, s4, s5
2460; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0)
2461; GFX1010-SDAG-NEXT:    v_writelane_b32 v13, s8, s5
2462; GFX1010-SDAG-NEXT:    v_writelane_b32 v12, s9, s5
2463; GFX1010-SDAG-NEXT:    v_writelane_b32 v11, s10, s5
2464; GFX1010-SDAG-NEXT:    v_writelane_b32 v10, s11, s5
2465; GFX1010-SDAG-NEXT:    v_writelane_b32 v15, s6, s5
2466; GFX1010-SDAG-NEXT:    v_writelane_b32 v14, s7, s5
2467; GFX1010-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[10:13], off
2468; GFX1010-SDAG-NEXT:    global_store_dwordx3 v[0:1], v[14:16], off offset:16
2469; GFX1010-SDAG-NEXT:    s_setpc_b64 s[30:31]
2470;
2471; GFX1100-SDAG-LABEL: test_writelane_v7i32:
2472; GFX1100-SDAG:       ; %bb.0:
2473; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2474; GFX1100-SDAG-NEXT:    s_clause 0x1
2475; GFX1100-SDAG-NEXT:    global_load_b96 v[14:16], v[0:1], off offset:16
2476; GFX1100-SDAG-NEXT:    global_load_b128 v[10:13], v[0:1], off
2477; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s1, v9
2478; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s4, v5
2479; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s5, v4
2480; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s6, v3
2481; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s7, v2
2482; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s0, v8
2483; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s2, v7
2484; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s3, v6
2485; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(1)
2486; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3)
2487; GFX1100-SDAG-NEXT:    v_writelane_b32 v16, s0, s1
2488; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0)
2489; GFX1100-SDAG-NEXT:    v_writelane_b32 v13, s4, s1
2490; GFX1100-SDAG-NEXT:    v_writelane_b32 v12, s5, s1
2491; GFX1100-SDAG-NEXT:    v_writelane_b32 v11, s6, s1
2492; GFX1100-SDAG-NEXT:    v_writelane_b32 v10, s7, s1
2493; GFX1100-SDAG-NEXT:    v_writelane_b32 v15, s2, s1
2494; GFX1100-SDAG-NEXT:    v_writelane_b32 v14, s3, s1
2495; GFX1100-SDAG-NEXT:    s_clause 0x1
2496; GFX1100-SDAG-NEXT:    global_store_b128 v[0:1], v[10:13], off
2497; GFX1100-SDAG-NEXT:    global_store_b96 v[0:1], v[14:16], off offset:16
2498; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
2499;
2500; GFX802-GISEL-LABEL: test_writelane_v7i32:
2501; GFX802-GISEL:       ; %bb.0:
2502; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2503; GFX802-GISEL-NEXT:    v_add_u32_e32 v18, vcc, 16, v0
2504; GFX802-GISEL-NEXT:    flat_load_dwordx4 v[10:13], v[0:1]
2505; GFX802-GISEL-NEXT:    v_addc_u32_e32 v19, vcc, 0, v1, vcc
2506; GFX802-GISEL-NEXT:    flat_load_dwordx4 v[14:17], v[18:19]
2507; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s5, v9
2508; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
2509; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s6, v3
2510; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s7, v4
2511; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s8, v5
2512; GFX802-GISEL-NEXT:    s_mov_b32 m0, s5
2513; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s9, v6
2514; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s10, v7
2515; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s11, v8
2516; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(1)
2517; GFX802-GISEL-NEXT:    v_writelane_b32 v10, s4, m0
2518; GFX802-GISEL-NEXT:    v_writelane_b32 v11, s6, m0
2519; GFX802-GISEL-NEXT:    v_writelane_b32 v12, s7, m0
2520; GFX802-GISEL-NEXT:    v_writelane_b32 v13, s8, m0
2521; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0)
2522; GFX802-GISEL-NEXT:    v_writelane_b32 v14, s9, m0
2523; GFX802-GISEL-NEXT:    v_writelane_b32 v15, s10, m0
2524; GFX802-GISEL-NEXT:    v_writelane_b32 v16, s11, m0
2525; GFX802-GISEL-NEXT:    flat_store_dwordx4 v[0:1], v[10:13]
2526; GFX802-GISEL-NEXT:    flat_store_dwordx3 v[18:19], v[14:16]
2527; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0)
2528; GFX802-GISEL-NEXT:    s_setpc_b64 s[30:31]
2529;
2530; GFX1010-GISEL-LABEL: test_writelane_v7i32:
2531; GFX1010-GISEL:       ; %bb.0:
2532; GFX1010-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2533; GFX1010-GISEL-NEXT:    s_clause 0x1
2534; GFX1010-GISEL-NEXT:    global_load_dwordx4 v[10:13], v[0:1], off
2535; GFX1010-GISEL-NEXT:    global_load_dwordx4 v[14:17], v[0:1], off offset:16
2536; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
2537; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s5, v9
2538; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s6, v3
2539; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s7, v4
2540; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s8, v5
2541; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s9, v6
2542; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s10, v7
2543; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s11, v8
2544; GFX1010-GISEL-NEXT:    s_waitcnt vmcnt(1)
2545; GFX1010-GISEL-NEXT:    v_writelane_b32 v10, s4, s5
2546; GFX1010-GISEL-NEXT:    v_writelane_b32 v11, s6, s5
2547; GFX1010-GISEL-NEXT:    v_writelane_b32 v12, s7, s5
2548; GFX1010-GISEL-NEXT:    v_writelane_b32 v13, s8, s5
2549; GFX1010-GISEL-NEXT:    s_waitcnt vmcnt(0)
2550; GFX1010-GISEL-NEXT:    v_writelane_b32 v14, s9, s5
2551; GFX1010-GISEL-NEXT:    v_writelane_b32 v15, s10, s5
2552; GFX1010-GISEL-NEXT:    v_writelane_b32 v16, s11, s5
2553; GFX1010-GISEL-NEXT:    global_store_dwordx4 v[0:1], v[10:13], off
2554; GFX1010-GISEL-NEXT:    global_store_dwordx3 v[0:1], v[14:16], off offset:16
2555; GFX1010-GISEL-NEXT:    s_setpc_b64 s[30:31]
2556;
2557; GFX1100-GISEL-LABEL: test_writelane_v7i32:
2558; GFX1100-GISEL:       ; %bb.0:
2559; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2560; GFX1100-GISEL-NEXT:    s_clause 0x1
2561; GFX1100-GISEL-NEXT:    global_load_b128 v[10:13], v[0:1], off
2562; GFX1100-GISEL-NEXT:    global_load_b128 v[14:17], v[0:1], off offset:16
2563; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s0, v2
2564; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s1, v9
2565; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s2, v3
2566; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s3, v4
2567; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s4, v5
2568; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s5, v6
2569; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s6, v7
2570; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s7, v8
2571; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(1)
2572; GFX1100-GISEL-NEXT:    v_writelane_b32 v10, s0, s1
2573; GFX1100-GISEL-NEXT:    v_writelane_b32 v11, s2, s1
2574; GFX1100-GISEL-NEXT:    v_writelane_b32 v12, s3, s1
2575; GFX1100-GISEL-NEXT:    v_writelane_b32 v13, s4, s1
2576; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0)
2577; GFX1100-GISEL-NEXT:    v_writelane_b32 v14, s5, s1
2578; GFX1100-GISEL-NEXT:    v_writelane_b32 v15, s6, s1
2579; GFX1100-GISEL-NEXT:    v_writelane_b32 v16, s7, s1
2580; GFX1100-GISEL-NEXT:    s_clause 0x1
2581; GFX1100-GISEL-NEXT:    global_store_b128 v[0:1], v[10:13], off
2582; GFX1100-GISEL-NEXT:    global_store_b96 v[0:1], v[14:16], off offset:16
2583; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
2584  %oldval = load <7 x i32>, ptr addrspace(1) %out
2585  %writelane = call <7 x i32> @llvm.amdgcn.writelane.v7i32(<7 x i32> %src, i32 %src1, <7 x i32> %oldval)
2586  store <7 x i32> %writelane, ptr addrspace(1) %out, align 4
2587  ret void
2588}
2589
2590define void @test_writelane_v8i16(ptr addrspace(1) %out, <8 x i16> %src, i32 %src1) {
2591; GFX802-SDAG-LABEL: test_writelane_v8i16:
2592; GFX802-SDAG:       ; %bb.0:
2593; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2594; GFX802-SDAG-NEXT:    flat_load_dwordx4 v[7:10], v[0:1]
2595; GFX802-SDAG-NEXT:    v_readfirstlane_b32 m0, v6
2596; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s4, v5
2597; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s5, v4
2598; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s6, v3
2599; GFX802-SDAG-NEXT:    v_readfirstlane_b32 s7, v2
2600; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
2601; GFX802-SDAG-NEXT:    v_writelane_b32 v10, s4, m0
2602; GFX802-SDAG-NEXT:    v_writelane_b32 v9, s5, m0
2603; GFX802-SDAG-NEXT:    v_writelane_b32 v8, s6, m0
2604; GFX802-SDAG-NEXT:    v_writelane_b32 v7, s7, m0
2605; GFX802-SDAG-NEXT:    flat_store_dwordx4 v[0:1], v[7:10]
2606; GFX802-SDAG-NEXT:    s_waitcnt vmcnt(0)
2607; GFX802-SDAG-NEXT:    s_setpc_b64 s[30:31]
2608;
2609; GFX1010-SDAG-LABEL: test_writelane_v8i16:
2610; GFX1010-SDAG:       ; %bb.0:
2611; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2612; GFX1010-SDAG-NEXT:    global_load_dwordx4 v[7:10], v[0:1], off
2613; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s4, v5
2614; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s5, v6
2615; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s6, v4
2616; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s7, v3
2617; GFX1010-SDAG-NEXT:    v_readfirstlane_b32 s8, v2
2618; GFX1010-SDAG-NEXT:    s_waitcnt vmcnt(0)
2619; GFX1010-SDAG-NEXT:    v_writelane_b32 v10, s4, s5
2620; GFX1010-SDAG-NEXT:    v_writelane_b32 v9, s6, s5
2621; GFX1010-SDAG-NEXT:    v_writelane_b32 v8, s7, s5
2622; GFX1010-SDAG-NEXT:    v_writelane_b32 v7, s8, s5
2623; GFX1010-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off
2624; GFX1010-SDAG-NEXT:    s_setpc_b64 s[30:31]
2625;
2626; GFX1100-SDAG-LABEL: test_writelane_v8i16:
2627; GFX1100-SDAG:       ; %bb.0:
2628; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2629; GFX1100-SDAG-NEXT:    global_load_b128 v[7:10], v[0:1], off
2630; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s0, v5
2631; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s1, v6
2632; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s2, v4
2633; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s3, v3
2634; GFX1100-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
2635; GFX1100-SDAG-NEXT:    s_waitcnt vmcnt(0)
2636; GFX1100-SDAG-NEXT:    v_writelane_b32 v10, s0, s1
2637; GFX1100-SDAG-NEXT:    v_writelane_b32 v9, s2, s1
2638; GFX1100-SDAG-NEXT:    v_writelane_b32 v8, s3, s1
2639; GFX1100-SDAG-NEXT:    v_writelane_b32 v7, s4, s1
2640; GFX1100-SDAG-NEXT:    global_store_b128 v[0:1], v[7:10], off
2641; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
2642;
2643; GFX802-GISEL-LABEL: test_writelane_v8i16:
2644; GFX802-GISEL:       ; %bb.0:
2645; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2646; GFX802-GISEL-NEXT:    flat_load_dwordx4 v[7:10], v[0:1]
2647; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s5, v6
2648; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
2649; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s6, v3
2650; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s7, v4
2651; GFX802-GISEL-NEXT:    v_readfirstlane_b32 s8, v5
2652; GFX802-GISEL-NEXT:    s_mov_b32 m0, s5
2653; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0)
2654; GFX802-GISEL-NEXT:    v_writelane_b32 v7, s4, m0
2655; GFX802-GISEL-NEXT:    v_writelane_b32 v8, s6, m0
2656; GFX802-GISEL-NEXT:    v_writelane_b32 v9, s7, m0
2657; GFX802-GISEL-NEXT:    v_writelane_b32 v10, s8, m0
2658; GFX802-GISEL-NEXT:    flat_store_dwordx4 v[0:1], v[7:10]
2659; GFX802-GISEL-NEXT:    s_waitcnt vmcnt(0)
2660; GFX802-GISEL-NEXT:    s_setpc_b64 s[30:31]
2661;
2662; GFX1010-GISEL-LABEL: test_writelane_v8i16:
2663; GFX1010-GISEL:       ; %bb.0:
2664; GFX1010-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2665; GFX1010-GISEL-NEXT:    global_load_dwordx4 v[7:10], v[0:1], off
2666; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
2667; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s5, v6
2668; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s6, v3
2669; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s7, v4
2670; GFX1010-GISEL-NEXT:    v_readfirstlane_b32 s8, v5
2671; GFX1010-GISEL-NEXT:    s_waitcnt vmcnt(0)
2672; GFX1010-GISEL-NEXT:    v_writelane_b32 v7, s4, s5
2673; GFX1010-GISEL-NEXT:    v_writelane_b32 v8, s6, s5
2674; GFX1010-GISEL-NEXT:    v_writelane_b32 v9, s7, s5
2675; GFX1010-GISEL-NEXT:    v_writelane_b32 v10, s8, s5
2676; GFX1010-GISEL-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off
2677; GFX1010-GISEL-NEXT:    s_setpc_b64 s[30:31]
2678;
2679; GFX1100-GISEL-LABEL: test_writelane_v8i16:
2680; GFX1100-GISEL:       ; %bb.0:
2681; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2682; GFX1100-GISEL-NEXT:    global_load_b128 v[7:10], v[0:1], off
2683; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s0, v2
2684; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s1, v6
2685; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s2, v3
2686; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s3, v4
2687; GFX1100-GISEL-NEXT:    v_readfirstlane_b32 s4, v5
2688; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0)
2689; GFX1100-GISEL-NEXT:    v_writelane_b32 v7, s0, s1
2690; GFX1100-GISEL-NEXT:    v_writelane_b32 v8, s2, s1
2691; GFX1100-GISEL-NEXT:    v_writelane_b32 v9, s3, s1
2692; GFX1100-GISEL-NEXT:    v_writelane_b32 v10, s4, s1
2693; GFX1100-GISEL-NEXT:    global_store_b128 v[0:1], v[7:10], off
2694; GFX1100-GISEL-NEXT:    s_setpc_b64 s[30:31]
2695  %oldval = load <8 x i16>, ptr addrspace(1) %out
2696  %writelane = call <8 x i16> @llvm.amdgcn.writelane.v8i16(<8 x i16> %src, i32 %src1, <8 x i16> %oldval)
2697  store <8 x i16> %writelane, ptr addrspace(1) %out, align 4
2698  ret void
2699}
2700
2701declare i32 @llvm.amdgcn.workitem.id.x() #2
2702
2703attributes #0 = { nounwind readnone convergent }
2704attributes #1 = { nounwind }
2705attributes #2 = { nounwind readnone }
2706