xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll (revision 5a81a559d69fb84e1e8ef623ac4b642081c14c51)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-SDAG -enable-var-scope %s
3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -global-isel -global-isel-abort=2 < %s | FileCheck -check-prefix=CHECK-GISEL -enable-var-scope %s
4
5define void @test_readfirstlane_i1(ptr addrspace(1) %out, i1 %src) {
6; CHECK-SDAG-LABEL: test_readfirstlane_i1:
7; CHECK-SDAG:       ; %bb.0:
8; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
10; CHECK-SDAG-NEXT:    s_and_b32 s4, s4, 1
11; CHECK-SDAG-NEXT:    v_mov_b32_e32 v2, s4
12; CHECK-SDAG-NEXT:    flat_store_byte v[0:1], v2
13; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0)
14; CHECK-SDAG-NEXT:    s_setpc_b64 s[30:31]
15;
16; CHECK-GISEL-LABEL: test_readfirstlane_i1:
17; CHECK-GISEL:       ; %bb.0:
18; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
20; CHECK-GISEL-NEXT:    s_and_b32 s4, s4, 1
21; CHECK-GISEL-NEXT:    v_mov_b32_e32 v2, s4
22; CHECK-GISEL-NEXT:    flat_store_byte v[0:1], v2
23; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0)
24; CHECK-GISEL-NEXT:    s_setpc_b64 s[30:31]
25  %readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %src)
26  store i1 %readfirstlane, ptr addrspace(1) %out, align 4
27  ret void
28}
29
30define void @test_readfirstlane_i1_inreg(ptr addrspace(1) %out, i1 inreg %src) {
31; CHECK-SDAG-LABEL: test_readfirstlane_i1_inreg:
32; CHECK-SDAG:       ; %bb.0:
33; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; CHECK-SDAG-NEXT:    s_and_b32 s4, s16, 1
35; CHECK-SDAG-NEXT:    v_mov_b32_e32 v2, s4
36; CHECK-SDAG-NEXT:    flat_store_byte v[0:1], v2
37; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0)
38; CHECK-SDAG-NEXT:    s_setpc_b64 s[30:31]
39;
40; CHECK-GISEL-LABEL: test_readfirstlane_i1_inreg:
41; CHECK-GISEL:       ; %bb.0:
42; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43; CHECK-GISEL-NEXT:    s_and_b32 s4, s16, 1
44; CHECK-GISEL-NEXT:    v_mov_b32_e32 v2, s4
45; CHECK-GISEL-NEXT:    flat_store_byte v[0:1], v2
46; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0)
47; CHECK-GISEL-NEXT:    s_setpc_b64 s[30:31]
48  %readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %src)
49  store i1 %readfirstlane, ptr addrspace(1) %out, align 4
50  ret void
51}
52
53define void @test_readfirstlane_i1_select(ptr addrspace(1) %out, i32 %src, i32 %src1) {
54; CHECK-SDAG-LABEL: test_readfirstlane_i1_select:
55; CHECK-SDAG:       ; %bb.0:
56; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
57; CHECK-SDAG-NEXT:    v_cmp_lt_u32_e32 vcc, 42, v2
58; CHECK-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
59; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s4, v4
60; CHECK-SDAG-NEXT:    s_bitcmp1_b32 s4, 0
61; CHECK-SDAG-NEXT:    s_cselect_b64 vcc, -1, 0
62; CHECK-SDAG-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
63; CHECK-SDAG-NEXT:    flat_store_dword v[0:1], v2
64; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0)
65; CHECK-SDAG-NEXT:    s_setpc_b64 s[30:31]
66;
67; CHECK-GISEL-LABEL: test_readfirstlane_i1_select:
68; CHECK-GISEL:       ; %bb.0:
69; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
70; CHECK-GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 42, v2
71; CHECK-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
72; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s4, v4
73; CHECK-GISEL-NEXT:    s_and_b32 s4, 1, s4
74; CHECK-GISEL-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
75; CHECK-GISEL-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
76; CHECK-GISEL-NEXT:    flat_store_dword v[0:1], v2
77; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0)
78; CHECK-GISEL-NEXT:    s_setpc_b64 s[30:31]
79  %cmp = icmp ugt i32 %src, 42
80  %readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %cmp)
81  %sel = select i1 %readfirstlane, i32 %src, i32 %src1
82  store i32 %sel, ptr addrspace(1) %out, align 4
83  ret void
84}
85
86define void @test_readfirstlane_i1_load(ptr addrspace(1) %out, ptr addrspace(1) %in) {
87; CHECK-SDAG-LABEL: test_readfirstlane_i1_load:
88; CHECK-SDAG:       ; %bb.0:
89; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90; CHECK-SDAG-NEXT:    flat_load_ubyte v2, v[2:3]
91; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0)
92; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
93; CHECK-SDAG-NEXT:    s_and_b32 s4, s4, 1
94; CHECK-SDAG-NEXT:    v_mov_b32_e32 v2, s4
95; CHECK-SDAG-NEXT:    flat_store_byte v[0:1], v2
96; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0)
97; CHECK-SDAG-NEXT:    s_setpc_b64 s[30:31]
98;
99; CHECK-GISEL-LABEL: test_readfirstlane_i1_load:
100; CHECK-GISEL:       ; %bb.0:
101; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102; CHECK-GISEL-NEXT:    flat_load_ubyte v2, v[2:3]
103; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0)
104; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
105; CHECK-GISEL-NEXT:    s_and_b32 s4, s4, 1
106; CHECK-GISEL-NEXT:    v_mov_b32_e32 v2, s4
107; CHECK-GISEL-NEXT:    flat_store_byte v[0:1], v2
108; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0)
109; CHECK-GISEL-NEXT:    s_setpc_b64 s[30:31]
110  %load = load i1, ptr addrspace(1) %in
111  %readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %load)
112  store i1 %readfirstlane, ptr addrspace(1) %out, align 4
113  ret void
114}
115
116define void @test_readfirstlane_i32(ptr addrspace(1) %out, i32 %src) {
117; CHECK-SDAG-LABEL: test_readfirstlane_i32:
118; CHECK-SDAG:       ; %bb.0:
119; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
121; CHECK-SDAG-NEXT:    v_mov_b32_e32 v2, s4
122; CHECK-SDAG-NEXT:    flat_store_dword v[0:1], v2
123; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0)
124; CHECK-SDAG-NEXT:    s_setpc_b64 s[30:31]
125;
126; CHECK-GISEL-LABEL: test_readfirstlane_i32:
127; CHECK-GISEL:       ; %bb.0:
128; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
129; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
130; CHECK-GISEL-NEXT:    v_mov_b32_e32 v2, s4
131; CHECK-GISEL-NEXT:    flat_store_dword v[0:1], v2
132; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0)
133; CHECK-GISEL-NEXT:    s_setpc_b64 s[30:31]
134  %readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %src)
135  store i32 %readfirstlane, ptr addrspace(1) %out, align 4
136  ret void
137}
138
139define void @test_readfirstlane_i64(ptr addrspace(1) %out, i64 %src) {
140; CHECK-SDAG-LABEL: test_readfirstlane_i64:
141; CHECK-SDAG:       ; %bb.0:
142; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s4, v3
144; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s5, v2
145; CHECK-SDAG-NEXT:    v_mov_b32_e32 v2, s5
146; CHECK-SDAG-NEXT:    v_mov_b32_e32 v3, s4
147; CHECK-SDAG-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
148; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0)
149; CHECK-SDAG-NEXT:    s_setpc_b64 s[30:31]
150;
151; CHECK-GISEL-LABEL: test_readfirstlane_i64:
152; CHECK-GISEL:       ; %bb.0:
153; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
154; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
155; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s5, v3
156; CHECK-GISEL-NEXT:    v_mov_b32_e32 v2, s4
157; CHECK-GISEL-NEXT:    v_mov_b32_e32 v3, s5
158; CHECK-GISEL-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
159; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0)
160; CHECK-GISEL-NEXT:    s_setpc_b64 s[30:31]
161  %readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 %src)
162  store i64 %readfirstlane, ptr addrspace(1) %out, align 4
163  ret void
164}
165
166define void @test_readfirstlane_f64(ptr addrspace(1) %out, double %src) {
167; CHECK-SDAG-LABEL: test_readfirstlane_f64:
168; CHECK-SDAG:       ; %bb.0:
169; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s4, v3
171; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s5, v2
172; CHECK-SDAG-NEXT:    v_mov_b32_e32 v2, s5
173; CHECK-SDAG-NEXT:    v_mov_b32_e32 v3, s4
174; CHECK-SDAG-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
175; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0)
176; CHECK-SDAG-NEXT:    s_setpc_b64 s[30:31]
177;
178; CHECK-GISEL-LABEL: test_readfirstlane_f64:
179; CHECK-GISEL:       ; %bb.0:
180; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
181; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
182; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s5, v3
183; CHECK-GISEL-NEXT:    v_mov_b32_e32 v2, s4
184; CHECK-GISEL-NEXT:    v_mov_b32_e32 v3, s5
185; CHECK-GISEL-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
186; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0)
187; CHECK-GISEL-NEXT:    s_setpc_b64 s[30:31]
188  %readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double %src)
189  store double %readfirstlane, ptr addrspace(1) %out, align 4
190  ret void
191}
192
193define amdgpu_kernel void @test_readfirstlane_imm_i32(ptr addrspace(1) %out) {
194; CHECK-SDAG-LABEL: test_readfirstlane_imm_i32:
195; CHECK-SDAG:       ; %bb.0:
196; CHECK-SDAG-NEXT:    s_mov_b32 s0, 32
197; CHECK-SDAG-NEXT:    ;;#ASMSTART
198; CHECK-SDAG-NEXT:    ; use s0
199; CHECK-SDAG-NEXT:    ;;#ASMEND
200; CHECK-SDAG-NEXT:    s_endpgm
201;
202; CHECK-GISEL-LABEL: test_readfirstlane_imm_i32:
203; CHECK-GISEL:       ; %bb.0:
204; CHECK-GISEL-NEXT:    s_mov_b32 s0, 32
205; CHECK-GISEL-NEXT:    ;;#ASMSTART
206; CHECK-GISEL-NEXT:    ; use s0
207; CHECK-GISEL-NEXT:    ;;#ASMEND
208; CHECK-GISEL-NEXT:    s_endpgm
209  %readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 32)
210  call void asm sideeffect "; use $0", "s"(i32 %readfirstlane)
211  ret void
212}
213
214define amdgpu_kernel void @test_readfirstlane_imm_i64(ptr addrspace(1) %out) {
215; CHECK-SDAG-LABEL: test_readfirstlane_imm_i64:
216; CHECK-SDAG:       ; %bb.0:
217; CHECK-SDAG-NEXT:    s_mov_b64 s[0:1], 32
218; CHECK-SDAG-NEXT:    ;;#ASMSTART
219; CHECK-SDAG-NEXT:    ; use s[0:1]
220; CHECK-SDAG-NEXT:    ;;#ASMEND
221; CHECK-SDAG-NEXT:    s_endpgm
222;
223; CHECK-GISEL-LABEL: test_readfirstlane_imm_i64:
224; CHECK-GISEL:       ; %bb.0:
225; CHECK-GISEL-NEXT:    s_mov_b64 s[0:1], 32
226; CHECK-GISEL-NEXT:    ;;#ASMSTART
227; CHECK-GISEL-NEXT:    ; use s[0:1]
228; CHECK-GISEL-NEXT:    ;;#ASMEND
229; CHECK-GISEL-NEXT:    s_endpgm
230  %readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 32)
231  call void asm sideeffect "; use $0", "s"(i64 %readfirstlane)
232  ret void
233}
234
235define amdgpu_kernel void @test_readfirstlane_imm_f64(ptr addrspace(1) %out) {
236; CHECK-SDAG-LABEL: test_readfirstlane_imm_f64:
237; CHECK-SDAG:       ; %bb.0:
238; CHECK-SDAG-NEXT:    s_mov_b32 s0, 0
239; CHECK-SDAG-NEXT:    s_mov_b32 s1, 0x40400000
240; CHECK-SDAG-NEXT:    ;;#ASMSTART
241; CHECK-SDAG-NEXT:    ; use s[0:1]
242; CHECK-SDAG-NEXT:    ;;#ASMEND
243; CHECK-SDAG-NEXT:    s_endpgm
244;
245; CHECK-GISEL-LABEL: test_readfirstlane_imm_f64:
246; CHECK-GISEL:       ; %bb.0:
247; CHECK-GISEL-NEXT:    s_mov_b32 s0, 0
248; CHECK-GISEL-NEXT:    s_mov_b32 s1, 0x40400000
249; CHECK-GISEL-NEXT:    ;;#ASMSTART
250; CHECK-GISEL-NEXT:    ; use s[0:1]
251; CHECK-GISEL-NEXT:    ;;#ASMEND
252; CHECK-GISEL-NEXT:    s_endpgm
253  %readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double 32.0)
254  call void asm sideeffect "; use $0", "s"(double %readfirstlane)
255  ret void
256}
257
258define amdgpu_kernel void @test_readfirstlane_imm_fold_i32(ptr addrspace(1) %out) {
259; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_i32:
260; CHECK-SDAG:       ; %bb.0:
261; CHECK-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
262; CHECK-SDAG-NEXT:    v_mov_b32_e32 v2, 32
263; CHECK-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
264; CHECK-SDAG-NEXT:    v_mov_b32_e32 v0, s0
265; CHECK-SDAG-NEXT:    v_mov_b32_e32 v1, s1
266; CHECK-SDAG-NEXT:    flat_store_dword v[0:1], v2
267; CHECK-SDAG-NEXT:    s_endpgm
268;
269; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_i32:
270; CHECK-GISEL:       ; %bb.0:
271; CHECK-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
272; CHECK-GISEL-NEXT:    v_mov_b32_e32 v2, 32
273; CHECK-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
274; CHECK-GISEL-NEXT:    v_mov_b32_e32 v0, s0
275; CHECK-GISEL-NEXT:    v_mov_b32_e32 v1, s1
276; CHECK-GISEL-NEXT:    flat_store_dword v[0:1], v2
277; CHECK-GISEL-NEXT:    s_endpgm
278  %readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 32)
279  store i32 %readfirstlane, ptr addrspace(1) %out, align 4
280  ret void
281}
282
283define amdgpu_kernel void @test_readfirstlane_imm_fold_i64(ptr addrspace(1) %out) {
284; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_i64:
285; CHECK-SDAG:       ; %bb.0:
286; CHECK-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
287; CHECK-SDAG-NEXT:    v_mov_b32_e32 v0, 32
288; CHECK-SDAG-NEXT:    v_mov_b32_e32 v1, 0
289; CHECK-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
290; CHECK-SDAG-NEXT:    v_mov_b32_e32 v3, s1
291; CHECK-SDAG-NEXT:    v_mov_b32_e32 v2, s0
292; CHECK-SDAG-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
293; CHECK-SDAG-NEXT:    s_endpgm
294;
295; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_i64:
296; CHECK-GISEL:       ; %bb.0:
297; CHECK-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
298; CHECK-GISEL-NEXT:    s_mov_b64 s[2:3], 32
299; CHECK-GISEL-NEXT:    v_mov_b32_e32 v0, s2
300; CHECK-GISEL-NEXT:    v_mov_b32_e32 v1, s3
301; CHECK-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
302; CHECK-GISEL-NEXT:    v_mov_b32_e32 v3, s1
303; CHECK-GISEL-NEXT:    v_mov_b32_e32 v2, s0
304; CHECK-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
305; CHECK-GISEL-NEXT:    s_endpgm
306  %readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 32)
307  store i64 %readfirstlane, ptr addrspace(1) %out, align 4
308  ret void
309}
310
311define amdgpu_kernel void @test_readfirstlane_imm_fold_f64(ptr addrspace(1) %out) {
312; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_f64:
313; CHECK-SDAG:       ; %bb.0:
314; CHECK-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
315; CHECK-SDAG-NEXT:    v_mov_b32_e32 v0, 0
316; CHECK-SDAG-NEXT:    v_mov_b32_e32 v1, 0x40400000
317; CHECK-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
318; CHECK-SDAG-NEXT:    v_mov_b32_e32 v3, s1
319; CHECK-SDAG-NEXT:    v_mov_b32_e32 v2, s0
320; CHECK-SDAG-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
321; CHECK-SDAG-NEXT:    s_endpgm
322;
323; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_f64:
324; CHECK-GISEL:       ; %bb.0:
325; CHECK-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
326; CHECK-GISEL-NEXT:    s_mov_b32 s2, 0
327; CHECK-GISEL-NEXT:    s_mov_b32 s3, 0x40400000
328; CHECK-GISEL-NEXT:    v_mov_b32_e32 v0, s2
329; CHECK-GISEL-NEXT:    v_mov_b32_e32 v1, s3
330; CHECK-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
331; CHECK-GISEL-NEXT:    v_mov_b32_e32 v3, s1
332; CHECK-GISEL-NEXT:    v_mov_b32_e32 v2, s0
333; CHECK-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
334; CHECK-GISEL-NEXT:    s_endpgm
335  %readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double 32.0)
336  store double %readfirstlane, ptr addrspace(1) %out, align 4
337  ret void
338}
339
340define amdgpu_kernel void @test_readfirstlane_m0(ptr addrspace(1) %out) {
341; CHECK-SDAG-LABEL: test_readfirstlane_m0:
342; CHECK-SDAG:       ; %bb.0:
343; CHECK-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
344; CHECK-SDAG-NEXT:    ;;#ASMSTART
345; CHECK-SDAG-NEXT:    s_mov_b32 m0, -1
346; CHECK-SDAG-NEXT:    ;;#ASMEND
347; CHECK-SDAG-NEXT:    v_mov_b32_e32 v2, m0
348; CHECK-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
349; CHECK-SDAG-NEXT:    v_mov_b32_e32 v0, s0
350; CHECK-SDAG-NEXT:    v_mov_b32_e32 v1, s1
351; CHECK-SDAG-NEXT:    flat_store_dword v[0:1], v2
352; CHECK-SDAG-NEXT:    s_endpgm
353;
354; CHECK-GISEL-LABEL: test_readfirstlane_m0:
355; CHECK-GISEL:       ; %bb.0:
356; CHECK-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
357; CHECK-GISEL-NEXT:    ;;#ASMSTART
358; CHECK-GISEL-NEXT:    s_mov_b32 m0, -1
359; CHECK-GISEL-NEXT:    ;;#ASMEND
360; CHECK-GISEL-NEXT:    v_mov_b32_e32 v2, m0
361; CHECK-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
362; CHECK-GISEL-NEXT:    v_mov_b32_e32 v0, s0
363; CHECK-GISEL-NEXT:    v_mov_b32_e32 v1, s1
364; CHECK-GISEL-NEXT:    flat_store_dword v[0:1], v2
365; CHECK-GISEL-NEXT:    s_endpgm
366  %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"()
367  %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %m0)
368  store i32 %readfirstlane, ptr addrspace(1) %out, align 4
369  ret void
370}
371
372define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i32(ptr addrspace(1) %out) {
373; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_i32:
374; CHECK-SDAG:       ; %bb.0:
375; CHECK-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
376; CHECK-SDAG-NEXT:    ;;#ASMSTART
377; CHECK-SDAG-NEXT:    s_mov_b32 s2, 0
378; CHECK-SDAG-NEXT:    ;;#ASMEND
379; CHECK-SDAG-NEXT:    v_mov_b32_e32 v2, s2
380; CHECK-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
381; CHECK-SDAG-NEXT:    v_mov_b32_e32 v0, s0
382; CHECK-SDAG-NEXT:    v_mov_b32_e32 v1, s1
383; CHECK-SDAG-NEXT:    flat_store_dword v[0:1], v2
384; CHECK-SDAG-NEXT:    s_endpgm
385;
386; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_i32:
387; CHECK-GISEL:       ; %bb.0:
388; CHECK-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
389; CHECK-GISEL-NEXT:    ;;#ASMSTART
390; CHECK-GISEL-NEXT:    s_mov_b32 s2, 0
391; CHECK-GISEL-NEXT:    ;;#ASMEND
392; CHECK-GISEL-NEXT:    v_mov_b32_e32 v2, s2
393; CHECK-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
394; CHECK-GISEL-NEXT:    v_mov_b32_e32 v0, s0
395; CHECK-GISEL-NEXT:    v_mov_b32_e32 v1, s1
396; CHECK-GISEL-NEXT:    flat_store_dword v[0:1], v2
397; CHECK-GISEL-NEXT:    s_endpgm
398  %sgpr = call i32 asm "s_mov_b32 $0, 0", "=s"()
399  %readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %sgpr)
400  store i32 %readfirstlane, ptr addrspace(1) %out, align 4
401  ret void
402}
403
404define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i64(ptr addrspace(1) %out) {
405; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_i64:
406; CHECK-SDAG:       ; %bb.0:
407; CHECK-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
408; CHECK-SDAG-NEXT:    ;;#ASMSTART
409; CHECK-SDAG-NEXT:    s_mov_b64 s[2:3], 0
410; CHECK-SDAG-NEXT:    ;;#ASMEND
411; CHECK-SDAG-NEXT:    v_mov_b32_e32 v0, s2
412; CHECK-SDAG-NEXT:    v_mov_b32_e32 v1, s3
413; CHECK-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
414; CHECK-SDAG-NEXT:    v_mov_b32_e32 v3, s1
415; CHECK-SDAG-NEXT:    v_mov_b32_e32 v2, s0
416; CHECK-SDAG-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
417; CHECK-SDAG-NEXT:    s_endpgm
418;
419; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_i64:
420; CHECK-GISEL:       ; %bb.0:
421; CHECK-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
422; CHECK-GISEL-NEXT:    ;;#ASMSTART
423; CHECK-GISEL-NEXT:    s_mov_b64 s[2:3], 0
424; CHECK-GISEL-NEXT:    ;;#ASMEND
425; CHECK-GISEL-NEXT:    v_mov_b32_e32 v0, s2
426; CHECK-GISEL-NEXT:    v_mov_b32_e32 v1, s3
427; CHECK-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
428; CHECK-GISEL-NEXT:    v_mov_b32_e32 v3, s1
429; CHECK-GISEL-NEXT:    v_mov_b32_e32 v2, s0
430; CHECK-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
431; CHECK-GISEL-NEXT:    s_endpgm
432  %sgpr = call i64 asm "s_mov_b64 $0, 0", "=s"()
433  %readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 %sgpr)
434  store i64 %readfirstlane, ptr addrspace(1) %out, align 4
435  ret void
436}
437
438define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_f64(ptr addrspace(1) %out) {
439; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_f64:
440; CHECK-SDAG:       ; %bb.0:
441; CHECK-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
442; CHECK-SDAG-NEXT:    ;;#ASMSTART
443; CHECK-SDAG-NEXT:    s_mov_b64 s[2:3], 0
444; CHECK-SDAG-NEXT:    ;;#ASMEND
445; CHECK-SDAG-NEXT:    v_mov_b32_e32 v0, s2
446; CHECK-SDAG-NEXT:    v_mov_b32_e32 v1, s3
447; CHECK-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
448; CHECK-SDAG-NEXT:    v_mov_b32_e32 v3, s1
449; CHECK-SDAG-NEXT:    v_mov_b32_e32 v2, s0
450; CHECK-SDAG-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
451; CHECK-SDAG-NEXT:    s_endpgm
452;
453; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_f64:
454; CHECK-GISEL:       ; %bb.0:
455; CHECK-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
456; CHECK-GISEL-NEXT:    ;;#ASMSTART
457; CHECK-GISEL-NEXT:    s_mov_b64 s[2:3], 0
458; CHECK-GISEL-NEXT:    ;;#ASMEND
459; CHECK-GISEL-NEXT:    v_mov_b32_e32 v0, s2
460; CHECK-GISEL-NEXT:    v_mov_b32_e32 v1, s3
461; CHECK-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
462; CHECK-GISEL-NEXT:    v_mov_b32_e32 v3, s1
463; CHECK-GISEL-NEXT:    v_mov_b32_e32 v2, s0
464; CHECK-GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
465; CHECK-GISEL-NEXT:    s_endpgm
466  %sgpr = call double asm "s_mov_b64 $0, 0", "=s"()
467  %readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double %sgpr)
468  store double %readfirstlane, ptr addrspace(1) %out, align 4
469  ret void
470}
471
472define amdgpu_kernel void @test_readfirstlane_fi(ptr addrspace(1) %out) {
473; CHECK-SDAG-LABEL: test_readfirstlane_fi:
474; CHECK-SDAG:       ; %bb.0:
475; CHECK-SDAG-NEXT:    s_add_u32 s0, s0, s17
476; CHECK-SDAG-NEXT:    s_addc_u32 s1, s1, 0
477; CHECK-SDAG-NEXT:    s_mov_b32 s4, 0
478; CHECK-SDAG-NEXT:    ;;#ASMSTART
479; CHECK-SDAG-NEXT:    ; use s4
480; CHECK-SDAG-NEXT:    ;;#ASMEND
481; CHECK-SDAG-NEXT:    s_endpgm
482;
483; CHECK-GISEL-LABEL: test_readfirstlane_fi:
484; CHECK-GISEL:       ; %bb.0:
485; CHECK-GISEL-NEXT:    s_add_u32 s0, s0, s17
486; CHECK-GISEL-NEXT:    s_addc_u32 s1, s1, 0
487; CHECK-GISEL-NEXT:    s_mov_b32 s4, 0
488; CHECK-GISEL-NEXT:    ;;#ASMSTART
489; CHECK-GISEL-NEXT:    ; use s4
490; CHECK-GISEL-NEXT:    ;;#ASMEND
491; CHECK-GISEL-NEXT:    s_endpgm
492  %alloca = alloca i32, addrspace(5)
493  %int = ptrtoint ptr addrspace(5) %alloca to i32
494  %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %int)
495  call void asm sideeffect "; use $0", "s"(i32 %readfirstlane)
496  ret void
497}
498
499define void @test_readfirstlane_half(ptr addrspace(1) %out, half %src) {
500; CHECK-SDAG-LABEL: test_readfirstlane_half:
501; CHECK-SDAG:       ; %bb.0:
502; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
503; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
504; CHECK-SDAG-NEXT:    ;;#ASMSTART
505; CHECK-SDAG-NEXT:    ; use s4
506; CHECK-SDAG-NEXT:    ;;#ASMEND
507; CHECK-SDAG-NEXT:    s_setpc_b64 s[30:31]
508;
509; CHECK-GISEL-LABEL: test_readfirstlane_half:
510; CHECK-GISEL:       ; %bb.0:
511; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
512; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
513; CHECK-GISEL-NEXT:    ;;#ASMSTART
514; CHECK-GISEL-NEXT:    ; use s4
515; CHECK-GISEL-NEXT:    ;;#ASMEND
516; CHECK-GISEL-NEXT:    s_setpc_b64 s[30:31]
517  %x = call half @llvm.amdgcn.readfirstlane.f16(half %src)
518  call void asm sideeffect "; use $0", "s"(half %x)
519  ret void
520}
521
522define void @test_readfirstlane_float(ptr addrspace(1) %out, float %src) {
523; CHECK-SDAG-LABEL: test_readfirstlane_float:
524; CHECK-SDAG:       ; %bb.0:
525; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
526; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
527; CHECK-SDAG-NEXT:    ;;#ASMSTART
528; CHECK-SDAG-NEXT:    ; use s4
529; CHECK-SDAG-NEXT:    ;;#ASMEND
530; CHECK-SDAG-NEXT:    s_setpc_b64 s[30:31]
531;
532; CHECK-GISEL-LABEL: test_readfirstlane_float:
533; CHECK-GISEL:       ; %bb.0:
534; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
535; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
536; CHECK-GISEL-NEXT:    ;;#ASMSTART
537; CHECK-GISEL-NEXT:    ; use s4
538; CHECK-GISEL-NEXT:    ;;#ASMEND
539; CHECK-GISEL-NEXT:    s_setpc_b64 s[30:31]
540  %x = call float @llvm.amdgcn.readfirstlane.f32(float %src)
541  call void asm sideeffect "; use $0", "s"(float %x)
542  ret void
543}
544
545define void @test_readfirstlane_bfloat(ptr addrspace(1) %out, bfloat %src) {
546; CHECK-SDAG-LABEL: test_readfirstlane_bfloat:
547; CHECK-SDAG:       ; %bb.0:
548; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
549; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
550; CHECK-SDAG-NEXT:    ;;#ASMSTART
551; CHECK-SDAG-NEXT:    ; use s4
552; CHECK-SDAG-NEXT:    ;;#ASMEND
553; CHECK-SDAG-NEXT:    s_setpc_b64 s[30:31]
554;
555; CHECK-GISEL-LABEL: test_readfirstlane_bfloat:
556; CHECK-GISEL:       ; %bb.0:
557; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
558; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
559; CHECK-GISEL-NEXT:    ;;#ASMSTART
560; CHECK-GISEL-NEXT:    ; use s4
561; CHECK-GISEL-NEXT:    ;;#ASMEND
562; CHECK-GISEL-NEXT:    s_setpc_b64 s[30:31]
563  %x = call bfloat @llvm.amdgcn.readfirstlane.bf16(bfloat %src)
564  call void asm sideeffect "; use $0", "s"(bfloat %x)
565  ret void
566}
567
568define void @test_readfirstlane_i16(ptr addrspace(1) %out, i16 %src) {
569; CHECK-SDAG-LABEL: test_readfirstlane_i16:
570; CHECK-SDAG:       ; %bb.0:
571; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
572; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
573; CHECK-SDAG-NEXT:    s_and_b32 s4, s4, 0xffff
574; CHECK-SDAG-NEXT:    ;;#ASMSTART
575; CHECK-SDAG-NEXT:    ; use s4
576; CHECK-SDAG-NEXT:    ;;#ASMEND
577; CHECK-SDAG-NEXT:    s_setpc_b64 s[30:31]
578;
579; CHECK-GISEL-LABEL: test_readfirstlane_i16:
580; CHECK-GISEL:       ; %bb.0:
581; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
582; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
583; CHECK-GISEL-NEXT:    ;;#ASMSTART
584; CHECK-GISEL-NEXT:    ; use s4
585; CHECK-GISEL-NEXT:    ;;#ASMEND
586; CHECK-GISEL-NEXT:    s_setpc_b64 s[30:31]
587  %x = call i16 @llvm.amdgcn.readfirstlane.i16(i16 %src)
588  call void asm sideeffect "; use $0", "s"(i16 %x)
589  ret void
590}
591
592define void @test_readfirstlane_v2f16(ptr addrspace(1) %out, <2 x half> %src) {
593; CHECK-SDAG-LABEL: test_readfirstlane_v2f16:
594; CHECK-SDAG:       ; %bb.0:
595; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
596; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
597; CHECK-SDAG-NEXT:    ;;#ASMSTART
598; CHECK-SDAG-NEXT:    ; use s4
599; CHECK-SDAG-NEXT:    ;;#ASMEND
600; CHECK-SDAG-NEXT:    s_setpc_b64 s[30:31]
601;
602; CHECK-GISEL-LABEL: test_readfirstlane_v2f16:
603; CHECK-GISEL:       ; %bb.0:
604; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
605; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
606; CHECK-GISEL-NEXT:    ;;#ASMSTART
607; CHECK-GISEL-NEXT:    ; use s4
608; CHECK-GISEL-NEXT:    ;;#ASMEND
609; CHECK-GISEL-NEXT:    s_setpc_b64 s[30:31]
610  %x = call <2 x half> @llvm.amdgcn.readfirstlane.v2f16(<2 x half> %src)
611  call void asm sideeffect "; use $0", "s"(<2 x half> %x)
612  ret void
613}
614
615define void @test_readfirstlane_v2f32(ptr addrspace(1) %out, <2 x float> %src) {
616; CHECK-SDAG-LABEL: test_readfirstlane_v2f32:
617; CHECK-SDAG:       ; %bb.0:
618; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
619; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s5, v3
620; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
621; CHECK-SDAG-NEXT:    ;;#ASMSTART
622; CHECK-SDAG-NEXT:    ; use s[4:5]
623; CHECK-SDAG-NEXT:    ;;#ASMEND
624; CHECK-SDAG-NEXT:    s_setpc_b64 s[30:31]
625;
626; CHECK-GISEL-LABEL: test_readfirstlane_v2f32:
627; CHECK-GISEL:       ; %bb.0:
628; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
629; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
630; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s5, v3
631; CHECK-GISEL-NEXT:    ;;#ASMSTART
632; CHECK-GISEL-NEXT:    ; use s[4:5]
633; CHECK-GISEL-NEXT:    ;;#ASMEND
634; CHECK-GISEL-NEXT:    s_setpc_b64 s[30:31]
635  %x = call <2 x float> @llvm.amdgcn.readfirstlane.v2f32(<2 x float> %src)
636  call void asm sideeffect "; use $0", "s"(<2 x float> %x)
637  ret void
638}
639
640define void @test_readfirstlane_v7i32(ptr addrspace(1) %out, <7 x i32> %src) {
641; CHECK-SDAG-LABEL: test_readfirstlane_v7i32:
642; CHECK-SDAG:       ; %bb.0:
643; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
644; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s10, v8
645; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s9, v7
646; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s8, v6
647; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s7, v5
648; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s6, v4
649; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s5, v3
650; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
651; CHECK-SDAG-NEXT:    ;;#ASMSTART
652; CHECK-SDAG-NEXT:    ; use s[4:10]
653; CHECK-SDAG-NEXT:    ;;#ASMEND
654; CHECK-SDAG-NEXT:    s_setpc_b64 s[30:31]
655;
656; CHECK-GISEL-LABEL: test_readfirstlane_v7i32:
657; CHECK-GISEL:       ; %bb.0:
658; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
659; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
660; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s5, v3
661; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s6, v4
662; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s7, v5
663; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s8, v6
664; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s9, v7
665; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s10, v8
666; CHECK-GISEL-NEXT:    ;;#ASMSTART
667; CHECK-GISEL-NEXT:    ; use s[4:10]
668; CHECK-GISEL-NEXT:    ;;#ASMEND
669; CHECK-GISEL-NEXT:    s_setpc_b64 s[30:31]
670  %x = call <7 x i32> @llvm.amdgcn.readfirstlane.v7i32(<7 x i32> %src)
671  call void asm sideeffect "; use $0", "s"(<7 x i32> %x)
672  ret void
673}
674
675define void @test_readfirstlane_v8i16(ptr addrspace(1) %out, <8 x i16> %src) {
676; CHECK-SDAG-LABEL: test_readfirstlane_v8i16:
677; CHECK-SDAG:       ; %bb.0:
678; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
679; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s7, v5
680; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s6, v4
681; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s5, v3
682; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
683; CHECK-SDAG-NEXT:    ;;#ASMSTART
684; CHECK-SDAG-NEXT:    ; use s[4:7]
685; CHECK-SDAG-NEXT:    ;;#ASMEND
686; CHECK-SDAG-NEXT:    s_setpc_b64 s[30:31]
687;
688; CHECK-GISEL-LABEL: test_readfirstlane_v8i16:
689; CHECK-GISEL:       ; %bb.0:
690; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
691; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
692; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s5, v3
693; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s6, v4
694; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s7, v5
695; CHECK-GISEL-NEXT:    ;;#ASMSTART
696; CHECK-GISEL-NEXT:    ; use s[4:7]
697; CHECK-GISEL-NEXT:    ;;#ASMEND
698; CHECK-GISEL-NEXT:    s_setpc_b64 s[30:31]
699  %x = call <8 x i16> @llvm.amdgcn.readfirstlane.v8i16(<8 x i16> %src)
700  call void asm sideeffect "; use $0", "s"(<8 x i16> %x)
701  ret void
702}
703