xref: /llvm-project/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN %s
3
4define amdgpu_kernel void @float4_extelt(ptr addrspace(1) %out, i32 %sel) {
5; GCN-LABEL: float4_extelt:
6; GCN:       ; %bb.0: ; %entry
7; GCN-NEXT:    s_load_dword s6, s[4:5], 0x2c
8; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
9; GCN-NEXT:    s_waitcnt lgkmcnt(0)
10; GCN-NEXT:    s_cmp_eq_u32 s6, 1
11; GCN-NEXT:    s_cselect_b64 s[2:3], -1, 0
12; GCN-NEXT:    s_cmp_lg_u32 s6, 2
13; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, s[2:3]
14; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
15; GCN-NEXT:    s_cmp_lg_u32 s6, 3
16; GCN-NEXT:    v_cndmask_b32_e32 v0, 2.0, v0, vcc
17; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
18; GCN-NEXT:    v_cndmask_b32_e32 v2, 4.0, v0, vcc
19; GCN-NEXT:    v_mov_b32_e32 v0, s0
20; GCN-NEXT:    v_mov_b32_e32 v1, s1
21; GCN-NEXT:    flat_store_dword v[0:1], v2
22; GCN-NEXT:    s_endpgm
23entry:
24  %ext = extractelement <4 x float> <float 0.0, float 1.0, float 2.0, float 4.0>, i32 %sel
25  store float %ext, ptr addrspace(1) %out
26  ret void
27}
28
29define amdgpu_kernel void @int4_extelt(ptr addrspace(1) %out, i32 %sel) {
30; GCN-LABEL: int4_extelt:
31; GCN:       ; %bb.0: ; %entry
32; GCN-NEXT:    s_load_dword s6, s[4:5], 0x2c
33; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
34; GCN-NEXT:    s_waitcnt lgkmcnt(0)
35; GCN-NEXT:    s_cmp_eq_u32 s6, 1
36; GCN-NEXT:    s_cselect_b64 s[2:3], -1, 0
37; GCN-NEXT:    s_cmp_lg_u32 s6, 2
38; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[2:3]
39; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
40; GCN-NEXT:    s_cmp_lg_u32 s6, 3
41; GCN-NEXT:    v_cndmask_b32_e32 v0, 2, v0, vcc
42; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
43; GCN-NEXT:    v_cndmask_b32_e32 v2, 4, v0, vcc
44; GCN-NEXT:    v_mov_b32_e32 v0, s0
45; GCN-NEXT:    v_mov_b32_e32 v1, s1
46; GCN-NEXT:    flat_store_dword v[0:1], v2
47; GCN-NEXT:    s_endpgm
48entry:
49  %ext = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 4>, i32 %sel
50  store i32 %ext, ptr addrspace(1) %out
51  ret void
52}
53
54define amdgpu_kernel void @double4_extelt(ptr addrspace(1) %out, i32 %sel) {
55; GCN-LABEL: double4_extelt:
56; GCN:       ; %bb.0: ; %entry
57; GCN-NEXT:    s_load_dword s2, s[4:5], 0x2c
58; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
59; GCN-NEXT:    s_mov_b32 s3, 0x3ff028f5
60; GCN-NEXT:    s_mov_b32 s4, 0xc28f5c29
61; GCN-NEXT:    s_waitcnt lgkmcnt(0)
62; GCN-NEXT:    s_cmp_eq_u32 s2, 1
63; GCN-NEXT:    s_cselect_b32 s3, s3, 0x3f847ae1
64; GCN-NEXT:    s_cselect_b32 s4, s4, 0x47ae147b
65; GCN-NEXT:    s_cmp_eq_u32 s2, 2
66; GCN-NEXT:    s_cselect_b32 s4, 0xe147ae14, s4
67; GCN-NEXT:    s_cselect_b32 s3, 0x4000147a, s3
68; GCN-NEXT:    s_cmp_eq_u32 s2, 3
69; GCN-NEXT:    s_cselect_b32 s2, 0x40100a3d, s3
70; GCN-NEXT:    s_cselect_b32 s3, 0x70a3d70a, s4
71; GCN-NEXT:    v_mov_b32_e32 v3, s1
72; GCN-NEXT:    v_mov_b32_e32 v0, s3
73; GCN-NEXT:    v_mov_b32_e32 v1, s2
74; GCN-NEXT:    v_mov_b32_e32 v2, s0
75; GCN-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
76; GCN-NEXT:    s_endpgm
77entry:
78  %ext = extractelement <4 x double> <double 0.01, double 1.01, double 2.01, double 4.01>, i32 %sel
79  store double %ext, ptr addrspace(1) %out
80  ret void
81}
82
83define amdgpu_kernel void @double5_extelt(ptr addrspace(1) %out, i32 %sel) {
84; GCN-LABEL: double5_extelt:
85; GCN:       ; %bb.0: ; %entry
86; GCN-NEXT:    s_load_dword s6, s[4:5], 0x2c
87; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
88; GCN-NEXT:    s_mov_b32 s2, 0x3ff028f5
89; GCN-NEXT:    s_mov_b32 s3, 0xc28f5c29
90; GCN-NEXT:    s_waitcnt lgkmcnt(0)
91; GCN-NEXT:    s_cmp_eq_u32 s6, 1
92; GCN-NEXT:    s_cselect_b32 s2, s2, 0x3f847ae1
93; GCN-NEXT:    s_cselect_b32 s3, s3, 0x47ae147b
94; GCN-NEXT:    s_cmp_eq_u32 s6, 2
95; GCN-NEXT:    s_cselect_b32 s8, 0xe147ae14, s3
96; GCN-NEXT:    s_cselect_b32 s7, 0x4000147a, s2
97; GCN-NEXT:    s_cmp_eq_u32 s6, 3
98; GCN-NEXT:    s_cselect_b64 s[2:3], -1, 0
99; GCN-NEXT:    s_and_b64 s[4:5], s[2:3], exec
100; GCN-NEXT:    s_cselect_b32 s9, 0x40100a3d, s7
101; GCN-NEXT:    s_cmp_eq_u32 s6, 4
102; GCN-NEXT:    s_cselect_b64 s[4:5], -1, 0
103; GCN-NEXT:    s_and_b64 s[6:7], s[4:5], exec
104; GCN-NEXT:    s_cselect_b32 s6, 0x40140a3d, s9
105; GCN-NEXT:    s_or_b64 s[2:3], s[4:5], s[2:3]
106; GCN-NEXT:    s_and_b64 s[2:3], s[2:3], exec
107; GCN-NEXT:    s_cselect_b32 s2, 0x70a3d70a, s8
108; GCN-NEXT:    v_mov_b32_e32 v3, s1
109; GCN-NEXT:    v_mov_b32_e32 v0, s2
110; GCN-NEXT:    v_mov_b32_e32 v1, s6
111; GCN-NEXT:    v_mov_b32_e32 v2, s0
112; GCN-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
113; GCN-NEXT:    s_endpgm
114entry:
115  %ext = extractelement <5 x double> <double 0.01, double 1.01, double 2.01, double 4.01, double 5.01>, i32 %sel
116  store double %ext, ptr addrspace(1) %out
117  ret void
118}
119
120define amdgpu_kernel void @half4_extelt(ptr addrspace(1) %out, i32 %sel) {
121; GCN-LABEL: half4_extelt:
122; GCN:       ; %bb.0: ; %entry
123; GCN-NEXT:    s_load_dword s6, s[4:5], 0x2c
124; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
125; GCN-NEXT:    s_mov_b32 s2, 0x40003c00
126; GCN-NEXT:    s_mov_b32 s3, 0x44004200
127; GCN-NEXT:    s_waitcnt lgkmcnt(0)
128; GCN-NEXT:    s_lshl_b32 s4, s6, 4
129; GCN-NEXT:    s_lshr_b64 s[2:3], s[2:3], s4
130; GCN-NEXT:    v_mov_b32_e32 v0, s0
131; GCN-NEXT:    v_mov_b32_e32 v1, s1
132; GCN-NEXT:    v_mov_b32_e32 v2, s2
133; GCN-NEXT:    flat_store_short v[0:1], v2
134; GCN-NEXT:    s_endpgm
135entry:
136  %ext = extractelement <4 x half> <half 1.0, half 2.0, half 3.0, half 4.0>, i32 %sel
137  store half %ext, ptr addrspace(1) %out
138  ret void
139}
140
141define amdgpu_kernel void @float2_extelt(ptr addrspace(1) %out, i32 %sel) {
142; GCN-LABEL: float2_extelt:
143; GCN:       ; %bb.0: ; %entry
144; GCN-NEXT:    s_load_dword s2, s[4:5], 0x2c
145; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
146; GCN-NEXT:    s_waitcnt lgkmcnt(0)
147; GCN-NEXT:    s_cmp_eq_u32 s2, 1
148; GCN-NEXT:    s_cselect_b64 s[2:3], -1, 0
149; GCN-NEXT:    v_mov_b32_e32 v0, s0
150; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s[2:3]
151; GCN-NEXT:    v_mov_b32_e32 v1, s1
152; GCN-NEXT:    flat_store_dword v[0:1], v2
153; GCN-NEXT:    s_endpgm
154entry:
155  %ext = extractelement <2 x float> <float 0.0, float 1.0>, i32 %sel
156  store float %ext, ptr addrspace(1) %out
157  ret void
158}
159
160define amdgpu_kernel void @double2_extelt(ptr addrspace(1) %out, i32 %sel) {
161; GCN-LABEL: double2_extelt:
162; GCN:       ; %bb.0: ; %entry
163; GCN-NEXT:    s_load_dword s2, s[4:5], 0x2c
164; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
165; GCN-NEXT:    s_mov_b32 s3, 0x3ff028f5
166; GCN-NEXT:    s_mov_b32 s4, 0xc28f5c29
167; GCN-NEXT:    s_waitcnt lgkmcnt(0)
168; GCN-NEXT:    s_cmp_eq_u32 s2, 1
169; GCN-NEXT:    s_cselect_b32 s2, s3, 0x3f847ae1
170; GCN-NEXT:    s_cselect_b32 s3, s4, 0x47ae147b
171; GCN-NEXT:    v_mov_b32_e32 v3, s1
172; GCN-NEXT:    v_mov_b32_e32 v0, s3
173; GCN-NEXT:    v_mov_b32_e32 v1, s2
174; GCN-NEXT:    v_mov_b32_e32 v2, s0
175; GCN-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
176; GCN-NEXT:    s_endpgm
177entry:
178  %ext = extractelement <2 x double> <double 0.01, double 1.01>, i32 %sel
179  store double %ext, ptr addrspace(1) %out
180  ret void
181}
182
183define amdgpu_kernel void @half8_extelt(ptr addrspace(1) %out, i32 %sel) {
184; GCN-LABEL: half8_extelt:
185; GCN:       ; %bb.0: ; %entry
186; GCN-NEXT:    s_load_dword s2, s[4:5], 0x2c
187; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
188; GCN-NEXT:    v_mov_b32_e32 v0, 0x3c00
189; GCN-NEXT:    v_mov_b32_e32 v1, 0x4000
190; GCN-NEXT:    s_waitcnt lgkmcnt(0)
191; GCN-NEXT:    s_cmp_eq_u32 s2, 1
192; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
193; GCN-NEXT:    s_cmp_lg_u32 s2, 2
194; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
195; GCN-NEXT:    v_mov_b32_e32 v1, 0x4200
196; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
197; GCN-NEXT:    s_cmp_lg_u32 s2, 3
198; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
199; GCN-NEXT:    v_mov_b32_e32 v1, 0x4400
200; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
201; GCN-NEXT:    s_cmp_lg_u32 s2, 4
202; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
203; GCN-NEXT:    v_mov_b32_e32 v1, 0x4500
204; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
205; GCN-NEXT:    s_cmp_lg_u32 s2, 5
206; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
207; GCN-NEXT:    v_mov_b32_e32 v1, 0x4600
208; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
209; GCN-NEXT:    s_cmp_lg_u32 s2, 6
210; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
211; GCN-NEXT:    v_mov_b32_e32 v1, 0x4700
212; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
213; GCN-NEXT:    s_cmp_lg_u32 s2, 7
214; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
215; GCN-NEXT:    v_mov_b32_e32 v1, 0x4800
216; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
217; GCN-NEXT:    v_cndmask_b32_e32 v2, v1, v0, vcc
218; GCN-NEXT:    v_mov_b32_e32 v0, s0
219; GCN-NEXT:    v_mov_b32_e32 v1, s1
220; GCN-NEXT:    flat_store_short v[0:1], v2
221; GCN-NEXT:    s_endpgm
222entry:
223  %ext = extractelement <8 x half> <half 1.0, half 2.0, half 3.0, half 4.0, half 5.0, half 6.0, half 7.0, half 8.0>, i32 %sel
224  store half %ext, ptr addrspace(1) %out
225  ret void
226}
227
228define amdgpu_kernel void @short8_extelt(ptr addrspace(1) %out, i32 %sel) {
229; GCN-LABEL: short8_extelt:
230; GCN:       ; %bb.0: ; %entry
231; GCN-NEXT:    s_load_dword s2, s[4:5], 0x2c
232; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
233; GCN-NEXT:    s_waitcnt lgkmcnt(0)
234; GCN-NEXT:    s_cmp_eq_u32 s2, 1
235; GCN-NEXT:    s_cselect_b32 s3, 2, 1
236; GCN-NEXT:    s_cmp_lg_u32 s2, 2
237; GCN-NEXT:    s_cselect_b32 s3, s3, 3
238; GCN-NEXT:    s_cmp_lg_u32 s2, 3
239; GCN-NEXT:    s_cselect_b32 s3, s3, 4
240; GCN-NEXT:    s_cmp_lg_u32 s2, 4
241; GCN-NEXT:    s_cselect_b32 s3, s3, 5
242; GCN-NEXT:    s_cmp_lg_u32 s2, 5
243; GCN-NEXT:    s_cselect_b32 s3, s3, 6
244; GCN-NEXT:    s_cmp_lg_u32 s2, 6
245; GCN-NEXT:    s_cselect_b32 s3, s3, 7
246; GCN-NEXT:    s_cmp_lg_u32 s2, 7
247; GCN-NEXT:    s_cselect_b32 s2, s3, 8
248; GCN-NEXT:    v_mov_b32_e32 v0, s0
249; GCN-NEXT:    v_mov_b32_e32 v1, s1
250; GCN-NEXT:    v_mov_b32_e32 v2, s2
251; GCN-NEXT:    flat_store_short v[0:1], v2
252; GCN-NEXT:    s_endpgm
253entry:
254  %ext = extractelement <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i32 %sel
255  store i16 %ext, ptr addrspace(1) %out
256  ret void
257}
258
259define amdgpu_kernel void @float8_extelt(ptr addrspace(1) %out, i32 %sel) {
260; GCN-LABEL: float8_extelt:
261; GCN:       ; %bb.0: ; %entry
262; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
263; GCN-NEXT:    s_load_dword s2, s[4:5], 0x2c
264; GCN-NEXT:    v_mov_b32_e32 v0, 1.0
265; GCN-NEXT:    v_mov_b32_e32 v1, 2.0
266; GCN-NEXT:    v_mov_b32_e32 v2, 0x40400000
267; GCN-NEXT:    v_mov_b32_e32 v3, 4.0
268; GCN-NEXT:    s_waitcnt lgkmcnt(0)
269; GCN-NEXT:    s_mov_b32 m0, s2
270; GCN-NEXT:    v_mov_b32_e32 v4, 0x40a00000
271; GCN-NEXT:    v_mov_b32_e32 v5, 0x40c00000
272; GCN-NEXT:    v_mov_b32_e32 v6, 0x40e00000
273; GCN-NEXT:    v_mov_b32_e32 v7, 0x41000000
274; GCN-NEXT:    v_movrels_b32_e32 v2, v0
275; GCN-NEXT:    v_mov_b32_e32 v0, s0
276; GCN-NEXT:    v_mov_b32_e32 v1, s1
277; GCN-NEXT:    flat_store_dword v[0:1], v2
278; GCN-NEXT:    s_endpgm
279entry:
280  %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
281  store float %ext, ptr addrspace(1) %out
282  ret void
283}
284
285define amdgpu_kernel void @double8_extelt(ptr addrspace(1) %out, i32 %sel) {
286; GCN-LABEL: double8_extelt:
287; GCN:       ; %bb.0: ; %entry
288; GCN-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x24
289; GCN-NEXT:    s_load_dword s18, s[4:5], 0x2c
290; GCN-NEXT:    s_mov_b32 s0, 0
291; GCN-NEXT:    s_mov_b32 s15, 0x40200000
292; GCN-NEXT:    s_mov_b32 s13, 0x401c0000
293; GCN-NEXT:    s_mov_b32 s11, 0x40180000
294; GCN-NEXT:    s_mov_b32 s9, 0x40140000
295; GCN-NEXT:    s_mov_b32 s7, 0x40100000
296; GCN-NEXT:    s_mov_b32 s5, 0x40080000
297; GCN-NEXT:    s_mov_b32 s3, 2.0
298; GCN-NEXT:    s_mov_b32 s1, 0x3ff00000
299; GCN-NEXT:    s_mov_b32 s2, s0
300; GCN-NEXT:    s_mov_b32 s4, s0
301; GCN-NEXT:    s_mov_b32 s6, s0
302; GCN-NEXT:    s_mov_b32 s8, s0
303; GCN-NEXT:    s_mov_b32 s10, s0
304; GCN-NEXT:    s_mov_b32 s12, s0
305; GCN-NEXT:    s_mov_b32 s14, s0
306; GCN-NEXT:    s_waitcnt lgkmcnt(0)
307; GCN-NEXT:    s_lshl_b32 s18, s18, 1
308; GCN-NEXT:    v_mov_b32_e32 v0, s0
309; GCN-NEXT:    v_mov_b32_e32 v1, s1
310; GCN-NEXT:    v_mov_b32_e32 v15, s15
311; GCN-NEXT:    s_mov_b32 m0, s18
312; GCN-NEXT:    v_mov_b32_e32 v2, s2
313; GCN-NEXT:    v_mov_b32_e32 v3, s3
314; GCN-NEXT:    v_mov_b32_e32 v4, s4
315; GCN-NEXT:    v_mov_b32_e32 v5, s5
316; GCN-NEXT:    v_mov_b32_e32 v6, s6
317; GCN-NEXT:    v_mov_b32_e32 v7, s7
318; GCN-NEXT:    v_mov_b32_e32 v8, s8
319; GCN-NEXT:    v_mov_b32_e32 v9, s9
320; GCN-NEXT:    v_mov_b32_e32 v10, s10
321; GCN-NEXT:    v_mov_b32_e32 v11, s11
322; GCN-NEXT:    v_mov_b32_e32 v12, s12
323; GCN-NEXT:    v_mov_b32_e32 v13, s13
324; GCN-NEXT:    v_mov_b32_e32 v14, s14
325; GCN-NEXT:    v_movrels_b32_e32 v16, v1
326; GCN-NEXT:    v_movrels_b32_e32 v15, v0
327; GCN-NEXT:    v_mov_b32_e32 v0, s16
328; GCN-NEXT:    v_mov_b32_e32 v1, s17
329; GCN-NEXT:    flat_store_dwordx2 v[0:1], v[15:16]
330; GCN-NEXT:    s_endpgm
331entry:
332  %ext = extractelement <8 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>, i32 %sel
333  store double %ext, ptr addrspace(1) %out
334  ret void
335}
336
337define amdgpu_kernel void @double7_extelt(ptr addrspace(1) %out, i32 %sel) {
338; GCN-LABEL: double7_extelt:
339; GCN:       ; %bb.0: ; %entry
340; GCN-NEXT:    s_load_dwordx2 s[14:15], s[4:5], 0x24
341; GCN-NEXT:    s_load_dword s16, s[4:5], 0x2c
342; GCN-NEXT:    s_mov_b32 s0, 0
343; GCN-NEXT:    s_mov_b32 s13, 0x401c0000
344; GCN-NEXT:    s_mov_b32 s11, 0x40180000
345; GCN-NEXT:    s_mov_b32 s9, 0x40140000
346; GCN-NEXT:    s_mov_b32 s7, 0x40100000
347; GCN-NEXT:    s_mov_b32 s5, 0x40080000
348; GCN-NEXT:    s_mov_b32 s3, 2.0
349; GCN-NEXT:    s_mov_b32 s1, 0x3ff00000
350; GCN-NEXT:    s_mov_b32 s2, s0
351; GCN-NEXT:    s_mov_b32 s4, s0
352; GCN-NEXT:    s_mov_b32 s6, s0
353; GCN-NEXT:    s_mov_b32 s8, s0
354; GCN-NEXT:    s_mov_b32 s10, s0
355; GCN-NEXT:    s_mov_b32 s12, s0
356; GCN-NEXT:    s_waitcnt lgkmcnt(0)
357; GCN-NEXT:    s_lshl_b32 s16, s16, 1
358; GCN-NEXT:    v_mov_b32_e32 v0, s0
359; GCN-NEXT:    v_mov_b32_e32 v1, s1
360; GCN-NEXT:    v_mov_b32_e32 v15, s15
361; GCN-NEXT:    s_mov_b32 m0, s16
362; GCN-NEXT:    v_mov_b32_e32 v2, s2
363; GCN-NEXT:    v_mov_b32_e32 v3, s3
364; GCN-NEXT:    v_mov_b32_e32 v4, s4
365; GCN-NEXT:    v_mov_b32_e32 v5, s5
366; GCN-NEXT:    v_mov_b32_e32 v6, s6
367; GCN-NEXT:    v_mov_b32_e32 v7, s7
368; GCN-NEXT:    v_mov_b32_e32 v8, s8
369; GCN-NEXT:    v_mov_b32_e32 v9, s9
370; GCN-NEXT:    v_mov_b32_e32 v10, s10
371; GCN-NEXT:    v_mov_b32_e32 v11, s11
372; GCN-NEXT:    v_mov_b32_e32 v12, s12
373; GCN-NEXT:    v_mov_b32_e32 v13, s13
374; GCN-NEXT:    v_mov_b32_e32 v14, s14
375; GCN-NEXT:    v_movrels_b32_e32 v16, v1
376; GCN-NEXT:    v_movrels_b32_e32 v15, v0
377; GCN-NEXT:    v_mov_b32_e32 v0, s14
378; GCN-NEXT:    v_mov_b32_e32 v1, s15
379; GCN-NEXT:    flat_store_dwordx2 v[0:1], v[15:16]
380; GCN-NEXT:    s_endpgm
381entry:
382  %ext = extractelement <7 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0>, i32 %sel
383  store double %ext, ptr addrspace(1) %out
384  ret void
385}
386
387define amdgpu_kernel void @float16_extelt(ptr addrspace(1) %out, i32 %sel) {
388; GCN-LABEL: float16_extelt:
389; GCN:       ; %bb.0: ; %entry
390; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
391; GCN-NEXT:    s_load_dword s2, s[4:5], 0x2c
392; GCN-NEXT:    v_mov_b32_e32 v0, 1.0
393; GCN-NEXT:    v_mov_b32_e32 v1, 2.0
394; GCN-NEXT:    v_mov_b32_e32 v2, 0x40400000
395; GCN-NEXT:    v_mov_b32_e32 v3, 4.0
396; GCN-NEXT:    s_waitcnt lgkmcnt(0)
397; GCN-NEXT:    s_mov_b32 m0, s2
398; GCN-NEXT:    v_mov_b32_e32 v4, 0x40a00000
399; GCN-NEXT:    v_mov_b32_e32 v5, 0x40c00000
400; GCN-NEXT:    v_mov_b32_e32 v6, 0x40e00000
401; GCN-NEXT:    v_mov_b32_e32 v7, 0x41000000
402; GCN-NEXT:    v_mov_b32_e32 v8, 0x41100000
403; GCN-NEXT:    v_mov_b32_e32 v9, 0x41200000
404; GCN-NEXT:    v_mov_b32_e32 v10, 0x41300000
405; GCN-NEXT:    v_mov_b32_e32 v11, 0x41400000
406; GCN-NEXT:    v_mov_b32_e32 v12, 0x41500000
407; GCN-NEXT:    v_mov_b32_e32 v13, 0x41600000
408; GCN-NEXT:    v_mov_b32_e32 v14, 0x41700000
409; GCN-NEXT:    v_mov_b32_e32 v15, 0x41800000
410; GCN-NEXT:    v_movrels_b32_e32 v2, v0
411; GCN-NEXT:    v_mov_b32_e32 v0, s0
412; GCN-NEXT:    v_mov_b32_e32 v1, s1
413; GCN-NEXT:    flat_store_dword v[0:1], v2
414; GCN-NEXT:    s_endpgm
415entry:
416  %ext = extractelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, i32 %sel
417  store float %ext, ptr addrspace(1) %out
418  ret void
419}
420
421define amdgpu_kernel void @double15_extelt(ptr addrspace(1) %out, i32 %sel) {
422; GCN-LABEL: double15_extelt:
423; GCN:       ; %bb.0: ; %entry
424; GCN-NEXT:    s_load_dword s2, s[4:5], 0x2c
425; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
426; GCN-NEXT:    s_mov_b32 s36, 0
427; GCN-NEXT:    s_mov_b32 s65, 0x402e0000
428; GCN-NEXT:    s_mov_b32 s63, 0x402c0000
429; GCN-NEXT:    s_mov_b32 s61, 0x402a0000
430; GCN-NEXT:    s_mov_b32 s59, 0x40280000
431; GCN-NEXT:    s_mov_b32 s57, 0x40260000
432; GCN-NEXT:    s_mov_b32 s55, 0x40240000
433; GCN-NEXT:    s_mov_b32 s53, 0x40220000
434; GCN-NEXT:    s_mov_b32 s51, 0x40200000
435; GCN-NEXT:    s_mov_b32 s49, 0x401c0000
436; GCN-NEXT:    s_mov_b32 s47, 0x40180000
437; GCN-NEXT:    s_mov_b32 s45, 0x40140000
438; GCN-NEXT:    s_mov_b32 s43, 0x40100000
439; GCN-NEXT:    s_mov_b32 s41, 0x40080000
440; GCN-NEXT:    s_mov_b32 s39, 2.0
441; GCN-NEXT:    s_mov_b32 s37, 0x3ff00000
442; GCN-NEXT:    s_mov_b32 s38, s36
443; GCN-NEXT:    s_mov_b32 s40, s36
444; GCN-NEXT:    s_mov_b32 s42, s36
445; GCN-NEXT:    s_mov_b32 s44, s36
446; GCN-NEXT:    s_mov_b32 s46, s36
447; GCN-NEXT:    s_mov_b32 s48, s36
448; GCN-NEXT:    s_mov_b32 s50, s36
449; GCN-NEXT:    s_mov_b32 s52, s36
450; GCN-NEXT:    s_mov_b32 s54, s36
451; GCN-NEXT:    s_mov_b32 s56, s36
452; GCN-NEXT:    s_mov_b32 s58, s36
453; GCN-NEXT:    s_mov_b32 s60, s36
454; GCN-NEXT:    s_mov_b32 s62, s36
455; GCN-NEXT:    s_mov_b32 s64, s36
456; GCN-NEXT:    s_waitcnt lgkmcnt(0)
457; GCN-NEXT:    s_lshl_b32 s2, s2, 1
458; GCN-NEXT:    v_mov_b32_e32 v0, s36
459; GCN-NEXT:    v_mov_b32_e32 v1, s37
460; GCN-NEXT:    v_mov_b32_e32 v31, s67
461; GCN-NEXT:    s_mov_b32 m0, s2
462; GCN-NEXT:    v_mov_b32_e32 v2, s38
463; GCN-NEXT:    v_mov_b32_e32 v3, s39
464; GCN-NEXT:    v_mov_b32_e32 v4, s40
465; GCN-NEXT:    v_mov_b32_e32 v5, s41
466; GCN-NEXT:    v_mov_b32_e32 v6, s42
467; GCN-NEXT:    v_mov_b32_e32 v7, s43
468; GCN-NEXT:    v_mov_b32_e32 v8, s44
469; GCN-NEXT:    v_mov_b32_e32 v9, s45
470; GCN-NEXT:    v_mov_b32_e32 v10, s46
471; GCN-NEXT:    v_mov_b32_e32 v11, s47
472; GCN-NEXT:    v_mov_b32_e32 v12, s48
473; GCN-NEXT:    v_mov_b32_e32 v13, s49
474; GCN-NEXT:    v_mov_b32_e32 v14, s50
475; GCN-NEXT:    v_mov_b32_e32 v15, s51
476; GCN-NEXT:    v_mov_b32_e32 v16, s52
477; GCN-NEXT:    v_mov_b32_e32 v17, s53
478; GCN-NEXT:    v_mov_b32_e32 v18, s54
479; GCN-NEXT:    v_mov_b32_e32 v19, s55
480; GCN-NEXT:    v_mov_b32_e32 v20, s56
481; GCN-NEXT:    v_mov_b32_e32 v21, s57
482; GCN-NEXT:    v_mov_b32_e32 v22, s58
483; GCN-NEXT:    v_mov_b32_e32 v23, s59
484; GCN-NEXT:    v_mov_b32_e32 v24, s60
485; GCN-NEXT:    v_mov_b32_e32 v25, s61
486; GCN-NEXT:    v_mov_b32_e32 v26, s62
487; GCN-NEXT:    v_mov_b32_e32 v27, s63
488; GCN-NEXT:    v_mov_b32_e32 v28, s64
489; GCN-NEXT:    v_mov_b32_e32 v29, s65
490; GCN-NEXT:    v_mov_b32_e32 v30, s66
491; GCN-NEXT:    v_movrels_b32_e32 v32, v1
492; GCN-NEXT:    v_movrels_b32_e32 v31, v0
493; GCN-NEXT:    v_mov_b32_e32 v0, s0
494; GCN-NEXT:    v_mov_b32_e32 v1, s1
495; GCN-NEXT:    flat_store_dwordx2 v[0:1], v[31:32]
496; GCN-NEXT:    s_endpgm
497entry:
498  %ext = extractelement <15 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0>, i32 %sel
499  store double %ext, ptr addrspace(1) %out
500  ret void
501}
502
503define amdgpu_kernel void @double16_extelt(ptr addrspace(1) %out, i32 %sel) {
504; GCN-LABEL: double16_extelt:
505; GCN:       ; %bb.0: ; %entry
506; GCN-NEXT:    s_load_dword s2, s[4:5], 0x2c
507; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
508; GCN-NEXT:    s_mov_b32 s36, 0
509; GCN-NEXT:    s_mov_b32 s67, 0x40300000
510; GCN-NEXT:    s_mov_b32 s65, 0x402e0000
511; GCN-NEXT:    s_mov_b32 s63, 0x402c0000
512; GCN-NEXT:    s_mov_b32 s61, 0x402a0000
513; GCN-NEXT:    s_mov_b32 s59, 0x40280000
514; GCN-NEXT:    s_mov_b32 s57, 0x40260000
515; GCN-NEXT:    s_mov_b32 s55, 0x40240000
516; GCN-NEXT:    s_mov_b32 s53, 0x40220000
517; GCN-NEXT:    s_mov_b32 s51, 0x40200000
518; GCN-NEXT:    s_mov_b32 s49, 0x401c0000
519; GCN-NEXT:    s_mov_b32 s47, 0x40180000
520; GCN-NEXT:    s_mov_b32 s45, 0x40140000
521; GCN-NEXT:    s_mov_b32 s43, 0x40100000
522; GCN-NEXT:    s_mov_b32 s41, 0x40080000
523; GCN-NEXT:    s_mov_b32 s39, 2.0
524; GCN-NEXT:    s_mov_b32 s37, 0x3ff00000
525; GCN-NEXT:    s_mov_b32 s38, s36
526; GCN-NEXT:    s_mov_b32 s40, s36
527; GCN-NEXT:    s_mov_b32 s42, s36
528; GCN-NEXT:    s_mov_b32 s44, s36
529; GCN-NEXT:    s_mov_b32 s46, s36
530; GCN-NEXT:    s_mov_b32 s48, s36
531; GCN-NEXT:    s_mov_b32 s50, s36
532; GCN-NEXT:    s_mov_b32 s52, s36
533; GCN-NEXT:    s_mov_b32 s54, s36
534; GCN-NEXT:    s_mov_b32 s56, s36
535; GCN-NEXT:    s_mov_b32 s58, s36
536; GCN-NEXT:    s_mov_b32 s60, s36
537; GCN-NEXT:    s_mov_b32 s62, s36
538; GCN-NEXT:    s_mov_b32 s64, s36
539; GCN-NEXT:    s_mov_b32 s66, s36
540; GCN-NEXT:    s_waitcnt lgkmcnt(0)
541; GCN-NEXT:    s_lshl_b32 s2, s2, 1
542; GCN-NEXT:    v_mov_b32_e32 v0, s36
543; GCN-NEXT:    v_mov_b32_e32 v1, s37
544; GCN-NEXT:    v_mov_b32_e32 v31, s67
545; GCN-NEXT:    s_mov_b32 m0, s2
546; GCN-NEXT:    v_mov_b32_e32 v2, s38
547; GCN-NEXT:    v_mov_b32_e32 v3, s39
548; GCN-NEXT:    v_mov_b32_e32 v4, s40
549; GCN-NEXT:    v_mov_b32_e32 v5, s41
550; GCN-NEXT:    v_mov_b32_e32 v6, s42
551; GCN-NEXT:    v_mov_b32_e32 v7, s43
552; GCN-NEXT:    v_mov_b32_e32 v8, s44
553; GCN-NEXT:    v_mov_b32_e32 v9, s45
554; GCN-NEXT:    v_mov_b32_e32 v10, s46
555; GCN-NEXT:    v_mov_b32_e32 v11, s47
556; GCN-NEXT:    v_mov_b32_e32 v12, s48
557; GCN-NEXT:    v_mov_b32_e32 v13, s49
558; GCN-NEXT:    v_mov_b32_e32 v14, s50
559; GCN-NEXT:    v_mov_b32_e32 v15, s51
560; GCN-NEXT:    v_mov_b32_e32 v16, s52
561; GCN-NEXT:    v_mov_b32_e32 v17, s53
562; GCN-NEXT:    v_mov_b32_e32 v18, s54
563; GCN-NEXT:    v_mov_b32_e32 v19, s55
564; GCN-NEXT:    v_mov_b32_e32 v20, s56
565; GCN-NEXT:    v_mov_b32_e32 v21, s57
566; GCN-NEXT:    v_mov_b32_e32 v22, s58
567; GCN-NEXT:    v_mov_b32_e32 v23, s59
568; GCN-NEXT:    v_mov_b32_e32 v24, s60
569; GCN-NEXT:    v_mov_b32_e32 v25, s61
570; GCN-NEXT:    v_mov_b32_e32 v26, s62
571; GCN-NEXT:    v_mov_b32_e32 v27, s63
572; GCN-NEXT:    v_mov_b32_e32 v28, s64
573; GCN-NEXT:    v_mov_b32_e32 v29, s65
574; GCN-NEXT:    v_mov_b32_e32 v30, s66
575; GCN-NEXT:    v_movrels_b32_e32 v32, v1
576; GCN-NEXT:    v_movrels_b32_e32 v31, v0
577; GCN-NEXT:    v_mov_b32_e32 v0, s0
578; GCN-NEXT:    v_mov_b32_e32 v1, s1
579; GCN-NEXT:    flat_store_dwordx2 v[0:1], v[31:32]
580; GCN-NEXT:    s_endpgm
581entry:
582  %ext = extractelement <16 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0, double 16.0>, i32 %sel
583  store double %ext, ptr addrspace(1) %out
584  ret void
585}
586
587define amdgpu_kernel void @float32_extelt(ptr addrspace(1) %out, i32 %sel) {
588; GCN-LABEL: float32_extelt:
589; GCN:       ; %bb.0: ; %entry
590; GCN-NEXT:    s_load_dword s2, s[4:5], 0x2c
591; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
592; GCN-NEXT:    v_mov_b32_e32 v0, 1.0
593; GCN-NEXT:    v_mov_b32_e32 v1, 2.0
594; GCN-NEXT:    v_mov_b32_e32 v2, 0x40400000
595; GCN-NEXT:    s_waitcnt lgkmcnt(0)
596; GCN-NEXT:    s_mov_b32 m0, s2
597; GCN-NEXT:    v_mov_b32_e32 v3, 4.0
598; GCN-NEXT:    v_mov_b32_e32 v4, 0x40a00000
599; GCN-NEXT:    v_mov_b32_e32 v5, 0x40c00000
600; GCN-NEXT:    v_mov_b32_e32 v6, 0x40e00000
601; GCN-NEXT:    v_mov_b32_e32 v7, 0x41000000
602; GCN-NEXT:    v_mov_b32_e32 v8, 0x41100000
603; GCN-NEXT:    v_mov_b32_e32 v9, 0x41200000
604; GCN-NEXT:    v_mov_b32_e32 v10, 0x41300000
605; GCN-NEXT:    v_mov_b32_e32 v11, 0x41400000
606; GCN-NEXT:    v_mov_b32_e32 v12, 0x41500000
607; GCN-NEXT:    v_mov_b32_e32 v13, 0x41600000
608; GCN-NEXT:    v_mov_b32_e32 v14, 0x41700000
609; GCN-NEXT:    v_mov_b32_e32 v15, 0x41800000
610; GCN-NEXT:    v_mov_b32_e32 v16, 0x41880000
611; GCN-NEXT:    v_mov_b32_e32 v17, 0x41900000
612; GCN-NEXT:    v_mov_b32_e32 v18, 0x41980000
613; GCN-NEXT:    v_mov_b32_e32 v19, 0x41a00000
614; GCN-NEXT:    v_mov_b32_e32 v20, 0x41a80000
615; GCN-NEXT:    v_mov_b32_e32 v21, 0x41b00000
616; GCN-NEXT:    v_mov_b32_e32 v22, 0x41b80000
617; GCN-NEXT:    v_mov_b32_e32 v23, 0x41c00000
618; GCN-NEXT:    v_mov_b32_e32 v24, 0x41c80000
619; GCN-NEXT:    v_mov_b32_e32 v25, 0x41d00000
620; GCN-NEXT:    v_mov_b32_e32 v26, 0x41d80000
621; GCN-NEXT:    v_mov_b32_e32 v27, 0x41e00000
622; GCN-NEXT:    v_mov_b32_e32 v28, 0x41e80000
623; GCN-NEXT:    v_mov_b32_e32 v29, 0x41f00000
624; GCN-NEXT:    v_mov_b32_e32 v30, 0x41f80000
625; GCN-NEXT:    v_mov_b32_e32 v31, 0x42000000
626; GCN-NEXT:    v_movrels_b32_e32 v2, v0
627; GCN-NEXT:    v_mov_b32_e32 v0, s0
628; GCN-NEXT:    v_mov_b32_e32 v1, s1
629; GCN-NEXT:    flat_store_dword v[0:1], v2
630; GCN-NEXT:    s_endpgm
631entry:
632  %ext = extractelement <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 32.0>, i32 %sel
633  store float %ext, ptr addrspace(1) %out
634  ret void
635}
636
637define amdgpu_kernel void @byte8_extelt(ptr addrspace(1) %out, i32 %sel) {
638; GCN-LABEL: byte8_extelt:
639; GCN:       ; %bb.0: ; %entry
640; GCN-NEXT:    s_load_dword s6, s[4:5], 0x2c
641; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
642; GCN-NEXT:    s_mov_b32 s2, 0x4030201
643; GCN-NEXT:    s_mov_b32 s3, 0x8070605
644; GCN-NEXT:    s_waitcnt lgkmcnt(0)
645; GCN-NEXT:    s_lshl_b32 s4, s6, 3
646; GCN-NEXT:    s_lshr_b64 s[2:3], s[2:3], s4
647; GCN-NEXT:    v_mov_b32_e32 v0, s0
648; GCN-NEXT:    v_mov_b32_e32 v1, s1
649; GCN-NEXT:    v_mov_b32_e32 v2, s2
650; GCN-NEXT:    flat_store_byte v[0:1], v2
651; GCN-NEXT:    s_endpgm
652entry:
653  %ext = extractelement <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i32 %sel
654  store i8 %ext, ptr addrspace(1) %out
655  ret void
656}
657
658define amdgpu_kernel void @byte16_extelt(ptr addrspace(1) %out, i32 %sel) {
659; GCN-LABEL: byte16_extelt:
660; GCN:       ; %bb.0: ; %entry
661; GCN-NEXT:    s_load_dword s2, s[4:5], 0x2c
662; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
663; GCN-NEXT:    s_waitcnt lgkmcnt(0)
664; GCN-NEXT:    s_cmp_eq_u32 s2, 1
665; GCN-NEXT:    s_cselect_b32 s3, 2, 1
666; GCN-NEXT:    s_cmp_lg_u32 s2, 2
667; GCN-NEXT:    s_cselect_b32 s3, s3, 3
668; GCN-NEXT:    s_cmp_lg_u32 s2, 3
669; GCN-NEXT:    s_cselect_b32 s3, s3, 4
670; GCN-NEXT:    s_cmp_lg_u32 s2, 4
671; GCN-NEXT:    s_cselect_b32 s3, s3, 5
672; GCN-NEXT:    s_cmp_lg_u32 s2, 5
673; GCN-NEXT:    s_cselect_b32 s3, s3, 6
674; GCN-NEXT:    s_cmp_lg_u32 s2, 6
675; GCN-NEXT:    s_cselect_b32 s3, s3, 7
676; GCN-NEXT:    s_cmp_lg_u32 s2, 7
677; GCN-NEXT:    s_cselect_b32 s3, s3, 8
678; GCN-NEXT:    s_cmp_lg_u32 s2, 8
679; GCN-NEXT:    s_cselect_b32 s3, s3, 9
680; GCN-NEXT:    s_cmp_lg_u32 s2, 9
681; GCN-NEXT:    s_cselect_b32 s3, s3, 10
682; GCN-NEXT:    s_cmp_lg_u32 s2, 10
683; GCN-NEXT:    s_cselect_b32 s3, s3, 11
684; GCN-NEXT:    s_cmp_lg_u32 s2, 11
685; GCN-NEXT:    s_cselect_b32 s3, s3, 12
686; GCN-NEXT:    s_cmp_lg_u32 s2, 12
687; GCN-NEXT:    s_cselect_b32 s3, s3, 13
688; GCN-NEXT:    s_cmp_lg_u32 s2, 13
689; GCN-NEXT:    s_cselect_b32 s3, s3, 14
690; GCN-NEXT:    s_cmp_lg_u32 s2, 14
691; GCN-NEXT:    s_cselect_b32 s3, s3, 15
692; GCN-NEXT:    s_cmp_lg_u32 s2, 15
693; GCN-NEXT:    s_cselect_b32 s2, s3, 16
694; GCN-NEXT:    v_mov_b32_e32 v0, s0
695; GCN-NEXT:    v_mov_b32_e32 v1, s1
696; GCN-NEXT:    v_mov_b32_e32 v2, s2
697; GCN-NEXT:    flat_store_byte v[0:1], v2
698; GCN-NEXT:    s_endpgm
699entry:
700  %ext = extractelement <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, i32 %sel
701  store i8 %ext, ptr addrspace(1) %out
702  ret void
703}
704
705define amdgpu_kernel void @bit4_extelt(ptr addrspace(1) %out, i32 %sel) {
706; GCN-LABEL: bit4_extelt:
707; GCN:       ; %bb.0: ; %entry
708; GCN-NEXT:    s_load_dword s2, s[4:5], 0x2c
709; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
710; GCN-NEXT:    s_waitcnt lgkmcnt(0)
711; GCN-NEXT:    s_lshl_b32 s2, s2, 3
712; GCN-NEXT:    s_lshr_b32 s2, 0x1000100, s2
713; GCN-NEXT:    s_and_b32 s2, s2, 1
714; GCN-NEXT:    v_mov_b32_e32 v0, s0
715; GCN-NEXT:    v_mov_b32_e32 v1, s1
716; GCN-NEXT:    v_mov_b32_e32 v2, s2
717; GCN-NEXT:    flat_store_dword v[0:1], v2
718; GCN-NEXT:    s_endpgm
719entry:
720  %ext = extractelement <4 x i1> <i1 0, i1 1, i1 0, i1 1>, i32 %sel
721  %zext = zext i1 %ext to i32
722  store i32 %zext, ptr addrspace(1) %out
723  ret void
724}
725
726define amdgpu_kernel void @bit128_extelt(ptr addrspace(1) %out, i32 %sel) {
727; GCN-LABEL: bit128_extelt:
728; GCN:       ; %bb.0: ; %entry
729; GCN-NEXT:    s_load_dword s2, s[4:5], 0x2c
730; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
731; GCN-NEXT:    s_waitcnt lgkmcnt(0)
732; GCN-NEXT:    s_cmp_lg_u32 s2, 1
733; GCN-NEXT:    s_cselect_b64 s[4:5], -1, 0
734; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
735; GCN-NEXT:    s_cmp_lg_u32 s2, 2
736; GCN-NEXT:    v_readfirstlane_b32 s3, v0
737; GCN-NEXT:    s_cselect_b32 s3, s3, 1
738; GCN-NEXT:    s_cmp_lg_u32 s2, 3
739; GCN-NEXT:    s_cselect_b32 s3, s3, 0
740; GCN-NEXT:    s_cmp_lg_u32 s2, 4
741; GCN-NEXT:    s_cselect_b32 s3, s3, 1
742; GCN-NEXT:    s_cmp_lg_u32 s2, 5
743; GCN-NEXT:    s_cselect_b32 s3, s3, 0
744; GCN-NEXT:    s_cmp_lg_u32 s2, 6
745; GCN-NEXT:    s_cselect_b32 s3, s3, 1
746; GCN-NEXT:    s_cmp_lg_u32 s2, 7
747; GCN-NEXT:    s_cselect_b32 s3, s3, 0
748; GCN-NEXT:    s_cmp_lg_u32 s2, 8
749; GCN-NEXT:    s_cselect_b32 s3, s3, 1
750; GCN-NEXT:    s_cmp_lg_u32 s2, 9
751; GCN-NEXT:    s_cselect_b32 s3, s3, 0
752; GCN-NEXT:    s_cmp_lg_u32 s2, 10
753; GCN-NEXT:    s_cselect_b32 s3, s3, 1
754; GCN-NEXT:    s_cmp_lg_u32 s2, 11
755; GCN-NEXT:    s_cselect_b32 s3, s3, 0
756; GCN-NEXT:    s_cmp_lg_u32 s2, 12
757; GCN-NEXT:    s_cselect_b32 s3, s3, 1
758; GCN-NEXT:    s_cmp_lg_u32 s2, 13
759; GCN-NEXT:    s_cselect_b32 s3, s3, 0
760; GCN-NEXT:    s_cmp_lg_u32 s2, 14
761; GCN-NEXT:    s_cselect_b32 s3, s3, 1
762; GCN-NEXT:    s_cmp_lg_u32 s2, 15
763; GCN-NEXT:    s_cselect_b32 s3, s3, 0
764; GCN-NEXT:    s_cmp_lg_u32 s2, 16
765; GCN-NEXT:    s_cselect_b32 s3, s3, 1
766; GCN-NEXT:    s_cmp_lg_u32 s2, 17
767; GCN-NEXT:    s_cselect_b32 s3, s3, 0
768; GCN-NEXT:    s_cmp_lg_u32 s2, 18
769; GCN-NEXT:    s_cselect_b32 s3, s3, 1
770; GCN-NEXT:    s_cmp_lg_u32 s2, 19
771; GCN-NEXT:    s_cselect_b32 s3, s3, 0
772; GCN-NEXT:    s_cmp_lg_u32 s2, 20
773; GCN-NEXT:    s_cselect_b32 s3, s3, 1
774; GCN-NEXT:    s_cmp_lg_u32 s2, 21
775; GCN-NEXT:    s_cselect_b32 s3, s3, 0
776; GCN-NEXT:    s_cmp_lg_u32 s2, 22
777; GCN-NEXT:    s_cselect_b32 s3, s3, 1
778; GCN-NEXT:    s_cmp_lg_u32 s2, 23
779; GCN-NEXT:    s_cselect_b32 s3, s3, 0
780; GCN-NEXT:    s_cmp_lg_u32 s2, 24
781; GCN-NEXT:    s_cselect_b32 s3, s3, 1
782; GCN-NEXT:    s_cmp_lg_u32 s2, 25
783; GCN-NEXT:    s_cselect_b32 s3, s3, 0
784; GCN-NEXT:    s_cmp_lg_u32 s2, 26
785; GCN-NEXT:    s_cselect_b32 s3, s3, 1
786; GCN-NEXT:    s_cmp_lg_u32 s2, 27
787; GCN-NEXT:    s_cselect_b32 s3, s3, 0
788; GCN-NEXT:    s_cmp_lg_u32 s2, 28
789; GCN-NEXT:    s_cselect_b32 s3, s3, 1
790; GCN-NEXT:    s_cmp_lg_u32 s2, 29
791; GCN-NEXT:    s_cselect_b32 s3, s3, 0
792; GCN-NEXT:    s_cmp_lg_u32 s2, 30
793; GCN-NEXT:    s_cselect_b32 s3, s3, 1
794; GCN-NEXT:    s_cmp_lg_u32 s2, 31
795; GCN-NEXT:    s_cselect_b32 s3, s3, 0
796; GCN-NEXT:    s_cmp_lg_u32 s2, 32
797; GCN-NEXT:    s_cselect_b32 s3, s3, 1
798; GCN-NEXT:    s_cmp_lg_u32 s2, 33
799; GCN-NEXT:    s_cselect_b32 s3, s3, 0
800; GCN-NEXT:    s_cmp_lg_u32 s2, 34
801; GCN-NEXT:    s_cselect_b32 s3, s3, 1
802; GCN-NEXT:    s_cmp_lg_u32 s2, 35
803; GCN-NEXT:    s_cselect_b32 s3, s3, 0
804; GCN-NEXT:    s_cmp_lg_u32 s2, 36
805; GCN-NEXT:    s_cselect_b32 s3, s3, 1
806; GCN-NEXT:    s_cmp_lg_u32 s2, 37
807; GCN-NEXT:    s_cselect_b32 s3, s3, 0
808; GCN-NEXT:    s_cmp_lg_u32 s2, 38
809; GCN-NEXT:    s_cselect_b32 s3, s3, 1
810; GCN-NEXT:    s_cmp_lg_u32 s2, 39
811; GCN-NEXT:    s_cselect_b32 s3, s3, 0
812; GCN-NEXT:    s_cmp_lg_u32 s2, 40
813; GCN-NEXT:    s_cselect_b32 s3, s3, 1
814; GCN-NEXT:    s_cmp_lg_u32 s2, 41
815; GCN-NEXT:    s_cselect_b32 s3, s3, 0
816; GCN-NEXT:    s_cmp_lg_u32 s2, 42
817; GCN-NEXT:    s_cselect_b32 s3, s3, 1
818; GCN-NEXT:    s_cmp_lg_u32 s2, 43
819; GCN-NEXT:    s_cselect_b32 s3, s3, 0
820; GCN-NEXT:    s_cmp_lg_u32 s2, 44
821; GCN-NEXT:    s_cselect_b32 s3, s3, 1
822; GCN-NEXT:    s_cmp_lg_u32 s2, 45
823; GCN-NEXT:    s_cselect_b32 s3, s3, 0
824; GCN-NEXT:    s_cmp_lg_u32 s2, 46
825; GCN-NEXT:    s_cselect_b32 s3, s3, 1
826; GCN-NEXT:    s_cmp_lg_u32 s2, 47
827; GCN-NEXT:    s_cselect_b32 s3, s3, 0
828; GCN-NEXT:    s_cmp_lg_u32 s2, 48
829; GCN-NEXT:    s_cselect_b32 s3, s3, 1
830; GCN-NEXT:    s_cmp_lg_u32 s2, 49
831; GCN-NEXT:    s_cselect_b32 s3, s3, 0
832; GCN-NEXT:    s_cmp_lg_u32 s2, 50
833; GCN-NEXT:    s_cselect_b32 s3, s3, 1
834; GCN-NEXT:    s_cmp_lg_u32 s2, 51
835; GCN-NEXT:    s_cselect_b32 s3, s3, 0
836; GCN-NEXT:    s_cmp_lg_u32 s2, 52
837; GCN-NEXT:    s_cselect_b32 s3, s3, 1
838; GCN-NEXT:    s_cmp_lg_u32 s2, 53
839; GCN-NEXT:    s_cselect_b32 s3, s3, 0
840; GCN-NEXT:    s_cmp_lg_u32 s2, 54
841; GCN-NEXT:    s_cselect_b32 s3, s3, 1
842; GCN-NEXT:    s_cmp_lg_u32 s2, 55
843; GCN-NEXT:    s_cselect_b32 s3, s3, 0
844; GCN-NEXT:    s_cmp_lg_u32 s2, 56
845; GCN-NEXT:    s_cselect_b32 s3, s3, 1
846; GCN-NEXT:    s_cmp_lg_u32 s2, 57
847; GCN-NEXT:    s_cselect_b32 s3, s3, 0
848; GCN-NEXT:    s_cmp_lg_u32 s2, 58
849; GCN-NEXT:    s_cselect_b32 s3, s3, 1
850; GCN-NEXT:    s_cmp_lg_u32 s2, 59
851; GCN-NEXT:    s_cselect_b32 s3, s3, 0
852; GCN-NEXT:    s_cmp_lg_u32 s2, 60
853; GCN-NEXT:    s_cselect_b32 s3, s3, 1
854; GCN-NEXT:    s_cmp_lg_u32 s2, 61
855; GCN-NEXT:    s_cselect_b32 s3, s3, 0
856; GCN-NEXT:    s_cmp_lg_u32 s2, 62
857; GCN-NEXT:    s_cselect_b32 s3, s3, 1
858; GCN-NEXT:    s_cmp_lg_u32 s2, 63
859; GCN-NEXT:    s_cselect_b32 s3, s3, 0
860; GCN-NEXT:    s_cmp_lg_u32 s2, 64
861; GCN-NEXT:    s_cselect_b32 s3, s3, 1
862; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x41
863; GCN-NEXT:    s_cselect_b32 s3, s3, 0
864; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x42
865; GCN-NEXT:    s_cselect_b32 s3, s3, 1
866; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x43
867; GCN-NEXT:    s_cselect_b32 s3, s3, 0
868; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x44
869; GCN-NEXT:    s_cselect_b32 s3, s3, 1
870; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x45
871; GCN-NEXT:    s_cselect_b32 s3, s3, 0
872; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x46
873; GCN-NEXT:    s_cselect_b32 s3, s3, 1
874; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x47
875; GCN-NEXT:    s_cselect_b32 s3, s3, 0
876; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x48
877; GCN-NEXT:    s_cselect_b32 s3, s3, 1
878; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x49
879; GCN-NEXT:    s_cselect_b32 s3, s3, 0
880; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x4a
881; GCN-NEXT:    s_cselect_b32 s3, s3, 1
882; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x4b
883; GCN-NEXT:    s_cselect_b32 s3, s3, 0
884; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x4c
885; GCN-NEXT:    s_cselect_b32 s3, s3, 1
886; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x4d
887; GCN-NEXT:    s_cselect_b32 s3, s3, 0
888; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x4e
889; GCN-NEXT:    s_cselect_b32 s3, s3, 1
890; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x4f
891; GCN-NEXT:    s_cselect_b32 s3, s3, 0
892; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x50
893; GCN-NEXT:    s_cselect_b32 s3, s3, 1
894; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x51
895; GCN-NEXT:    s_cselect_b32 s3, s3, 0
896; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x52
897; GCN-NEXT:    s_cselect_b32 s3, s3, 1
898; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x53
899; GCN-NEXT:    s_cselect_b32 s3, s3, 0
900; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x54
901; GCN-NEXT:    s_cselect_b32 s3, s3, 1
902; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x55
903; GCN-NEXT:    s_cselect_b32 s3, s3, 0
904; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x56
905; GCN-NEXT:    s_cselect_b32 s3, s3, 1
906; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x57
907; GCN-NEXT:    s_cselect_b32 s3, s3, 0
908; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x58
909; GCN-NEXT:    s_cselect_b32 s3, s3, 1
910; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x59
911; GCN-NEXT:    s_cselect_b32 s3, s3, 0
912; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x5a
913; GCN-NEXT:    s_cselect_b32 s3, s3, 1
914; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x5b
915; GCN-NEXT:    s_cselect_b32 s3, s3, 0
916; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x5c
917; GCN-NEXT:    s_cselect_b32 s3, s3, 1
918; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x5d
919; GCN-NEXT:    s_cselect_b32 s3, s3, 0
920; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x5e
921; GCN-NEXT:    s_cselect_b32 s3, s3, 1
922; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x5f
923; GCN-NEXT:    s_cselect_b32 s3, s3, 0
924; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x60
925; GCN-NEXT:    s_cselect_b32 s3, s3, 1
926; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x61
927; GCN-NEXT:    s_cselect_b32 s3, s3, 0
928; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x62
929; GCN-NEXT:    s_cselect_b32 s3, s3, 1
930; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x63
931; GCN-NEXT:    s_cselect_b32 s3, s3, 0
932; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x64
933; GCN-NEXT:    s_cselect_b32 s3, s3, 1
934; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x65
935; GCN-NEXT:    s_cselect_b32 s3, s3, 0
936; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x66
937; GCN-NEXT:    s_cselect_b32 s3, s3, 1
938; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x67
939; GCN-NEXT:    s_cselect_b32 s3, s3, 0
940; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x68
941; GCN-NEXT:    s_cselect_b32 s3, s3, 1
942; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x69
943; GCN-NEXT:    s_cselect_b32 s3, s3, 0
944; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x6a
945; GCN-NEXT:    s_cselect_b32 s3, s3, 1
946; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x6b
947; GCN-NEXT:    s_cselect_b32 s3, s3, 0
948; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x6c
949; GCN-NEXT:    s_cselect_b32 s3, s3, 1
950; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x6d
951; GCN-NEXT:    s_cselect_b32 s3, s3, 0
952; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x6e
953; GCN-NEXT:    s_cselect_b32 s3, s3, 1
954; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x6f
955; GCN-NEXT:    s_cselect_b32 s3, s3, 0
956; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x70
957; GCN-NEXT:    s_cselect_b32 s3, s3, 1
958; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x71
959; GCN-NEXT:    s_cselect_b32 s3, s3, 0
960; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x72
961; GCN-NEXT:    s_cselect_b32 s3, s3, 1
962; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x73
963; GCN-NEXT:    s_cselect_b32 s3, s3, 0
964; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x74
965; GCN-NEXT:    s_cselect_b32 s3, s3, 1
966; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x75
967; GCN-NEXT:    s_cselect_b32 s3, s3, 0
968; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x76
969; GCN-NEXT:    s_cselect_b32 s3, s3, 1
970; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x77
971; GCN-NEXT:    s_cselect_b32 s3, s3, 0
972; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x78
973; GCN-NEXT:    s_cselect_b32 s3, s3, 1
974; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x79
975; GCN-NEXT:    s_cselect_b32 s3, s3, 0
976; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x7a
977; GCN-NEXT:    s_cselect_b32 s3, s3, 1
978; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x7b
979; GCN-NEXT:    s_cselect_b32 s3, s3, 0
980; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x7c
981; GCN-NEXT:    s_cselect_b32 s3, s3, 1
982; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x7d
983; GCN-NEXT:    s_cselect_b32 s3, s3, 0
984; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x7e
985; GCN-NEXT:    s_cselect_b32 s3, s3, 1
986; GCN-NEXT:    s_cmpk_lg_i32 s2, 0x7f
987; GCN-NEXT:    s_cselect_b32 s2, s3, 0
988; GCN-NEXT:    s_and_b32 s2, s2, 1
989; GCN-NEXT:    v_mov_b32_e32 v0, s0
990; GCN-NEXT:    v_mov_b32_e32 v1, s1
991; GCN-NEXT:    v_mov_b32_e32 v2, s2
992; GCN-NEXT:    flat_store_dword v[0:1], v2
993; GCN-NEXT:    s_endpgm
994entry:
995  %ext = extractelement <128 x i1> <i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>, i32 %sel
996  %zext = zext i1 %ext to i32
997  store i32 %zext, ptr addrspace(1) %out
998  ret void
999}
1000
1001define float @float32_extelt_vec(i32 %sel) {
1002; GCN-LABEL: float32_extelt_vec:
1003; GCN:       ; %bb.0: ; %entry
1004; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1005; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
1006; GCN-NEXT:    v_cndmask_b32_e64 v1, 1.0, 2.0, vcc
1007; GCN-NEXT:    v_mov_b32_e32 v2, 0x40400000
1008; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 2, v0
1009; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1010; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 3, v0
1011; GCN-NEXT:    v_cndmask_b32_e32 v1, 4.0, v1, vcc
1012; GCN-NEXT:    v_mov_b32_e32 v2, 0x40a00000
1013; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 4, v0
1014; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1015; GCN-NEXT:    v_mov_b32_e32 v2, 0x40c00000
1016; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 5, v0
1017; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1018; GCN-NEXT:    v_mov_b32_e32 v2, 0x40e00000
1019; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 6, v0
1020; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1021; GCN-NEXT:    v_mov_b32_e32 v2, 0x41000000
1022; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 7, v0
1023; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1024; GCN-NEXT:    v_mov_b32_e32 v2, 0x41100000
1025; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 8, v0
1026; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1027; GCN-NEXT:    v_mov_b32_e32 v2, 0x41200000
1028; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 9, v0
1029; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1030; GCN-NEXT:    v_mov_b32_e32 v2, 0x41300000
1031; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 10, v0
1032; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1033; GCN-NEXT:    v_mov_b32_e32 v2, 0x41400000
1034; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 11, v0
1035; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1036; GCN-NEXT:    v_mov_b32_e32 v2, 0x41500000
1037; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 12, v0
1038; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1039; GCN-NEXT:    v_mov_b32_e32 v2, 0x41600000
1040; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 13, v0
1041; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1042; GCN-NEXT:    v_mov_b32_e32 v2, 0x41700000
1043; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 14, v0
1044; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1045; GCN-NEXT:    v_mov_b32_e32 v2, 0x41800000
1046; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 15, v0
1047; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1048; GCN-NEXT:    v_mov_b32_e32 v2, 0x41880000
1049; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 16, v0
1050; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1051; GCN-NEXT:    v_mov_b32_e32 v2, 0x41900000
1052; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 17, v0
1053; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1054; GCN-NEXT:    v_mov_b32_e32 v2, 0x41980000
1055; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 18, v0
1056; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1057; GCN-NEXT:    v_mov_b32_e32 v2, 0x41a00000
1058; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 19, v0
1059; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1060; GCN-NEXT:    v_mov_b32_e32 v2, 0x41a80000
1061; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 20, v0
1062; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1063; GCN-NEXT:    v_mov_b32_e32 v2, 0x41b00000
1064; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 21, v0
1065; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1066; GCN-NEXT:    v_mov_b32_e32 v2, 0x41b80000
1067; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 22, v0
1068; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1069; GCN-NEXT:    v_mov_b32_e32 v2, 0x41c00000
1070; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 23, v0
1071; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1072; GCN-NEXT:    v_mov_b32_e32 v2, 0x41c80000
1073; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 24, v0
1074; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1075; GCN-NEXT:    v_mov_b32_e32 v2, 0x41d00000
1076; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 25, v0
1077; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1078; GCN-NEXT:    v_mov_b32_e32 v2, 0x41d80000
1079; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v0
1080; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1081; GCN-NEXT:    v_mov_b32_e32 v2, 0x41e00000
1082; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 27, v0
1083; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1084; GCN-NEXT:    v_mov_b32_e32 v2, 0x41e80000
1085; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 28, v0
1086; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1087; GCN-NEXT:    v_mov_b32_e32 v2, 0x41f00000
1088; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 29, v0
1089; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1090; GCN-NEXT:    v_mov_b32_e32 v2, 0x41f80000
1091; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 30, v0
1092; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
1093; GCN-NEXT:    v_mov_b32_e32 v2, 0x42000000
1094; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 31, v0
1095; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1096; GCN-NEXT:    s_setpc_b64 s[30:31]
1097entry:
1098  %ext = extractelement <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 32.0>, i32 %sel
1099  ret float %ext
1100}
1101
1102define double @double16_extelt_vec(i32 %sel) {
1103; GCN-LABEL: double16_extelt_vec:
1104; GCN:       ; %bb.0: ; %entry
1105; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1106; GCN-NEXT:    v_mov_b32_e32 v3, 0x3ff19999
1107; GCN-NEXT:    v_mov_b32_e32 v4, 0x4000cccc
1108; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
1109; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], 2, v0
1110; GCN-NEXT:    v_mov_b32_e32 v1, 0x9999999a
1111; GCN-NEXT:    v_mov_b32_e32 v2, 0xcccccccd
1112; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
1113; GCN-NEXT:    v_mov_b32_e32 v4, 0x4008cccc
1114; GCN-NEXT:    s_or_b64 vcc, s[4:5], vcc
1115; GCN-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[4:5]
1116; GCN-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
1117; GCN-NEXT:    v_mov_b32_e32 v4, 0x40106666
1118; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
1119; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
1120; GCN-NEXT:    v_mov_b32_e32 v4, 0x40146666
1121; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], 4, v0
1122; GCN-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[4:5]
1123; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
1124; GCN-NEXT:    v_mov_b32_e32 v4, 0x40186666
1125; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
1126; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
1127; GCN-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1128; GCN-NEXT:    v_mov_b32_e32 v5, 0x401c6666
1129; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
1130; GCN-NEXT:    v_mov_b32_e32 v4, 0x66666666
1131; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
1132; GCN-NEXT:    s_or_b64 vcc, vcc, s[4:5]
1133; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
1134; GCN-NEXT:    v_mov_b32_e32 v4, 0x40203333
1135; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
1136; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
1137; GCN-NEXT:    v_mov_b32_e32 v4, 0x40223333
1138; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], 8, v0
1139; GCN-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[4:5]
1140; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], vcc
1141; GCN-NEXT:    v_mov_b32_e32 v4, 0x40243333
1142; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v0
1143; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
1144; GCN-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1145; GCN-NEXT:    v_mov_b32_e32 v4, 0x40263333
1146; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v0
1147; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
1148; GCN-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1149; GCN-NEXT:    v_mov_b32_e32 v4, 0x40283333
1150; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v0
1151; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
1152; GCN-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1153; GCN-NEXT:    v_mov_b32_e32 v4, 0x402a3333
1154; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v0
1155; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
1156; GCN-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1157; GCN-NEXT:    v_mov_b32_e32 v4, 0x402c3333
1158; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v0
1159; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
1160; GCN-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1161; GCN-NEXT:    v_mov_b32_e32 v5, 0x402e3333
1162; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v0
1163; GCN-NEXT:    v_mov_b32_e32 v4, 0x33333333
1164; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
1165; GCN-NEXT:    s_or_b64 vcc, vcc, s[4:5]
1166; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
1167; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 15, v0
1168; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1169; GCN-NEXT:    v_mov_b32_e32 v1, 0x40301999
1170; GCN-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
1171; GCN-NEXT:    s_setpc_b64 s[30:31]
1172entry:
1173  %ext = extractelement <16 x double> <double 1.1, double 2.1, double 3.1, double 4.1, double 5.1, double 6.1, double 7.1, double 8.1, double 9.1, double 10.1, double 11.1, double 12.1, double 13.1, double 14.1, double 15.1, double 16.1>, i32 %sel
1174  ret double %ext
1175}
1176