xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll (revision c93e001ca695e905cb965b36d63f7a348d1dd809)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GPRIDX %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MOVREL %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
6
7define float @dyn_extract_v8f32_const_s_v(i32 %sel) {
8; GCN-LABEL: dyn_extract_v8f32_const_s_v:
9; GCN:       ; %bb.0: ; %entry
10; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
12; GCN-NEXT:    v_mov_b32_e32 v1, 0x40400000
13; GCN-NEXT:    v_cndmask_b32_e64 v6, 1.0, 2.0, vcc
14; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
15; GCN-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
16; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
17; GCN-NEXT:    v_mov_b32_e32 v2, 0x40a00000
18; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc
19; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
20; GCN-NEXT:    v_mov_b32_e32 v3, 0x40c00000
21; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
22; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
23; GCN-NEXT:    v_mov_b32_e32 v4, 0x40e00000
24; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
25; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
26; GCN-NEXT:    v_mov_b32_e32 v5, 0x41000000
27; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
28; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
29; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v5, vcc
30; GCN-NEXT:    s_setpc_b64 s[30:31]
31;
32; GFX10PLUS-LABEL: dyn_extract_v8f32_const_s_v:
33; GFX10PLUS:       ; %bb.0: ; %entry
34; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
36; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
37; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
38; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
39; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
40; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
41; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
42; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
43; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
44; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
45; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
46; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
47; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
48; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v0, v1, 0x41000000, vcc_lo
49; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
50entry:
51  %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
52  ret float %ext
53}
54
55define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) {
56; GPRIDX-LABEL: dyn_extract_v8f32_const_s_s:
57; GPRIDX:       ; %bb.0: ; %entry
58; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 1
59; GPRIDX-NEXT:    s_cselect_b32 s0, 2.0, 1.0
60; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 2
61; GPRIDX-NEXT:    s_cselect_b32 s0, 0x40400000, s0
62; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 3
63; GPRIDX-NEXT:    s_cselect_b32 s0, 4.0, s0
64; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 4
65; GPRIDX-NEXT:    s_cselect_b32 s0, 0x40a00000, s0
66; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 5
67; GPRIDX-NEXT:    s_cselect_b32 s0, 0x40c00000, s0
68; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 6
69; GPRIDX-NEXT:    s_cselect_b32 s0, 0x40e00000, s0
70; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 7
71; GPRIDX-NEXT:    s_cselect_b32 s0, 0x41000000, s0
72; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
73; GPRIDX-NEXT:    ; return to shader part epilog
74;
75; MOVREL-LABEL: dyn_extract_v8f32_const_s_s:
76; MOVREL:       ; %bb.0: ; %entry
77; MOVREL-NEXT:    s_mov_b32 s4, 1.0
78; MOVREL-NEXT:    s_mov_b32 m0, s2
79; MOVREL-NEXT:    s_mov_b32 s11, 0x41000000
80; MOVREL-NEXT:    s_mov_b32 s10, 0x40e00000
81; MOVREL-NEXT:    s_mov_b32 s9, 0x40c00000
82; MOVREL-NEXT:    s_mov_b32 s8, 0x40a00000
83; MOVREL-NEXT:    s_mov_b32 s7, 4.0
84; MOVREL-NEXT:    s_mov_b32 s6, 0x40400000
85; MOVREL-NEXT:    s_mov_b32 s5, 2.0
86; MOVREL-NEXT:    s_movrels_b32 s0, s4
87; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
88; MOVREL-NEXT:    ; return to shader part epilog
89;
90; GFX10PLUS-LABEL: dyn_extract_v8f32_const_s_s:
91; GFX10PLUS:       ; %bb.0: ; %entry
92; GFX10PLUS-NEXT:    s_mov_b32 s4, 1.0
93; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
94; GFX10PLUS-NEXT:    s_mov_b32 s11, 0x41000000
95; GFX10PLUS-NEXT:    s_mov_b32 s10, 0x40e00000
96; GFX10PLUS-NEXT:    s_mov_b32 s9, 0x40c00000
97; GFX10PLUS-NEXT:    s_mov_b32 s8, 0x40a00000
98; GFX10PLUS-NEXT:    s_mov_b32 s7, 4.0
99; GFX10PLUS-NEXT:    s_mov_b32 s6, 0x40400000
100; GFX10PLUS-NEXT:    s_mov_b32 s5, 2.0
101; GFX10PLUS-NEXT:    s_movrels_b32 s0, s4
102; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, s0
103; GFX10PLUS-NEXT:    ; return to shader part epilog
104entry:
105  %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel
106  ret float %ext
107}
108
109define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) {
110; GCN-LABEL: dyn_extract_v8f32_s_v:
111; GCN:       ; %bb.0: ; %entry
112; GCN-NEXT:    v_mov_b32_e32 v1, s2
113; GCN-NEXT:    v_mov_b32_e32 v2, s3
114; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
115; GCN-NEXT:    v_mov_b32_e32 v3, s4
116; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
117; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
118; GCN-NEXT:    v_mov_b32_e32 v4, s5
119; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
120; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
121; GCN-NEXT:    v_mov_b32_e32 v5, s6
122; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
123; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
124; GCN-NEXT:    v_mov_b32_e32 v6, s7
125; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
126; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
127; GCN-NEXT:    v_mov_b32_e32 v7, s8
128; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
129; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
130; GCN-NEXT:    v_mov_b32_e32 v8, s9
131; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
132; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
133; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v8, vcc
134; GCN-NEXT:    ; return to shader part epilog
135;
136; GFX10PLUS-LABEL: dyn_extract_v8f32_s_v:
137; GFX10PLUS:       ; %bb.0: ; %entry
138; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, s3
139; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
140; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc_lo
141; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
142; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
143; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
144; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s5, vcc_lo
145; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
146; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
147; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
148; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s7, vcc_lo
149; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
150; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
151; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
152; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v0, v1, s9, vcc_lo
153; GFX10PLUS-NEXT:    ; return to shader part epilog
154entry:
155  %ext = extractelement <8 x float> %vec, i32 %sel
156  ret float %ext
157}
158
159define float @dyn_extract_v8f32_v_v(<8 x float> %vec, i32 %sel) {
160; GCN-LABEL: dyn_extract_v8f32_v_v:
161; GCN:       ; %bb.0: ; %entry
162; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
163; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
164; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
165; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v8
166; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
167; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
168; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
169; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v8
170; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
171; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v8
172; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
173; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v8
174; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
175; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v8
176; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
177; GCN-NEXT:    s_setpc_b64 s[30:31]
178;
179; GFX10PLUS-LABEL: dyn_extract_v8f32_v_v:
180; GFX10PLUS:       ; %bb.0: ; %entry
181; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
183; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
184; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v8
185; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
186; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v8
187; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
188; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v8
189; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
190; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v8
191; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
192; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v8
193; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
194; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v8
195; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
196; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
197entry:
198  %ext = extractelement <8 x float> %vec, i32 %sel
199  ret float %ext
200}
201
202define amdgpu_ps float @dyn_extract_v8f32_v_s(<8 x float> %vec, i32 inreg %sel) {
203; GPRIDX-LABEL: dyn_extract_v8f32_v_s:
204; GPRIDX:       ; %bb.0: ; %entry
205; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
206; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
207; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
208; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
209; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 3
210; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
211; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 4
212; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
213; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 5
214; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
215; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 6
216; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
217; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 7
218; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
219; GPRIDX-NEXT:    ; return to shader part epilog
220;
221; MOVREL-LABEL: dyn_extract_v8f32_v_s:
222; MOVREL:       ; %bb.0: ; %entry
223; MOVREL-NEXT:    s_mov_b32 m0, s2
224; MOVREL-NEXT:    v_movrels_b32_e32 v0, v0
225; MOVREL-NEXT:    ; return to shader part epilog
226;
227; GFX10PLUS-LABEL: dyn_extract_v8f32_v_s:
228; GFX10PLUS:       ; %bb.0: ; %entry
229; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
230; GFX10PLUS-NEXT:    v_movrels_b32_e32 v0, v0
231; GFX10PLUS-NEXT:    ; return to shader part epilog
232entry:
233  %ext = extractelement <8 x float> %vec, i32 %sel
234  ret float %ext
235}
236
237define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) {
238; GPRIDX-LABEL: dyn_extract_v8f32_s_s:
239; GPRIDX:       ; %bb.0: ; %entry
240; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 1
241; GPRIDX-NEXT:    s_cselect_b32 s0, s3, s2
242; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 2
243; GPRIDX-NEXT:    s_cselect_b32 s0, s4, s0
244; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 3
245; GPRIDX-NEXT:    s_cselect_b32 s0, s5, s0
246; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 4
247; GPRIDX-NEXT:    s_cselect_b32 s0, s6, s0
248; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 5
249; GPRIDX-NEXT:    s_cselect_b32 s0, s7, s0
250; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 6
251; GPRIDX-NEXT:    s_cselect_b32 s0, s8, s0
252; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 7
253; GPRIDX-NEXT:    s_cselect_b32 s0, s9, s0
254; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
255; GPRIDX-NEXT:    ; return to shader part epilog
256;
257; MOVREL-LABEL: dyn_extract_v8f32_s_s:
258; MOVREL:       ; %bb.0: ; %entry
259; MOVREL-NEXT:    s_mov_b32 s0, s2
260; MOVREL-NEXT:    s_mov_b32 m0, s10
261; MOVREL-NEXT:    s_mov_b32 s1, s3
262; MOVREL-NEXT:    s_mov_b32 s2, s4
263; MOVREL-NEXT:    s_mov_b32 s3, s5
264; MOVREL-NEXT:    s_mov_b32 s4, s6
265; MOVREL-NEXT:    s_mov_b32 s5, s7
266; MOVREL-NEXT:    s_mov_b32 s6, s8
267; MOVREL-NEXT:    s_mov_b32 s7, s9
268; MOVREL-NEXT:    s_movrels_b32 s0, s0
269; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
270; MOVREL-NEXT:    ; return to shader part epilog
271;
272; GFX10PLUS-LABEL: dyn_extract_v8f32_s_s:
273; GFX10PLUS:       ; %bb.0: ; %entry
274; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
275; GFX10PLUS-NEXT:    s_mov_b32 m0, s10
276; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
277; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
278; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
279; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
280; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
281; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
282; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
283; GFX10PLUS-NEXT:    s_movrels_b32 s0, s0
284; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, s0
285; GFX10PLUS-NEXT:    ; return to shader part epilog
286entry:
287  %ext = extractelement <8 x float> %vec, i32 %sel
288  ret float %ext
289}
290
291define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) {
292; GCN-LABEL: dyn_extract_v8i64_const_s_v:
293; GCN:       ; %bb.0: ; %entry
294; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
295; GCN-NEXT:    s_mov_b64 s[16:17], 2
296; GCN-NEXT:    s_mov_b64 s[18:19], 1
297; GCN-NEXT:    s_mov_b64 s[14:15], 3
298; GCN-NEXT:    v_mov_b32_e32 v1, s18
299; GCN-NEXT:    v_mov_b32_e32 v2, s19
300; GCN-NEXT:    v_mov_b32_e32 v3, s16
301; GCN-NEXT:    v_mov_b32_e32 v4, s17
302; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
303; GCN-NEXT:    s_mov_b64 s[12:13], 4
304; GCN-NEXT:    v_mov_b32_e32 v5, s14
305; GCN-NEXT:    v_mov_b32_e32 v6, s15
306; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
307; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
308; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
309; GCN-NEXT:    s_mov_b64 s[10:11], 5
310; GCN-NEXT:    v_mov_b32_e32 v7, s12
311; GCN-NEXT:    v_mov_b32_e32 v8, s13
312; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
313; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
314; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
315; GCN-NEXT:    s_mov_b64 s[8:9], 6
316; GCN-NEXT:    v_mov_b32_e32 v9, s10
317; GCN-NEXT:    v_mov_b32_e32 v10, s11
318; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
319; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
320; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
321; GCN-NEXT:    s_mov_b64 s[6:7], 7
322; GCN-NEXT:    v_mov_b32_e32 v11, s8
323; GCN-NEXT:    v_mov_b32_e32 v12, s9
324; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
325; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
326; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
327; GCN-NEXT:    s_mov_b64 s[4:5], 8
328; GCN-NEXT:    v_mov_b32_e32 v13, s6
329; GCN-NEXT:    v_mov_b32_e32 v14, s7
330; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
331; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v12, vcc
332; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
333; GCN-NEXT:    v_mov_b32_e32 v15, s4
334; GCN-NEXT:    v_mov_b32_e32 v16, s5
335; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
336; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v14, vcc
337; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
338; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v15, vcc
339; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v16, vcc
340; GCN-NEXT:    s_setpc_b64 s[30:31]
341;
342; GFX10-LABEL: dyn_extract_v8i64_const_s_v:
343; GFX10:       ; %bb.0: ; %entry
344; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
345; GFX10-NEXT:    s_mov_b64 s[4:5], 2
346; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
347; GFX10-NEXT:    v_mov_b32_e32 v1, s4
348; GFX10-NEXT:    v_mov_b32_e32 v2, s5
349; GFX10-NEXT:    s_mov_b64 s[6:7], 1
350; GFX10-NEXT:    s_mov_b64 s[4:5], 3
351; GFX10-NEXT:    v_cndmask_b32_e32 v1, s6, v1, vcc_lo
352; GFX10-NEXT:    v_cndmask_b32_e32 v2, s7, v2, vcc_lo
353; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
354; GFX10-NEXT:    s_mov_b64 s[6:7], 4
355; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
356; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s5, vcc_lo
357; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
358; GFX10-NEXT:    s_mov_b64 s[4:5], 5
359; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
360; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
361; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
362; GFX10-NEXT:    s_mov_b64 s[6:7], 6
363; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
364; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s5, vcc_lo
365; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
366; GFX10-NEXT:    s_mov_b64 s[4:5], 7
367; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
368; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
369; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
370; GFX10-NEXT:    s_mov_b64 s[6:7], 8
371; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
372; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s5, vcc_lo
373; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
374; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s6, vcc_lo
375; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, s7, vcc_lo
376; GFX10-NEXT:    s_setpc_b64 s[30:31]
377;
378; GFX11-LABEL: dyn_extract_v8i64_const_s_v:
379; GFX11:       ; %bb.0: ; %entry
380; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
381; GFX11-NEXT:    s_mov_b64 s[0:1], 2
382; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
383; GFX11-NEXT:    v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, s1
384; GFX11-NEXT:    s_mov_b64 s[2:3], 1
385; GFX11-NEXT:    s_mov_b64 s[0:1], 3
386; GFX11-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc_lo
387; GFX11-NEXT:    v_cndmask_b32_e32 v2, s3, v2, vcc_lo
388; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
389; GFX11-NEXT:    s_mov_b64 s[2:3], 4
390; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s0, vcc_lo
391; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s1, vcc_lo
392; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
393; GFX11-NEXT:    s_mov_b64 s[0:1], 5
394; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc_lo
395; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s3, vcc_lo
396; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
397; GFX11-NEXT:    s_mov_b64 s[2:3], 6
398; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s0, vcc_lo
399; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s1, vcc_lo
400; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
401; GFX11-NEXT:    s_mov_b64 s[0:1], 7
402; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc_lo
403; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s3, vcc_lo
404; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
405; GFX11-NEXT:    s_mov_b64 s[2:3], 8
406; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s0, vcc_lo
407; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s1, vcc_lo
408; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
409; GFX11-NEXT:    v_cndmask_b32_e64 v0, v1, s2, vcc_lo
410; GFX11-NEXT:    v_cndmask_b32_e64 v1, v2, s3, vcc_lo
411; GFX11-NEXT:    s_setpc_b64 s[30:31]
412entry:
413  %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel
414  ret i64 %ext
415}
416
417define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) {
418; GPRIDX-LABEL: dyn_extract_v8i64_const_s_s:
419; GPRIDX:       ; %bb.0: ; %entry
420; GPRIDX-NEXT:    s_mov_b64 s[4:5], 1
421; GPRIDX-NEXT:    s_mov_b32 m0, s2
422; GPRIDX-NEXT:    s_mov_b64 s[18:19], 8
423; GPRIDX-NEXT:    s_mov_b64 s[16:17], 7
424; GPRIDX-NEXT:    s_mov_b64 s[14:15], 6
425; GPRIDX-NEXT:    s_mov_b64 s[12:13], 5
426; GPRIDX-NEXT:    s_mov_b64 s[10:11], 4
427; GPRIDX-NEXT:    s_mov_b64 s[8:9], 3
428; GPRIDX-NEXT:    s_mov_b64 s[6:7], 2
429; GPRIDX-NEXT:    s_movrels_b64 s[0:1], s[4:5]
430; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
431; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
432; GPRIDX-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
433; GPRIDX-NEXT:    s_endpgm
434;
435; MOVREL-LABEL: dyn_extract_v8i64_const_s_s:
436; MOVREL:       ; %bb.0: ; %entry
437; MOVREL-NEXT:    s_mov_b64 s[4:5], 1
438; MOVREL-NEXT:    s_mov_b32 m0, s2
439; MOVREL-NEXT:    s_mov_b64 s[18:19], 8
440; MOVREL-NEXT:    s_mov_b64 s[16:17], 7
441; MOVREL-NEXT:    s_mov_b64 s[14:15], 6
442; MOVREL-NEXT:    s_mov_b64 s[12:13], 5
443; MOVREL-NEXT:    s_mov_b64 s[10:11], 4
444; MOVREL-NEXT:    s_mov_b64 s[8:9], 3
445; MOVREL-NEXT:    s_mov_b64 s[6:7], 2
446; MOVREL-NEXT:    s_movrels_b64 s[0:1], s[4:5]
447; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
448; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
449; MOVREL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
450; MOVREL-NEXT:    s_endpgm
451;
452; GFX10-LABEL: dyn_extract_v8i64_const_s_s:
453; GFX10:       ; %bb.0: ; %entry
454; GFX10-NEXT:    s_mov_b64 s[4:5], 1
455; GFX10-NEXT:    s_mov_b32 m0, s2
456; GFX10-NEXT:    s_mov_b64 s[18:19], 8
457; GFX10-NEXT:    s_mov_b64 s[16:17], 7
458; GFX10-NEXT:    s_mov_b64 s[14:15], 6
459; GFX10-NEXT:    s_mov_b64 s[12:13], 5
460; GFX10-NEXT:    s_mov_b64 s[10:11], 4
461; GFX10-NEXT:    s_mov_b64 s[8:9], 3
462; GFX10-NEXT:    s_mov_b64 s[6:7], 2
463; GFX10-NEXT:    s_movrels_b64 s[0:1], s[4:5]
464; GFX10-NEXT:    v_mov_b32_e32 v0, s0
465; GFX10-NEXT:    v_mov_b32_e32 v1, s1
466; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
467; GFX10-NEXT:    s_endpgm
468;
469; GFX11-LABEL: dyn_extract_v8i64_const_s_s:
470; GFX11:       ; %bb.0: ; %entry
471; GFX11-NEXT:    s_mov_b64 s[4:5], 1
472; GFX11-NEXT:    s_mov_b32 m0, s2
473; GFX11-NEXT:    s_mov_b64 s[18:19], 8
474; GFX11-NEXT:    s_mov_b64 s[16:17], 7
475; GFX11-NEXT:    s_mov_b64 s[14:15], 6
476; GFX11-NEXT:    s_mov_b64 s[12:13], 5
477; GFX11-NEXT:    s_mov_b64 s[10:11], 4
478; GFX11-NEXT:    s_mov_b64 s[8:9], 3
479; GFX11-NEXT:    s_mov_b64 s[6:7], 2
480; GFX11-NEXT:    s_movrels_b64 s[0:1], s[4:5]
481; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
482; GFX11-NEXT:    global_store_b64 v[0:1], v[0:1], off
483; GFX11-NEXT:    s_endpgm
484entry:
485  %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel
486  store i64 %ext, ptr addrspace(1) undef
487  ret void
488}
489
490define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) {
491; GPRIDX-LABEL: dyn_extract_v8i64_s_v:
492; GPRIDX:       ; %bb.0: ; %entry
493; GPRIDX-NEXT:    v_mov_b32_e32 v1, s2
494; GPRIDX-NEXT:    v_mov_b32_e32 v2, s3
495; GPRIDX-NEXT:    v_mov_b32_e32 v3, s4
496; GPRIDX-NEXT:    v_mov_b32_e32 v4, s5
497; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
498; GPRIDX-NEXT:    v_mov_b32_e32 v5, s6
499; GPRIDX-NEXT:    v_mov_b32_e32 v6, s7
500; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
501; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
502; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
503; GPRIDX-NEXT:    v_mov_b32_e32 v7, s8
504; GPRIDX-NEXT:    v_mov_b32_e32 v8, s9
505; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
506; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
507; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
508; GPRIDX-NEXT:    v_mov_b32_e32 v9, s10
509; GPRIDX-NEXT:    v_mov_b32_e32 v10, s11
510; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
511; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
512; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
513; GPRIDX-NEXT:    v_mov_b32_e32 v11, s12
514; GPRIDX-NEXT:    v_mov_b32_e32 v12, s13
515; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
516; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
517; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
518; GPRIDX-NEXT:    v_mov_b32_e32 v13, s14
519; GPRIDX-NEXT:    v_mov_b32_e32 v14, s15
520; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
521; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v12, vcc
522; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
523; GPRIDX-NEXT:    v_mov_b32_e32 v15, s16
524; GPRIDX-NEXT:    v_mov_b32_e32 v16, s17
525; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
526; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v14, vcc
527; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
528; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v1, v15, vcc
529; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v2, v16, vcc
530; GPRIDX-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
531; GPRIDX-NEXT:    s_endpgm
532;
533; MOVREL-LABEL: dyn_extract_v8i64_s_v:
534; MOVREL:       ; %bb.0: ; %entry
535; MOVREL-NEXT:    v_mov_b32_e32 v1, s2
536; MOVREL-NEXT:    v_mov_b32_e32 v2, s3
537; MOVREL-NEXT:    v_mov_b32_e32 v3, s4
538; MOVREL-NEXT:    v_mov_b32_e32 v4, s5
539; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
540; MOVREL-NEXT:    v_mov_b32_e32 v5, s6
541; MOVREL-NEXT:    v_mov_b32_e32 v6, s7
542; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
543; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
544; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
545; MOVREL-NEXT:    v_mov_b32_e32 v7, s8
546; MOVREL-NEXT:    v_mov_b32_e32 v8, s9
547; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
548; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
549; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
550; MOVREL-NEXT:    v_mov_b32_e32 v9, s10
551; MOVREL-NEXT:    v_mov_b32_e32 v10, s11
552; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
553; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
554; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
555; MOVREL-NEXT:    v_mov_b32_e32 v11, s12
556; MOVREL-NEXT:    v_mov_b32_e32 v12, s13
557; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
558; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
559; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
560; MOVREL-NEXT:    v_mov_b32_e32 v13, s14
561; MOVREL-NEXT:    v_mov_b32_e32 v14, s15
562; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
563; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v12, vcc
564; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
565; MOVREL-NEXT:    v_mov_b32_e32 v15, s16
566; MOVREL-NEXT:    v_mov_b32_e32 v16, s17
567; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
568; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v14, vcc
569; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
570; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v1, v15, vcc
571; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v2, v16, vcc
572; MOVREL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
573; MOVREL-NEXT:    s_endpgm
574;
575; GFX10-LABEL: dyn_extract_v8i64_s_v:
576; GFX10:       ; %bb.0: ; %entry
577; GFX10-NEXT:    v_mov_b32_e32 v1, s4
578; GFX10-NEXT:    v_mov_b32_e32 v2, s5
579; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
580; GFX10-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc_lo
581; GFX10-NEXT:    v_cndmask_b32_e32 v2, s3, v2, vcc_lo
582; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
583; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
584; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
585; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
586; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
587; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
588; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
589; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
590; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
591; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
592; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s12, vcc_lo
593; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s13, vcc_lo
594; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
595; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s14, vcc_lo
596; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s15, vcc_lo
597; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
598; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s16, vcc_lo
599; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, s17, vcc_lo
600; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
601; GFX10-NEXT:    s_endpgm
602;
603; GFX11-LABEL: dyn_extract_v8i64_s_v:
604; GFX11:       ; %bb.0: ; %entry
605; GFX11-NEXT:    v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s5
606; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
607; GFX11-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc_lo
608; GFX11-NEXT:    v_cndmask_b32_e32 v2, s3, v2, vcc_lo
609; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
610; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
611; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
612; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
613; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
614; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
615; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
616; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
617; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
618; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
619; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s12, vcc_lo
620; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s13, vcc_lo
621; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
622; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s14, vcc_lo
623; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s15, vcc_lo
624; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
625; GFX11-NEXT:    v_cndmask_b32_e64 v0, v1, s16, vcc_lo
626; GFX11-NEXT:    v_cndmask_b32_e64 v1, v2, s17, vcc_lo
627; GFX11-NEXT:    global_store_b64 v[0:1], v[0:1], off
628; GFX11-NEXT:    s_endpgm
629entry:
630  %ext = extractelement <8 x i64> %vec, i32 %sel
631  store i64 %ext, ptr addrspace(1) undef
632  ret void
633}
634
635define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) {
636; GCN-LABEL: dyn_extract_v8i64_v_v:
637; GCN:       ; %bb.0: ; %entry
638; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
639; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v16
640; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
641; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
642; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v16
643; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
644; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
645; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v16
646; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
647; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
648; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v16
649; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
650; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
651; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v16
652; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
653; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
654; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v16
655; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
656; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
657; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v16
658; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
659; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
660; GCN-NEXT:    s_setpc_b64 s[30:31]
661;
662; GFX10-LABEL: dyn_extract_v8i64_v_v:
663; GFX10:       ; %bb.0: ; %entry
664; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
665; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
666; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
667; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
668; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
669; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
670; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
671; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v16
672; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
673; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
674; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
675; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
676; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
677; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v16
678; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
679; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
680; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v16
681; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
682; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc_lo
683; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v16
684; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
685; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc_lo
686; GFX10-NEXT:    s_setpc_b64 s[30:31]
687;
688; GFX11-LABEL: dyn_extract_v8i64_v_v:
689; GFX11:       ; %bb.0: ; %entry
690; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
691; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
692; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
693; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
694; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
695; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v16
696; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7
697; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
698; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9
699; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v16
700; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11
701; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v16
702; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13
703; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v16
704; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15
705; GFX11-NEXT:    s_setpc_b64 s[30:31]
706entry:
707  %ext = extractelement <8 x i64> %vec, i32 %sel
708  ret i64 %ext
709}
710
711define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) {
712; GPRIDX-LABEL: dyn_extract_v8i64_v_s:
713; GPRIDX:       ; %bb.0: ; %entry
714; GPRIDX-NEXT:    s_lshl_b32 s0, s2, 1
715; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(SRC0)
716; GPRIDX-NEXT:    v_mov_b32_e32 v16, v0
717; GPRIDX-NEXT:    v_mov_b32_e32 v17, v1
718; GPRIDX-NEXT:    s_set_gpr_idx_off
719; GPRIDX-NEXT:    global_store_dwordx2 v[0:1], v[16:17], off
720; GPRIDX-NEXT:    s_endpgm
721;
722; MOVREL-LABEL: dyn_extract_v8i64_v_s:
723; MOVREL:       ; %bb.0: ; %entry
724; MOVREL-NEXT:    s_lshl_b32 m0, s2, 1
725; MOVREL-NEXT:    v_movrels_b32_e32 v16, v0
726; MOVREL-NEXT:    v_movrels_b32_e32 v17, v1
727; MOVREL-NEXT:    flat_store_dwordx2 v[0:1], v[16:17]
728; MOVREL-NEXT:    s_endpgm
729;
730; GFX10-LABEL: dyn_extract_v8i64_v_s:
731; GFX10:       ; %bb.0: ; %entry
732; GFX10-NEXT:    s_lshl_b32 m0, s2, 1
733; GFX10-NEXT:    v_movrels_b32_e32 v16, v0
734; GFX10-NEXT:    v_movrels_b32_e32 v17, v1
735; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[16:17], off
736; GFX10-NEXT:    s_endpgm
737;
738; GFX11-LABEL: dyn_extract_v8i64_v_s:
739; GFX11:       ; %bb.0: ; %entry
740; GFX11-NEXT:    s_lshl_b32 m0, s2, 1
741; GFX11-NEXT:    v_movrels_b32_e32 v16, v0
742; GFX11-NEXT:    v_movrels_b32_e32 v17, v1
743; GFX11-NEXT:    global_store_b64 v[0:1], v[16:17], off
744; GFX11-NEXT:    s_endpgm
745entry:
746  %ext = extractelement <8 x i64> %vec, i32 %sel
747  store i64 %ext, ptr addrspace(1) undef
748  ret void
749}
750
751define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %sel) {
752; GPRIDX-LABEL: dyn_extract_v8i64_s_s:
753; GPRIDX:       ; %bb.0: ; %entry
754; GPRIDX-NEXT:    s_mov_b32 s0, s2
755; GPRIDX-NEXT:    s_mov_b32 s1, s3
756; GPRIDX-NEXT:    s_mov_b32 m0, s18
757; GPRIDX-NEXT:    s_mov_b32 s2, s4
758; GPRIDX-NEXT:    s_mov_b32 s3, s5
759; GPRIDX-NEXT:    s_mov_b32 s4, s6
760; GPRIDX-NEXT:    s_mov_b32 s5, s7
761; GPRIDX-NEXT:    s_mov_b32 s6, s8
762; GPRIDX-NEXT:    s_mov_b32 s7, s9
763; GPRIDX-NEXT:    s_mov_b32 s8, s10
764; GPRIDX-NEXT:    s_mov_b32 s9, s11
765; GPRIDX-NEXT:    s_mov_b32 s10, s12
766; GPRIDX-NEXT:    s_mov_b32 s11, s13
767; GPRIDX-NEXT:    s_mov_b32 s12, s14
768; GPRIDX-NEXT:    s_mov_b32 s13, s15
769; GPRIDX-NEXT:    s_mov_b32 s14, s16
770; GPRIDX-NEXT:    s_mov_b32 s15, s17
771; GPRIDX-NEXT:    s_movrels_b64 s[0:1], s[0:1]
772; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
773; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
774; GPRIDX-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
775; GPRIDX-NEXT:    s_endpgm
776;
777; MOVREL-LABEL: dyn_extract_v8i64_s_s:
778; MOVREL:       ; %bb.0: ; %entry
779; MOVREL-NEXT:    s_mov_b32 s0, s2
780; MOVREL-NEXT:    s_mov_b32 s1, s3
781; MOVREL-NEXT:    s_mov_b32 m0, s18
782; MOVREL-NEXT:    s_mov_b32 s2, s4
783; MOVREL-NEXT:    s_mov_b32 s3, s5
784; MOVREL-NEXT:    s_mov_b32 s4, s6
785; MOVREL-NEXT:    s_mov_b32 s5, s7
786; MOVREL-NEXT:    s_mov_b32 s6, s8
787; MOVREL-NEXT:    s_mov_b32 s7, s9
788; MOVREL-NEXT:    s_mov_b32 s8, s10
789; MOVREL-NEXT:    s_mov_b32 s9, s11
790; MOVREL-NEXT:    s_mov_b32 s10, s12
791; MOVREL-NEXT:    s_mov_b32 s11, s13
792; MOVREL-NEXT:    s_mov_b32 s12, s14
793; MOVREL-NEXT:    s_mov_b32 s13, s15
794; MOVREL-NEXT:    s_mov_b32 s14, s16
795; MOVREL-NEXT:    s_mov_b32 s15, s17
796; MOVREL-NEXT:    s_movrels_b64 s[0:1], s[0:1]
797; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
798; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
799; MOVREL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
800; MOVREL-NEXT:    s_endpgm
801;
802; GFX10-LABEL: dyn_extract_v8i64_s_s:
803; GFX10:       ; %bb.0: ; %entry
804; GFX10-NEXT:    s_mov_b32 s0, s2
805; GFX10-NEXT:    s_mov_b32 s1, s3
806; GFX10-NEXT:    s_mov_b32 m0, s18
807; GFX10-NEXT:    s_mov_b32 s2, s4
808; GFX10-NEXT:    s_mov_b32 s3, s5
809; GFX10-NEXT:    s_mov_b32 s4, s6
810; GFX10-NEXT:    s_mov_b32 s5, s7
811; GFX10-NEXT:    s_mov_b32 s6, s8
812; GFX10-NEXT:    s_mov_b32 s7, s9
813; GFX10-NEXT:    s_mov_b32 s8, s10
814; GFX10-NEXT:    s_mov_b32 s9, s11
815; GFX10-NEXT:    s_mov_b32 s10, s12
816; GFX10-NEXT:    s_mov_b32 s11, s13
817; GFX10-NEXT:    s_mov_b32 s12, s14
818; GFX10-NEXT:    s_mov_b32 s13, s15
819; GFX10-NEXT:    s_mov_b32 s14, s16
820; GFX10-NEXT:    s_mov_b32 s15, s17
821; GFX10-NEXT:    s_movrels_b64 s[0:1], s[0:1]
822; GFX10-NEXT:    v_mov_b32_e32 v0, s0
823; GFX10-NEXT:    v_mov_b32_e32 v1, s1
824; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
825; GFX10-NEXT:    s_endpgm
826;
827; GFX11-LABEL: dyn_extract_v8i64_s_s:
828; GFX11:       ; %bb.0: ; %entry
829; GFX11-NEXT:    s_mov_b32 s0, s2
830; GFX11-NEXT:    s_mov_b32 s1, s3
831; GFX11-NEXT:    s_mov_b32 m0, s18
832; GFX11-NEXT:    s_mov_b32 s2, s4
833; GFX11-NEXT:    s_mov_b32 s3, s5
834; GFX11-NEXT:    s_mov_b32 s4, s6
835; GFX11-NEXT:    s_mov_b32 s5, s7
836; GFX11-NEXT:    s_mov_b32 s6, s8
837; GFX11-NEXT:    s_mov_b32 s7, s9
838; GFX11-NEXT:    s_mov_b32 s8, s10
839; GFX11-NEXT:    s_mov_b32 s9, s11
840; GFX11-NEXT:    s_mov_b32 s10, s12
841; GFX11-NEXT:    s_mov_b32 s11, s13
842; GFX11-NEXT:    s_mov_b32 s12, s14
843; GFX11-NEXT:    s_mov_b32 s13, s15
844; GFX11-NEXT:    s_mov_b32 s14, s16
845; GFX11-NEXT:    s_mov_b32 s15, s17
846; GFX11-NEXT:    s_movrels_b64 s[0:1], s[0:1]
847; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
848; GFX11-NEXT:    global_store_b64 v[0:1], v[0:1], off
849; GFX11-NEXT:    s_endpgm
850entry:
851  %ext = extractelement <8 x i64> %vec, i32 %sel
852  store i64 %ext, ptr addrspace(1) undef
853  ret void
854}
855
856define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) {
857; GPRIDX-LABEL: dyn_extract_v8f32_s_s_offset3:
858; GPRIDX:       ; %bb.0: ; %entry
859; GPRIDX-NEXT:    s_add_i32 s10, s10, 3
860; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 1
861; GPRIDX-NEXT:    s_cselect_b32 s0, s3, s2
862; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 2
863; GPRIDX-NEXT:    s_cselect_b32 s0, s4, s0
864; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 3
865; GPRIDX-NEXT:    s_cselect_b32 s0, s5, s0
866; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 4
867; GPRIDX-NEXT:    s_cselect_b32 s0, s6, s0
868; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 5
869; GPRIDX-NEXT:    s_cselect_b32 s0, s7, s0
870; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 6
871; GPRIDX-NEXT:    s_cselect_b32 s0, s8, s0
872; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 7
873; GPRIDX-NEXT:    s_cselect_b32 s0, s9, s0
874; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
875; GPRIDX-NEXT:    ; return to shader part epilog
876;
877; MOVREL-LABEL: dyn_extract_v8f32_s_s_offset3:
878; MOVREL:       ; %bb.0: ; %entry
879; MOVREL-NEXT:    s_mov_b32 s0, s2
880; MOVREL-NEXT:    s_mov_b32 s1, s3
881; MOVREL-NEXT:    s_mov_b32 s3, s5
882; MOVREL-NEXT:    s_mov_b32 m0, s10
883; MOVREL-NEXT:    s_mov_b32 s2, s4
884; MOVREL-NEXT:    s_mov_b32 s4, s6
885; MOVREL-NEXT:    s_mov_b32 s5, s7
886; MOVREL-NEXT:    s_mov_b32 s6, s8
887; MOVREL-NEXT:    s_mov_b32 s7, s9
888; MOVREL-NEXT:    s_movrels_b32 s0, s3
889; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
890; MOVREL-NEXT:    ; return to shader part epilog
891;
892; GFX10PLUS-LABEL: dyn_extract_v8f32_s_s_offset3:
893; GFX10PLUS:       ; %bb.0: ; %entry
894; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
895; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
896; GFX10PLUS-NEXT:    s_mov_b32 m0, s10
897; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
898; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
899; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
900; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
901; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
902; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
903; GFX10PLUS-NEXT:    s_movrels_b32 s0, s3
904; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, s0
905; GFX10PLUS-NEXT:    ; return to shader part epilog
906entry:
907  %add = add i32 %sel, 3
908  %ext = extractelement <8 x float> %vec, i32 %add
909  ret float %ext
910}
911
912define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) {
913; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3:
914; GPRIDX:       ; %bb.0: ; %entry
915; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
916; GPRIDX-NEXT:    v_add_u32_e32 v8, 3, v8
917; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
918; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
919; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v8
920; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
921; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
922; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
923; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v8
924; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
925; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v8
926; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
927; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v8
928; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
929; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v8
930; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
931; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
932;
933; MOVREL-LABEL: dyn_extract_v8f32_v_v_offset3:
934; MOVREL:       ; %bb.0: ; %entry
935; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
936; MOVREL-NEXT:    v_add_u32_e32 v8, vcc, 3, v8
937; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
938; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
939; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v8
940; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
941; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
942; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
943; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v8
944; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
945; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v8
946; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
947; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v8
948; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
949; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v8
950; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
951; MOVREL-NEXT:    s_setpc_b64 s[30:31]
952;
953; GFX10PLUS-LABEL: dyn_extract_v8f32_v_v_offset3:
954; GFX10PLUS:       ; %bb.0: ; %entry
955; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
956; GFX10PLUS-NEXT:    v_add_nc_u32_e32 v8, 3, v8
957; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
958; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
959; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v8
960; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
961; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v8
962; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
963; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v8
964; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
965; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v8
966; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
967; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v8
968; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
969; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v8
970; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
971; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
972entry:
973  %add = add i32 %sel, 3
974  %ext = extractelement <8 x float> %vec, i32 %add
975  ret float %ext
976}
977
978define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec, i32 inreg %sel) {
979; GCN-LABEL: dyn_extract_v8f64_s_s_offset1:
980; GCN:       ; %bb.0: ; %entry
981; GCN-NEXT:    s_mov_b32 s0, s2
982; GCN-NEXT:    s_mov_b32 s1, s3
983; GCN-NEXT:    s_mov_b32 s2, s4
984; GCN-NEXT:    s_mov_b32 s3, s5
985; GCN-NEXT:    s_mov_b32 m0, s18
986; GCN-NEXT:    s_mov_b32 s4, s6
987; GCN-NEXT:    s_mov_b32 s5, s7
988; GCN-NEXT:    s_mov_b32 s6, s8
989; GCN-NEXT:    s_mov_b32 s7, s9
990; GCN-NEXT:    s_mov_b32 s8, s10
991; GCN-NEXT:    s_mov_b32 s9, s11
992; GCN-NEXT:    s_mov_b32 s10, s12
993; GCN-NEXT:    s_mov_b32 s11, s13
994; GCN-NEXT:    s_mov_b32 s12, s14
995; GCN-NEXT:    s_mov_b32 s13, s15
996; GCN-NEXT:    s_mov_b32 s14, s16
997; GCN-NEXT:    s_mov_b32 s15, s17
998; GCN-NEXT:    s_movrels_b64 s[0:1], s[2:3]
999; GCN-NEXT:    ; return to shader part epilog
1000;
1001; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset1:
1002; GFX10PLUS:       ; %bb.0: ; %entry
1003; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1004; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1005; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1006; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1007; GFX10PLUS-NEXT:    s_mov_b32 m0, s18
1008; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1009; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1010; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1011; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1012; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
1013; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
1014; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
1015; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
1016; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
1017; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
1018; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
1019; GFX10PLUS-NEXT:    s_mov_b32 s15, s17
1020; GFX10PLUS-NEXT:    s_movrels_b64 s[0:1], s[2:3]
1021; GFX10PLUS-NEXT:    ; return to shader part epilog
1022entry:
1023  %add = add i32 %sel, 1
1024  %ext = extractelement <8 x double> %vec, i32 %add
1025  ret double %ext
1026}
1027
1028define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec, i32 inreg %sel) {
1029; GCN-LABEL: dyn_extract_v8f64_s_s_offset2:
1030; GCN:       ; %bb.0: ; %entry
1031; GCN-NEXT:    s_mov_b32 s0, s2
1032; GCN-NEXT:    s_mov_b32 s1, s3
1033; GCN-NEXT:    s_mov_b32 s2, s4
1034; GCN-NEXT:    s_mov_b32 s3, s5
1035; GCN-NEXT:    s_mov_b32 s4, s6
1036; GCN-NEXT:    s_mov_b32 s5, s7
1037; GCN-NEXT:    s_mov_b32 m0, s18
1038; GCN-NEXT:    s_mov_b32 s6, s8
1039; GCN-NEXT:    s_mov_b32 s7, s9
1040; GCN-NEXT:    s_mov_b32 s8, s10
1041; GCN-NEXT:    s_mov_b32 s9, s11
1042; GCN-NEXT:    s_mov_b32 s10, s12
1043; GCN-NEXT:    s_mov_b32 s11, s13
1044; GCN-NEXT:    s_mov_b32 s12, s14
1045; GCN-NEXT:    s_mov_b32 s13, s15
1046; GCN-NEXT:    s_mov_b32 s14, s16
1047; GCN-NEXT:    s_mov_b32 s15, s17
1048; GCN-NEXT:    s_movrels_b64 s[0:1], s[4:5]
1049; GCN-NEXT:    ; return to shader part epilog
1050;
1051; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset2:
1052; GFX10PLUS:       ; %bb.0: ; %entry
1053; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1054; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1055; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1056; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1057; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1058; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1059; GFX10PLUS-NEXT:    s_mov_b32 m0, s18
1060; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1061; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1062; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
1063; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
1064; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
1065; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
1066; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
1067; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
1068; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
1069; GFX10PLUS-NEXT:    s_mov_b32 s15, s17
1070; GFX10PLUS-NEXT:    s_movrels_b64 s[0:1], s[4:5]
1071; GFX10PLUS-NEXT:    ; return to shader part epilog
1072entry:
1073  %add = add i32 %sel, 2
1074  %ext = extractelement <8 x double> %vec, i32 %add
1075  ret double %ext
1076}
1077
1078define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, i32 inreg %sel) {
1079; GCN-LABEL: dyn_extract_v8f64_s_s_offset3:
1080; GCN:       ; %bb.0: ; %entry
1081; GCN-NEXT:    s_mov_b32 s0, s2
1082; GCN-NEXT:    s_mov_b32 s1, s3
1083; GCN-NEXT:    s_mov_b32 s2, s4
1084; GCN-NEXT:    s_mov_b32 s3, s5
1085; GCN-NEXT:    s_mov_b32 s4, s6
1086; GCN-NEXT:    s_mov_b32 s5, s7
1087; GCN-NEXT:    s_mov_b32 s6, s8
1088; GCN-NEXT:    s_mov_b32 s7, s9
1089; GCN-NEXT:    s_mov_b32 m0, s18
1090; GCN-NEXT:    s_mov_b32 s8, s10
1091; GCN-NEXT:    s_mov_b32 s9, s11
1092; GCN-NEXT:    s_mov_b32 s10, s12
1093; GCN-NEXT:    s_mov_b32 s11, s13
1094; GCN-NEXT:    s_mov_b32 s12, s14
1095; GCN-NEXT:    s_mov_b32 s13, s15
1096; GCN-NEXT:    s_mov_b32 s14, s16
1097; GCN-NEXT:    s_mov_b32 s15, s17
1098; GCN-NEXT:    s_movrels_b64 s[0:1], s[6:7]
1099; GCN-NEXT:    ; return to shader part epilog
1100;
1101; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset3:
1102; GFX10PLUS:       ; %bb.0: ; %entry
1103; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1104; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1105; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1106; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1107; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1108; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1109; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1110; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1111; GFX10PLUS-NEXT:    s_mov_b32 m0, s18
1112; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
1113; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
1114; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
1115; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
1116; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
1117; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
1118; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
1119; GFX10PLUS-NEXT:    s_mov_b32 s15, s17
1120; GFX10PLUS-NEXT:    s_movrels_b64 s[0:1], s[6:7]
1121; GFX10PLUS-NEXT:    ; return to shader part epilog
1122entry:
1123  %add = add i32 %sel, 3
1124  %ext = extractelement <8 x double> %vec, i32 %add
1125  ret double %ext
1126}
1127
1128define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) {
1129; GCN-LABEL: dyn_extract_v8f64_s_s_offset4:
1130; GCN:       ; %bb.0: ; %entry
1131; GCN-NEXT:    s_mov_b32 s0, s2
1132; GCN-NEXT:    s_mov_b32 s1, s3
1133; GCN-NEXT:    s_mov_b32 s2, s4
1134; GCN-NEXT:    s_mov_b32 s3, s5
1135; GCN-NEXT:    s_mov_b32 s4, s6
1136; GCN-NEXT:    s_mov_b32 s5, s7
1137; GCN-NEXT:    s_mov_b32 s6, s8
1138; GCN-NEXT:    s_mov_b32 s7, s9
1139; GCN-NEXT:    s_mov_b32 s8, s10
1140; GCN-NEXT:    s_mov_b32 s9, s11
1141; GCN-NEXT:    s_mov_b32 m0, s18
1142; GCN-NEXT:    s_mov_b32 s10, s12
1143; GCN-NEXT:    s_mov_b32 s11, s13
1144; GCN-NEXT:    s_mov_b32 s12, s14
1145; GCN-NEXT:    s_mov_b32 s13, s15
1146; GCN-NEXT:    s_mov_b32 s14, s16
1147; GCN-NEXT:    s_mov_b32 s15, s17
1148; GCN-NEXT:    s_movrels_b64 s[0:1], s[8:9]
1149; GCN-NEXT:    ; return to shader part epilog
1150;
1151; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset4:
1152; GFX10PLUS:       ; %bb.0: ; %entry
1153; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1154; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1155; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1156; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1157; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1158; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1159; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1160; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1161; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
1162; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
1163; GFX10PLUS-NEXT:    s_mov_b32 m0, s18
1164; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
1165; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
1166; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
1167; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
1168; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
1169; GFX10PLUS-NEXT:    s_mov_b32 s15, s17
1170; GFX10PLUS-NEXT:    s_movrels_b64 s[0:1], s[8:9]
1171; GFX10PLUS-NEXT:    ; return to shader part epilog
1172entry:
1173  %add = add i32 %sel, 4
1174  %ext = extractelement <8 x double> %vec, i32 %add
1175  ret double %ext
1176}
1177
1178define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) {
1179; GCN-LABEL: dyn_extract_v8f64_s_s_offset5:
1180; GCN:       ; %bb.0: ; %entry
1181; GCN-NEXT:    s_mov_b32 s0, s2
1182; GCN-NEXT:    s_mov_b32 s1, s3
1183; GCN-NEXT:    s_mov_b32 s2, s4
1184; GCN-NEXT:    s_mov_b32 s3, s5
1185; GCN-NEXT:    s_mov_b32 s4, s6
1186; GCN-NEXT:    s_mov_b32 s5, s7
1187; GCN-NEXT:    s_mov_b32 s6, s8
1188; GCN-NEXT:    s_mov_b32 s7, s9
1189; GCN-NEXT:    s_mov_b32 s8, s10
1190; GCN-NEXT:    s_mov_b32 s9, s11
1191; GCN-NEXT:    s_mov_b32 s10, s12
1192; GCN-NEXT:    s_mov_b32 s11, s13
1193; GCN-NEXT:    s_mov_b32 m0, s18
1194; GCN-NEXT:    s_mov_b32 s12, s14
1195; GCN-NEXT:    s_mov_b32 s13, s15
1196; GCN-NEXT:    s_mov_b32 s14, s16
1197; GCN-NEXT:    s_mov_b32 s15, s17
1198; GCN-NEXT:    s_movrels_b64 s[0:1], s[10:11]
1199; GCN-NEXT:    ; return to shader part epilog
1200;
1201; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset5:
1202; GFX10PLUS:       ; %bb.0: ; %entry
1203; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1204; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1205; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1206; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1207; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1208; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1209; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1210; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1211; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
1212; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
1213; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
1214; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
1215; GFX10PLUS-NEXT:    s_mov_b32 m0, s18
1216; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
1217; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
1218; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
1219; GFX10PLUS-NEXT:    s_mov_b32 s15, s17
1220; GFX10PLUS-NEXT:    s_movrels_b64 s[0:1], s[10:11]
1221; GFX10PLUS-NEXT:    ; return to shader part epilog
1222entry:
1223  %add = add i32 %sel, 5
1224  %ext = extractelement <8 x double> %vec, i32 %add
1225  ret double %ext
1226}
1227
1228define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) {
1229; GCN-LABEL: dyn_extract_v8f64_s_s_offset6:
1230; GCN:       ; %bb.0: ; %entry
1231; GCN-NEXT:    s_mov_b32 s0, s2
1232; GCN-NEXT:    s_mov_b32 s1, s3
1233; GCN-NEXT:    s_mov_b32 s2, s4
1234; GCN-NEXT:    s_mov_b32 s3, s5
1235; GCN-NEXT:    s_mov_b32 s4, s6
1236; GCN-NEXT:    s_mov_b32 s5, s7
1237; GCN-NEXT:    s_mov_b32 s6, s8
1238; GCN-NEXT:    s_mov_b32 s7, s9
1239; GCN-NEXT:    s_mov_b32 s8, s10
1240; GCN-NEXT:    s_mov_b32 s9, s11
1241; GCN-NEXT:    s_mov_b32 s10, s12
1242; GCN-NEXT:    s_mov_b32 s11, s13
1243; GCN-NEXT:    s_mov_b32 s12, s14
1244; GCN-NEXT:    s_mov_b32 s13, s15
1245; GCN-NEXT:    s_mov_b32 m0, s18
1246; GCN-NEXT:    s_mov_b32 s14, s16
1247; GCN-NEXT:    s_mov_b32 s15, s17
1248; GCN-NEXT:    s_movrels_b64 s[0:1], s[12:13]
1249; GCN-NEXT:    ; return to shader part epilog
1250;
1251; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset6:
1252; GFX10PLUS:       ; %bb.0: ; %entry
1253; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1254; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1255; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1256; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1257; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1258; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1259; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1260; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1261; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
1262; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
1263; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
1264; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
1265; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
1266; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
1267; GFX10PLUS-NEXT:    s_mov_b32 m0, s18
1268; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
1269; GFX10PLUS-NEXT:    s_mov_b32 s15, s17
1270; GFX10PLUS-NEXT:    s_movrels_b64 s[0:1], s[12:13]
1271; GFX10PLUS-NEXT:    ; return to shader part epilog
1272entry:
1273  %add = add i32 %sel, 6
1274  %ext = extractelement <8 x double> %vec, i32 %add
1275  ret double %ext
1276}
1277
1278define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) {
1279; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset7:
1280; GPRIDX:       ; %bb.0: ; %entry
1281; GPRIDX-NEXT:    s_mov_b32 s0, s2
1282; GPRIDX-NEXT:    s_mov_b32 s1, s3
1283; GPRIDX-NEXT:    s_mov_b32 s2, s4
1284; GPRIDX-NEXT:    s_mov_b32 s3, s5
1285; GPRIDX-NEXT:    s_mov_b32 s4, s6
1286; GPRIDX-NEXT:    s_mov_b32 s5, s7
1287; GPRIDX-NEXT:    s_mov_b32 s6, s8
1288; GPRIDX-NEXT:    s_mov_b32 s7, s9
1289; GPRIDX-NEXT:    s_mov_b32 s8, s10
1290; GPRIDX-NEXT:    s_mov_b32 s9, s11
1291; GPRIDX-NEXT:    s_mov_b32 s10, s12
1292; GPRIDX-NEXT:    s_mov_b32 s11, s13
1293; GPRIDX-NEXT:    s_mov_b32 s12, s14
1294; GPRIDX-NEXT:    s_mov_b32 s13, s15
1295; GPRIDX-NEXT:    s_mov_b32 s14, s16
1296; GPRIDX-NEXT:    s_mov_b32 s15, s17
1297; GPRIDX-NEXT:    s_mov_b32 m0, s18
1298; GPRIDX-NEXT:    s_nop 0
1299; GPRIDX-NEXT:    s_movrels_b64 s[0:1], s[14:15]
1300; GPRIDX-NEXT:    ; return to shader part epilog
1301;
1302; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7:
1303; MOVREL:       ; %bb.0: ; %entry
1304; MOVREL-NEXT:    s_mov_b32 s0, s2
1305; MOVREL-NEXT:    s_mov_b32 s1, s3
1306; MOVREL-NEXT:    s_mov_b32 s2, s4
1307; MOVREL-NEXT:    s_mov_b32 s3, s5
1308; MOVREL-NEXT:    s_mov_b32 s4, s6
1309; MOVREL-NEXT:    s_mov_b32 s5, s7
1310; MOVREL-NEXT:    s_mov_b32 s6, s8
1311; MOVREL-NEXT:    s_mov_b32 s7, s9
1312; MOVREL-NEXT:    s_mov_b32 s8, s10
1313; MOVREL-NEXT:    s_mov_b32 s9, s11
1314; MOVREL-NEXT:    s_mov_b32 s10, s12
1315; MOVREL-NEXT:    s_mov_b32 s11, s13
1316; MOVREL-NEXT:    s_mov_b32 s12, s14
1317; MOVREL-NEXT:    s_mov_b32 s13, s15
1318; MOVREL-NEXT:    s_mov_b32 s14, s16
1319; MOVREL-NEXT:    s_mov_b32 s15, s17
1320; MOVREL-NEXT:    s_mov_b32 m0, s18
1321; MOVREL-NEXT:    s_movrels_b64 s[0:1], s[14:15]
1322; MOVREL-NEXT:    ; return to shader part epilog
1323;
1324; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset7:
1325; GFX10PLUS:       ; %bb.0: ; %entry
1326; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1327; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1328; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1329; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1330; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1331; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1332; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1333; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1334; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
1335; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
1336; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
1337; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
1338; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
1339; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
1340; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
1341; GFX10PLUS-NEXT:    s_mov_b32 s15, s17
1342; GFX10PLUS-NEXT:    s_mov_b32 m0, s18
1343; GFX10PLUS-NEXT:    s_movrels_b64 s[0:1], s[14:15]
1344; GFX10PLUS-NEXT:    ; return to shader part epilog
1345entry:
1346  %add = add i32 %sel, 7
1347  %ext = extractelement <8 x double> %vec, i32 %add
1348  ret double %ext
1349}
1350
1351define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec, i32 inreg %sel) {
1352; GCN-LABEL: dyn_extract_v8f64_s_s_offsetm1:
1353; GCN:       ; %bb.0: ; %entry
1354; GCN-NEXT:    s_mov_b32 s0, s2
1355; GCN-NEXT:    s_mov_b32 s1, s3
1356; GCN-NEXT:    s_add_i32 m0, s18, -1
1357; GCN-NEXT:    s_mov_b32 s2, s4
1358; GCN-NEXT:    s_mov_b32 s3, s5
1359; GCN-NEXT:    s_mov_b32 s4, s6
1360; GCN-NEXT:    s_mov_b32 s5, s7
1361; GCN-NEXT:    s_mov_b32 s6, s8
1362; GCN-NEXT:    s_mov_b32 s7, s9
1363; GCN-NEXT:    s_mov_b32 s8, s10
1364; GCN-NEXT:    s_mov_b32 s9, s11
1365; GCN-NEXT:    s_mov_b32 s10, s12
1366; GCN-NEXT:    s_mov_b32 s11, s13
1367; GCN-NEXT:    s_mov_b32 s12, s14
1368; GCN-NEXT:    s_mov_b32 s13, s15
1369; GCN-NEXT:    s_mov_b32 s14, s16
1370; GCN-NEXT:    s_mov_b32 s15, s17
1371; GCN-NEXT:    s_movrels_b64 s[0:1], s[0:1]
1372; GCN-NEXT:    ; return to shader part epilog
1373;
1374; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offsetm1:
1375; GFX10PLUS:       ; %bb.0: ; %entry
1376; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1377; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1378; GFX10PLUS-NEXT:    s_add_i32 m0, s18, -1
1379; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1380; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1381; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1382; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1383; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1384; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1385; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
1386; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
1387; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
1388; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
1389; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
1390; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
1391; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
1392; GFX10PLUS-NEXT:    s_mov_b32 s15, s17
1393; GFX10PLUS-NEXT:    s_movrels_b64 s[0:1], s[0:1]
1394; GFX10PLUS-NEXT:    ; return to shader part epilog
1395entry:
1396  %add = add i32 %sel, -1
1397  %ext = extractelement <8 x double> %vec, i32 %add
1398  ret double %ext
1399}
1400
1401define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) {
1402; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3:
1403; GPRIDX:       ; %bb.0: ; %entry
1404; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1405; GPRIDX-NEXT:    v_add_u32_e32 v16, 3, v16
1406; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v16
1407; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1408; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1409; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v16
1410; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1411; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
1412; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v16
1413; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
1414; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
1415; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v16
1416; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
1417; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
1418; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v16
1419; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
1420; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
1421; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v16
1422; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
1423; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
1424; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v16
1425; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
1426; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
1427; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
1428;
1429; MOVREL-LABEL: dyn_extract_v8f64_v_v_offset3:
1430; MOVREL:       ; %bb.0: ; %entry
1431; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1432; MOVREL-NEXT:    v_add_u32_e32 v16, vcc, 3, v16
1433; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v16
1434; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1435; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1436; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v16
1437; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1438; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
1439; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v16
1440; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
1441; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
1442; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v16
1443; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
1444; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
1445; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v16
1446; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
1447; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
1448; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v16
1449; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
1450; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
1451; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v16
1452; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
1453; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
1454; MOVREL-NEXT:    s_setpc_b64 s[30:31]
1455;
1456; GFX10-LABEL: dyn_extract_v8f64_v_v_offset3:
1457; GFX10:       ; %bb.0: ; %entry
1458; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1459; GFX10-NEXT:    v_add_nc_u32_e32 v16, 3, v16
1460; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
1461; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1462; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
1463; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
1464; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
1465; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
1466; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v16
1467; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
1468; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
1469; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
1470; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
1471; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
1472; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v16
1473; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
1474; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
1475; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v16
1476; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
1477; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc_lo
1478; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v16
1479; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
1480; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc_lo
1481; GFX10-NEXT:    s_setpc_b64 s[30:31]
1482;
1483; GFX11-LABEL: dyn_extract_v8f64_v_v_offset3:
1484; GFX11:       ; %bb.0: ; %entry
1485; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1486; GFX11-NEXT:    v_add_nc_u32_e32 v16, 3, v16
1487; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
1488; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
1489; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
1490; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4
1491; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v16
1492; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v7 :: v_dual_cndmask_b32 v0, v0, v6
1493; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
1494; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v9 :: v_dual_cndmask_b32 v0, v0, v8
1495; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v16
1496; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v11 :: v_dual_cndmask_b32 v0, v0, v10
1497; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v16
1498; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v13 :: v_dual_cndmask_b32 v0, v0, v12
1499; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v16
1500; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v15 :: v_dual_cndmask_b32 v0, v0, v14
1501; GFX11-NEXT:    s_setpc_b64 s[30:31]
1502entry:
1503  %add = add i32 %sel, 3
1504  %ext = extractelement <8 x double> %vec, i32 %add
1505  ret double %ext
1506}
1507
1508define ptr addrspace(3) @dyn_extract_v8p3_v_v(<8 x ptr addrspace(3)> %vec, i32 %idx) {
1509; GCN-LABEL: dyn_extract_v8p3_v_v:
1510; GCN:       ; %bb.0: ; %entry
1511; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1512; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
1513; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1514; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v8
1515; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1516; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
1517; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1518; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v8
1519; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1520; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v8
1521; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
1522; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v8
1523; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
1524; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v8
1525; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
1526; GCN-NEXT:    s_setpc_b64 s[30:31]
1527;
1528; GFX10PLUS-LABEL: dyn_extract_v8p3_v_v:
1529; GFX10PLUS:       ; %bb.0: ; %entry
1530; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1531; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
1532; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1533; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v8
1534; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1535; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v8
1536; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
1537; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v8
1538; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
1539; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v8
1540; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
1541; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v8
1542; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
1543; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v8
1544; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
1545; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1546entry:
1547  %ext = extractelement <8 x ptr addrspace(3)> %vec, i32 %idx
1548  ret ptr addrspace(3) %ext
1549}
1550
1551define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x ptr addrspace(3)> inreg %vec, i32 inreg %idx) {
1552; GPRIDX-LABEL: dyn_extract_v8p3_s_s:
1553; GPRIDX:       ; %bb.0: ; %entry
1554; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 1
1555; GPRIDX-NEXT:    s_cselect_b32 s0, s3, s2
1556; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 2
1557; GPRIDX-NEXT:    s_cselect_b32 s0, s4, s0
1558; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 3
1559; GPRIDX-NEXT:    s_cselect_b32 s0, s5, s0
1560; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 4
1561; GPRIDX-NEXT:    s_cselect_b32 s0, s6, s0
1562; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 5
1563; GPRIDX-NEXT:    s_cselect_b32 s0, s7, s0
1564; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 6
1565; GPRIDX-NEXT:    s_cselect_b32 s0, s8, s0
1566; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 7
1567; GPRIDX-NEXT:    s_cselect_b32 s0, s9, s0
1568; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
1569; GPRIDX-NEXT:    ds_write_b32 v0, v0
1570; GPRIDX-NEXT:    s_endpgm
1571;
1572; MOVREL-LABEL: dyn_extract_v8p3_s_s:
1573; MOVREL:       ; %bb.0: ; %entry
1574; MOVREL-NEXT:    s_mov_b32 s0, s2
1575; MOVREL-NEXT:    s_mov_b32 m0, s10
1576; MOVREL-NEXT:    s_mov_b32 s1, s3
1577; MOVREL-NEXT:    s_mov_b32 s2, s4
1578; MOVREL-NEXT:    s_mov_b32 s3, s5
1579; MOVREL-NEXT:    s_mov_b32 s4, s6
1580; MOVREL-NEXT:    s_mov_b32 s5, s7
1581; MOVREL-NEXT:    s_mov_b32 s6, s8
1582; MOVREL-NEXT:    s_mov_b32 s7, s9
1583; MOVREL-NEXT:    s_movrels_b32 s0, s0
1584; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
1585; MOVREL-NEXT:    s_mov_b32 m0, -1
1586; MOVREL-NEXT:    ds_write_b32 v0, v0
1587; MOVREL-NEXT:    s_endpgm
1588;
1589; GFX10-LABEL: dyn_extract_v8p3_s_s:
1590; GFX10:       ; %bb.0: ; %entry
1591; GFX10-NEXT:    s_mov_b32 s0, s2
1592; GFX10-NEXT:    s_mov_b32 m0, s10
1593; GFX10-NEXT:    s_mov_b32 s1, s3
1594; GFX10-NEXT:    s_mov_b32 s2, s4
1595; GFX10-NEXT:    s_mov_b32 s3, s5
1596; GFX10-NEXT:    s_mov_b32 s4, s6
1597; GFX10-NEXT:    s_mov_b32 s5, s7
1598; GFX10-NEXT:    s_mov_b32 s6, s8
1599; GFX10-NEXT:    s_mov_b32 s7, s9
1600; GFX10-NEXT:    s_movrels_b32 s0, s0
1601; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1602; GFX10-NEXT:    ds_write_b32 v0, v0
1603; GFX10-NEXT:    s_endpgm
1604;
1605; GFX11-LABEL: dyn_extract_v8p3_s_s:
1606; GFX11:       ; %bb.0: ; %entry
1607; GFX11-NEXT:    s_mov_b32 s0, s2
1608; GFX11-NEXT:    s_mov_b32 m0, s10
1609; GFX11-NEXT:    s_mov_b32 s1, s3
1610; GFX11-NEXT:    s_mov_b32 s2, s4
1611; GFX11-NEXT:    s_mov_b32 s3, s5
1612; GFX11-NEXT:    s_mov_b32 s4, s6
1613; GFX11-NEXT:    s_mov_b32 s5, s7
1614; GFX11-NEXT:    s_mov_b32 s6, s8
1615; GFX11-NEXT:    s_mov_b32 s7, s9
1616; GFX11-NEXT:    s_movrels_b32 s0, s0
1617; GFX11-NEXT:    v_mov_b32_e32 v0, s0
1618; GFX11-NEXT:    ds_store_b32 v0, v0
1619; GFX11-NEXT:    s_endpgm
1620entry:
1621  %ext = extractelement <8 x ptr addrspace(3)> %vec, i32 %idx
1622  store ptr addrspace(3) %ext, ptr addrspace(3) undef
1623  ret void
1624}
1625
1626define ptr addrspace(1) @dyn_extract_v8p1_v_v(<8 x ptr addrspace(1)> %vec, i32 %idx) {
1627; GCN-LABEL: dyn_extract_v8p1_v_v:
1628; GCN:       ; %bb.0: ; %entry
1629; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1630; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v16
1631; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1632; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1633; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v16
1634; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1635; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
1636; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v16
1637; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
1638; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
1639; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v16
1640; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
1641; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
1642; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v16
1643; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
1644; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
1645; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v16
1646; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
1647; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
1648; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v16
1649; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
1650; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
1651; GCN-NEXT:    s_setpc_b64 s[30:31]
1652;
1653; GFX10-LABEL: dyn_extract_v8p1_v_v:
1654; GFX10:       ; %bb.0: ; %entry
1655; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1656; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
1657; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1658; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
1659; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
1660; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
1661; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
1662; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v16
1663; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
1664; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
1665; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
1666; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
1667; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
1668; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v16
1669; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
1670; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
1671; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v16
1672; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
1673; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc_lo
1674; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v16
1675; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
1676; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc_lo
1677; GFX10-NEXT:    s_setpc_b64 s[30:31]
1678;
1679; GFX11-LABEL: dyn_extract_v8p1_v_v:
1680; GFX11:       ; %bb.0: ; %entry
1681; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1682; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
1683; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
1684; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
1685; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
1686; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v16
1687; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7
1688; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
1689; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9
1690; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v16
1691; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11
1692; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v16
1693; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13
1694; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v16
1695; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15
1696; GFX11-NEXT:    s_setpc_b64 s[30:31]
1697entry:
1698  %ext = extractelement <8 x ptr addrspace(1)> %vec, i32 %idx
1699  ret ptr addrspace(1) %ext
1700}
1701
1702define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x ptr addrspace(1)> inreg %vec, i32 inreg %idx) {
1703; GPRIDX-LABEL: dyn_extract_v8p1_s_s:
1704; GPRIDX:       ; %bb.0: ; %entry
1705; GPRIDX-NEXT:    s_mov_b32 s0, s2
1706; GPRIDX-NEXT:    s_mov_b32 s1, s3
1707; GPRIDX-NEXT:    s_mov_b32 m0, s18
1708; GPRIDX-NEXT:    s_mov_b32 s2, s4
1709; GPRIDX-NEXT:    s_mov_b32 s3, s5
1710; GPRIDX-NEXT:    s_mov_b32 s4, s6
1711; GPRIDX-NEXT:    s_mov_b32 s5, s7
1712; GPRIDX-NEXT:    s_mov_b32 s6, s8
1713; GPRIDX-NEXT:    s_mov_b32 s7, s9
1714; GPRIDX-NEXT:    s_mov_b32 s8, s10
1715; GPRIDX-NEXT:    s_mov_b32 s9, s11
1716; GPRIDX-NEXT:    s_mov_b32 s10, s12
1717; GPRIDX-NEXT:    s_mov_b32 s11, s13
1718; GPRIDX-NEXT:    s_mov_b32 s12, s14
1719; GPRIDX-NEXT:    s_mov_b32 s13, s15
1720; GPRIDX-NEXT:    s_mov_b32 s14, s16
1721; GPRIDX-NEXT:    s_mov_b32 s15, s17
1722; GPRIDX-NEXT:    s_movrels_b64 s[0:1], s[0:1]
1723; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
1724; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
1725; GPRIDX-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
1726; GPRIDX-NEXT:    s_endpgm
1727;
1728; MOVREL-LABEL: dyn_extract_v8p1_s_s:
1729; MOVREL:       ; %bb.0: ; %entry
1730; MOVREL-NEXT:    s_mov_b32 s0, s2
1731; MOVREL-NEXT:    s_mov_b32 s1, s3
1732; MOVREL-NEXT:    s_mov_b32 m0, s18
1733; MOVREL-NEXT:    s_mov_b32 s2, s4
1734; MOVREL-NEXT:    s_mov_b32 s3, s5
1735; MOVREL-NEXT:    s_mov_b32 s4, s6
1736; MOVREL-NEXT:    s_mov_b32 s5, s7
1737; MOVREL-NEXT:    s_mov_b32 s6, s8
1738; MOVREL-NEXT:    s_mov_b32 s7, s9
1739; MOVREL-NEXT:    s_mov_b32 s8, s10
1740; MOVREL-NEXT:    s_mov_b32 s9, s11
1741; MOVREL-NEXT:    s_mov_b32 s10, s12
1742; MOVREL-NEXT:    s_mov_b32 s11, s13
1743; MOVREL-NEXT:    s_mov_b32 s12, s14
1744; MOVREL-NEXT:    s_mov_b32 s13, s15
1745; MOVREL-NEXT:    s_mov_b32 s14, s16
1746; MOVREL-NEXT:    s_mov_b32 s15, s17
1747; MOVREL-NEXT:    s_movrels_b64 s[0:1], s[0:1]
1748; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
1749; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
1750; MOVREL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
1751; MOVREL-NEXT:    s_endpgm
1752;
1753; GFX10-LABEL: dyn_extract_v8p1_s_s:
1754; GFX10:       ; %bb.0: ; %entry
1755; GFX10-NEXT:    s_mov_b32 s0, s2
1756; GFX10-NEXT:    s_mov_b32 s1, s3
1757; GFX10-NEXT:    s_mov_b32 m0, s18
1758; GFX10-NEXT:    s_mov_b32 s2, s4
1759; GFX10-NEXT:    s_mov_b32 s3, s5
1760; GFX10-NEXT:    s_mov_b32 s4, s6
1761; GFX10-NEXT:    s_mov_b32 s5, s7
1762; GFX10-NEXT:    s_mov_b32 s6, s8
1763; GFX10-NEXT:    s_mov_b32 s7, s9
1764; GFX10-NEXT:    s_mov_b32 s8, s10
1765; GFX10-NEXT:    s_mov_b32 s9, s11
1766; GFX10-NEXT:    s_mov_b32 s10, s12
1767; GFX10-NEXT:    s_mov_b32 s11, s13
1768; GFX10-NEXT:    s_mov_b32 s12, s14
1769; GFX10-NEXT:    s_mov_b32 s13, s15
1770; GFX10-NEXT:    s_mov_b32 s14, s16
1771; GFX10-NEXT:    s_mov_b32 s15, s17
1772; GFX10-NEXT:    s_movrels_b64 s[0:1], s[0:1]
1773; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1774; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1775; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
1776; GFX10-NEXT:    s_endpgm
1777;
1778; GFX11-LABEL: dyn_extract_v8p1_s_s:
1779; GFX11:       ; %bb.0: ; %entry
1780; GFX11-NEXT:    s_mov_b32 s0, s2
1781; GFX11-NEXT:    s_mov_b32 s1, s3
1782; GFX11-NEXT:    s_mov_b32 m0, s18
1783; GFX11-NEXT:    s_mov_b32 s2, s4
1784; GFX11-NEXT:    s_mov_b32 s3, s5
1785; GFX11-NEXT:    s_mov_b32 s4, s6
1786; GFX11-NEXT:    s_mov_b32 s5, s7
1787; GFX11-NEXT:    s_mov_b32 s6, s8
1788; GFX11-NEXT:    s_mov_b32 s7, s9
1789; GFX11-NEXT:    s_mov_b32 s8, s10
1790; GFX11-NEXT:    s_mov_b32 s9, s11
1791; GFX11-NEXT:    s_mov_b32 s10, s12
1792; GFX11-NEXT:    s_mov_b32 s11, s13
1793; GFX11-NEXT:    s_mov_b32 s12, s14
1794; GFX11-NEXT:    s_mov_b32 s13, s15
1795; GFX11-NEXT:    s_mov_b32 s14, s16
1796; GFX11-NEXT:    s_mov_b32 s15, s17
1797; GFX11-NEXT:    s_movrels_b64 s[0:1], s[0:1]
1798; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1799; GFX11-NEXT:    global_store_b64 v[0:1], v[0:1], off
1800; GFX11-NEXT:    s_endpgm
1801entry:
1802  %ext = extractelement <8 x ptr addrspace(1)> %vec, i32 %idx
1803  store ptr addrspace(1) %ext, ptr addrspace(1) undef
1804  ret void
1805}
1806
1807define amdgpu_ps float @dyn_extract_v16f32_v_s(<16 x float> %vec, i32 inreg %sel) {
1808; GPRIDX-LABEL: dyn_extract_v16f32_v_s:
1809; GPRIDX:       ; %bb.0: ; %entry
1810; GPRIDX-NEXT:    s_set_gpr_idx_on s2, gpr_idx(SRC0)
1811; GPRIDX-NEXT:    v_mov_b32_e32 v0, v0
1812; GPRIDX-NEXT:    s_set_gpr_idx_off
1813; GPRIDX-NEXT:    ; return to shader part epilog
1814;
1815; MOVREL-LABEL: dyn_extract_v16f32_v_s:
1816; MOVREL:       ; %bb.0: ; %entry
1817; MOVREL-NEXT:    s_mov_b32 m0, s2
1818; MOVREL-NEXT:    v_movrels_b32_e32 v0, v0
1819; MOVREL-NEXT:    ; return to shader part epilog
1820;
1821; GFX10PLUS-LABEL: dyn_extract_v16f32_v_s:
1822; GFX10PLUS:       ; %bb.0: ; %entry
1823; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
1824; GFX10PLUS-NEXT:    v_movrels_b32_e32 v0, v0
1825; GFX10PLUS-NEXT:    ; return to shader part epilog
1826entry:
1827  %ext = extractelement <16 x float> %vec, i32 %sel
1828  ret float %ext
1829}
1830
1831define amdgpu_ps float @dyn_extract_v32f32_v_s(<32 x float> %vec, i32 inreg %sel) {
1832; GPRIDX-LABEL: dyn_extract_v32f32_v_s:
1833; GPRIDX:       ; %bb.0: ; %entry
1834; GPRIDX-NEXT:    s_set_gpr_idx_on s2, gpr_idx(SRC0)
1835; GPRIDX-NEXT:    v_mov_b32_e32 v0, v0
1836; GPRIDX-NEXT:    s_set_gpr_idx_off
1837; GPRIDX-NEXT:    ; return to shader part epilog
1838;
1839; MOVREL-LABEL: dyn_extract_v32f32_v_s:
1840; MOVREL:       ; %bb.0: ; %entry
1841; MOVREL-NEXT:    s_mov_b32 m0, s2
1842; MOVREL-NEXT:    v_movrels_b32_e32 v0, v0
1843; MOVREL-NEXT:    ; return to shader part epilog
1844;
1845; GFX10PLUS-LABEL: dyn_extract_v32f32_v_s:
1846; GFX10PLUS:       ; %bb.0: ; %entry
1847; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
1848; GFX10PLUS-NEXT:    v_movrels_b32_e32 v0, v0
1849; GFX10PLUS-NEXT:    ; return to shader part epilog
1850entry:
1851  %ext = extractelement <32 x float> %vec, i32 %sel
1852  ret float %ext
1853}
1854
1855define amdgpu_ps double @dyn_extract_v16f64_v_s(<16 x double> %vec, i32 inreg %sel) {
1856; GPRIDX-LABEL: dyn_extract_v16f64_v_s:
1857; GPRIDX:       ; %bb.0: ; %entry
1858; GPRIDX-NEXT:    s_lshl_b32 s0, s2, 1
1859; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(SRC0)
1860; GPRIDX-NEXT:    v_mov_b32_e32 v32, v0
1861; GPRIDX-NEXT:    v_mov_b32_e32 v0, v1
1862; GPRIDX-NEXT:    s_set_gpr_idx_off
1863; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v32
1864; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v0
1865; GPRIDX-NEXT:    ; return to shader part epilog
1866;
1867; MOVREL-LABEL: dyn_extract_v16f64_v_s:
1868; MOVREL:       ; %bb.0: ; %entry
1869; MOVREL-NEXT:    s_lshl_b32 m0, s2, 1
1870; MOVREL-NEXT:    v_movrels_b32_e32 v32, v0
1871; MOVREL-NEXT:    v_movrels_b32_e32 v0, v1
1872; MOVREL-NEXT:    v_readfirstlane_b32 s0, v32
1873; MOVREL-NEXT:    v_readfirstlane_b32 s1, v0
1874; MOVREL-NEXT:    ; return to shader part epilog
1875;
1876; GFX10PLUS-LABEL: dyn_extract_v16f64_v_s:
1877; GFX10PLUS:       ; %bb.0: ; %entry
1878; GFX10PLUS-NEXT:    s_lshl_b32 m0, s2, 1
1879; GFX10PLUS-NEXT:    v_movrels_b32_e32 v32, v0
1880; GFX10PLUS-NEXT:    v_movrels_b32_e32 v0, v1
1881; GFX10PLUS-NEXT:    v_readfirstlane_b32 s0, v32
1882; GFX10PLUS-NEXT:    v_readfirstlane_b32 s1, v0
1883; GFX10PLUS-NEXT:    ; return to shader part epilog
1884entry:
1885  %ext = extractelement <16 x double> %vec, i32 %sel
1886  ret double %ext
1887}
1888
1889define amdgpu_ps float @dyn_extract_v16f32_s_s(i32 inreg %sel) {
1890; GCN-LABEL: dyn_extract_v16f32_s_s:
1891; GCN:       ; %bb.0: ; %entry
1892; GCN-NEXT:    s_mov_b32 s4, 1.0
1893; GCN-NEXT:    s_mov_b32 m0, s2
1894; GCN-NEXT:    s_mov_b32 s19, 0x41800000
1895; GCN-NEXT:    s_mov_b32 s18, 0x41700000
1896; GCN-NEXT:    s_mov_b32 s17, 0x41600000
1897; GCN-NEXT:    s_mov_b32 s16, 0x41500000
1898; GCN-NEXT:    s_mov_b32 s15, 0x41400000
1899; GCN-NEXT:    s_mov_b32 s14, 0x41300000
1900; GCN-NEXT:    s_mov_b32 s13, 0x41200000
1901; GCN-NEXT:    s_mov_b32 s12, 0x41100000
1902; GCN-NEXT:    s_mov_b32 s11, 0x41000000
1903; GCN-NEXT:    s_mov_b32 s10, 0x40e00000
1904; GCN-NEXT:    s_mov_b32 s9, 0x40c00000
1905; GCN-NEXT:    s_mov_b32 s8, 0x40a00000
1906; GCN-NEXT:    s_mov_b32 s7, 4.0
1907; GCN-NEXT:    s_mov_b32 s6, 0x40400000
1908; GCN-NEXT:    s_mov_b32 s5, 2.0
1909; GCN-NEXT:    s_movrels_b32 s0, s4
1910; GCN-NEXT:    v_mov_b32_e32 v0, s0
1911; GCN-NEXT:    ; return to shader part epilog
1912;
1913; GFX10PLUS-LABEL: dyn_extract_v16f32_s_s:
1914; GFX10PLUS:       ; %bb.0: ; %entry
1915; GFX10PLUS-NEXT:    s_mov_b32 s4, 1.0
1916; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
1917; GFX10PLUS-NEXT:    s_mov_b32 s19, 0x41800000
1918; GFX10PLUS-NEXT:    s_mov_b32 s18, 0x41700000
1919; GFX10PLUS-NEXT:    s_mov_b32 s17, 0x41600000
1920; GFX10PLUS-NEXT:    s_mov_b32 s16, 0x41500000
1921; GFX10PLUS-NEXT:    s_mov_b32 s15, 0x41400000
1922; GFX10PLUS-NEXT:    s_mov_b32 s14, 0x41300000
1923; GFX10PLUS-NEXT:    s_mov_b32 s13, 0x41200000
1924; GFX10PLUS-NEXT:    s_mov_b32 s12, 0x41100000
1925; GFX10PLUS-NEXT:    s_mov_b32 s11, 0x41000000
1926; GFX10PLUS-NEXT:    s_mov_b32 s10, 0x40e00000
1927; GFX10PLUS-NEXT:    s_mov_b32 s9, 0x40c00000
1928; GFX10PLUS-NEXT:    s_mov_b32 s8, 0x40a00000
1929; GFX10PLUS-NEXT:    s_mov_b32 s7, 4.0
1930; GFX10PLUS-NEXT:    s_mov_b32 s6, 0x40400000
1931; GFX10PLUS-NEXT:    s_mov_b32 s5, 2.0
1932; GFX10PLUS-NEXT:    s_movrels_b32 s0, s4
1933; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, s0
1934; GFX10PLUS-NEXT:    ; return to shader part epilog
1935entry:
1936  %ext = extractelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, i32 %sel
1937  ret float %ext
1938}
1939
1940define amdgpu_ps float @dyn_extract_v32f32_s_s(i32 inreg %sel) {
1941; GCN-LABEL: dyn_extract_v32f32_s_s:
1942; GCN:       ; %bb.0: ; %entry
1943; GCN-NEXT:    s_mov_b32 s36, 1.0
1944; GCN-NEXT:    s_mov_b32 m0, s2
1945; GCN-NEXT:    s_mov_b32 s67, 0x42000000
1946; GCN-NEXT:    s_mov_b32 s66, 0x41f80000
1947; GCN-NEXT:    s_mov_b32 s65, 0x41f00000
1948; GCN-NEXT:    s_mov_b32 s64, 0x41e80000
1949; GCN-NEXT:    s_mov_b32 s63, 0x41e00000
1950; GCN-NEXT:    s_mov_b32 s62, 0x41d80000
1951; GCN-NEXT:    s_mov_b32 s61, 0x41d00000
1952; GCN-NEXT:    s_mov_b32 s60, 0x41c80000
1953; GCN-NEXT:    s_mov_b32 s59, 0x41c00000
1954; GCN-NEXT:    s_mov_b32 s58, 0x41b80000
1955; GCN-NEXT:    s_mov_b32 s57, 0x41b00000
1956; GCN-NEXT:    s_mov_b32 s56, 0x41a80000
1957; GCN-NEXT:    s_mov_b32 s55, 0x41a00000
1958; GCN-NEXT:    s_mov_b32 s54, 0x41980000
1959; GCN-NEXT:    s_mov_b32 s53, 0x41900000
1960; GCN-NEXT:    s_mov_b32 s52, 0x41880000
1961; GCN-NEXT:    s_mov_b32 s51, 0x41800000
1962; GCN-NEXT:    s_mov_b32 s50, 0x41700000
1963; GCN-NEXT:    s_mov_b32 s49, 0x41600000
1964; GCN-NEXT:    s_mov_b32 s48, 0x41500000
1965; GCN-NEXT:    s_mov_b32 s47, 0x41400000
1966; GCN-NEXT:    s_mov_b32 s46, 0x41300000
1967; GCN-NEXT:    s_mov_b32 s45, 0x41200000
1968; GCN-NEXT:    s_mov_b32 s44, 0x41100000
1969; GCN-NEXT:    s_mov_b32 s43, 0x41000000
1970; GCN-NEXT:    s_mov_b32 s42, 0x40e00000
1971; GCN-NEXT:    s_mov_b32 s41, 0x40c00000
1972; GCN-NEXT:    s_mov_b32 s40, 0x40a00000
1973; GCN-NEXT:    s_mov_b32 s39, 4.0
1974; GCN-NEXT:    s_mov_b32 s38, 0x40400000
1975; GCN-NEXT:    s_mov_b32 s37, 2.0
1976; GCN-NEXT:    s_movrels_b32 s0, s36
1977; GCN-NEXT:    v_mov_b32_e32 v0, s0
1978; GCN-NEXT:    ; return to shader part epilog
1979;
1980; GFX10PLUS-LABEL: dyn_extract_v32f32_s_s:
1981; GFX10PLUS:       ; %bb.0: ; %entry
1982; GFX10PLUS-NEXT:    s_mov_b32 s36, 1.0
1983; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
1984; GFX10PLUS-NEXT:    s_mov_b32 s67, 0x42000000
1985; GFX10PLUS-NEXT:    s_mov_b32 s66, 0x41f80000
1986; GFX10PLUS-NEXT:    s_mov_b32 s65, 0x41f00000
1987; GFX10PLUS-NEXT:    s_mov_b32 s64, 0x41e80000
1988; GFX10PLUS-NEXT:    s_mov_b32 s63, 0x41e00000
1989; GFX10PLUS-NEXT:    s_mov_b32 s62, 0x41d80000
1990; GFX10PLUS-NEXT:    s_mov_b32 s61, 0x41d00000
1991; GFX10PLUS-NEXT:    s_mov_b32 s60, 0x41c80000
1992; GFX10PLUS-NEXT:    s_mov_b32 s59, 0x41c00000
1993; GFX10PLUS-NEXT:    s_mov_b32 s58, 0x41b80000
1994; GFX10PLUS-NEXT:    s_mov_b32 s57, 0x41b00000
1995; GFX10PLUS-NEXT:    s_mov_b32 s56, 0x41a80000
1996; GFX10PLUS-NEXT:    s_mov_b32 s55, 0x41a00000
1997; GFX10PLUS-NEXT:    s_mov_b32 s54, 0x41980000
1998; GFX10PLUS-NEXT:    s_mov_b32 s53, 0x41900000
1999; GFX10PLUS-NEXT:    s_mov_b32 s52, 0x41880000
2000; GFX10PLUS-NEXT:    s_mov_b32 s51, 0x41800000
2001; GFX10PLUS-NEXT:    s_mov_b32 s50, 0x41700000
2002; GFX10PLUS-NEXT:    s_mov_b32 s49, 0x41600000
2003; GFX10PLUS-NEXT:    s_mov_b32 s48, 0x41500000
2004; GFX10PLUS-NEXT:    s_mov_b32 s47, 0x41400000
2005; GFX10PLUS-NEXT:    s_mov_b32 s46, 0x41300000
2006; GFX10PLUS-NEXT:    s_mov_b32 s45, 0x41200000
2007; GFX10PLUS-NEXT:    s_mov_b32 s44, 0x41100000
2008; GFX10PLUS-NEXT:    s_mov_b32 s43, 0x41000000
2009; GFX10PLUS-NEXT:    s_mov_b32 s42, 0x40e00000
2010; GFX10PLUS-NEXT:    s_mov_b32 s41, 0x40c00000
2011; GFX10PLUS-NEXT:    s_mov_b32 s40, 0x40a00000
2012; GFX10PLUS-NEXT:    s_mov_b32 s39, 4.0
2013; GFX10PLUS-NEXT:    s_mov_b32 s38, 0x40400000
2014; GFX10PLUS-NEXT:    s_mov_b32 s37, 2.0
2015; GFX10PLUS-NEXT:    s_movrels_b32 s0, s36
2016; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, s0
2017; GFX10PLUS-NEXT:    ; return to shader part epilog
2018entry:
2019  %ext = extractelement <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 32.0>, i32 %sel
2020  ret float %ext
2021}
2022
2023define amdgpu_ps double @dyn_extract_v16f64_s_s(i32 inreg %sel) {
2024; GCN-LABEL: dyn_extract_v16f64_s_s:
2025; GCN:       ; %bb.0: ; %entry
2026; GCN-NEXT:    s_mov_b32 s66, 0
2027; GCN-NEXT:    s_mov_b32 s64, 0
2028; GCN-NEXT:    s_mov_b32 s62, 0
2029; GCN-NEXT:    s_mov_b32 s60, 0
2030; GCN-NEXT:    s_mov_b32 s58, 0
2031; GCN-NEXT:    s_mov_b32 s56, 0
2032; GCN-NEXT:    s_mov_b32 s54, 0
2033; GCN-NEXT:    s_mov_b32 s52, 0
2034; GCN-NEXT:    s_mov_b32 s50, 0
2035; GCN-NEXT:    s_mov_b32 s48, 0
2036; GCN-NEXT:    s_mov_b32 s46, 0
2037; GCN-NEXT:    s_mov_b32 s44, 0
2038; GCN-NEXT:    s_mov_b32 s40, 0
2039; GCN-NEXT:    s_mov_b64 s[36:37], 1.0
2040; GCN-NEXT:    s_mov_b32 m0, s2
2041; GCN-NEXT:    s_mov_b32 s67, 0x40300000
2042; GCN-NEXT:    s_mov_b32 s65, 0x402e0000
2043; GCN-NEXT:    s_mov_b32 s63, 0x402c0000
2044; GCN-NEXT:    s_mov_b32 s61, 0x402a0000
2045; GCN-NEXT:    s_mov_b32 s59, 0x40280000
2046; GCN-NEXT:    s_mov_b32 s57, 0x40260000
2047; GCN-NEXT:    s_mov_b32 s55, 0x40240000
2048; GCN-NEXT:    s_mov_b32 s53, 0x40220000
2049; GCN-NEXT:    s_mov_b32 s51, 0x40200000
2050; GCN-NEXT:    s_mov_b32 s49, 0x401c0000
2051; GCN-NEXT:    s_mov_b32 s47, 0x40180000
2052; GCN-NEXT:    s_mov_b32 s45, 0x40140000
2053; GCN-NEXT:    s_mov_b64 s[42:43], 4.0
2054; GCN-NEXT:    s_mov_b32 s41, 0x40080000
2055; GCN-NEXT:    s_mov_b64 s[38:39], 2.0
2056; GCN-NEXT:    s_movrels_b64 s[0:1], s[36:37]
2057; GCN-NEXT:    ; return to shader part epilog
2058;
2059; GFX10PLUS-LABEL: dyn_extract_v16f64_s_s:
2060; GFX10PLUS:       ; %bb.0: ; %entry
2061; GFX10PLUS-NEXT:    s_mov_b64 s[36:37], 1.0
2062; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
2063; GFX10PLUS-NEXT:    s_mov_b32 s66, 0
2064; GFX10PLUS-NEXT:    s_mov_b32 s64, 0
2065; GFX10PLUS-NEXT:    s_mov_b32 s62, 0
2066; GFX10PLUS-NEXT:    s_mov_b32 s60, 0
2067; GFX10PLUS-NEXT:    s_mov_b32 s58, 0
2068; GFX10PLUS-NEXT:    s_mov_b32 s56, 0
2069; GFX10PLUS-NEXT:    s_mov_b32 s54, 0
2070; GFX10PLUS-NEXT:    s_mov_b32 s52, 0
2071; GFX10PLUS-NEXT:    s_mov_b32 s50, 0
2072; GFX10PLUS-NEXT:    s_mov_b32 s48, 0
2073; GFX10PLUS-NEXT:    s_mov_b32 s46, 0
2074; GFX10PLUS-NEXT:    s_mov_b32 s44, 0
2075; GFX10PLUS-NEXT:    s_mov_b32 s40, 0
2076; GFX10PLUS-NEXT:    s_mov_b32 s67, 0x40300000
2077; GFX10PLUS-NEXT:    s_mov_b32 s65, 0x402e0000
2078; GFX10PLUS-NEXT:    s_mov_b32 s63, 0x402c0000
2079; GFX10PLUS-NEXT:    s_mov_b32 s61, 0x402a0000
2080; GFX10PLUS-NEXT:    s_mov_b32 s59, 0x40280000
2081; GFX10PLUS-NEXT:    s_mov_b32 s57, 0x40260000
2082; GFX10PLUS-NEXT:    s_mov_b32 s55, 0x40240000
2083; GFX10PLUS-NEXT:    s_mov_b32 s53, 0x40220000
2084; GFX10PLUS-NEXT:    s_mov_b32 s51, 0x40200000
2085; GFX10PLUS-NEXT:    s_mov_b32 s49, 0x401c0000
2086; GFX10PLUS-NEXT:    s_mov_b32 s47, 0x40180000
2087; GFX10PLUS-NEXT:    s_mov_b32 s45, 0x40140000
2088; GFX10PLUS-NEXT:    s_mov_b64 s[42:43], 4.0
2089; GFX10PLUS-NEXT:    s_mov_b32 s41, 0x40080000
2090; GFX10PLUS-NEXT:    s_mov_b64 s[38:39], 2.0
2091; GFX10PLUS-NEXT:    s_movrels_b64 s[0:1], s[36:37]
2092; GFX10PLUS-NEXT:    ; return to shader part epilog
2093entry:
2094  %ext = extractelement <16 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0, double 16.0>, i32 %sel
2095  ret double %ext
2096}
2097
2098define amdgpu_ps float @dyn_extract_v6f32_s_v(<6 x float> inreg %vec, i32 %sel) {
2099; GCN-LABEL: dyn_extract_v6f32_s_v:
2100; GCN:       ; %bb.0: ; %entry
2101; GCN-NEXT:    v_mov_b32_e32 v1, s2
2102; GCN-NEXT:    v_mov_b32_e32 v2, s3
2103; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
2104; GCN-NEXT:    v_mov_b32_e32 v3, s4
2105; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
2106; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
2107; GCN-NEXT:    v_mov_b32_e32 v4, s5
2108; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2109; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
2110; GCN-NEXT:    v_mov_b32_e32 v5, s6
2111; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
2112; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
2113; GCN-NEXT:    v_mov_b32_e32 v6, s7
2114; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2115; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
2116; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v6, vcc
2117; GCN-NEXT:    ; return to shader part epilog
2118;
2119; GFX10PLUS-LABEL: dyn_extract_v6f32_s_v:
2120; GFX10PLUS:       ; %bb.0: ; %entry
2121; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, s3
2122; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
2123; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc_lo
2124; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
2125; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2126; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
2127; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s5, vcc_lo
2128; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
2129; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2130; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
2131; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v0, v1, s7, vcc_lo
2132; GFX10PLUS-NEXT:    ; return to shader part epilog
2133entry:
2134  %ext = extractelement <6 x float> %vec, i32 %sel
2135  ret float %ext
2136}
2137
2138define float @dyn_extract_v6f32_v_v(<6 x float> %vec, i32 %sel) {
2139; GCN-LABEL: dyn_extract_v6f32_v_v:
2140; GCN:       ; %bb.0: ; %entry
2141; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2142; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v6
2143; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2144; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v6
2145; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2146; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v6
2147; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2148; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v6
2149; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2150; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v6
2151; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
2152; GCN-NEXT:    s_setpc_b64 s[30:31]
2153;
2154; GFX10PLUS-LABEL: dyn_extract_v6f32_v_v:
2155; GFX10PLUS:       ; %bb.0: ; %entry
2156; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2157; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v6
2158; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2159; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v6
2160; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2161; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v6
2162; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2163; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v6
2164; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2165; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v6
2166; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2167; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
2168entry:
2169  %ext = extractelement <6 x float> %vec, i32 %sel
2170  ret float %ext
2171}
2172
2173define amdgpu_ps float @dyn_extract_v6f32_v_s(<6 x float> %vec, i32 inreg %sel) {
2174; GCN-LABEL: dyn_extract_v6f32_v_s:
2175; GCN:       ; %bb.0: ; %entry
2176; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
2177; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2178; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
2179; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2180; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 3
2181; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2182; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 4
2183; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2184; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 5
2185; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
2186; GCN-NEXT:    ; return to shader part epilog
2187;
2188; GFX10PLUS-LABEL: dyn_extract_v6f32_v_s:
2189; GFX10PLUS:       ; %bb.0: ; %entry
2190; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 1
2191; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2192; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 2
2193; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2194; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 3
2195; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2196; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 4
2197; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2198; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 5
2199; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2200; GFX10PLUS-NEXT:    ; return to shader part epilog
2201entry:
2202  %ext = extractelement <6 x float> %vec, i32 %sel
2203  ret float %ext
2204}
2205
2206define amdgpu_ps float @dyn_extract_v6f32_s_s(<6 x float> inreg %vec, i32 inreg %sel) {
2207; GCN-LABEL: dyn_extract_v6f32_s_s:
2208; GCN:       ; %bb.0: ; %entry
2209; GCN-NEXT:    s_cmp_eq_u32 s8, 1
2210; GCN-NEXT:    s_cselect_b32 s0, s3, s2
2211; GCN-NEXT:    s_cmp_eq_u32 s8, 2
2212; GCN-NEXT:    s_cselect_b32 s0, s4, s0
2213; GCN-NEXT:    s_cmp_eq_u32 s8, 3
2214; GCN-NEXT:    s_cselect_b32 s0, s5, s0
2215; GCN-NEXT:    s_cmp_eq_u32 s8, 4
2216; GCN-NEXT:    s_cselect_b32 s0, s6, s0
2217; GCN-NEXT:    s_cmp_eq_u32 s8, 5
2218; GCN-NEXT:    s_cselect_b32 s0, s7, s0
2219; GCN-NEXT:    v_mov_b32_e32 v0, s0
2220; GCN-NEXT:    ; return to shader part epilog
2221;
2222; GFX10PLUS-LABEL: dyn_extract_v6f32_s_s:
2223; GFX10PLUS:       ; %bb.0: ; %entry
2224; GFX10PLUS-NEXT:    s_cmp_eq_u32 s8, 1
2225; GFX10PLUS-NEXT:    s_cselect_b32 s0, s3, s2
2226; GFX10PLUS-NEXT:    s_cmp_eq_u32 s8, 2
2227; GFX10PLUS-NEXT:    s_cselect_b32 s0, s4, s0
2228; GFX10PLUS-NEXT:    s_cmp_eq_u32 s8, 3
2229; GFX10PLUS-NEXT:    s_cselect_b32 s0, s5, s0
2230; GFX10PLUS-NEXT:    s_cmp_eq_u32 s8, 4
2231; GFX10PLUS-NEXT:    s_cselect_b32 s0, s6, s0
2232; GFX10PLUS-NEXT:    s_cmp_eq_u32 s8, 5
2233; GFX10PLUS-NEXT:    s_cselect_b32 s0, s7, s0
2234; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, s0
2235; GFX10PLUS-NEXT:    ; return to shader part epilog
2236entry:
2237  %ext = extractelement <6 x float> %vec, i32 %sel
2238  ret float %ext
2239}
2240
2241define amdgpu_ps float @dyn_extract_v7f32_s_v(<7 x float> inreg %vec, i32 %sel) {
2242; GCN-LABEL: dyn_extract_v7f32_s_v:
2243; GCN:       ; %bb.0: ; %entry
2244; GCN-NEXT:    v_mov_b32_e32 v1, s2
2245; GCN-NEXT:    v_mov_b32_e32 v2, s3
2246; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
2247; GCN-NEXT:    v_mov_b32_e32 v3, s4
2248; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
2249; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
2250; GCN-NEXT:    v_mov_b32_e32 v4, s5
2251; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2252; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
2253; GCN-NEXT:    v_mov_b32_e32 v5, s6
2254; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
2255; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
2256; GCN-NEXT:    v_mov_b32_e32 v6, s7
2257; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2258; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
2259; GCN-NEXT:    v_mov_b32_e32 v7, s8
2260; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
2261; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
2262; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v7, vcc
2263; GCN-NEXT:    ; return to shader part epilog
2264;
2265; GFX10PLUS-LABEL: dyn_extract_v7f32_s_v:
2266; GFX10PLUS:       ; %bb.0: ; %entry
2267; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, s3
2268; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
2269; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc_lo
2270; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
2271; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
2272; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
2273; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s5, vcc_lo
2274; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
2275; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2276; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
2277; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s7, vcc_lo
2278; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
2279; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v0, v1, s8, vcc_lo
2280; GFX10PLUS-NEXT:    ; return to shader part epilog
2281entry:
2282  %ext = extractelement <7 x float> %vec, i32 %sel
2283  ret float %ext
2284}
2285
2286define float @dyn_extract_v7f32_v_v(<7 x float> %vec, i32 %sel) {
2287; GCN-LABEL: dyn_extract_v7f32_v_v:
2288; GCN:       ; %bb.0: ; %entry
2289; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2290; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v7
2291; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2292; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v7
2293; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2294; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v7
2295; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2296; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v7
2297; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2298; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v7
2299; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
2300; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v7
2301; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
2302; GCN-NEXT:    s_setpc_b64 s[30:31]
2303;
2304; GFX10PLUS-LABEL: dyn_extract_v7f32_v_v:
2305; GFX10PLUS:       ; %bb.0: ; %entry
2306; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2307; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v7
2308; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2309; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v7
2310; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2311; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v7
2312; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2313; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v7
2314; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2315; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v7
2316; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2317; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v7
2318; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2319; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
2320entry:
2321  %ext = extractelement <7 x float> %vec, i32 %sel
2322  ret float %ext
2323}
2324
2325define amdgpu_ps float @dyn_extract_v7f32_v_s(<7 x float> %vec, i32 inreg %sel) {
2326; GCN-LABEL: dyn_extract_v7f32_v_s:
2327; GCN:       ; %bb.0: ; %entry
2328; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
2329; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2330; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
2331; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2332; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 3
2333; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2334; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 4
2335; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2336; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 5
2337; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
2338; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 6
2339; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
2340; GCN-NEXT:    ; return to shader part epilog
2341;
2342; GFX10PLUS-LABEL: dyn_extract_v7f32_v_s:
2343; GFX10PLUS:       ; %bb.0: ; %entry
2344; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 1
2345; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
2346; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 2
2347; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2348; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 3
2349; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2350; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 4
2351; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2352; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 5
2353; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
2354; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 6
2355; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2356; GFX10PLUS-NEXT:    ; return to shader part epilog
2357entry:
2358  %ext = extractelement <7 x float> %vec, i32 %sel
2359  ret float %ext
2360}
2361
2362define amdgpu_ps float @dyn_extract_v7f32_s_s(<7 x float> inreg %vec, i32 inreg %sel) {
2363; GCN-LABEL: dyn_extract_v7f32_s_s:
2364; GCN:       ; %bb.0: ; %entry
2365; GCN-NEXT:    s_cmp_eq_u32 s9, 1
2366; GCN-NEXT:    s_cselect_b32 s0, s3, s2
2367; GCN-NEXT:    s_cmp_eq_u32 s9, 2
2368; GCN-NEXT:    s_cselect_b32 s0, s4, s0
2369; GCN-NEXT:    s_cmp_eq_u32 s9, 3
2370; GCN-NEXT:    s_cselect_b32 s0, s5, s0
2371; GCN-NEXT:    s_cmp_eq_u32 s9, 4
2372; GCN-NEXT:    s_cselect_b32 s0, s6, s0
2373; GCN-NEXT:    s_cmp_eq_u32 s9, 5
2374; GCN-NEXT:    s_cselect_b32 s0, s7, s0
2375; GCN-NEXT:    s_cmp_eq_u32 s9, 6
2376; GCN-NEXT:    s_cselect_b32 s0, s8, s0
2377; GCN-NEXT:    v_mov_b32_e32 v0, s0
2378; GCN-NEXT:    ; return to shader part epilog
2379;
2380; GFX10PLUS-LABEL: dyn_extract_v7f32_s_s:
2381; GFX10PLUS:       ; %bb.0: ; %entry
2382; GFX10PLUS-NEXT:    s_cmp_eq_u32 s9, 1
2383; GFX10PLUS-NEXT:    s_cselect_b32 s0, s3, s2
2384; GFX10PLUS-NEXT:    s_cmp_eq_u32 s9, 2
2385; GFX10PLUS-NEXT:    s_cselect_b32 s0, s4, s0
2386; GFX10PLUS-NEXT:    s_cmp_eq_u32 s9, 3
2387; GFX10PLUS-NEXT:    s_cselect_b32 s0, s5, s0
2388; GFX10PLUS-NEXT:    s_cmp_eq_u32 s9, 4
2389; GFX10PLUS-NEXT:    s_cselect_b32 s0, s6, s0
2390; GFX10PLUS-NEXT:    s_cmp_eq_u32 s9, 5
2391; GFX10PLUS-NEXT:    s_cselect_b32 s0, s7, s0
2392; GFX10PLUS-NEXT:    s_cmp_eq_u32 s9, 6
2393; GFX10PLUS-NEXT:    s_cselect_b32 s0, s8, s0
2394; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, s0
2395; GFX10PLUS-NEXT:    ; return to shader part epilog
2396entry:
2397  %ext = extractelement <7 x float> %vec, i32 %sel
2398  ret float %ext
2399}
2400
2401define amdgpu_ps double @dyn_extract_v6f64_s_v(<6 x double> inreg %vec, i32 %sel) {
2402; GCN-LABEL: dyn_extract_v6f64_s_v:
2403; GCN:       ; %bb.0: ; %entry
2404; GCN-NEXT:    v_mov_b32_e32 v1, s2
2405; GCN-NEXT:    v_mov_b32_e32 v2, s3
2406; GCN-NEXT:    v_mov_b32_e32 v3, s4
2407; GCN-NEXT:    v_mov_b32_e32 v4, s5
2408; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
2409; GCN-NEXT:    v_mov_b32_e32 v5, s6
2410; GCN-NEXT:    v_mov_b32_e32 v6, s7
2411; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2412; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2413; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
2414; GCN-NEXT:    v_mov_b32_e32 v7, s8
2415; GCN-NEXT:    v_mov_b32_e32 v8, s9
2416; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2417; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
2418; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
2419; GCN-NEXT:    v_mov_b32_e32 v9, s10
2420; GCN-NEXT:    v_mov_b32_e32 v10, s11
2421; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
2422; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
2423; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
2424; GCN-NEXT:    v_mov_b32_e32 v11, s12
2425; GCN-NEXT:    v_mov_b32_e32 v12, s13
2426; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
2427; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
2428; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
2429; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v11, vcc
2430; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v12, vcc
2431; GCN-NEXT:    v_readfirstlane_b32 s0, v0
2432; GCN-NEXT:    v_readfirstlane_b32 s1, v1
2433; GCN-NEXT:    ; return to shader part epilog
2434;
2435; GFX10-LABEL: dyn_extract_v6f64_s_v:
2436; GFX10:       ; %bb.0: ; %entry
2437; GFX10-NEXT:    v_mov_b32_e32 v1, s4
2438; GFX10-NEXT:    v_mov_b32_e32 v2, s5
2439; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
2440; GFX10-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc_lo
2441; GFX10-NEXT:    v_cndmask_b32_e32 v2, s3, v2, vcc_lo
2442; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
2443; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2444; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2445; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
2446; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2447; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2448; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
2449; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
2450; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
2451; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
2452; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s12, vcc_lo
2453; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, s13, vcc_lo
2454; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2455; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
2456; GFX10-NEXT:    ; return to shader part epilog
2457;
2458; GFX11-LABEL: dyn_extract_v6f64_s_v:
2459; GFX11:       ; %bb.0: ; %entry
2460; GFX11-NEXT:    v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s5
2461; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
2462; GFX11-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc_lo
2463; GFX11-NEXT:    v_cndmask_b32_e32 v2, s3, v2, vcc_lo
2464; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
2465; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2466; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2467; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
2468; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2469; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2470; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
2471; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
2472; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
2473; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
2474; GFX11-NEXT:    v_cndmask_b32_e64 v0, v1, s12, vcc_lo
2475; GFX11-NEXT:    v_cndmask_b32_e64 v1, v2, s13, vcc_lo
2476; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
2477; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
2478; GFX11-NEXT:    ; return to shader part epilog
2479entry:
2480  %ext = extractelement <6 x double> %vec, i32 %sel
2481  ret double %ext
2482}
2483
2484define double @dyn_extract_v6f64_v_v(<6 x double> %vec, i32 %sel) {
2485; GCN-LABEL: dyn_extract_v6f64_v_v:
2486; GCN:       ; %bb.0: ; %entry
2487; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2488; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v12
2489; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2490; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2491; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v12
2492; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2493; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2494; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v12
2495; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
2496; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
2497; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v12
2498; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
2499; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
2500; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v12
2501; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
2502; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
2503; GCN-NEXT:    s_setpc_b64 s[30:31]
2504;
2505; GFX10-LABEL: dyn_extract_v6f64_v_v:
2506; GFX10:       ; %bb.0: ; %entry
2507; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2508; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v12
2509; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2510; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
2511; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v12
2512; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2513; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
2514; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v12
2515; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2516; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
2517; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v12
2518; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
2519; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
2520; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v12
2521; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
2522; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
2523; GFX10-NEXT:    s_setpc_b64 s[30:31]
2524;
2525; GFX11-LABEL: dyn_extract_v6f64_v_v:
2526; GFX11:       ; %bb.0: ; %entry
2527; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2528; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v12
2529; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
2530; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v12
2531; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
2532; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v12
2533; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7
2534; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v12
2535; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9
2536; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v12
2537; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11
2538; GFX11-NEXT:    s_setpc_b64 s[30:31]
2539entry:
2540  %ext = extractelement <6 x double> %vec, i32 %sel
2541  ret double %ext
2542}
2543
2544define amdgpu_ps double @dyn_extract_v6f64_v_s(<6 x double> %vec, i32 inreg %sel) {
2545; GPRIDX-LABEL: dyn_extract_v6f64_v_s:
2546; GPRIDX:       ; %bb.0: ; %entry
2547; GPRIDX-NEXT:    s_lshl_b32 s0, s2, 1
2548; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(SRC0)
2549; GPRIDX-NEXT:    v_mov_b32_e32 v12, v0
2550; GPRIDX-NEXT:    v_mov_b32_e32 v0, v1
2551; GPRIDX-NEXT:    s_set_gpr_idx_off
2552; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v12
2553; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v0
2554; GPRIDX-NEXT:    ; return to shader part epilog
2555;
2556; MOVREL-LABEL: dyn_extract_v6f64_v_s:
2557; MOVREL:       ; %bb.0: ; %entry
2558; MOVREL-NEXT:    s_lshl_b32 m0, s2, 1
2559; MOVREL-NEXT:    v_movrels_b32_e32 v12, v0
2560; MOVREL-NEXT:    v_movrels_b32_e32 v0, v1
2561; MOVREL-NEXT:    v_readfirstlane_b32 s0, v12
2562; MOVREL-NEXT:    v_readfirstlane_b32 s1, v0
2563; MOVREL-NEXT:    ; return to shader part epilog
2564;
2565; GFX10PLUS-LABEL: dyn_extract_v6f64_v_s:
2566; GFX10PLUS:       ; %bb.0: ; %entry
2567; GFX10PLUS-NEXT:    s_lshl_b32 m0, s2, 1
2568; GFX10PLUS-NEXT:    v_movrels_b32_e32 v12, v0
2569; GFX10PLUS-NEXT:    v_movrels_b32_e32 v0, v1
2570; GFX10PLUS-NEXT:    v_readfirstlane_b32 s0, v12
2571; GFX10PLUS-NEXT:    v_readfirstlane_b32 s1, v0
2572; GFX10PLUS-NEXT:    ; return to shader part epilog
2573entry:
2574  %ext = extractelement <6 x double> %vec, i32 %sel
2575  ret double %ext
2576}
2577
2578define amdgpu_ps double @dyn_extract_v6f64_s_s(<6 x double> inreg %vec, i32 inreg %sel) {
2579; GCN-LABEL: dyn_extract_v6f64_s_s:
2580; GCN:       ; %bb.0: ; %entry
2581; GCN-NEXT:    s_mov_b32 s0, s2
2582; GCN-NEXT:    s_mov_b32 s1, s3
2583; GCN-NEXT:    s_mov_b32 m0, s14
2584; GCN-NEXT:    s_mov_b32 s2, s4
2585; GCN-NEXT:    s_mov_b32 s3, s5
2586; GCN-NEXT:    s_mov_b32 s4, s6
2587; GCN-NEXT:    s_mov_b32 s5, s7
2588; GCN-NEXT:    s_mov_b32 s6, s8
2589; GCN-NEXT:    s_mov_b32 s7, s9
2590; GCN-NEXT:    s_mov_b32 s8, s10
2591; GCN-NEXT:    s_mov_b32 s9, s11
2592; GCN-NEXT:    s_mov_b32 s10, s12
2593; GCN-NEXT:    s_mov_b32 s11, s13
2594; GCN-NEXT:    s_movrels_b64 s[0:1], s[0:1]
2595; GCN-NEXT:    ; return to shader part epilog
2596;
2597; GFX10PLUS-LABEL: dyn_extract_v6f64_s_s:
2598; GFX10PLUS:       ; %bb.0: ; %entry
2599; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
2600; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
2601; GFX10PLUS-NEXT:    s_mov_b32 m0, s14
2602; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
2603; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
2604; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
2605; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
2606; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
2607; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
2608; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
2609; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
2610; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
2611; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
2612; GFX10PLUS-NEXT:    s_movrels_b64 s[0:1], s[0:1]
2613; GFX10PLUS-NEXT:    ; return to shader part epilog
2614entry:
2615  %ext = extractelement <6 x double> %vec, i32 %sel
2616  ret double %ext
2617}
2618
2619define amdgpu_ps double @dyn_extract_v7f64_s_v_bitcast(<14 x float> inreg %userData, i32 %sel) {
2620; GCN-LABEL: dyn_extract_v7f64_s_v_bitcast:
2621; GCN:       ; %bb.0: ; %entry
2622; GCN-NEXT:    v_mov_b32_e32 v1, s2
2623; GCN-NEXT:    v_mov_b32_e32 v2, s3
2624; GCN-NEXT:    v_mov_b32_e32 v3, s4
2625; GCN-NEXT:    v_mov_b32_e32 v4, s5
2626; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
2627; GCN-NEXT:    v_mov_b32_e32 v5, s6
2628; GCN-NEXT:    v_mov_b32_e32 v6, s7
2629; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2630; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2631; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
2632; GCN-NEXT:    v_mov_b32_e32 v7, s8
2633; GCN-NEXT:    v_mov_b32_e32 v8, s9
2634; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2635; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
2636; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
2637; GCN-NEXT:    v_mov_b32_e32 v9, s10
2638; GCN-NEXT:    v_mov_b32_e32 v10, s11
2639; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
2640; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
2641; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
2642; GCN-NEXT:    v_mov_b32_e32 v11, s12
2643; GCN-NEXT:    v_mov_b32_e32 v12, s13
2644; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
2645; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
2646; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
2647; GCN-NEXT:    v_mov_b32_e32 v13, s14
2648; GCN-NEXT:    v_mov_b32_e32 v14, s15
2649; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
2650; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v12, vcc
2651; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
2652; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
2653; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v14, vcc
2654; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
2655; GCN-NEXT:    ; kill: def $vgpr15 killed $sgpr2 killed $exec
2656; GCN-NEXT:    ; kill: def $vgpr16 killed $sgpr3 killed $exec
2657; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v15, vcc
2658; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v16, vcc
2659; GCN-NEXT:    v_readfirstlane_b32 s0, v0
2660; GCN-NEXT:    v_readfirstlane_b32 s1, v1
2661; GCN-NEXT:    ; return to shader part epilog
2662;
2663; GFX10-LABEL: dyn_extract_v7f64_s_v_bitcast:
2664; GFX10:       ; %bb.0: ; %entry
2665; GFX10-NEXT:    v_mov_b32_e32 v1, s4
2666; GFX10-NEXT:    v_mov_b32_e32 v2, s5
2667; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
2668; GFX10-NEXT:    s_mov_b32 s0, s14
2669; GFX10-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc_lo
2670; GFX10-NEXT:    v_cndmask_b32_e32 v2, s3, v2, vcc_lo
2671; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
2672; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2673; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2674; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
2675; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2676; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2677; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
2678; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
2679; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
2680; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
2681; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s12, vcc_lo
2682; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s13, vcc_lo
2683; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
2684; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s0, vcc_lo
2685; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s15, vcc_lo
2686; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
2687; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s2, vcc_lo
2688; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, s3, vcc_lo
2689; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2690; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
2691; GFX10-NEXT:    ; return to shader part epilog
2692;
2693; GFX11-LABEL: dyn_extract_v7f64_s_v_bitcast:
2694; GFX11:       ; %bb.0: ; %entry
2695; GFX11-NEXT:    v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s5
2696; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
2697; GFX11-NEXT:    s_mov_b32 s0, s14
2698; GFX11-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc_lo
2699; GFX11-NEXT:    v_cndmask_b32_e32 v2, s3, v2, vcc_lo
2700; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
2701; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2702; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2703; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
2704; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2705; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2706; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
2707; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
2708; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
2709; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
2710; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s12, vcc_lo
2711; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s13, vcc_lo
2712; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
2713; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s0, vcc_lo
2714; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s15, vcc_lo
2715; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
2716; GFX11-NEXT:    v_cndmask_b32_e64 v0, v1, s2, vcc_lo
2717; GFX11-NEXT:    v_cndmask_b32_e64 v1, v2, s3, vcc_lo
2718; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
2719; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
2720; GFX11-NEXT:    ; return to shader part epilog
2721entry:
2722  %bc = bitcast <14 x float> %userData to <7 x double>
2723  %ext = extractelement <7 x double> %bc, i32 %sel
2724  ret double %ext
2725}
2726
2727define amdgpu_ps i64 @dyn_extract_v7i64_s_v_bitcast(<14 x i32> inreg %userData, i32 %sel) {
2728; GCN-LABEL: dyn_extract_v7i64_s_v_bitcast:
2729; GCN:       ; %bb.0: ; %entry
2730; GCN-NEXT:    s_mov_b32 s0, s10
2731; GCN-NEXT:    s_mov_b32 s1, s11
2732; GCN-NEXT:    ; return to shader part epilog
2733;
2734; GFX10PLUS-LABEL: dyn_extract_v7i64_s_v_bitcast:
2735; GFX10PLUS:       ; %bb.0: ; %entry
2736; GFX10PLUS-NEXT:    s_mov_b32 s0, s10
2737; GFX10PLUS-NEXT:    s_mov_b32 s1, s11
2738; GFX10PLUS-NEXT:    ; return to shader part epilog
2739entry:
2740  %.bc = bitcast <14 x i32> %userData to <7 x i64>
2741  %ext = extractelement <7 x i64> %.bc, i32 4
2742  ret i64 %ext
2743}
2744
2745define amdgpu_ps double @dyn_extract_v7f64_s_v(<7 x double> inreg %vec, i32 %sel) {
2746; GCN-LABEL: dyn_extract_v7f64_s_v:
2747; GCN:       ; %bb.0: ; %entry
2748; GCN-NEXT:    v_mov_b32_e32 v1, s2
2749; GCN-NEXT:    v_mov_b32_e32 v2, s3
2750; GCN-NEXT:    v_mov_b32_e32 v3, s4
2751; GCN-NEXT:    v_mov_b32_e32 v4, s5
2752; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
2753; GCN-NEXT:    v_mov_b32_e32 v5, s6
2754; GCN-NEXT:    v_mov_b32_e32 v6, s7
2755; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2756; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2757; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
2758; GCN-NEXT:    v_mov_b32_e32 v7, s8
2759; GCN-NEXT:    v_mov_b32_e32 v8, s9
2760; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2761; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
2762; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
2763; GCN-NEXT:    v_mov_b32_e32 v9, s10
2764; GCN-NEXT:    v_mov_b32_e32 v10, s11
2765; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
2766; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
2767; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
2768; GCN-NEXT:    v_mov_b32_e32 v11, s12
2769; GCN-NEXT:    v_mov_b32_e32 v12, s13
2770; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
2771; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
2772; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
2773; GCN-NEXT:    v_mov_b32_e32 v13, s14
2774; GCN-NEXT:    v_mov_b32_e32 v14, s15
2775; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
2776; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v12, vcc
2777; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
2778; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
2779; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v14, vcc
2780; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
2781; GCN-NEXT:    ; kill: def $vgpr15 killed $sgpr2 killed $exec
2782; GCN-NEXT:    ; kill: def $vgpr16 killed $sgpr3 killed $exec
2783; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v15, vcc
2784; GCN-NEXT:    v_cndmask_b32_e32 v1, v2, v16, vcc
2785; GCN-NEXT:    v_readfirstlane_b32 s0, v0
2786; GCN-NEXT:    v_readfirstlane_b32 s1, v1
2787; GCN-NEXT:    ; return to shader part epilog
2788;
2789; GFX10-LABEL: dyn_extract_v7f64_s_v:
2790; GFX10:       ; %bb.0: ; %entry
2791; GFX10-NEXT:    v_mov_b32_e32 v1, s4
2792; GFX10-NEXT:    v_mov_b32_e32 v2, s5
2793; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
2794; GFX10-NEXT:    s_mov_b32 s0, s14
2795; GFX10-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc_lo
2796; GFX10-NEXT:    v_cndmask_b32_e32 v2, s3, v2, vcc_lo
2797; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
2798; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2799; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2800; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
2801; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2802; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2803; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
2804; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
2805; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
2806; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
2807; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s12, vcc_lo
2808; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s13, vcc_lo
2809; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
2810; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s0, vcc_lo
2811; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s15, vcc_lo
2812; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
2813; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s2, vcc_lo
2814; GFX10-NEXT:    v_cndmask_b32_e64 v1, v2, s3, vcc_lo
2815; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2816; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
2817; GFX10-NEXT:    ; return to shader part epilog
2818;
2819; GFX11-LABEL: dyn_extract_v7f64_s_v:
2820; GFX11:       ; %bb.0: ; %entry
2821; GFX11-NEXT:    v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s5
2822; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
2823; GFX11-NEXT:    s_mov_b32 s0, s14
2824; GFX11-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc_lo
2825; GFX11-NEXT:    v_cndmask_b32_e32 v2, s3, v2, vcc_lo
2826; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
2827; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
2828; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s7, vcc_lo
2829; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
2830; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
2831; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s9, vcc_lo
2832; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
2833; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
2834; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s11, vcc_lo
2835; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
2836; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s12, vcc_lo
2837; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s13, vcc_lo
2838; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
2839; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s0, vcc_lo
2840; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s15, vcc_lo
2841; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
2842; GFX11-NEXT:    v_cndmask_b32_e64 v0, v1, s2, vcc_lo
2843; GFX11-NEXT:    v_cndmask_b32_e64 v1, v2, s3, vcc_lo
2844; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
2845; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
2846; GFX11-NEXT:    ; return to shader part epilog
2847entry:
2848  %ext = extractelement <7 x double> %vec, i32 %sel
2849  ret double %ext
2850}
2851
2852define double @dyn_extract_v7f64_v_v(<7 x double> %vec, i32 %sel) {
2853; GCN-LABEL: dyn_extract_v7f64_v_v:
2854; GCN:       ; %bb.0: ; %entry
2855; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2856; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v14
2857; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2858; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
2859; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v14
2860; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2861; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
2862; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v14
2863; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
2864; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
2865; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v14
2866; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
2867; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
2868; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v14
2869; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
2870; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
2871; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v14
2872; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
2873; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
2874; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v14
2875; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
2876; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
2877; GCN-NEXT:    s_setpc_b64 s[30:31]
2878;
2879; GFX10-LABEL: dyn_extract_v7f64_v_v:
2880; GFX10:       ; %bb.0: ; %entry
2881; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2882; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v14
2883; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2884; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
2885; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v14
2886; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
2887; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
2888; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v14
2889; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
2890; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
2891; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v14
2892; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
2893; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
2894; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v14
2895; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
2896; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
2897; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v14
2898; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
2899; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc_lo
2900; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v14
2901; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
2902; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc_lo
2903; GFX10-NEXT:    s_setpc_b64 s[30:31]
2904;
2905; GFX11-LABEL: dyn_extract_v7f64_v_v:
2906; GFX11:       ; %bb.0: ; %entry
2907; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2908; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v14
2909; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3
2910; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v14
2911; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
2912; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v14
2913; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7
2914; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v14
2915; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9
2916; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v14
2917; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11
2918; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v14
2919; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13
2920; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v14
2921; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15
2922; GFX11-NEXT:    s_setpc_b64 s[30:31]
2923entry:
2924  %ext = extractelement <7 x double> %vec, i32 %sel
2925  ret double %ext
2926}
2927
2928define amdgpu_ps double @dyn_extract_v7f64_v_s(<7 x double> %vec, i32 inreg %sel) {
2929; GPRIDX-LABEL: dyn_extract_v7f64_v_s:
2930; GPRIDX:       ; %bb.0: ; %entry
2931; GPRIDX-NEXT:    s_lshl_b32 s0, s2, 1
2932; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(SRC0)
2933; GPRIDX-NEXT:    v_mov_b32_e32 v14, v0
2934; GPRIDX-NEXT:    v_mov_b32_e32 v0, v1
2935; GPRIDX-NEXT:    s_set_gpr_idx_off
2936; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v14
2937; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v0
2938; GPRIDX-NEXT:    ; return to shader part epilog
2939;
2940; MOVREL-LABEL: dyn_extract_v7f64_v_s:
2941; MOVREL:       ; %bb.0: ; %entry
2942; MOVREL-NEXT:    s_lshl_b32 m0, s2, 1
2943; MOVREL-NEXT:    v_movrels_b32_e32 v14, v0
2944; MOVREL-NEXT:    v_movrels_b32_e32 v0, v1
2945; MOVREL-NEXT:    v_readfirstlane_b32 s0, v14
2946; MOVREL-NEXT:    v_readfirstlane_b32 s1, v0
2947; MOVREL-NEXT:    ; return to shader part epilog
2948;
2949; GFX10PLUS-LABEL: dyn_extract_v7f64_v_s:
2950; GFX10PLUS:       ; %bb.0: ; %entry
2951; GFX10PLUS-NEXT:    s_lshl_b32 m0, s2, 1
2952; GFX10PLUS-NEXT:    v_movrels_b32_e32 v14, v0
2953; GFX10PLUS-NEXT:    v_movrels_b32_e32 v0, v1
2954; GFX10PLUS-NEXT:    v_readfirstlane_b32 s0, v14
2955; GFX10PLUS-NEXT:    v_readfirstlane_b32 s1, v0
2956; GFX10PLUS-NEXT:    ; return to shader part epilog
2957entry:
2958  %ext = extractelement <7 x double> %vec, i32 %sel
2959  ret double %ext
2960}
2961
2962define amdgpu_ps double @dyn_extract_v7f64_s_s(<7 x double> inreg %vec, i32 inreg %sel) {
2963; GCN-LABEL: dyn_extract_v7f64_s_s:
2964; GCN:       ; %bb.0: ; %entry
2965; GCN-NEXT:    s_mov_b32 s0, s2
2966; GCN-NEXT:    s_mov_b32 s1, s3
2967; GCN-NEXT:    s_mov_b32 m0, s16
2968; GCN-NEXT:    s_mov_b32 s2, s4
2969; GCN-NEXT:    s_mov_b32 s3, s5
2970; GCN-NEXT:    s_mov_b32 s4, s6
2971; GCN-NEXT:    s_mov_b32 s5, s7
2972; GCN-NEXT:    s_mov_b32 s6, s8
2973; GCN-NEXT:    s_mov_b32 s7, s9
2974; GCN-NEXT:    s_mov_b32 s8, s10
2975; GCN-NEXT:    s_mov_b32 s9, s11
2976; GCN-NEXT:    s_mov_b32 s10, s12
2977; GCN-NEXT:    s_mov_b32 s11, s13
2978; GCN-NEXT:    s_mov_b32 s12, s14
2979; GCN-NEXT:    s_mov_b32 s13, s15
2980; GCN-NEXT:    s_movrels_b64 s[0:1], s[0:1]
2981; GCN-NEXT:    ; return to shader part epilog
2982;
2983; GFX10PLUS-LABEL: dyn_extract_v7f64_s_s:
2984; GFX10PLUS:       ; %bb.0: ; %entry
2985; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
2986; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
2987; GFX10PLUS-NEXT:    s_mov_b32 m0, s16
2988; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
2989; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
2990; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
2991; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
2992; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
2993; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
2994; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
2995; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
2996; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
2997; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
2998; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
2999; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
3000; GFX10PLUS-NEXT:    s_movrels_b64 s[0:1], s[0:1]
3001; GFX10PLUS-NEXT:    ; return to shader part epilog
3002entry:
3003  %ext = extractelement <7 x double> %vec, i32 %sel
3004  ret double %ext
3005}
3006
3007define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel) {
3008; GPRIDX-LABEL: dyn_extract_v5f64_s_s:
3009; GPRIDX:         .amd_kernel_code_t
3010; GPRIDX-NEXT:     amd_code_version_major = 1
3011; GPRIDX-NEXT:     amd_code_version_minor = 2
3012; GPRIDX-NEXT:     amd_machine_kind = 1
3013; GPRIDX-NEXT:     amd_machine_version_major = 9
3014; GPRIDX-NEXT:     amd_machine_version_minor = 0
3015; GPRIDX-NEXT:     amd_machine_version_stepping = 0
3016; GPRIDX-NEXT:     kernel_code_entry_byte_offset = 256
3017; GPRIDX-NEXT:     kernel_code_prefetch_byte_size = 0
3018; GPRIDX-NEXT:     granulated_workitem_vgpr_count = 0
3019; GPRIDX-NEXT:     granulated_wavefront_sgpr_count = 1
3020; GPRIDX-NEXT:     priority = 0
3021; GPRIDX-NEXT:     float_mode = 240
3022; GPRIDX-NEXT:     priv = 0
3023; GPRIDX-NEXT:     enable_dx10_clamp = 1
3024; GPRIDX-NEXT:     debug_mode = 0
3025; GPRIDX-NEXT:     enable_ieee_mode = 1
3026; GPRIDX-NEXT:     enable_wgp_mode = 0
3027; GPRIDX-NEXT:     enable_mem_ordered = 0
3028; GPRIDX-NEXT:     enable_fwd_progress = 0
3029; GPRIDX-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
3030; GPRIDX-NEXT:     user_sgpr_count = 12
3031; GPRIDX-NEXT:     enable_trap_handler = 0
3032; GPRIDX-NEXT:     enable_sgpr_workgroup_id_x = 1
3033; GPRIDX-NEXT:     enable_sgpr_workgroup_id_y = 1
3034; GPRIDX-NEXT:     enable_sgpr_workgroup_id_z = 1
3035; GPRIDX-NEXT:     enable_sgpr_workgroup_info = 0
3036; GPRIDX-NEXT:     enable_vgpr_workitem_id = 2
3037; GPRIDX-NEXT:     enable_exception_msb = 0
3038; GPRIDX-NEXT:     granulated_lds_size = 0
3039; GPRIDX-NEXT:     enable_exception = 0
3040; GPRIDX-NEXT:     enable_sgpr_private_segment_buffer = 1
3041; GPRIDX-NEXT:     enable_sgpr_dispatch_ptr = 1
3042; GPRIDX-NEXT:     enable_sgpr_queue_ptr = 1
3043; GPRIDX-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
3044; GPRIDX-NEXT:     enable_sgpr_dispatch_id = 1
3045; GPRIDX-NEXT:     enable_sgpr_flat_scratch_init = 0
3046; GPRIDX-NEXT:     enable_sgpr_private_segment_size = 0
3047; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
3048; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
3049; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
3050; GPRIDX-NEXT:     enable_wavefront_size32 = 0
3051; GPRIDX-NEXT:     enable_ordered_append_gds = 0
3052; GPRIDX-NEXT:     private_element_size = 1
3053; GPRIDX-NEXT:     is_ptr64 = 1
3054; GPRIDX-NEXT:     is_dynamic_callstack = 0
3055; GPRIDX-NEXT:     is_debug_enabled = 0
3056; GPRIDX-NEXT:     is_xnack_enabled = 1
3057; GPRIDX-NEXT:     workitem_private_segment_byte_size = 0
3058; GPRIDX-NEXT:     workgroup_group_segment_byte_size = 0
3059; GPRIDX-NEXT:     gds_segment_byte_size = 0
3060; GPRIDX-NEXT:     kernarg_segment_byte_size = 28
3061; GPRIDX-NEXT:     workgroup_fbarrier_count = 0
3062; GPRIDX-NEXT:     wavefront_sgpr_count = 15
3063; GPRIDX-NEXT:     workitem_vgpr_count = 3
3064; GPRIDX-NEXT:     reserved_vgpr_first = 0
3065; GPRIDX-NEXT:     reserved_vgpr_count = 0
3066; GPRIDX-NEXT:     reserved_sgpr_first = 0
3067; GPRIDX-NEXT:     reserved_sgpr_count = 0
3068; GPRIDX-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
3069; GPRIDX-NEXT:     debug_private_segment_buffer_sgpr = 0
3070; GPRIDX-NEXT:     kernarg_segment_alignment = 4
3071; GPRIDX-NEXT:     group_segment_alignment = 4
3072; GPRIDX-NEXT:     private_segment_alignment = 4
3073; GPRIDX-NEXT:     wavefront_size = 6
3074; GPRIDX-NEXT:     call_convention = -1
3075; GPRIDX-NEXT:     runtime_loader_kernel_symbol = 0
3076; GPRIDX-NEXT:    .end_amd_kernel_code_t
3077; GPRIDX-NEXT:  ; %bb.0: ; %entry
3078; GPRIDX-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
3079; GPRIDX-NEXT:    s_load_dword s10, s[8:9], 0x8
3080; GPRIDX-NEXT:    s_mov_b32 s4, 0
3081; GPRIDX-NEXT:    s_mov_b32 s5, 0x40080000
3082; GPRIDX-NEXT:    s_mov_b32 s2, 0
3083; GPRIDX-NEXT:    s_mov_b32 s3, 0x40140000
3084; GPRIDX-NEXT:    s_waitcnt lgkmcnt(0)
3085; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 1
3086; GPRIDX-NEXT:    s_cselect_b64 s[6:7], 2.0, 1.0
3087; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 2
3088; GPRIDX-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
3089; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 3
3090; GPRIDX-NEXT:    s_cselect_b64 s[4:5], 4.0, s[4:5]
3091; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 4
3092; GPRIDX-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
3093; GPRIDX-NEXT:    v_mov_b32_e32 v0, s2
3094; GPRIDX-NEXT:    v_mov_b32_e32 v1, s3
3095; GPRIDX-NEXT:    v_mov_b32_e32 v2, 0
3096; GPRIDX-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
3097; GPRIDX-NEXT:    s_endpgm
3098;
3099; MOVREL-LABEL: dyn_extract_v5f64_s_s:
3100; MOVREL:         .amd_kernel_code_t
3101; MOVREL-NEXT:     amd_code_version_major = 1
3102; MOVREL-NEXT:     amd_code_version_minor = 2
3103; MOVREL-NEXT:     amd_machine_kind = 1
3104; MOVREL-NEXT:     amd_machine_version_major = 8
3105; MOVREL-NEXT:     amd_machine_version_minor = 0
3106; MOVREL-NEXT:     amd_machine_version_stepping = 3
3107; MOVREL-NEXT:     kernel_code_entry_byte_offset = 256
3108; MOVREL-NEXT:     kernel_code_prefetch_byte_size = 0
3109; MOVREL-NEXT:     granulated_workitem_vgpr_count = 0
3110; MOVREL-NEXT:     granulated_wavefront_sgpr_count = 1
3111; MOVREL-NEXT:     priority = 0
3112; MOVREL-NEXT:     float_mode = 240
3113; MOVREL-NEXT:     priv = 0
3114; MOVREL-NEXT:     enable_dx10_clamp = 1
3115; MOVREL-NEXT:     debug_mode = 0
3116; MOVREL-NEXT:     enable_ieee_mode = 1
3117; MOVREL-NEXT:     enable_wgp_mode = 0
3118; MOVREL-NEXT:     enable_mem_ordered = 0
3119; MOVREL-NEXT:     enable_fwd_progress = 0
3120; MOVREL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
3121; MOVREL-NEXT:     user_sgpr_count = 12
3122; MOVREL-NEXT:     enable_trap_handler = 0
3123; MOVREL-NEXT:     enable_sgpr_workgroup_id_x = 1
3124; MOVREL-NEXT:     enable_sgpr_workgroup_id_y = 1
3125; MOVREL-NEXT:     enable_sgpr_workgroup_id_z = 1
3126; MOVREL-NEXT:     enable_sgpr_workgroup_info = 0
3127; MOVREL-NEXT:     enable_vgpr_workitem_id = 2
3128; MOVREL-NEXT:     enable_exception_msb = 0
3129; MOVREL-NEXT:     granulated_lds_size = 0
3130; MOVREL-NEXT:     enable_exception = 0
3131; MOVREL-NEXT:     enable_sgpr_private_segment_buffer = 1
3132; MOVREL-NEXT:     enable_sgpr_dispatch_ptr = 1
3133; MOVREL-NEXT:     enable_sgpr_queue_ptr = 1
3134; MOVREL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
3135; MOVREL-NEXT:     enable_sgpr_dispatch_id = 1
3136; MOVREL-NEXT:     enable_sgpr_flat_scratch_init = 0
3137; MOVREL-NEXT:     enable_sgpr_private_segment_size = 0
3138; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
3139; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
3140; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
3141; MOVREL-NEXT:     enable_wavefront_size32 = 0
3142; MOVREL-NEXT:     enable_ordered_append_gds = 0
3143; MOVREL-NEXT:     private_element_size = 1
3144; MOVREL-NEXT:     is_ptr64 = 1
3145; MOVREL-NEXT:     is_dynamic_callstack = 0
3146; MOVREL-NEXT:     is_debug_enabled = 0
3147; MOVREL-NEXT:     is_xnack_enabled = 0
3148; MOVREL-NEXT:     workitem_private_segment_byte_size = 0
3149; MOVREL-NEXT:     workgroup_group_segment_byte_size = 0
3150; MOVREL-NEXT:     gds_segment_byte_size = 0
3151; MOVREL-NEXT:     kernarg_segment_byte_size = 28
3152; MOVREL-NEXT:     workgroup_fbarrier_count = 0
3153; MOVREL-NEXT:     wavefront_sgpr_count = 10
3154; MOVREL-NEXT:     workitem_vgpr_count = 4
3155; MOVREL-NEXT:     reserved_vgpr_first = 0
3156; MOVREL-NEXT:     reserved_vgpr_count = 0
3157; MOVREL-NEXT:     reserved_sgpr_first = 0
3158; MOVREL-NEXT:     reserved_sgpr_count = 0
3159; MOVREL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
3160; MOVREL-NEXT:     debug_private_segment_buffer_sgpr = 0
3161; MOVREL-NEXT:     kernarg_segment_alignment = 4
3162; MOVREL-NEXT:     group_segment_alignment = 4
3163; MOVREL-NEXT:     private_segment_alignment = 4
3164; MOVREL-NEXT:     wavefront_size = 6
3165; MOVREL-NEXT:     call_convention = -1
3166; MOVREL-NEXT:     runtime_loader_kernel_symbol = 0
3167; MOVREL-NEXT:    .end_amd_kernel_code_t
3168; MOVREL-NEXT:  ; %bb.0: ; %entry
3169; MOVREL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
3170; MOVREL-NEXT:    s_load_dword s8, s[8:9], 0x8
3171; MOVREL-NEXT:    s_mov_b32 s4, 0
3172; MOVREL-NEXT:    s_mov_b32 s5, 0x40080000
3173; MOVREL-NEXT:    s_mov_b32 s2, 0
3174; MOVREL-NEXT:    s_mov_b32 s3, 0x40140000
3175; MOVREL-NEXT:    s_waitcnt lgkmcnt(0)
3176; MOVREL-NEXT:    s_cmp_eq_u32 s8, 1
3177; MOVREL-NEXT:    s_cselect_b64 s[6:7], 2.0, 1.0
3178; MOVREL-NEXT:    s_cmp_eq_u32 s8, 2
3179; MOVREL-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
3180; MOVREL-NEXT:    s_cmp_eq_u32 s8, 3
3181; MOVREL-NEXT:    s_cselect_b64 s[4:5], 4.0, s[4:5]
3182; MOVREL-NEXT:    s_cmp_eq_u32 s8, 4
3183; MOVREL-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
3184; MOVREL-NEXT:    v_mov_b32_e32 v0, s2
3185; MOVREL-NEXT:    v_mov_b32_e32 v3, s1
3186; MOVREL-NEXT:    v_mov_b32_e32 v1, s3
3187; MOVREL-NEXT:    v_mov_b32_e32 v2, s0
3188; MOVREL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
3189; MOVREL-NEXT:    s_endpgm
3190;
3191; GFX10-LABEL: dyn_extract_v5f64_s_s:
3192; GFX10:         .amd_kernel_code_t
3193; GFX10-NEXT:     amd_code_version_major = 1
3194; GFX10-NEXT:     amd_code_version_minor = 2
3195; GFX10-NEXT:     amd_machine_kind = 1
3196; GFX10-NEXT:     amd_machine_version_major = 10
3197; GFX10-NEXT:     amd_machine_version_minor = 1
3198; GFX10-NEXT:     amd_machine_version_stepping = 0
3199; GFX10-NEXT:     kernel_code_entry_byte_offset = 256
3200; GFX10-NEXT:     kernel_code_prefetch_byte_size = 0
3201; GFX10-NEXT:     granulated_workitem_vgpr_count = 0
3202; GFX10-NEXT:     granulated_wavefront_sgpr_count = 1
3203; GFX10-NEXT:     priority = 0
3204; GFX10-NEXT:     float_mode = 240
3205; GFX10-NEXT:     priv = 0
3206; GFX10-NEXT:     enable_dx10_clamp = 1
3207; GFX10-NEXT:     debug_mode = 0
3208; GFX10-NEXT:     enable_ieee_mode = 1
3209; GFX10-NEXT:     enable_wgp_mode = 1
3210; GFX10-NEXT:     enable_mem_ordered = 1
3211; GFX10-NEXT:     enable_fwd_progress = 0
3212; GFX10-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
3213; GFX10-NEXT:     user_sgpr_count = 12
3214; GFX10-NEXT:     enable_trap_handler = 0
3215; GFX10-NEXT:     enable_sgpr_workgroup_id_x = 1
3216; GFX10-NEXT:     enable_sgpr_workgroup_id_y = 1
3217; GFX10-NEXT:     enable_sgpr_workgroup_id_z = 1
3218; GFX10-NEXT:     enable_sgpr_workgroup_info = 0
3219; GFX10-NEXT:     enable_vgpr_workitem_id = 2
3220; GFX10-NEXT:     enable_exception_msb = 0
3221; GFX10-NEXT:     granulated_lds_size = 0
3222; GFX10-NEXT:     enable_exception = 0
3223; GFX10-NEXT:     enable_sgpr_private_segment_buffer = 1
3224; GFX10-NEXT:     enable_sgpr_dispatch_ptr = 1
3225; GFX10-NEXT:     enable_sgpr_queue_ptr = 1
3226; GFX10-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
3227; GFX10-NEXT:     enable_sgpr_dispatch_id = 1
3228; GFX10-NEXT:     enable_sgpr_flat_scratch_init = 0
3229; GFX10-NEXT:     enable_sgpr_private_segment_size = 0
3230; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
3231; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
3232; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
3233; GFX10-NEXT:     enable_wavefront_size32 = 1
3234; GFX10-NEXT:     enable_ordered_append_gds = 0
3235; GFX10-NEXT:     private_element_size = 1
3236; GFX10-NEXT:     is_ptr64 = 1
3237; GFX10-NEXT:     is_dynamic_callstack = 0
3238; GFX10-NEXT:     is_debug_enabled = 0
3239; GFX10-NEXT:     is_xnack_enabled = 1
3240; GFX10-NEXT:     workitem_private_segment_byte_size = 0
3241; GFX10-NEXT:     workgroup_group_segment_byte_size = 0
3242; GFX10-NEXT:     gds_segment_byte_size = 0
3243; GFX10-NEXT:     kernarg_segment_byte_size = 28
3244; GFX10-NEXT:     workgroup_fbarrier_count = 0
3245; GFX10-NEXT:     wavefront_sgpr_count = 10
3246; GFX10-NEXT:     workitem_vgpr_count = 3
3247; GFX10-NEXT:     reserved_vgpr_first = 0
3248; GFX10-NEXT:     reserved_vgpr_count = 0
3249; GFX10-NEXT:     reserved_sgpr_first = 0
3250; GFX10-NEXT:     reserved_sgpr_count = 0
3251; GFX10-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
3252; GFX10-NEXT:     debug_private_segment_buffer_sgpr = 0
3253; GFX10-NEXT:     kernarg_segment_alignment = 4
3254; GFX10-NEXT:     group_segment_alignment = 4
3255; GFX10-NEXT:     private_segment_alignment = 4
3256; GFX10-NEXT:     wavefront_size = 5
3257; GFX10-NEXT:     call_convention = -1
3258; GFX10-NEXT:     runtime_loader_kernel_symbol = 0
3259; GFX10-NEXT:    .end_amd_kernel_code_t
3260; GFX10-NEXT:  ; %bb.0: ; %entry
3261; GFX10-NEXT:    s_clause 0x1
3262; GFX10-NEXT:    s_load_dword s6, s[8:9], 0x8
3263; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
3264; GFX10-NEXT:    s_mov_b32 s2, 0
3265; GFX10-NEXT:    s_mov_b32 s3, 0x40080000
3266; GFX10-NEXT:    v_mov_b32_e32 v2, 0
3267; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
3268; GFX10-NEXT:    s_cmp_eq_u32 s6, 1
3269; GFX10-NEXT:    s_cselect_b64 s[4:5], 2.0, 1.0
3270; GFX10-NEXT:    s_cmp_eq_u32 s6, 2
3271; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
3272; GFX10-NEXT:    s_cmp_eq_u32 s6, 3
3273; GFX10-NEXT:    s_mov_b32 s4, 0
3274; GFX10-NEXT:    s_mov_b32 s5, 0x40140000
3275; GFX10-NEXT:    s_cselect_b64 s[2:3], 4.0, s[2:3]
3276; GFX10-NEXT:    s_cmp_eq_u32 s6, 4
3277; GFX10-NEXT:    s_cselect_b64 s[2:3], s[4:5], s[2:3]
3278; GFX10-NEXT:    v_mov_b32_e32 v0, s2
3279; GFX10-NEXT:    v_mov_b32_e32 v1, s3
3280; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
3281; GFX10-NEXT:    s_endpgm
3282;
3283; GFX11-LABEL: dyn_extract_v5f64_s_s:
3284; GFX11:         .amd_kernel_code_t
3285; GFX11-NEXT:     amd_code_version_major = 1
3286; GFX11-NEXT:     amd_code_version_minor = 2
3287; GFX11-NEXT:     amd_machine_kind = 1
3288; GFX11-NEXT:     amd_machine_version_major = 11
3289; GFX11-NEXT:     amd_machine_version_minor = 0
3290; GFX11-NEXT:     amd_machine_version_stepping = 0
3291; GFX11-NEXT:     kernel_code_entry_byte_offset = 256
3292; GFX11-NEXT:     kernel_code_prefetch_byte_size = 0
3293; GFX11-NEXT:     granulated_workitem_vgpr_count = 0
3294; GFX11-NEXT:     granulated_wavefront_sgpr_count = 0
3295; GFX11-NEXT:     priority = 0
3296; GFX11-NEXT:     float_mode = 240
3297; GFX11-NEXT:     priv = 0
3298; GFX11-NEXT:     enable_dx10_clamp = 1
3299; GFX11-NEXT:     debug_mode = 0
3300; GFX11-NEXT:     enable_ieee_mode = 1
3301; GFX11-NEXT:     enable_wgp_mode = 1
3302; GFX11-NEXT:     enable_mem_ordered = 1
3303; GFX11-NEXT:     enable_fwd_progress = 0
3304; GFX11-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
3305; GFX11-NEXT:     user_sgpr_count = 13
3306; GFX11-NEXT:     enable_trap_handler = 0
3307; GFX11-NEXT:     enable_sgpr_workgroup_id_x = 1
3308; GFX11-NEXT:     enable_sgpr_workgroup_id_y = 1
3309; GFX11-NEXT:     enable_sgpr_workgroup_id_z = 1
3310; GFX11-NEXT:     enable_sgpr_workgroup_info = 0
3311; GFX11-NEXT:     enable_vgpr_workitem_id = 2
3312; GFX11-NEXT:     enable_exception_msb = 0
3313; GFX11-NEXT:     granulated_lds_size = 0
3314; GFX11-NEXT:     enable_exception = 0
3315; GFX11-NEXT:     enable_sgpr_private_segment_buffer = 0
3316; GFX11-NEXT:     enable_sgpr_dispatch_ptr = 1
3317; GFX11-NEXT:     enable_sgpr_queue_ptr = 1
3318; GFX11-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
3319; GFX11-NEXT:     enable_sgpr_dispatch_id = 1
3320; GFX11-NEXT:     enable_sgpr_flat_scratch_init = 0
3321; GFX11-NEXT:     enable_sgpr_private_segment_size = 0
3322; GFX11-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
3323; GFX11-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
3324; GFX11-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
3325; GFX11-NEXT:     enable_wavefront_size32 = 1
3326; GFX11-NEXT:     enable_ordered_append_gds = 0
3327; GFX11-NEXT:     private_element_size = 1
3328; GFX11-NEXT:     is_ptr64 = 1
3329; GFX11-NEXT:     is_dynamic_callstack = 0
3330; GFX11-NEXT:     is_debug_enabled = 0
3331; GFX11-NEXT:     is_xnack_enabled = 0
3332; GFX11-NEXT:     workitem_private_segment_byte_size = 0
3333; GFX11-NEXT:     workgroup_group_segment_byte_size = 0
3334; GFX11-NEXT:     gds_segment_byte_size = 0
3335; GFX11-NEXT:     kernarg_segment_byte_size = 28
3336; GFX11-NEXT:     workgroup_fbarrier_count = 0
3337; GFX11-NEXT:     wavefront_sgpr_count = 7
3338; GFX11-NEXT:     workitem_vgpr_count = 3
3339; GFX11-NEXT:     reserved_vgpr_first = 0
3340; GFX11-NEXT:     reserved_vgpr_count = 0
3341; GFX11-NEXT:     reserved_sgpr_first = 0
3342; GFX11-NEXT:     reserved_sgpr_count = 0
3343; GFX11-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
3344; GFX11-NEXT:     debug_private_segment_buffer_sgpr = 0
3345; GFX11-NEXT:     kernarg_segment_alignment = 4
3346; GFX11-NEXT:     group_segment_alignment = 4
3347; GFX11-NEXT:     private_segment_alignment = 4
3348; GFX11-NEXT:     wavefront_size = 5
3349; GFX11-NEXT:     call_convention = -1
3350; GFX11-NEXT:     runtime_loader_kernel_symbol = 0
3351; GFX11-NEXT:    .end_amd_kernel_code_t
3352; GFX11-NEXT:  ; %bb.0: ; %entry
3353; GFX11-NEXT:    s_clause 0x1
3354; GFX11-NEXT:    s_load_b32 s6, s[4:5], 0x8
3355; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3356; GFX11-NEXT:    s_mov_b32 s2, 0
3357; GFX11-NEXT:    s_mov_b32 s3, 0x40080000
3358; GFX11-NEXT:    v_mov_b32_e32 v2, 0
3359; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
3360; GFX11-NEXT:    s_cmp_eq_u32 s6, 1
3361; GFX11-NEXT:    s_cselect_b64 s[4:5], 2.0, 1.0
3362; GFX11-NEXT:    s_cmp_eq_u32 s6, 2
3363; GFX11-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
3364; GFX11-NEXT:    s_cmp_eq_u32 s6, 3
3365; GFX11-NEXT:    s_mov_b32 s4, 0
3366; GFX11-NEXT:    s_mov_b32 s5, 0x40140000
3367; GFX11-NEXT:    s_cselect_b64 s[2:3], 4.0, s[2:3]
3368; GFX11-NEXT:    s_cmp_eq_u32 s6, 4
3369; GFX11-NEXT:    s_cselect_b64 s[2:3], s[4:5], s[2:3]
3370; GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
3371; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
3372; GFX11-NEXT:    s_endpgm
3373entry:
3374  %ext = extractelement <5 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0>, i32 %sel
3375  store double %ext, ptr addrspace(1) %out
3376  ret void
3377}
3378
3379define float @dyn_extract_v15f32_const_s_v(i32 %sel) {
3380; GCN-LABEL: dyn_extract_v15f32_const_s_v:
3381; GCN:       ; %bb.0: ; %entry
3382; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3383; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
3384; GCN-NEXT:    v_mov_b32_e32 v1, 0x40400000
3385; GCN-NEXT:    v_cndmask_b32_e64 v13, 1.0, 2.0, vcc
3386; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
3387; GCN-NEXT:    v_cndmask_b32_e32 v1, v13, v1, vcc
3388; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
3389; GCN-NEXT:    v_mov_b32_e32 v2, 0x40a00000
3390; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc
3391; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
3392; GCN-NEXT:    v_mov_b32_e32 v3, 0x40c00000
3393; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3394; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
3395; GCN-NEXT:    v_mov_b32_e32 v4, 0x40e00000
3396; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
3397; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
3398; GCN-NEXT:    v_mov_b32_e32 v5, 0x41000000
3399; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
3400; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
3401; GCN-NEXT:    v_mov_b32_e32 v6, 0x41100000
3402; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
3403; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v0
3404; GCN-NEXT:    v_mov_b32_e32 v7, 0x41200000
3405; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
3406; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v0
3407; GCN-NEXT:    v_mov_b32_e32 v8, 0x41300000
3408; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
3409; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v0
3410; GCN-NEXT:    v_mov_b32_e32 v9, 0x41400000
3411; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
3412; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v0
3413; GCN-NEXT:    v_mov_b32_e32 v10, 0x41500000
3414; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
3415; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v0
3416; GCN-NEXT:    v_mov_b32_e32 v11, 0x41600000
3417; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v10, vcc
3418; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v0
3419; GCN-NEXT:    v_mov_b32_e32 v12, 0x41700000
3420; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
3421; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v0
3422; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v12, vcc
3423; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 15, v0
3424; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3425; GCN-NEXT:    s_setpc_b64 s[30:31]
3426;
3427; GFX10-LABEL: dyn_extract_v15f32_const_s_v:
3428; GFX10:       ; %bb.0: ; %entry
3429; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3430; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
3431; GFX10-NEXT:    v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
3432; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
3433; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
3434; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
3435; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
3436; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
3437; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
3438; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
3439; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
3440; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
3441; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
3442; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
3443; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo
3444; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v0
3445; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo
3446; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v0
3447; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo
3448; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v0
3449; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo
3450; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v0
3451; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo
3452; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v0
3453; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo
3454; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v0
3455; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo
3456; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v0
3457; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41700000, vcc_lo
3458; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 15, v0
3459; GFX10-NEXT:    v_cndmask_b32_e64 v0, v1, s4, vcc_lo
3460; GFX10-NEXT:    s_setpc_b64 s[30:31]
3461;
3462; GFX11-LABEL: dyn_extract_v15f32_const_s_v:
3463; GFX11:       ; %bb.0: ; %entry
3464; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3465; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
3466; GFX11-NEXT:    v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo
3467; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
3468; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo
3469; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
3470; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo
3471; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
3472; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo
3473; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
3474; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo
3475; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
3476; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo
3477; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
3478; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo
3479; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v0
3480; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo
3481; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v0
3482; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo
3483; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v0
3484; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo
3485; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v0
3486; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo
3487; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v0
3488; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo
3489; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v0
3490; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo
3491; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v0
3492; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, 0x41700000, vcc_lo
3493; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 15, v0
3494; GFX11-NEXT:    v_cndmask_b32_e64 v0, v1, s0, vcc_lo
3495; GFX11-NEXT:    s_setpc_b64 s[30:31]
3496entry:
3497  %ext = extractelement <15 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>, i32 %sel
3498  ret float %ext
3499}
3500
3501define amdgpu_ps float @dyn_extract_v15f32_const_s_s(i32 inreg %sel) {
3502; GCN-LABEL: dyn_extract_v15f32_const_s_s:
3503; GCN:       ; %bb.0: ; %entry
3504; GCN-NEXT:    s_mov_b32 s4, 1.0
3505; GCN-NEXT:    s_mov_b32 m0, s2
3506; GCN-NEXT:    s_mov_b32 s18, 0x41700000
3507; GCN-NEXT:    s_mov_b32 s17, 0x41600000
3508; GCN-NEXT:    s_mov_b32 s16, 0x41500000
3509; GCN-NEXT:    s_mov_b32 s15, 0x41400000
3510; GCN-NEXT:    s_mov_b32 s14, 0x41300000
3511; GCN-NEXT:    s_mov_b32 s13, 0x41200000
3512; GCN-NEXT:    s_mov_b32 s12, 0x41100000
3513; GCN-NEXT:    s_mov_b32 s11, 0x41000000
3514; GCN-NEXT:    s_mov_b32 s10, 0x40e00000
3515; GCN-NEXT:    s_mov_b32 s9, 0x40c00000
3516; GCN-NEXT:    s_mov_b32 s8, 0x40a00000
3517; GCN-NEXT:    s_mov_b32 s7, 4.0
3518; GCN-NEXT:    s_mov_b32 s6, 0x40400000
3519; GCN-NEXT:    s_mov_b32 s5, 2.0
3520; GCN-NEXT:    s_movrels_b32 s0, s4
3521; GCN-NEXT:    v_mov_b32_e32 v0, s0
3522; GCN-NEXT:    ; return to shader part epilog
3523;
3524; GFX10PLUS-LABEL: dyn_extract_v15f32_const_s_s:
3525; GFX10PLUS:       ; %bb.0: ; %entry
3526; GFX10PLUS-NEXT:    s_mov_b32 s4, 1.0
3527; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
3528; GFX10PLUS-NEXT:    s_mov_b32 s18, 0x41700000
3529; GFX10PLUS-NEXT:    s_mov_b32 s17, 0x41600000
3530; GFX10PLUS-NEXT:    s_mov_b32 s16, 0x41500000
3531; GFX10PLUS-NEXT:    s_mov_b32 s15, 0x41400000
3532; GFX10PLUS-NEXT:    s_mov_b32 s14, 0x41300000
3533; GFX10PLUS-NEXT:    s_mov_b32 s13, 0x41200000
3534; GFX10PLUS-NEXT:    s_mov_b32 s12, 0x41100000
3535; GFX10PLUS-NEXT:    s_mov_b32 s11, 0x41000000
3536; GFX10PLUS-NEXT:    s_mov_b32 s10, 0x40e00000
3537; GFX10PLUS-NEXT:    s_mov_b32 s9, 0x40c00000
3538; GFX10PLUS-NEXT:    s_mov_b32 s8, 0x40a00000
3539; GFX10PLUS-NEXT:    s_mov_b32 s7, 4.0
3540; GFX10PLUS-NEXT:    s_mov_b32 s6, 0x40400000
3541; GFX10PLUS-NEXT:    s_mov_b32 s5, 2.0
3542; GFX10PLUS-NEXT:    s_movrels_b32 s0, s4
3543; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, s0
3544; GFX10PLUS-NEXT:    ; return to shader part epilog
3545entry:
3546  %ext = extractelement <15 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>, i32 %sel
3547  ret float %ext
3548}
3549
3550define amdgpu_ps float @dyn_extract_v15f32_s_v(<15 x float> inreg %vec, i32 %sel) {
3551; GCN-LABEL: dyn_extract_v15f32_s_v:
3552; GCN:       ; %bb.0: ; %entry
3553; GCN-NEXT:    v_mov_b32_e32 v1, s2
3554; GCN-NEXT:    v_mov_b32_e32 v2, s3
3555; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
3556; GCN-NEXT:    v_mov_b32_e32 v3, s4
3557; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
3558; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
3559; GCN-NEXT:    v_mov_b32_e32 v4, s5
3560; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
3561; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
3562; GCN-NEXT:    v_mov_b32_e32 v5, s6
3563; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
3564; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
3565; GCN-NEXT:    v_mov_b32_e32 v6, s7
3566; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
3567; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
3568; GCN-NEXT:    v_mov_b32_e32 v7, s8
3569; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
3570; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
3571; GCN-NEXT:    v_mov_b32_e32 v8, s9
3572; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
3573; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
3574; GCN-NEXT:    v_mov_b32_e32 v9, s10
3575; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
3576; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v0
3577; GCN-NEXT:    v_mov_b32_e32 v10, s11
3578; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
3579; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v0
3580; GCN-NEXT:    v_mov_b32_e32 v11, s12
3581; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v10, vcc
3582; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v0
3583; GCN-NEXT:    v_mov_b32_e32 v12, s13
3584; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
3585; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v0
3586; GCN-NEXT:    v_mov_b32_e32 v13, s14
3587; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v12, vcc
3588; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v0
3589; GCN-NEXT:    v_mov_b32_e32 v14, s15
3590; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v13, vcc
3591; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v0
3592; GCN-NEXT:    v_mov_b32_e32 v15, s16
3593; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v14, vcc
3594; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v0
3595; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
3596; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 15, v0
3597; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
3598; GCN-NEXT:    ; return to shader part epilog
3599;
3600; GFX10PLUS-LABEL: dyn_extract_v15f32_s_v:
3601; GFX10PLUS:       ; %bb.0: ; %entry
3602; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, s3
3603; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
3604; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, s2, v1, vcc_lo
3605; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
3606; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s4, vcc_lo
3607; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
3608; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s5, vcc_lo
3609; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
3610; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s6, vcc_lo
3611; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
3612; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s7, vcc_lo
3613; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
3614; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s8, vcc_lo
3615; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
3616; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s9, vcc_lo
3617; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v0
3618; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s10, vcc_lo
3619; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v0
3620; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s11, vcc_lo
3621; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v0
3622; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s12, vcc_lo
3623; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v0
3624; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s13, vcc_lo
3625; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v0
3626; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s14, vcc_lo
3627; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v0
3628; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s15, vcc_lo
3629; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v0
3630; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s16, vcc_lo
3631; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 15, v0
3632; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v0, v1, s0, vcc_lo
3633; GFX10PLUS-NEXT:    ; return to shader part epilog
3634entry:
3635  %ext = extractelement <15 x float> %vec, i32 %sel
3636  ret float %ext
3637}
3638
3639define float @dyn_extract_v15f32_v_v(<15 x float> %vec, i32 %sel) {
3640; GCN-LABEL: dyn_extract_v15f32_v_v:
3641; GCN:       ; %bb.0: ; %entry
3642; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3643; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v15
3644; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3645; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v15
3646; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3647; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v15
3648; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
3649; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v15
3650; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
3651; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v15
3652; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
3653; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v15
3654; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
3655; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v15
3656; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
3657; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v15
3658; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
3659; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v15
3660; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
3661; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v15
3662; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
3663; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v15
3664; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
3665; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v15
3666; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
3667; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v15
3668; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
3669; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v15
3670; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
3671; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 15, v15
3672; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v0, vcc
3673; GCN-NEXT:    s_setpc_b64 s[30:31]
3674;
3675; GFX10-LABEL: dyn_extract_v15f32_v_v:
3676; GFX10:       ; %bb.0: ; %entry
3677; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3678; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v15
3679; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3680; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v15
3681; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
3682; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v15
3683; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
3684; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v15
3685; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
3686; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v15
3687; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
3688; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v15
3689; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
3690; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v15
3691; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
3692; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v15
3693; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
3694; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v15
3695; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc_lo
3696; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v15
3697; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
3698; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v15
3699; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc_lo
3700; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v15
3701; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
3702; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v15
3703; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc_lo
3704; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v15
3705; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
3706; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 15, v15
3707; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, s4, vcc_lo
3708; GFX10-NEXT:    s_setpc_b64 s[30:31]
3709;
3710; GFX11-LABEL: dyn_extract_v15f32_v_v:
3711; GFX11:       ; %bb.0: ; %entry
3712; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3713; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v15
3714; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3715; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v15
3716; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
3717; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v15
3718; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
3719; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v15
3720; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
3721; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v15
3722; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
3723; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v15
3724; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
3725; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v15
3726; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
3727; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v15
3728; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
3729; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v15
3730; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc_lo
3731; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v15
3732; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
3733; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v15
3734; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc_lo
3735; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v15
3736; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
3737; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v15
3738; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc_lo
3739; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v15
3740; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
3741; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 15, v15
3742; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, s0, vcc_lo
3743; GFX11-NEXT:    s_setpc_b64 s[30:31]
3744entry:
3745  %ext = extractelement <15 x float> %vec, i32 %sel
3746  ret float %ext
3747}
3748
3749define amdgpu_ps float @dyn_extract_v15f32_v_s(<15 x float> %vec, i32 inreg %sel) {
3750; GPRIDX-LABEL: dyn_extract_v15f32_v_s:
3751; GPRIDX:       ; %bb.0: ; %entry
3752; GPRIDX-NEXT:    s_set_gpr_idx_on s2, gpr_idx(SRC0)
3753; GPRIDX-NEXT:    v_mov_b32_e32 v0, v0
3754; GPRIDX-NEXT:    s_set_gpr_idx_off
3755; GPRIDX-NEXT:    ; return to shader part epilog
3756;
3757; MOVREL-LABEL: dyn_extract_v15f32_v_s:
3758; MOVREL:       ; %bb.0: ; %entry
3759; MOVREL-NEXT:    s_mov_b32 m0, s2
3760; MOVREL-NEXT:    v_movrels_b32_e32 v0, v0
3761; MOVREL-NEXT:    ; return to shader part epilog
3762;
3763; GFX10PLUS-LABEL: dyn_extract_v15f32_v_s:
3764; GFX10PLUS:       ; %bb.0: ; %entry
3765; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
3766; GFX10PLUS-NEXT:    v_movrels_b32_e32 v0, v0
3767; GFX10PLUS-NEXT:    ; return to shader part epilog
3768entry:
3769  %ext = extractelement <15 x float> %vec, i32 %sel
3770  ret float %ext
3771}
3772
3773define amdgpu_ps float @dyn_extract_v15f32_s_s(<15 x float> inreg %vec, i32 inreg %sel) {
3774; GCN-LABEL: dyn_extract_v15f32_s_s:
3775; GCN:       ; %bb.0: ; %entry
3776; GCN-NEXT:    s_mov_b32 s0, s2
3777; GCN-NEXT:    s_mov_b32 m0, s17
3778; GCN-NEXT:    s_mov_b32 s1, s3
3779; GCN-NEXT:    s_mov_b32 s2, s4
3780; GCN-NEXT:    s_mov_b32 s3, s5
3781; GCN-NEXT:    s_mov_b32 s4, s6
3782; GCN-NEXT:    s_mov_b32 s5, s7
3783; GCN-NEXT:    s_mov_b32 s6, s8
3784; GCN-NEXT:    s_mov_b32 s7, s9
3785; GCN-NEXT:    s_mov_b32 s8, s10
3786; GCN-NEXT:    s_mov_b32 s9, s11
3787; GCN-NEXT:    s_mov_b32 s10, s12
3788; GCN-NEXT:    s_mov_b32 s11, s13
3789; GCN-NEXT:    s_mov_b32 s12, s14
3790; GCN-NEXT:    s_mov_b32 s13, s15
3791; GCN-NEXT:    s_mov_b32 s14, s16
3792; GCN-NEXT:    s_movrels_b32 s0, s0
3793; GCN-NEXT:    v_mov_b32_e32 v0, s0
3794; GCN-NEXT:    ; return to shader part epilog
3795;
3796; GFX10PLUS-LABEL: dyn_extract_v15f32_s_s:
3797; GFX10PLUS:       ; %bb.0: ; %entry
3798; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
3799; GFX10PLUS-NEXT:    s_mov_b32 m0, s17
3800; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
3801; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
3802; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
3803; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
3804; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
3805; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
3806; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
3807; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
3808; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
3809; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
3810; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
3811; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
3812; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
3813; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
3814; GFX10PLUS-NEXT:    s_movrels_b32 s0, s0
3815; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, s0
3816; GFX10PLUS-NEXT:    ; return to shader part epilog
3817entry:
3818  %ext = extractelement <15 x float> %vec, i32 %sel
3819  ret float %ext
3820}
3821
3822define amdgpu_ps float @dyn_extract_v15f32_s_s_offset3(<15 x float> inreg %vec, i32 inreg %sel) {
3823; GCN-LABEL: dyn_extract_v15f32_s_s_offset3:
3824; GCN:       ; %bb.0: ; %entry
3825; GCN-NEXT:    s_mov_b32 s0, s2
3826; GCN-NEXT:    s_mov_b32 s1, s3
3827; GCN-NEXT:    s_mov_b32 s3, s5
3828; GCN-NEXT:    s_mov_b32 m0, s17
3829; GCN-NEXT:    s_mov_b32 s2, s4
3830; GCN-NEXT:    s_mov_b32 s4, s6
3831; GCN-NEXT:    s_mov_b32 s5, s7
3832; GCN-NEXT:    s_mov_b32 s6, s8
3833; GCN-NEXT:    s_mov_b32 s7, s9
3834; GCN-NEXT:    s_mov_b32 s8, s10
3835; GCN-NEXT:    s_mov_b32 s9, s11
3836; GCN-NEXT:    s_mov_b32 s10, s12
3837; GCN-NEXT:    s_mov_b32 s11, s13
3838; GCN-NEXT:    s_mov_b32 s12, s14
3839; GCN-NEXT:    s_mov_b32 s13, s15
3840; GCN-NEXT:    s_mov_b32 s14, s16
3841; GCN-NEXT:    s_movrels_b32 s0, s3
3842; GCN-NEXT:    v_mov_b32_e32 v0, s0
3843; GCN-NEXT:    ; return to shader part epilog
3844;
3845; GFX10PLUS-LABEL: dyn_extract_v15f32_s_s_offset3:
3846; GFX10PLUS:       ; %bb.0: ; %entry
3847; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
3848; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
3849; GFX10PLUS-NEXT:    s_mov_b32 m0, s17
3850; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
3851; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
3852; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
3853; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
3854; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
3855; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
3856; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
3857; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
3858; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
3859; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
3860; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
3861; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
3862; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
3863; GFX10PLUS-NEXT:    s_movrels_b32 s0, s3
3864; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, s0
3865; GFX10PLUS-NEXT:    ; return to shader part epilog
3866entry:
3867  %add = add i32 %sel, 3
3868  %ext = extractelement <15 x float> %vec, i32 %add
3869  ret float %ext
3870}
3871
3872define float @dyn_extract_v15f32_v_v_offset3(<15 x float> %vec, i32 %sel) {
3873; GPRIDX-LABEL: dyn_extract_v15f32_v_v_offset3:
3874; GPRIDX:       ; %bb.0: ; %entry
3875; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3876; GPRIDX-NEXT:    v_add_u32_e32 v15, 3, v15
3877; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v15
3878; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3879; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v15
3880; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3881; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v15
3882; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
3883; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v15
3884; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
3885; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v15
3886; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
3887; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v15
3888; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
3889; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v15
3890; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
3891; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v15
3892; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
3893; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v15
3894; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
3895; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v15
3896; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
3897; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v15
3898; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
3899; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v15
3900; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
3901; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v15
3902; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
3903; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v15
3904; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
3905; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 15, v15
3906; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v0, vcc
3907; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
3908;
3909; MOVREL-LABEL: dyn_extract_v15f32_v_v_offset3:
3910; MOVREL:       ; %bb.0: ; %entry
3911; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3912; MOVREL-NEXT:    v_add_u32_e32 v15, vcc, 3, v15
3913; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v15
3914; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3915; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v15
3916; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3917; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v15
3918; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
3919; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v15
3920; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
3921; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v15
3922; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
3923; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v15
3924; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
3925; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v15
3926; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
3927; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v15
3928; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
3929; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v15
3930; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
3931; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v15
3932; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
3933; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v15
3934; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
3935; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 12, v15
3936; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
3937; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 13, v15
3938; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc
3939; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 14, v15
3940; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
3941; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, 15, v15
3942; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v0, vcc
3943; MOVREL-NEXT:    s_setpc_b64 s[30:31]
3944;
3945; GFX10-LABEL: dyn_extract_v15f32_v_v_offset3:
3946; GFX10:       ; %bb.0: ; %entry
3947; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3948; GFX10-NEXT:    v_add_nc_u32_e32 v15, 3, v15
3949; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v15
3950; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3951; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v15
3952; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
3953; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v15
3954; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
3955; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v15
3956; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
3957; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v15
3958; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
3959; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v15
3960; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
3961; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v15
3962; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
3963; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v15
3964; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
3965; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v15
3966; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc_lo
3967; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v15
3968; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
3969; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v15
3970; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc_lo
3971; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v15
3972; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
3973; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v15
3974; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc_lo
3975; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v15
3976; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
3977; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 15, v15
3978; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, s4, vcc_lo
3979; GFX10-NEXT:    s_setpc_b64 s[30:31]
3980;
3981; GFX11-LABEL: dyn_extract_v15f32_v_v_offset3:
3982; GFX11:       ; %bb.0: ; %entry
3983; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3984; GFX11-NEXT:    v_add_nc_u32_e32 v15, 3, v15
3985; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v15
3986; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
3987; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v15
3988; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
3989; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v15
3990; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
3991; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v15
3992; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
3993; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v15
3994; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
3995; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v15
3996; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc_lo
3997; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v15
3998; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
3999; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v15
4000; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
4001; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v15
4002; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc_lo
4003; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v15
4004; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
4005; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v15
4006; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc_lo
4007; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 12, v15
4008; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
4009; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 13, v15
4010; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v13, vcc_lo
4011; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 14, v15
4012; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
4013; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 15, v15
4014; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, s0, vcc_lo
4015; GFX11-NEXT:    s_setpc_b64 s[30:31]
4016entry:
4017  %add = add i32 %sel, 3
4018  %ext = extractelement <15 x float> %vec, i32 %add
4019  ret float %ext
4020}
4021
4022define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(ptr addrspace(1) %out, i32 %sel) {
4023; GPRIDX-LABEL: dyn_extract_v4f32_s_s_s:
4024; GPRIDX:         .amd_kernel_code_t
4025; GPRIDX-NEXT:     amd_code_version_major = 1
4026; GPRIDX-NEXT:     amd_code_version_minor = 2
4027; GPRIDX-NEXT:     amd_machine_kind = 1
4028; GPRIDX-NEXT:     amd_machine_version_major = 9
4029; GPRIDX-NEXT:     amd_machine_version_minor = 0
4030; GPRIDX-NEXT:     amd_machine_version_stepping = 0
4031; GPRIDX-NEXT:     kernel_code_entry_byte_offset = 256
4032; GPRIDX-NEXT:     kernel_code_prefetch_byte_size = 0
4033; GPRIDX-NEXT:     granulated_workitem_vgpr_count = 0
4034; GPRIDX-NEXT:     granulated_wavefront_sgpr_count = 1
4035; GPRIDX-NEXT:     priority = 0
4036; GPRIDX-NEXT:     float_mode = 240
4037; GPRIDX-NEXT:     priv = 0
4038; GPRIDX-NEXT:     enable_dx10_clamp = 1
4039; GPRIDX-NEXT:     debug_mode = 0
4040; GPRIDX-NEXT:     enable_ieee_mode = 1
4041; GPRIDX-NEXT:     enable_wgp_mode = 0
4042; GPRIDX-NEXT:     enable_mem_ordered = 0
4043; GPRIDX-NEXT:     enable_fwd_progress = 0
4044; GPRIDX-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
4045; GPRIDX-NEXT:     user_sgpr_count = 12
4046; GPRIDX-NEXT:     enable_trap_handler = 0
4047; GPRIDX-NEXT:     enable_sgpr_workgroup_id_x = 1
4048; GPRIDX-NEXT:     enable_sgpr_workgroup_id_y = 1
4049; GPRIDX-NEXT:     enable_sgpr_workgroup_id_z = 1
4050; GPRIDX-NEXT:     enable_sgpr_workgroup_info = 0
4051; GPRIDX-NEXT:     enable_vgpr_workitem_id = 2
4052; GPRIDX-NEXT:     enable_exception_msb = 0
4053; GPRIDX-NEXT:     granulated_lds_size = 0
4054; GPRIDX-NEXT:     enable_exception = 0
4055; GPRIDX-NEXT:     enable_sgpr_private_segment_buffer = 1
4056; GPRIDX-NEXT:     enable_sgpr_dispatch_ptr = 1
4057; GPRIDX-NEXT:     enable_sgpr_queue_ptr = 1
4058; GPRIDX-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
4059; GPRIDX-NEXT:     enable_sgpr_dispatch_id = 1
4060; GPRIDX-NEXT:     enable_sgpr_flat_scratch_init = 0
4061; GPRIDX-NEXT:     enable_sgpr_private_segment_size = 0
4062; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
4063; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
4064; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
4065; GPRIDX-NEXT:     enable_wavefront_size32 = 0
4066; GPRIDX-NEXT:     enable_ordered_append_gds = 0
4067; GPRIDX-NEXT:     private_element_size = 1
4068; GPRIDX-NEXT:     is_ptr64 = 1
4069; GPRIDX-NEXT:     is_dynamic_callstack = 0
4070; GPRIDX-NEXT:     is_debug_enabled = 0
4071; GPRIDX-NEXT:     is_xnack_enabled = 1
4072; GPRIDX-NEXT:     workitem_private_segment_byte_size = 0
4073; GPRIDX-NEXT:     workgroup_group_segment_byte_size = 0
4074; GPRIDX-NEXT:     gds_segment_byte_size = 0
4075; GPRIDX-NEXT:     kernarg_segment_byte_size = 28
4076; GPRIDX-NEXT:     workgroup_fbarrier_count = 0
4077; GPRIDX-NEXT:     wavefront_sgpr_count = 14
4078; GPRIDX-NEXT:     workitem_vgpr_count = 2
4079; GPRIDX-NEXT:     reserved_vgpr_first = 0
4080; GPRIDX-NEXT:     reserved_vgpr_count = 0
4081; GPRIDX-NEXT:     reserved_sgpr_first = 0
4082; GPRIDX-NEXT:     reserved_sgpr_count = 0
4083; GPRIDX-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
4084; GPRIDX-NEXT:     debug_private_segment_buffer_sgpr = 0
4085; GPRIDX-NEXT:     kernarg_segment_alignment = 4
4086; GPRIDX-NEXT:     group_segment_alignment = 4
4087; GPRIDX-NEXT:     private_segment_alignment = 4
4088; GPRIDX-NEXT:     wavefront_size = 6
4089; GPRIDX-NEXT:     call_convention = -1
4090; GPRIDX-NEXT:     runtime_loader_kernel_symbol = 0
4091; GPRIDX-NEXT:    .end_amd_kernel_code_t
4092; GPRIDX-NEXT:  ; %bb.0: ; %entry
4093; GPRIDX-NEXT:    s_load_dword s2, s[8:9], 0x8
4094; GPRIDX-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
4095; GPRIDX-NEXT:    v_mov_b32_e32 v1, 0
4096; GPRIDX-NEXT:    s_waitcnt lgkmcnt(0)
4097; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 1
4098; GPRIDX-NEXT:    s_cselect_b32 s3, 2.0, 1.0
4099; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 2
4100; GPRIDX-NEXT:    s_cselect_b32 s3, 0x40400000, s3
4101; GPRIDX-NEXT:    s_cmp_eq_u32 s2, 3
4102; GPRIDX-NEXT:    s_cselect_b32 s2, 4.0, s3
4103; GPRIDX-NEXT:    v_mov_b32_e32 v0, s2
4104; GPRIDX-NEXT:    global_store_dword v1, v0, s[0:1]
4105; GPRIDX-NEXT:    s_endpgm
4106;
4107; MOVREL-LABEL: dyn_extract_v4f32_s_s_s:
4108; MOVREL:         .amd_kernel_code_t
4109; MOVREL-NEXT:     amd_code_version_major = 1
4110; MOVREL-NEXT:     amd_code_version_minor = 2
4111; MOVREL-NEXT:     amd_machine_kind = 1
4112; MOVREL-NEXT:     amd_machine_version_major = 8
4113; MOVREL-NEXT:     amd_machine_version_minor = 0
4114; MOVREL-NEXT:     amd_machine_version_stepping = 3
4115; MOVREL-NEXT:     kernel_code_entry_byte_offset = 256
4116; MOVREL-NEXT:     kernel_code_prefetch_byte_size = 0
4117; MOVREL-NEXT:     granulated_workitem_vgpr_count = 0
4118; MOVREL-NEXT:     granulated_wavefront_sgpr_count = 1
4119; MOVREL-NEXT:     priority = 0
4120; MOVREL-NEXT:     float_mode = 240
4121; MOVREL-NEXT:     priv = 0
4122; MOVREL-NEXT:     enable_dx10_clamp = 1
4123; MOVREL-NEXT:     debug_mode = 0
4124; MOVREL-NEXT:     enable_ieee_mode = 1
4125; MOVREL-NEXT:     enable_wgp_mode = 0
4126; MOVREL-NEXT:     enable_mem_ordered = 0
4127; MOVREL-NEXT:     enable_fwd_progress = 0
4128; MOVREL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
4129; MOVREL-NEXT:     user_sgpr_count = 12
4130; MOVREL-NEXT:     enable_trap_handler = 0
4131; MOVREL-NEXT:     enable_sgpr_workgroup_id_x = 1
4132; MOVREL-NEXT:     enable_sgpr_workgroup_id_y = 1
4133; MOVREL-NEXT:     enable_sgpr_workgroup_id_z = 1
4134; MOVREL-NEXT:     enable_sgpr_workgroup_info = 0
4135; MOVREL-NEXT:     enable_vgpr_workitem_id = 2
4136; MOVREL-NEXT:     enable_exception_msb = 0
4137; MOVREL-NEXT:     granulated_lds_size = 0
4138; MOVREL-NEXT:     enable_exception = 0
4139; MOVREL-NEXT:     enable_sgpr_private_segment_buffer = 1
4140; MOVREL-NEXT:     enable_sgpr_dispatch_ptr = 1
4141; MOVREL-NEXT:     enable_sgpr_queue_ptr = 1
4142; MOVREL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
4143; MOVREL-NEXT:     enable_sgpr_dispatch_id = 1
4144; MOVREL-NEXT:     enable_sgpr_flat_scratch_init = 0
4145; MOVREL-NEXT:     enable_sgpr_private_segment_size = 0
4146; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
4147; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
4148; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
4149; MOVREL-NEXT:     enable_wavefront_size32 = 0
4150; MOVREL-NEXT:     enable_ordered_append_gds = 0
4151; MOVREL-NEXT:     private_element_size = 1
4152; MOVREL-NEXT:     is_ptr64 = 1
4153; MOVREL-NEXT:     is_dynamic_callstack = 0
4154; MOVREL-NEXT:     is_debug_enabled = 0
4155; MOVREL-NEXT:     is_xnack_enabled = 0
4156; MOVREL-NEXT:     workitem_private_segment_byte_size = 0
4157; MOVREL-NEXT:     workgroup_group_segment_byte_size = 0
4158; MOVREL-NEXT:     gds_segment_byte_size = 0
4159; MOVREL-NEXT:     kernarg_segment_byte_size = 28
4160; MOVREL-NEXT:     workgroup_fbarrier_count = 0
4161; MOVREL-NEXT:     wavefront_sgpr_count = 10
4162; MOVREL-NEXT:     workitem_vgpr_count = 3
4163; MOVREL-NEXT:     reserved_vgpr_first = 0
4164; MOVREL-NEXT:     reserved_vgpr_count = 0
4165; MOVREL-NEXT:     reserved_sgpr_first = 0
4166; MOVREL-NEXT:     reserved_sgpr_count = 0
4167; MOVREL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
4168; MOVREL-NEXT:     debug_private_segment_buffer_sgpr = 0
4169; MOVREL-NEXT:     kernarg_segment_alignment = 4
4170; MOVREL-NEXT:     group_segment_alignment = 4
4171; MOVREL-NEXT:     private_segment_alignment = 4
4172; MOVREL-NEXT:     wavefront_size = 6
4173; MOVREL-NEXT:     call_convention = -1
4174; MOVREL-NEXT:     runtime_loader_kernel_symbol = 0
4175; MOVREL-NEXT:    .end_amd_kernel_code_t
4176; MOVREL-NEXT:  ; %bb.0: ; %entry
4177; MOVREL-NEXT:    s_load_dword s2, s[8:9], 0x8
4178; MOVREL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
4179; MOVREL-NEXT:    s_waitcnt lgkmcnt(0)
4180; MOVREL-NEXT:    s_cmp_eq_u32 s2, 1
4181; MOVREL-NEXT:    s_cselect_b32 s3, 2.0, 1.0
4182; MOVREL-NEXT:    s_cmp_eq_u32 s2, 2
4183; MOVREL-NEXT:    s_cselect_b32 s3, 0x40400000, s3
4184; MOVREL-NEXT:    s_cmp_eq_u32 s2, 3
4185; MOVREL-NEXT:    s_cselect_b32 s2, 4.0, s3
4186; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
4187; MOVREL-NEXT:    v_mov_b32_e32 v2, s2
4188; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
4189; MOVREL-NEXT:    flat_store_dword v[0:1], v2
4190; MOVREL-NEXT:    s_endpgm
4191;
4192; GFX10-LABEL: dyn_extract_v4f32_s_s_s:
4193; GFX10:         .amd_kernel_code_t
4194; GFX10-NEXT:     amd_code_version_major = 1
4195; GFX10-NEXT:     amd_code_version_minor = 2
4196; GFX10-NEXT:     amd_machine_kind = 1
4197; GFX10-NEXT:     amd_machine_version_major = 10
4198; GFX10-NEXT:     amd_machine_version_minor = 1
4199; GFX10-NEXT:     amd_machine_version_stepping = 0
4200; GFX10-NEXT:     kernel_code_entry_byte_offset = 256
4201; GFX10-NEXT:     kernel_code_prefetch_byte_size = 0
4202; GFX10-NEXT:     granulated_workitem_vgpr_count = 0
4203; GFX10-NEXT:     granulated_wavefront_sgpr_count = 1
4204; GFX10-NEXT:     priority = 0
4205; GFX10-NEXT:     float_mode = 240
4206; GFX10-NEXT:     priv = 0
4207; GFX10-NEXT:     enable_dx10_clamp = 1
4208; GFX10-NEXT:     debug_mode = 0
4209; GFX10-NEXT:     enable_ieee_mode = 1
4210; GFX10-NEXT:     enable_wgp_mode = 1
4211; GFX10-NEXT:     enable_mem_ordered = 1
4212; GFX10-NEXT:     enable_fwd_progress = 0
4213; GFX10-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
4214; GFX10-NEXT:     user_sgpr_count = 12
4215; GFX10-NEXT:     enable_trap_handler = 0
4216; GFX10-NEXT:     enable_sgpr_workgroup_id_x = 1
4217; GFX10-NEXT:     enable_sgpr_workgroup_id_y = 1
4218; GFX10-NEXT:     enable_sgpr_workgroup_id_z = 1
4219; GFX10-NEXT:     enable_sgpr_workgroup_info = 0
4220; GFX10-NEXT:     enable_vgpr_workitem_id = 2
4221; GFX10-NEXT:     enable_exception_msb = 0
4222; GFX10-NEXT:     granulated_lds_size = 0
4223; GFX10-NEXT:     enable_exception = 0
4224; GFX10-NEXT:     enable_sgpr_private_segment_buffer = 1
4225; GFX10-NEXT:     enable_sgpr_dispatch_ptr = 1
4226; GFX10-NEXT:     enable_sgpr_queue_ptr = 1
4227; GFX10-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
4228; GFX10-NEXT:     enable_sgpr_dispatch_id = 1
4229; GFX10-NEXT:     enable_sgpr_flat_scratch_init = 0
4230; GFX10-NEXT:     enable_sgpr_private_segment_size = 0
4231; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
4232; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
4233; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
4234; GFX10-NEXT:     enable_wavefront_size32 = 1
4235; GFX10-NEXT:     enable_ordered_append_gds = 0
4236; GFX10-NEXT:     private_element_size = 1
4237; GFX10-NEXT:     is_ptr64 = 1
4238; GFX10-NEXT:     is_dynamic_callstack = 0
4239; GFX10-NEXT:     is_debug_enabled = 0
4240; GFX10-NEXT:     is_xnack_enabled = 1
4241; GFX10-NEXT:     workitem_private_segment_byte_size = 0
4242; GFX10-NEXT:     workgroup_group_segment_byte_size = 0
4243; GFX10-NEXT:     gds_segment_byte_size = 0
4244; GFX10-NEXT:     kernarg_segment_byte_size = 28
4245; GFX10-NEXT:     workgroup_fbarrier_count = 0
4246; GFX10-NEXT:     wavefront_sgpr_count = 10
4247; GFX10-NEXT:     workitem_vgpr_count = 2
4248; GFX10-NEXT:     reserved_vgpr_first = 0
4249; GFX10-NEXT:     reserved_vgpr_count = 0
4250; GFX10-NEXT:     reserved_sgpr_first = 0
4251; GFX10-NEXT:     reserved_sgpr_count = 0
4252; GFX10-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
4253; GFX10-NEXT:     debug_private_segment_buffer_sgpr = 0
4254; GFX10-NEXT:     kernarg_segment_alignment = 4
4255; GFX10-NEXT:     group_segment_alignment = 4
4256; GFX10-NEXT:     private_segment_alignment = 4
4257; GFX10-NEXT:     wavefront_size = 5
4258; GFX10-NEXT:     call_convention = -1
4259; GFX10-NEXT:     runtime_loader_kernel_symbol = 0
4260; GFX10-NEXT:    .end_amd_kernel_code_t
4261; GFX10-NEXT:  ; %bb.0: ; %entry
4262; GFX10-NEXT:    s_clause 0x1
4263; GFX10-NEXT:    s_load_dword s2, s[8:9], 0x8
4264; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
4265; GFX10-NEXT:    v_mov_b32_e32 v1, 0
4266; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
4267; GFX10-NEXT:    s_cmp_eq_u32 s2, 1
4268; GFX10-NEXT:    s_cselect_b32 s3, 2.0, 1.0
4269; GFX10-NEXT:    s_cmp_eq_u32 s2, 2
4270; GFX10-NEXT:    s_cselect_b32 s3, 0x40400000, s3
4271; GFX10-NEXT:    s_cmp_eq_u32 s2, 3
4272; GFX10-NEXT:    s_cselect_b32 s2, 4.0, s3
4273; GFX10-NEXT:    v_mov_b32_e32 v0, s2
4274; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
4275; GFX10-NEXT:    s_endpgm
4276;
4277; GFX11-LABEL: dyn_extract_v4f32_s_s_s:
4278; GFX11:         .amd_kernel_code_t
4279; GFX11-NEXT:     amd_code_version_major = 1
4280; GFX11-NEXT:     amd_code_version_minor = 2
4281; GFX11-NEXT:     amd_machine_kind = 1
4282; GFX11-NEXT:     amd_machine_version_major = 11
4283; GFX11-NEXT:     amd_machine_version_minor = 0
4284; GFX11-NEXT:     amd_machine_version_stepping = 0
4285; GFX11-NEXT:     kernel_code_entry_byte_offset = 256
4286; GFX11-NEXT:     kernel_code_prefetch_byte_size = 0
4287; GFX11-NEXT:     granulated_workitem_vgpr_count = 0
4288; GFX11-NEXT:     granulated_wavefront_sgpr_count = 0
4289; GFX11-NEXT:     priority = 0
4290; GFX11-NEXT:     float_mode = 240
4291; GFX11-NEXT:     priv = 0
4292; GFX11-NEXT:     enable_dx10_clamp = 1
4293; GFX11-NEXT:     debug_mode = 0
4294; GFX11-NEXT:     enable_ieee_mode = 1
4295; GFX11-NEXT:     enable_wgp_mode = 1
4296; GFX11-NEXT:     enable_mem_ordered = 1
4297; GFX11-NEXT:     enable_fwd_progress = 0
4298; GFX11-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
4299; GFX11-NEXT:     user_sgpr_count = 13
4300; GFX11-NEXT:     enable_trap_handler = 0
4301; GFX11-NEXT:     enable_sgpr_workgroup_id_x = 1
4302; GFX11-NEXT:     enable_sgpr_workgroup_id_y = 1
4303; GFX11-NEXT:     enable_sgpr_workgroup_id_z = 1
4304; GFX11-NEXT:     enable_sgpr_workgroup_info = 0
4305; GFX11-NEXT:     enable_vgpr_workitem_id = 2
4306; GFX11-NEXT:     enable_exception_msb = 0
4307; GFX11-NEXT:     granulated_lds_size = 0
4308; GFX11-NEXT:     enable_exception = 0
4309; GFX11-NEXT:     enable_sgpr_private_segment_buffer = 0
4310; GFX11-NEXT:     enable_sgpr_dispatch_ptr = 1
4311; GFX11-NEXT:     enable_sgpr_queue_ptr = 1
4312; GFX11-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
4313; GFX11-NEXT:     enable_sgpr_dispatch_id = 1
4314; GFX11-NEXT:     enable_sgpr_flat_scratch_init = 0
4315; GFX11-NEXT:     enable_sgpr_private_segment_size = 0
4316; GFX11-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
4317; GFX11-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
4318; GFX11-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
4319; GFX11-NEXT:     enable_wavefront_size32 = 1
4320; GFX11-NEXT:     enable_ordered_append_gds = 0
4321; GFX11-NEXT:     private_element_size = 1
4322; GFX11-NEXT:     is_ptr64 = 1
4323; GFX11-NEXT:     is_dynamic_callstack = 0
4324; GFX11-NEXT:     is_debug_enabled = 0
4325; GFX11-NEXT:     is_xnack_enabled = 0
4326; GFX11-NEXT:     workitem_private_segment_byte_size = 0
4327; GFX11-NEXT:     workgroup_group_segment_byte_size = 0
4328; GFX11-NEXT:     gds_segment_byte_size = 0
4329; GFX11-NEXT:     kernarg_segment_byte_size = 28
4330; GFX11-NEXT:     workgroup_fbarrier_count = 0
4331; GFX11-NEXT:     wavefront_sgpr_count = 6
4332; GFX11-NEXT:     workitem_vgpr_count = 2
4333; GFX11-NEXT:     reserved_vgpr_first = 0
4334; GFX11-NEXT:     reserved_vgpr_count = 0
4335; GFX11-NEXT:     reserved_sgpr_first = 0
4336; GFX11-NEXT:     reserved_sgpr_count = 0
4337; GFX11-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
4338; GFX11-NEXT:     debug_private_segment_buffer_sgpr = 0
4339; GFX11-NEXT:     kernarg_segment_alignment = 4
4340; GFX11-NEXT:     group_segment_alignment = 4
4341; GFX11-NEXT:     private_segment_alignment = 4
4342; GFX11-NEXT:     wavefront_size = 5
4343; GFX11-NEXT:     call_convention = -1
4344; GFX11-NEXT:     runtime_loader_kernel_symbol = 0
4345; GFX11-NEXT:    .end_amd_kernel_code_t
4346; GFX11-NEXT:  ; %bb.0: ; %entry
4347; GFX11-NEXT:    s_clause 0x1
4348; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x8
4349; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4350; GFX11-NEXT:    v_mov_b32_e32 v1, 0
4351; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
4352; GFX11-NEXT:    s_cmp_eq_u32 s2, 1
4353; GFX11-NEXT:    s_cselect_b32 s3, 2.0, 1.0
4354; GFX11-NEXT:    s_cmp_eq_u32 s2, 2
4355; GFX11-NEXT:    s_cselect_b32 s3, 0x40400000, s3
4356; GFX11-NEXT:    s_cmp_eq_u32 s2, 3
4357; GFX11-NEXT:    s_cselect_b32 s2, 4.0, s3
4358; GFX11-NEXT:    v_mov_b32_e32 v0, s2
4359; GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
4360; GFX11-NEXT:    s_endpgm
4361entry:
4362  %ext = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %sel
4363  store float %ext, ptr addrspace(1) %out
4364  ret void
4365}
4366
4367define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %sel) {
4368; GPRIDX-LABEL: dyn_extract_v4f64_s_s_s:
4369; GPRIDX:         .amd_kernel_code_t
4370; GPRIDX-NEXT:     amd_code_version_major = 1
4371; GPRIDX-NEXT:     amd_code_version_minor = 2
4372; GPRIDX-NEXT:     amd_machine_kind = 1
4373; GPRIDX-NEXT:     amd_machine_version_major = 9
4374; GPRIDX-NEXT:     amd_machine_version_minor = 0
4375; GPRIDX-NEXT:     amd_machine_version_stepping = 0
4376; GPRIDX-NEXT:     kernel_code_entry_byte_offset = 256
4377; GPRIDX-NEXT:     kernel_code_prefetch_byte_size = 0
4378; GPRIDX-NEXT:     granulated_workitem_vgpr_count = 0
4379; GPRIDX-NEXT:     granulated_wavefront_sgpr_count = 1
4380; GPRIDX-NEXT:     priority = 0
4381; GPRIDX-NEXT:     float_mode = 240
4382; GPRIDX-NEXT:     priv = 0
4383; GPRIDX-NEXT:     enable_dx10_clamp = 1
4384; GPRIDX-NEXT:     debug_mode = 0
4385; GPRIDX-NEXT:     enable_ieee_mode = 1
4386; GPRIDX-NEXT:     enable_wgp_mode = 0
4387; GPRIDX-NEXT:     enable_mem_ordered = 0
4388; GPRIDX-NEXT:     enable_fwd_progress = 0
4389; GPRIDX-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
4390; GPRIDX-NEXT:     user_sgpr_count = 12
4391; GPRIDX-NEXT:     enable_trap_handler = 0
4392; GPRIDX-NEXT:     enable_sgpr_workgroup_id_x = 1
4393; GPRIDX-NEXT:     enable_sgpr_workgroup_id_y = 1
4394; GPRIDX-NEXT:     enable_sgpr_workgroup_id_z = 1
4395; GPRIDX-NEXT:     enable_sgpr_workgroup_info = 0
4396; GPRIDX-NEXT:     enable_vgpr_workitem_id = 2
4397; GPRIDX-NEXT:     enable_exception_msb = 0
4398; GPRIDX-NEXT:     granulated_lds_size = 0
4399; GPRIDX-NEXT:     enable_exception = 0
4400; GPRIDX-NEXT:     enable_sgpr_private_segment_buffer = 1
4401; GPRIDX-NEXT:     enable_sgpr_dispatch_ptr = 1
4402; GPRIDX-NEXT:     enable_sgpr_queue_ptr = 1
4403; GPRIDX-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
4404; GPRIDX-NEXT:     enable_sgpr_dispatch_id = 1
4405; GPRIDX-NEXT:     enable_sgpr_flat_scratch_init = 0
4406; GPRIDX-NEXT:     enable_sgpr_private_segment_size = 0
4407; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
4408; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
4409; GPRIDX-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
4410; GPRIDX-NEXT:     enable_wavefront_size32 = 0
4411; GPRIDX-NEXT:     enable_ordered_append_gds = 0
4412; GPRIDX-NEXT:     private_element_size = 1
4413; GPRIDX-NEXT:     is_ptr64 = 1
4414; GPRIDX-NEXT:     is_dynamic_callstack = 0
4415; GPRIDX-NEXT:     is_debug_enabled = 0
4416; GPRIDX-NEXT:     is_xnack_enabled = 1
4417; GPRIDX-NEXT:     workitem_private_segment_byte_size = 0
4418; GPRIDX-NEXT:     workgroup_group_segment_byte_size = 0
4419; GPRIDX-NEXT:     gds_segment_byte_size = 0
4420; GPRIDX-NEXT:     kernarg_segment_byte_size = 28
4421; GPRIDX-NEXT:     workgroup_fbarrier_count = 0
4422; GPRIDX-NEXT:     wavefront_sgpr_count = 14
4423; GPRIDX-NEXT:     workitem_vgpr_count = 3
4424; GPRIDX-NEXT:     reserved_vgpr_first = 0
4425; GPRIDX-NEXT:     reserved_vgpr_count = 0
4426; GPRIDX-NEXT:     reserved_sgpr_first = 0
4427; GPRIDX-NEXT:     reserved_sgpr_count = 0
4428; GPRIDX-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
4429; GPRIDX-NEXT:     debug_private_segment_buffer_sgpr = 0
4430; GPRIDX-NEXT:     kernarg_segment_alignment = 4
4431; GPRIDX-NEXT:     group_segment_alignment = 4
4432; GPRIDX-NEXT:     private_segment_alignment = 4
4433; GPRIDX-NEXT:     wavefront_size = 6
4434; GPRIDX-NEXT:     call_convention = -1
4435; GPRIDX-NEXT:     runtime_loader_kernel_symbol = 0
4436; GPRIDX-NEXT:    .end_amd_kernel_code_t
4437; GPRIDX-NEXT:  ; %bb.0: ; %entry
4438; GPRIDX-NEXT:    s_load_dword s6, s[8:9], 0x8
4439; GPRIDX-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
4440; GPRIDX-NEXT:    s_mov_b32 s2, 0
4441; GPRIDX-NEXT:    s_mov_b32 s3, 0x40080000
4442; GPRIDX-NEXT:    v_mov_b32_e32 v2, 0
4443; GPRIDX-NEXT:    s_waitcnt lgkmcnt(0)
4444; GPRIDX-NEXT:    s_cmp_eq_u32 s6, 1
4445; GPRIDX-NEXT:    s_cselect_b64 s[4:5], 2.0, 1.0
4446; GPRIDX-NEXT:    s_cmp_eq_u32 s6, 2
4447; GPRIDX-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
4448; GPRIDX-NEXT:    s_cmp_eq_u32 s6, 3
4449; GPRIDX-NEXT:    s_cselect_b64 s[2:3], 4.0, s[2:3]
4450; GPRIDX-NEXT:    v_mov_b32_e32 v0, s2
4451; GPRIDX-NEXT:    v_mov_b32_e32 v1, s3
4452; GPRIDX-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
4453; GPRIDX-NEXT:    s_endpgm
4454;
4455; MOVREL-LABEL: dyn_extract_v4f64_s_s_s:
4456; MOVREL:         .amd_kernel_code_t
4457; MOVREL-NEXT:     amd_code_version_major = 1
4458; MOVREL-NEXT:     amd_code_version_minor = 2
4459; MOVREL-NEXT:     amd_machine_kind = 1
4460; MOVREL-NEXT:     amd_machine_version_major = 8
4461; MOVREL-NEXT:     amd_machine_version_minor = 0
4462; MOVREL-NEXT:     amd_machine_version_stepping = 3
4463; MOVREL-NEXT:     kernel_code_entry_byte_offset = 256
4464; MOVREL-NEXT:     kernel_code_prefetch_byte_size = 0
4465; MOVREL-NEXT:     granulated_workitem_vgpr_count = 0
4466; MOVREL-NEXT:     granulated_wavefront_sgpr_count = 1
4467; MOVREL-NEXT:     priority = 0
4468; MOVREL-NEXT:     float_mode = 240
4469; MOVREL-NEXT:     priv = 0
4470; MOVREL-NEXT:     enable_dx10_clamp = 1
4471; MOVREL-NEXT:     debug_mode = 0
4472; MOVREL-NEXT:     enable_ieee_mode = 1
4473; MOVREL-NEXT:     enable_wgp_mode = 0
4474; MOVREL-NEXT:     enable_mem_ordered = 0
4475; MOVREL-NEXT:     enable_fwd_progress = 0
4476; MOVREL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
4477; MOVREL-NEXT:     user_sgpr_count = 12
4478; MOVREL-NEXT:     enable_trap_handler = 0
4479; MOVREL-NEXT:     enable_sgpr_workgroup_id_x = 1
4480; MOVREL-NEXT:     enable_sgpr_workgroup_id_y = 1
4481; MOVREL-NEXT:     enable_sgpr_workgroup_id_z = 1
4482; MOVREL-NEXT:     enable_sgpr_workgroup_info = 0
4483; MOVREL-NEXT:     enable_vgpr_workitem_id = 2
4484; MOVREL-NEXT:     enable_exception_msb = 0
4485; MOVREL-NEXT:     granulated_lds_size = 0
4486; MOVREL-NEXT:     enable_exception = 0
4487; MOVREL-NEXT:     enable_sgpr_private_segment_buffer = 1
4488; MOVREL-NEXT:     enable_sgpr_dispatch_ptr = 1
4489; MOVREL-NEXT:     enable_sgpr_queue_ptr = 1
4490; MOVREL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
4491; MOVREL-NEXT:     enable_sgpr_dispatch_id = 1
4492; MOVREL-NEXT:     enable_sgpr_flat_scratch_init = 0
4493; MOVREL-NEXT:     enable_sgpr_private_segment_size = 0
4494; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
4495; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
4496; MOVREL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
4497; MOVREL-NEXT:     enable_wavefront_size32 = 0
4498; MOVREL-NEXT:     enable_ordered_append_gds = 0
4499; MOVREL-NEXT:     private_element_size = 1
4500; MOVREL-NEXT:     is_ptr64 = 1
4501; MOVREL-NEXT:     is_dynamic_callstack = 0
4502; MOVREL-NEXT:     is_debug_enabled = 0
4503; MOVREL-NEXT:     is_xnack_enabled = 0
4504; MOVREL-NEXT:     workitem_private_segment_byte_size = 0
4505; MOVREL-NEXT:     workgroup_group_segment_byte_size = 0
4506; MOVREL-NEXT:     gds_segment_byte_size = 0
4507; MOVREL-NEXT:     kernarg_segment_byte_size = 28
4508; MOVREL-NEXT:     workgroup_fbarrier_count = 0
4509; MOVREL-NEXT:     wavefront_sgpr_count = 10
4510; MOVREL-NEXT:     workitem_vgpr_count = 4
4511; MOVREL-NEXT:     reserved_vgpr_first = 0
4512; MOVREL-NEXT:     reserved_vgpr_count = 0
4513; MOVREL-NEXT:     reserved_sgpr_first = 0
4514; MOVREL-NEXT:     reserved_sgpr_count = 0
4515; MOVREL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
4516; MOVREL-NEXT:     debug_private_segment_buffer_sgpr = 0
4517; MOVREL-NEXT:     kernarg_segment_alignment = 4
4518; MOVREL-NEXT:     group_segment_alignment = 4
4519; MOVREL-NEXT:     private_segment_alignment = 4
4520; MOVREL-NEXT:     wavefront_size = 6
4521; MOVREL-NEXT:     call_convention = -1
4522; MOVREL-NEXT:     runtime_loader_kernel_symbol = 0
4523; MOVREL-NEXT:    .end_amd_kernel_code_t
4524; MOVREL-NEXT:  ; %bb.0: ; %entry
4525; MOVREL-NEXT:    s_load_dword s6, s[8:9], 0x8
4526; MOVREL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
4527; MOVREL-NEXT:    s_mov_b32 s2, 0
4528; MOVREL-NEXT:    s_mov_b32 s3, 0x40080000
4529; MOVREL-NEXT:    s_waitcnt lgkmcnt(0)
4530; MOVREL-NEXT:    s_cmp_eq_u32 s6, 1
4531; MOVREL-NEXT:    s_cselect_b64 s[4:5], 2.0, 1.0
4532; MOVREL-NEXT:    s_cmp_eq_u32 s6, 2
4533; MOVREL-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
4534; MOVREL-NEXT:    s_cmp_eq_u32 s6, 3
4535; MOVREL-NEXT:    s_cselect_b64 s[2:3], 4.0, s[2:3]
4536; MOVREL-NEXT:    v_mov_b32_e32 v0, s2
4537; MOVREL-NEXT:    v_mov_b32_e32 v3, s1
4538; MOVREL-NEXT:    v_mov_b32_e32 v1, s3
4539; MOVREL-NEXT:    v_mov_b32_e32 v2, s0
4540; MOVREL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
4541; MOVREL-NEXT:    s_endpgm
4542;
4543; GFX10-LABEL: dyn_extract_v4f64_s_s_s:
4544; GFX10:         .amd_kernel_code_t
4545; GFX10-NEXT:     amd_code_version_major = 1
4546; GFX10-NEXT:     amd_code_version_minor = 2
4547; GFX10-NEXT:     amd_machine_kind = 1
4548; GFX10-NEXT:     amd_machine_version_major = 10
4549; GFX10-NEXT:     amd_machine_version_minor = 1
4550; GFX10-NEXT:     amd_machine_version_stepping = 0
4551; GFX10-NEXT:     kernel_code_entry_byte_offset = 256
4552; GFX10-NEXT:     kernel_code_prefetch_byte_size = 0
4553; GFX10-NEXT:     granulated_workitem_vgpr_count = 0
4554; GFX10-NEXT:     granulated_wavefront_sgpr_count = 1
4555; GFX10-NEXT:     priority = 0
4556; GFX10-NEXT:     float_mode = 240
4557; GFX10-NEXT:     priv = 0
4558; GFX10-NEXT:     enable_dx10_clamp = 1
4559; GFX10-NEXT:     debug_mode = 0
4560; GFX10-NEXT:     enable_ieee_mode = 1
4561; GFX10-NEXT:     enable_wgp_mode = 1
4562; GFX10-NEXT:     enable_mem_ordered = 1
4563; GFX10-NEXT:     enable_fwd_progress = 0
4564; GFX10-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
4565; GFX10-NEXT:     user_sgpr_count = 12
4566; GFX10-NEXT:     enable_trap_handler = 0
4567; GFX10-NEXT:     enable_sgpr_workgroup_id_x = 1
4568; GFX10-NEXT:     enable_sgpr_workgroup_id_y = 1
4569; GFX10-NEXT:     enable_sgpr_workgroup_id_z = 1
4570; GFX10-NEXT:     enable_sgpr_workgroup_info = 0
4571; GFX10-NEXT:     enable_vgpr_workitem_id = 2
4572; GFX10-NEXT:     enable_exception_msb = 0
4573; GFX10-NEXT:     granulated_lds_size = 0
4574; GFX10-NEXT:     enable_exception = 0
4575; GFX10-NEXT:     enable_sgpr_private_segment_buffer = 1
4576; GFX10-NEXT:     enable_sgpr_dispatch_ptr = 1
4577; GFX10-NEXT:     enable_sgpr_queue_ptr = 1
4578; GFX10-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
4579; GFX10-NEXT:     enable_sgpr_dispatch_id = 1
4580; GFX10-NEXT:     enable_sgpr_flat_scratch_init = 0
4581; GFX10-NEXT:     enable_sgpr_private_segment_size = 0
4582; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
4583; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
4584; GFX10-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
4585; GFX10-NEXT:     enable_wavefront_size32 = 1
4586; GFX10-NEXT:     enable_ordered_append_gds = 0
4587; GFX10-NEXT:     private_element_size = 1
4588; GFX10-NEXT:     is_ptr64 = 1
4589; GFX10-NEXT:     is_dynamic_callstack = 0
4590; GFX10-NEXT:     is_debug_enabled = 0
4591; GFX10-NEXT:     is_xnack_enabled = 1
4592; GFX10-NEXT:     workitem_private_segment_byte_size = 0
4593; GFX10-NEXT:     workgroup_group_segment_byte_size = 0
4594; GFX10-NEXT:     gds_segment_byte_size = 0
4595; GFX10-NEXT:     kernarg_segment_byte_size = 28
4596; GFX10-NEXT:     workgroup_fbarrier_count = 0
4597; GFX10-NEXT:     wavefront_sgpr_count = 10
4598; GFX10-NEXT:     workitem_vgpr_count = 3
4599; GFX10-NEXT:     reserved_vgpr_first = 0
4600; GFX10-NEXT:     reserved_vgpr_count = 0
4601; GFX10-NEXT:     reserved_sgpr_first = 0
4602; GFX10-NEXT:     reserved_sgpr_count = 0
4603; GFX10-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
4604; GFX10-NEXT:     debug_private_segment_buffer_sgpr = 0
4605; GFX10-NEXT:     kernarg_segment_alignment = 4
4606; GFX10-NEXT:     group_segment_alignment = 4
4607; GFX10-NEXT:     private_segment_alignment = 4
4608; GFX10-NEXT:     wavefront_size = 5
4609; GFX10-NEXT:     call_convention = -1
4610; GFX10-NEXT:     runtime_loader_kernel_symbol = 0
4611; GFX10-NEXT:    .end_amd_kernel_code_t
4612; GFX10-NEXT:  ; %bb.0: ; %entry
4613; GFX10-NEXT:    s_clause 0x1
4614; GFX10-NEXT:    s_load_dword s6, s[8:9], 0x8
4615; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
4616; GFX10-NEXT:    s_mov_b32 s2, 0
4617; GFX10-NEXT:    s_mov_b32 s3, 0x40080000
4618; GFX10-NEXT:    v_mov_b32_e32 v2, 0
4619; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
4620; GFX10-NEXT:    s_cmp_eq_u32 s6, 1
4621; GFX10-NEXT:    s_cselect_b64 s[4:5], 2.0, 1.0
4622; GFX10-NEXT:    s_cmp_eq_u32 s6, 2
4623; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
4624; GFX10-NEXT:    s_cmp_eq_u32 s6, 3
4625; GFX10-NEXT:    s_cselect_b64 s[2:3], 4.0, s[2:3]
4626; GFX10-NEXT:    v_mov_b32_e32 v0, s2
4627; GFX10-NEXT:    v_mov_b32_e32 v1, s3
4628; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
4629; GFX10-NEXT:    s_endpgm
4630;
4631; GFX11-LABEL: dyn_extract_v4f64_s_s_s:
4632; GFX11:         .amd_kernel_code_t
4633; GFX11-NEXT:     amd_code_version_major = 1
4634; GFX11-NEXT:     amd_code_version_minor = 2
4635; GFX11-NEXT:     amd_machine_kind = 1
4636; GFX11-NEXT:     amd_machine_version_major = 11
4637; GFX11-NEXT:     amd_machine_version_minor = 0
4638; GFX11-NEXT:     amd_machine_version_stepping = 0
4639; GFX11-NEXT:     kernel_code_entry_byte_offset = 256
4640; GFX11-NEXT:     kernel_code_prefetch_byte_size = 0
4641; GFX11-NEXT:     granulated_workitem_vgpr_count = 0
4642; GFX11-NEXT:     granulated_wavefront_sgpr_count = 0
4643; GFX11-NEXT:     priority = 0
4644; GFX11-NEXT:     float_mode = 240
4645; GFX11-NEXT:     priv = 0
4646; GFX11-NEXT:     enable_dx10_clamp = 1
4647; GFX11-NEXT:     debug_mode = 0
4648; GFX11-NEXT:     enable_ieee_mode = 1
4649; GFX11-NEXT:     enable_wgp_mode = 1
4650; GFX11-NEXT:     enable_mem_ordered = 1
4651; GFX11-NEXT:     enable_fwd_progress = 0
4652; GFX11-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
4653; GFX11-NEXT:     user_sgpr_count = 13
4654; GFX11-NEXT:     enable_trap_handler = 0
4655; GFX11-NEXT:     enable_sgpr_workgroup_id_x = 1
4656; GFX11-NEXT:     enable_sgpr_workgroup_id_y = 1
4657; GFX11-NEXT:     enable_sgpr_workgroup_id_z = 1
4658; GFX11-NEXT:     enable_sgpr_workgroup_info = 0
4659; GFX11-NEXT:     enable_vgpr_workitem_id = 2
4660; GFX11-NEXT:     enable_exception_msb = 0
4661; GFX11-NEXT:     granulated_lds_size = 0
4662; GFX11-NEXT:     enable_exception = 0
4663; GFX11-NEXT:     enable_sgpr_private_segment_buffer = 0
4664; GFX11-NEXT:     enable_sgpr_dispatch_ptr = 1
4665; GFX11-NEXT:     enable_sgpr_queue_ptr = 1
4666; GFX11-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
4667; GFX11-NEXT:     enable_sgpr_dispatch_id = 1
4668; GFX11-NEXT:     enable_sgpr_flat_scratch_init = 0
4669; GFX11-NEXT:     enable_sgpr_private_segment_size = 0
4670; GFX11-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
4671; GFX11-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
4672; GFX11-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
4673; GFX11-NEXT:     enable_wavefront_size32 = 1
4674; GFX11-NEXT:     enable_ordered_append_gds = 0
4675; GFX11-NEXT:     private_element_size = 1
4676; GFX11-NEXT:     is_ptr64 = 1
4677; GFX11-NEXT:     is_dynamic_callstack = 0
4678; GFX11-NEXT:     is_debug_enabled = 0
4679; GFX11-NEXT:     is_xnack_enabled = 0
4680; GFX11-NEXT:     workitem_private_segment_byte_size = 0
4681; GFX11-NEXT:     workgroup_group_segment_byte_size = 0
4682; GFX11-NEXT:     gds_segment_byte_size = 0
4683; GFX11-NEXT:     kernarg_segment_byte_size = 28
4684; GFX11-NEXT:     workgroup_fbarrier_count = 0
4685; GFX11-NEXT:     wavefront_sgpr_count = 7
4686; GFX11-NEXT:     workitem_vgpr_count = 3
4687; GFX11-NEXT:     reserved_vgpr_first = 0
4688; GFX11-NEXT:     reserved_vgpr_count = 0
4689; GFX11-NEXT:     reserved_sgpr_first = 0
4690; GFX11-NEXT:     reserved_sgpr_count = 0
4691; GFX11-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
4692; GFX11-NEXT:     debug_private_segment_buffer_sgpr = 0
4693; GFX11-NEXT:     kernarg_segment_alignment = 4
4694; GFX11-NEXT:     group_segment_alignment = 4
4695; GFX11-NEXT:     private_segment_alignment = 4
4696; GFX11-NEXT:     wavefront_size = 5
4697; GFX11-NEXT:     call_convention = -1
4698; GFX11-NEXT:     runtime_loader_kernel_symbol = 0
4699; GFX11-NEXT:    .end_amd_kernel_code_t
4700; GFX11-NEXT:  ; %bb.0: ; %entry
4701; GFX11-NEXT:    s_clause 0x1
4702; GFX11-NEXT:    s_load_b32 s6, s[4:5], 0x8
4703; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4704; GFX11-NEXT:    s_mov_b32 s2, 0
4705; GFX11-NEXT:    s_mov_b32 s3, 0x40080000
4706; GFX11-NEXT:    v_mov_b32_e32 v2, 0
4707; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
4708; GFX11-NEXT:    s_cmp_eq_u32 s6, 1
4709; GFX11-NEXT:    s_cselect_b64 s[4:5], 2.0, 1.0
4710; GFX11-NEXT:    s_cmp_eq_u32 s6, 2
4711; GFX11-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
4712; GFX11-NEXT:    s_cmp_eq_u32 s6, 3
4713; GFX11-NEXT:    s_cselect_b64 s[2:3], 4.0, s[2:3]
4714; GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
4715; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
4716; GFX11-NEXT:    s_endpgm
4717entry:
4718  %ext = extractelement <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, i32 %sel
4719  store double %ext, ptr addrspace(1) %out
4720  ret void
4721}
4722
4723define i32 @v_extract_v64i32_7(ptr addrspace(1) %ptr) {
4724; GPRIDX-LABEL: v_extract_v64i32_7:
4725; GPRIDX:       ; %bb.0:
4726; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4727; GPRIDX-NEXT:    global_load_dword v0, v[0:1], off offset:28
4728; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
4729; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
4730;
4731; MOVREL-LABEL: v_extract_v64i32_7:
4732; MOVREL:       ; %bb.0:
4733; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4734; MOVREL-NEXT:    v_add_u32_e32 v0, vcc, 28, v0
4735; MOVREL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4736; MOVREL-NEXT:    flat_load_dword v0, v[0:1]
4737; MOVREL-NEXT:    s_waitcnt vmcnt(0)
4738; MOVREL-NEXT:    s_setpc_b64 s[30:31]
4739;
4740; GFX10-LABEL: v_extract_v64i32_7:
4741; GFX10:       ; %bb.0:
4742; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4743; GFX10-NEXT:    global_load_dword v0, v[0:1], off offset:28
4744; GFX10-NEXT:    s_waitcnt vmcnt(0)
4745; GFX10-NEXT:    s_setpc_b64 s[30:31]
4746;
4747; GFX11-LABEL: v_extract_v64i32_7:
4748; GFX11:       ; %bb.0:
4749; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4750; GFX11-NEXT:    global_load_b32 v0, v[0:1], off offset:28
4751; GFX11-NEXT:    s_waitcnt vmcnt(0)
4752; GFX11-NEXT:    s_setpc_b64 s[30:31]
4753  %vec = load <64 x i32>, ptr addrspace(1) %ptr
4754  %elt = extractelement <64 x i32> %vec, i32 7
4755  ret i32 %elt
4756}
4757
4758define i32 @v_extract_v64i32_32(ptr addrspace(1) %ptr) {
4759; GPRIDX-LABEL: v_extract_v64i32_32:
4760; GPRIDX:       ; %bb.0:
4761; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4762; GPRIDX-NEXT:    global_load_dword v0, v[0:1], off offset:128
4763; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
4764; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
4765;
4766; MOVREL-LABEL: v_extract_v64i32_32:
4767; MOVREL:       ; %bb.0:
4768; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4769; MOVREL-NEXT:    v_add_u32_e32 v0, vcc, 0x80, v0
4770; MOVREL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4771; MOVREL-NEXT:    flat_load_dword v0, v[0:1]
4772; MOVREL-NEXT:    s_waitcnt vmcnt(0)
4773; MOVREL-NEXT:    s_setpc_b64 s[30:31]
4774;
4775; GFX10-LABEL: v_extract_v64i32_32:
4776; GFX10:       ; %bb.0:
4777; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4778; GFX10-NEXT:    global_load_dword v0, v[0:1], off offset:128
4779; GFX10-NEXT:    s_waitcnt vmcnt(0)
4780; GFX10-NEXT:    s_setpc_b64 s[30:31]
4781;
4782; GFX11-LABEL: v_extract_v64i32_32:
4783; GFX11:       ; %bb.0:
4784; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4785; GFX11-NEXT:    global_load_b32 v0, v[0:1], off offset:128
4786; GFX11-NEXT:    s_waitcnt vmcnt(0)
4787; GFX11-NEXT:    s_setpc_b64 s[30:31]
4788  %vec = load <64 x i32>, ptr addrspace(1) %ptr
4789  %elt = extractelement <64 x i32> %vec, i32 32
4790  ret i32 %elt
4791}
4792
4793define i32 @v_extract_v64i32_33(ptr addrspace(1) %ptr) {
4794; GPRIDX-LABEL: v_extract_v64i32_33:
4795; GPRIDX:       ; %bb.0:
4796; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4797; GPRIDX-NEXT:    global_load_dword v0, v[0:1], off offset:132
4798; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
4799; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
4800;
4801; MOVREL-LABEL: v_extract_v64i32_33:
4802; MOVREL:       ; %bb.0:
4803; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4804; MOVREL-NEXT:    v_add_u32_e32 v0, vcc, 0x84, v0
4805; MOVREL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4806; MOVREL-NEXT:    flat_load_dword v0, v[0:1]
4807; MOVREL-NEXT:    s_waitcnt vmcnt(0)
4808; MOVREL-NEXT:    s_setpc_b64 s[30:31]
4809;
4810; GFX10-LABEL: v_extract_v64i32_33:
4811; GFX10:       ; %bb.0:
4812; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4813; GFX10-NEXT:    global_load_dword v0, v[0:1], off offset:132
4814; GFX10-NEXT:    s_waitcnt vmcnt(0)
4815; GFX10-NEXT:    s_setpc_b64 s[30:31]
4816;
4817; GFX11-LABEL: v_extract_v64i32_33:
4818; GFX11:       ; %bb.0:
4819; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4820; GFX11-NEXT:    global_load_b32 v0, v[0:1], off offset:132
4821; GFX11-NEXT:    s_waitcnt vmcnt(0)
4822; GFX11-NEXT:    s_setpc_b64 s[30:31]
4823  %vec = load <64 x i32>, ptr addrspace(1) %ptr
4824  %elt = extractelement <64 x i32> %vec, i32 33
4825  ret i32 %elt
4826}
4827
4828define i32 @v_extract_v64i32_37(ptr addrspace(1) %ptr) {
4829; GPRIDX-LABEL: v_extract_v64i32_37:
4830; GPRIDX:       ; %bb.0:
4831; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4832; GPRIDX-NEXT:    global_load_dword v0, v[0:1], off offset:148
4833; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
4834; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
4835;
4836; MOVREL-LABEL: v_extract_v64i32_37:
4837; MOVREL:       ; %bb.0:
4838; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4839; MOVREL-NEXT:    v_add_u32_e32 v0, vcc, 0x94, v0
4840; MOVREL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4841; MOVREL-NEXT:    flat_load_dword v0, v[0:1]
4842; MOVREL-NEXT:    s_waitcnt vmcnt(0)
4843; MOVREL-NEXT:    s_setpc_b64 s[30:31]
4844;
4845; GFX10-LABEL: v_extract_v64i32_37:
4846; GFX10:       ; %bb.0:
4847; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4848; GFX10-NEXT:    global_load_dword v0, v[0:1], off offset:148
4849; GFX10-NEXT:    s_waitcnt vmcnt(0)
4850; GFX10-NEXT:    s_setpc_b64 s[30:31]
4851;
4852; GFX11-LABEL: v_extract_v64i32_37:
4853; GFX11:       ; %bb.0:
4854; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4855; GFX11-NEXT:    global_load_b32 v0, v[0:1], off offset:148
4856; GFX11-NEXT:    s_waitcnt vmcnt(0)
4857; GFX11-NEXT:    s_setpc_b64 s[30:31]
4858  %vec = load <64 x i32>, ptr addrspace(1) %ptr
4859  %elt = extractelement <64 x i32> %vec, i32 37
4860  ret i32 %elt
4861}
4862