xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll (revision 6206f5444fc0732e6495703c75a67f1f90f5b418)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GPRIDX %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
5; RUN: not --crash llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s
6
7; FIXME: Need constant bus fixup pre-gfx10 for movrel
8; ERR: Bad machine code: VOP* instruction violates constant bus restriction
9
10define amdgpu_ps <8 x i32> @dyn_insertelement_v8i32_s_s_s(<8 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
11; GPRIDX-LABEL: dyn_insertelement_v8i32_s_s_s:
12; GPRIDX:       ; %bb.0: ; %entry
13; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 0
14; GPRIDX-NEXT:    s_cselect_b32 s0, s10, s2
15; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 1
16; GPRIDX-NEXT:    s_cselect_b32 s1, s10, s3
17; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 2
18; GPRIDX-NEXT:    s_cselect_b32 s2, s10, s4
19; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 3
20; GPRIDX-NEXT:    s_cselect_b32 s3, s10, s5
21; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 4
22; GPRIDX-NEXT:    s_cselect_b32 s4, s10, s6
23; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 5
24; GPRIDX-NEXT:    s_cselect_b32 s5, s10, s7
25; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 6
26; GPRIDX-NEXT:    s_cselect_b32 s6, s10, s8
27; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 7
28; GPRIDX-NEXT:    s_cselect_b32 s7, s10, s9
29; GPRIDX-NEXT:    ; return to shader part epilog
30;
31; GFX10PLUS-LABEL: dyn_insertelement_v8i32_s_s_s:
32; GFX10PLUS:       ; %bb.0: ; %entry
33; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
34; GFX10PLUS-NEXT:    s_mov_b32 m0, s11
35; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
36; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
37; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
38; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
39; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
40; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
41; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
42; GFX10PLUS-NEXT:    s_movreld_b32 s0, s10
43; GFX10PLUS-NEXT:    ; return to shader part epilog
44entry:
45  %insert = insertelement <8 x i32> %vec, i32 %val, i32 %idx
46  ret <8 x i32> %insert
47}
48
49define amdgpu_ps <8 x ptr addrspace(3)> @dyn_insertelement_v8p3i8_s_s_s(<8 x ptr addrspace(3)> inreg %vec, ptr addrspace(3) inreg %val, i32 inreg %idx) {
50; GPRIDX-LABEL: dyn_insertelement_v8p3i8_s_s_s:
51; GPRIDX:       ; %bb.0: ; %entry
52; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 0
53; GPRIDX-NEXT:    s_cselect_b32 s0, s10, s2
54; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 1
55; GPRIDX-NEXT:    s_cselect_b32 s1, s10, s3
56; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 2
57; GPRIDX-NEXT:    s_cselect_b32 s2, s10, s4
58; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 3
59; GPRIDX-NEXT:    s_cselect_b32 s3, s10, s5
60; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 4
61; GPRIDX-NEXT:    s_cselect_b32 s4, s10, s6
62; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 5
63; GPRIDX-NEXT:    s_cselect_b32 s5, s10, s7
64; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 6
65; GPRIDX-NEXT:    s_cselect_b32 s6, s10, s8
66; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 7
67; GPRIDX-NEXT:    s_cselect_b32 s7, s10, s9
68; GPRIDX-NEXT:    ; return to shader part epilog
69;
70; GFX10PLUS-LABEL: dyn_insertelement_v8p3i8_s_s_s:
71; GFX10PLUS:       ; %bb.0: ; %entry
72; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
73; GFX10PLUS-NEXT:    s_mov_b32 m0, s11
74; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
75; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
76; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
77; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
78; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
79; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
80; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
81; GFX10PLUS-NEXT:    s_movreld_b32 s0, s10
82; GFX10PLUS-NEXT:    ; return to shader part epilog
83entry:
84  %insert = insertelement <8 x ptr addrspace(3)> %vec, ptr addrspace(3) %val, i32 %idx
85  ret <8 x ptr addrspace(3)> %insert
86}
87
88define <8 x float> @dyn_insertelement_v8f32_const_s_v_v(float %val, i32 %idx) {
89; GPRIDX-LABEL: dyn_insertelement_v8f32_const_s_v_v:
90; GPRIDX:       ; %bb.0: ; %entry
91; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
93; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, 1.0, v0, vcc
94; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
95; GPRIDX-NEXT:    v_mov_b32_e32 v2, 0x40400000
96; GPRIDX-NEXT:    v_cndmask_b32_e32 v9, 2.0, v0, vcc
97; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v1
98; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
99; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v1
100; GPRIDX-NEXT:    v_mov_b32_e32 v4, 0x40a00000
101; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, 4.0, v0, vcc
102; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v1
103; GPRIDX-NEXT:    v_mov_b32_e32 v5, 0x40c00000
104; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
105; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v1
106; GPRIDX-NEXT:    v_mov_b32_e32 v6, 0x40e00000
107; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
108; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v1
109; GPRIDX-NEXT:    v_mov_b32_e32 v7, 0x41000000
110; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
111; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v1
112; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v0, vcc
113; GPRIDX-NEXT:    v_mov_b32_e32 v0, v8
114; GPRIDX-NEXT:    v_mov_b32_e32 v1, v9
115; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
116;
117; GFX10-LABEL: dyn_insertelement_v8f32_const_s_v_v:
118; GFX10:       ; %bb.0: ; %entry
119; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
121; GFX10-NEXT:    v_cndmask_b32_e32 v8, 1.0, v0, vcc_lo
122; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
123; GFX10-NEXT:    v_cndmask_b32_e32 v9, 2.0, v0, vcc_lo
124; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v1
125; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0x40400000, v0, vcc_lo
126; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v1
127; GFX10-NEXT:    v_cndmask_b32_e32 v3, 4.0, v0, vcc_lo
128; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v1
129; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0x40a00000, v0, vcc_lo
130; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v1
131; GFX10-NEXT:    v_cndmask_b32_e32 v5, 0x40c00000, v0, vcc_lo
132; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v1
133; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0x40e00000, v0, vcc_lo
134; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v1
135; GFX10-NEXT:    v_mov_b32_e32 v1, v9
136; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0x41000000, v0, vcc_lo
137; GFX10-NEXT:    v_mov_b32_e32 v0, v8
138; GFX10-NEXT:    s_setpc_b64 s[30:31]
139;
140; GFX11-LABEL: dyn_insertelement_v8f32_const_s_v_v:
141; GFX11:       ; %bb.0: ; %entry
142; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
144; GFX11-NEXT:    v_cndmask_b32_e32 v8, 1.0, v0, vcc_lo
145; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
146; GFX11-NEXT:    v_cndmask_b32_e32 v9, 2.0, v0, vcc_lo
147; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v1
148; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x40400000, v0, vcc_lo
149; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v1
150; GFX11-NEXT:    v_cndmask_b32_e32 v3, 4.0, v0, vcc_lo
151; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v1
152; GFX11-NEXT:    v_cndmask_b32_e32 v4, 0x40a00000, v0, vcc_lo
153; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v1
154; GFX11-NEXT:    v_cndmask_b32_e32 v5, 0x40c00000, v0, vcc_lo
155; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v1
156; GFX11-NEXT:    v_cndmask_b32_e32 v6, 0x40e00000, v0, vcc_lo
157; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v1
158; GFX11-NEXT:    v_mov_b32_e32 v1, v9
159; GFX11-NEXT:    v_dual_cndmask_b32 v7, 0x41000000, v0 :: v_dual_mov_b32 v0, v8
160; GFX11-NEXT:    s_setpc_b64 s[30:31]
161entry:
162  %insert = insertelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, float %val, i32 %idx
163  ret <8 x float> %insert
164}
165
166define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_v(<8 x float> inreg %vec, float inreg %val, i32 %idx) {
167; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_v:
168; GPRIDX:       ; %bb.0: ; %entry
169; GPRIDX-NEXT:    v_mov_b32_e32 v1, s2
170; GPRIDX-NEXT:    v_mov_b32_e32 v10, s10
171; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
172; GPRIDX-NEXT:    v_mov_b32_e32 v2, s3
173; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v1, v10, vcc
174; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
175; GPRIDX-NEXT:    v_mov_b32_e32 v3, s4
176; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v2, v10, vcc
177; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
178; GPRIDX-NEXT:    v_mov_b32_e32 v4, s5
179; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v3, v10, vcc
180; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
181; GPRIDX-NEXT:    v_mov_b32_e32 v5, s6
182; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v4, v10, vcc
183; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
184; GPRIDX-NEXT:    v_mov_b32_e32 v6, s7
185; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v5, v10, vcc
186; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
187; GPRIDX-NEXT:    v_mov_b32_e32 v7, s8
188; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v6, v10, vcc
189; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
190; GPRIDX-NEXT:    v_mov_b32_e32 v9, s9
191; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v7, v10, vcc
192; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
193; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v9, v10, vcc
194; GPRIDX-NEXT:    v_mov_b32_e32 v0, v8
195; GPRIDX-NEXT:    ; return to shader part epilog
196;
197; GFX10-LABEL: dyn_insertelement_v8f32_s_s_v:
198; GFX10:       ; %bb.0: ; %entry
199; GFX10-NEXT:    v_mov_b32_e32 v7, s10
200; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
201; GFX10-NEXT:    v_cndmask_b32_e32 v8, s2, v7, vcc_lo
202; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
203; GFX10-NEXT:    v_cndmask_b32_e32 v1, s3, v7, vcc_lo
204; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
205; GFX10-NEXT:    v_cndmask_b32_e32 v2, s4, v7, vcc_lo
206; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
207; GFX10-NEXT:    v_cndmask_b32_e32 v3, s5, v7, vcc_lo
208; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
209; GFX10-NEXT:    v_cndmask_b32_e32 v4, s6, v7, vcc_lo
210; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
211; GFX10-NEXT:    v_cndmask_b32_e32 v5, s7, v7, vcc_lo
212; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
213; GFX10-NEXT:    v_cndmask_b32_e32 v6, s8, v7, vcc_lo
214; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
215; GFX10-NEXT:    v_mov_b32_e32 v0, v8
216; GFX10-NEXT:    v_cndmask_b32_e32 v7, s9, v7, vcc_lo
217; GFX10-NEXT:    ; return to shader part epilog
218;
219; GFX11-LABEL: dyn_insertelement_v8f32_s_s_v:
220; GFX11:       ; %bb.0: ; %entry
221; GFX11-NEXT:    v_mov_b32_e32 v7, s10
222; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
223; GFX11-NEXT:    v_cndmask_b32_e32 v8, s2, v7, vcc_lo
224; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
225; GFX11-NEXT:    v_cndmask_b32_e32 v1, s3, v7, vcc_lo
226; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
227; GFX11-NEXT:    v_cndmask_b32_e32 v2, s4, v7, vcc_lo
228; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
229; GFX11-NEXT:    v_cndmask_b32_e32 v3, s5, v7, vcc_lo
230; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
231; GFX11-NEXT:    v_cndmask_b32_e32 v4, s6, v7, vcc_lo
232; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
233; GFX11-NEXT:    v_cndmask_b32_e32 v5, s7, v7, vcc_lo
234; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
235; GFX11-NEXT:    v_cndmask_b32_e32 v6, s8, v7, vcc_lo
236; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
237; GFX11-NEXT:    v_dual_mov_b32 v0, v8 :: v_dual_cndmask_b32 v7, s9, v7
238; GFX11-NEXT:    ; return to shader part epilog
239entry:
240  %insert = insertelement <8 x float> %vec, float %val, i32 %idx
241  ret <8 x float> %insert
242}
243
244define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_v_s(<8 x float> inreg %vec, float %val, i32 inreg %idx) {
245; GPRIDX-LABEL: dyn_insertelement_v8f32_s_v_s:
246; GPRIDX:       ; %bb.0: ; %entry
247; GPRIDX-NEXT:    v_mov_b32_e32 v1, s2
248; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s10, 0
249; GPRIDX-NEXT:    v_mov_b32_e32 v2, s3
250; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v1, v0, vcc
251; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s10, 1
252; GPRIDX-NEXT:    v_mov_b32_e32 v3, s4
253; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
254; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s10, 2
255; GPRIDX-NEXT:    v_mov_b32_e32 v4, s5
256; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v3, v0, vcc
257; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s10, 3
258; GPRIDX-NEXT:    v_mov_b32_e32 v5, s6
259; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v4, v0, vcc
260; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s10, 4
261; GPRIDX-NEXT:    v_mov_b32_e32 v6, s7
262; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v5, v0, vcc
263; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s10, 5
264; GPRIDX-NEXT:    v_mov_b32_e32 v7, s8
265; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v6, v0, vcc
266; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s10, 6
267; GPRIDX-NEXT:    v_mov_b32_e32 v9, s9
268; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v7, v0, vcc
269; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s10, 7
270; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v9, v0, vcc
271; GPRIDX-NEXT:    v_mov_b32_e32 v0, v8
272; GPRIDX-NEXT:    ; return to shader part epilog
273;
274; GFX10-LABEL: dyn_insertelement_v8f32_s_v_s:
275; GFX10:       ; %bb.0: ; %entry
276; GFX10-NEXT:    s_mov_b32 s0, s2
277; GFX10-NEXT:    s_mov_b32 s1, s3
278; GFX10-NEXT:    s_mov_b32 s2, s4
279; GFX10-NEXT:    s_mov_b32 s3, s5
280; GFX10-NEXT:    s_mov_b32 s4, s6
281; GFX10-NEXT:    s_mov_b32 s5, s7
282; GFX10-NEXT:    s_mov_b32 s6, s8
283; GFX10-NEXT:    s_mov_b32 s7, s9
284; GFX10-NEXT:    v_mov_b32_e32 v8, v0
285; GFX10-NEXT:    v_mov_b32_e32 v0, s0
286; GFX10-NEXT:    s_mov_b32 m0, s10
287; GFX10-NEXT:    v_mov_b32_e32 v1, s1
288; GFX10-NEXT:    v_mov_b32_e32 v2, s2
289; GFX10-NEXT:    v_mov_b32_e32 v3, s3
290; GFX10-NEXT:    v_mov_b32_e32 v4, s4
291; GFX10-NEXT:    v_mov_b32_e32 v5, s5
292; GFX10-NEXT:    v_mov_b32_e32 v6, s6
293; GFX10-NEXT:    v_mov_b32_e32 v7, s7
294; GFX10-NEXT:    v_movreld_b32_e32 v0, v8
295; GFX10-NEXT:    ; return to shader part epilog
296;
297; GFX11-LABEL: dyn_insertelement_v8f32_s_v_s:
298; GFX11:       ; %bb.0: ; %entry
299; GFX11-NEXT:    s_mov_b32 s0, s2
300; GFX11-NEXT:    s_mov_b32 s1, s3
301; GFX11-NEXT:    s_mov_b32 s2, s4
302; GFX11-NEXT:    s_mov_b32 s3, s5
303; GFX11-NEXT:    s_mov_b32 s4, s6
304; GFX11-NEXT:    s_mov_b32 s5, s7
305; GFX11-NEXT:    s_mov_b32 s6, s8
306; GFX11-NEXT:    s_mov_b32 s7, s9
307; GFX11-NEXT:    v_mov_b32_e32 v8, v0
308; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
309; GFX11-NEXT:    s_mov_b32 m0, s10
310; GFX11-NEXT:    v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
311; GFX11-NEXT:    v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v4, s4
312; GFX11-NEXT:    v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v6, s6
313; GFX11-NEXT:    v_movreld_b32_e32 v0, v8
314; GFX11-NEXT:    ; return to shader part epilog
315entry:
316  %insert = insertelement <8 x float> %vec, float %val, i32 %idx
317  ret <8 x float> %insert
318}
319
320define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_s_s(<8 x float> %vec, float inreg %val, i32 inreg %idx) {
321; GPRIDX-LABEL: dyn_insertelement_v8f32_v_s_s:
322; GPRIDX:       ; %bb.0: ; %entry
323; GPRIDX-NEXT:    v_mov_b32_e32 v8, s2
324; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s3, 0
325; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
326; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s3, 1
327; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
328; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s3, 2
329; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
330; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s3, 3
331; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
332; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s3, 4
333; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
334; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s3, 5
335; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc
336; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s3, 6
337; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc
338; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s3, 7
339; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
340; GPRIDX-NEXT:    ; return to shader part epilog
341;
342; GFX10PLUS-LABEL: dyn_insertelement_v8f32_v_s_s:
343; GFX10PLUS:       ; %bb.0: ; %entry
344; GFX10PLUS-NEXT:    s_mov_b32 m0, s3
345; GFX10PLUS-NEXT:    v_movreld_b32_e32 v0, s2
346; GFX10PLUS-NEXT:    ; return to shader part epilog
347entry:
348  %insert = insertelement <8 x float> %vec, float %val, i32 %idx
349  ret <8 x float> %insert
350}
351
352define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_v_v(<8 x float> inreg %vec, float %val, i32 %idx) {
353; GPRIDX-LABEL: dyn_insertelement_v8f32_s_v_v:
354; GPRIDX:       ; %bb.0: ; %entry
355; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
356; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
357; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
358; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v2, v0, vcc
359; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
360; GPRIDX-NEXT:    v_mov_b32_e32 v4, s4
361; GPRIDX-NEXT:    v_cndmask_b32_e32 v9, v3, v0, vcc
362; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v1
363; GPRIDX-NEXT:    v_mov_b32_e32 v5, s5
364; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
365; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v1
366; GPRIDX-NEXT:    v_mov_b32_e32 v6, s6
367; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v5, v0, vcc
368; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v1
369; GPRIDX-NEXT:    v_mov_b32_e32 v7, s7
370; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v6, v0, vcc
371; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v1
372; GPRIDX-NEXT:    v_mov_b32_e32 v10, s8
373; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v7, v0, vcc
374; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v1
375; GPRIDX-NEXT:    v_mov_b32_e32 v11, s9
376; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v10, v0, vcc
377; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v1
378; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v11, v0, vcc
379; GPRIDX-NEXT:    v_mov_b32_e32 v0, v8
380; GPRIDX-NEXT:    v_mov_b32_e32 v1, v9
381; GPRIDX-NEXT:    ; return to shader part epilog
382;
383; GFX10-LABEL: dyn_insertelement_v8f32_s_v_v:
384; GFX10:       ; %bb.0: ; %entry
385; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
386; GFX10-NEXT:    v_cndmask_b32_e32 v8, s2, v0, vcc_lo
387; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
388; GFX10-NEXT:    v_cndmask_b32_e32 v9, s3, v0, vcc_lo
389; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v1
390; GFX10-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
391; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v1
392; GFX10-NEXT:    v_cndmask_b32_e32 v3, s5, v0, vcc_lo
393; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v1
394; GFX10-NEXT:    v_cndmask_b32_e32 v4, s6, v0, vcc_lo
395; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v1
396; GFX10-NEXT:    v_cndmask_b32_e32 v5, s7, v0, vcc_lo
397; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v1
398; GFX10-NEXT:    v_cndmask_b32_e32 v6, s8, v0, vcc_lo
399; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v1
400; GFX10-NEXT:    v_mov_b32_e32 v1, v9
401; GFX10-NEXT:    v_cndmask_b32_e32 v7, s9, v0, vcc_lo
402; GFX10-NEXT:    v_mov_b32_e32 v0, v8
403; GFX10-NEXT:    ; return to shader part epilog
404;
405; GFX11-LABEL: dyn_insertelement_v8f32_s_v_v:
406; GFX11:       ; %bb.0: ; %entry
407; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
408; GFX11-NEXT:    v_cndmask_b32_e32 v8, s2, v0, vcc_lo
409; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
410; GFX11-NEXT:    v_cndmask_b32_e32 v9, s3, v0, vcc_lo
411; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v1
412; GFX11-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
413; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v1
414; GFX11-NEXT:    v_cndmask_b32_e32 v3, s5, v0, vcc_lo
415; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v1
416; GFX11-NEXT:    v_cndmask_b32_e32 v4, s6, v0, vcc_lo
417; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v1
418; GFX11-NEXT:    v_cndmask_b32_e32 v5, s7, v0, vcc_lo
419; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v1
420; GFX11-NEXT:    v_cndmask_b32_e32 v6, s8, v0, vcc_lo
421; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v1
422; GFX11-NEXT:    v_mov_b32_e32 v1, v9
423; GFX11-NEXT:    v_dual_cndmask_b32 v7, s9, v0 :: v_dual_mov_b32 v0, v8
424; GFX11-NEXT:    ; return to shader part epilog
425entry:
426  %insert = insertelement <8 x float> %vec, float %val, i32 %idx
427  ret <8 x float> %insert
428}
429
430define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_s_v(<8 x float> %vec, float inreg %val, i32 %idx) {
431; GPRIDX-LABEL: dyn_insertelement_v8f32_v_s_v:
432; GPRIDX:       ; %bb.0: ; %entry
433; GPRIDX-NEXT:    v_mov_b32_e32 v9, s2
434; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v8
435; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
436; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
437; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
438; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v8
439; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v9, vcc
440; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
441; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
442; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v8
443; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v9, vcc
444; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v8
445; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
446; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v8
447; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v9, vcc
448; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v8
449; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v9, vcc
450; GPRIDX-NEXT:    ; return to shader part epilog
451;
452; GFX10PLUS-LABEL: dyn_insertelement_v8f32_v_s_v:
453; GFX10PLUS:       ; %bb.0: ; %entry
454; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v8
455; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc_lo
456; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
457; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v1, v1, s2, vcc_lo
458; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v8
459; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v2, v2, s2, vcc_lo
460; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v8
461; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v3, v3, s2, vcc_lo
462; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v8
463; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v4, v4, s2, vcc_lo
464; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v8
465; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v5, v5, s2, vcc_lo
466; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v8
467; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v6, v6, s2, vcc_lo
468; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v8
469; GFX10PLUS-NEXT:    v_cndmask_b32_e64 v7, v7, s2, vcc_lo
470; GFX10PLUS-NEXT:    ; return to shader part epilog
471entry:
472  %insert = insertelement <8 x float> %vec, float %val, i32 %idx
473  ret <8 x float> %insert
474}
475
476define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_s(<8 x float> %vec, float %val, i32 inreg %idx) {
477; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_s:
478; GPRIDX:       ; %bb.0: ; %entry
479; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 0
480; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
481; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
482; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
483; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
484; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
485; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 3
486; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
487; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 4
488; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
489; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 5
490; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc
491; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 6
492; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc
493; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 7
494; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
495; GPRIDX-NEXT:    ; return to shader part epilog
496;
497; GFX10PLUS-LABEL: dyn_insertelement_v8f32_v_v_s:
498; GFX10PLUS:       ; %bb.0: ; %entry
499; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
500; GFX10PLUS-NEXT:    v_movreld_b32_e32 v0, v8
501; GFX10PLUS-NEXT:    ; return to shader part epilog
502entry:
503  %insert = insertelement <8 x float> %vec, float %val, i32 %idx
504  ret <8 x float> %insert
505}
506
507define amdgpu_ps <8 x float> @dyn_insertelement_v8p3i8_v_v_s(<8 x ptr addrspace(3)> %vec, ptr addrspace(3) %val, i32 inreg %idx) {
508; GPRIDX-LABEL: dyn_insertelement_v8p3i8_v_v_s:
509; GPRIDX:       ; %bb.0: ; %entry
510; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 0
511; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
512; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
513; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
514; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
515; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
516; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 3
517; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
518; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 4
519; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
520; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 5
521; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc
522; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 6
523; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc
524; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 7
525; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
526; GPRIDX-NEXT:    ; return to shader part epilog
527;
528; GFX10PLUS-LABEL: dyn_insertelement_v8p3i8_v_v_s:
529; GFX10PLUS:       ; %bb.0: ; %entry
530; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
531; GFX10PLUS-NEXT:    v_movreld_b32_e32 v0, v8
532; GFX10PLUS-NEXT:    ; return to shader part epilog
533entry:
534  %insert = insertelement <8 x ptr addrspace(3)> %vec, ptr addrspace(3) %val, i32 %idx
535  %cast.0 = ptrtoint <8 x ptr addrspace(3)> %insert to <8 x i32>
536  %cast.1 = bitcast <8 x i32> %cast.0 to <8 x float>
537  ret <8 x float> %cast.1
538}
539
540define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v(<8 x float> %vec, float %val, i32 %idx) {
541; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v:
542; GPRIDX:       ; %bb.0: ; %entry
543; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
544; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
545; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v9
546; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
547; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v9
548; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
549; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v9
550; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
551; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v9
552; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
553; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v9
554; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc
555; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v9
556; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc
557; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v9
558; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
559; GPRIDX-NEXT:    ; return to shader part epilog
560;
561; GFX10PLUS-LABEL: dyn_insertelement_v8f32_v_v_v:
562; GFX10PLUS:       ; %bb.0: ; %entry
563; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v9
564; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
565; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v9
566; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc_lo
567; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v9
568; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc_lo
569; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v9
570; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc_lo
571; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v9
572; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc_lo
573; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v9
574; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc_lo
575; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v9
576; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc_lo
577; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v9
578; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc_lo
579; GFX10PLUS-NEXT:    ; return to shader part epilog
580entry:
581  %insert = insertelement <8 x float> %vec, float %val, i32 %idx
582  ret <8 x float> %insert
583}
584
585define amdgpu_ps <8 x i64> @dyn_insertelement_v8i64_s_s_s(<8 x i64> inreg %vec, i64 inreg %val, i32 inreg %idx) {
586; GPRIDX-LABEL: dyn_insertelement_v8i64_s_s_s:
587; GPRIDX:       ; %bb.0: ; %entry
588; GPRIDX-NEXT:    s_mov_b32 s0, s2
589; GPRIDX-NEXT:    s_mov_b32 s1, s3
590; GPRIDX-NEXT:    s_mov_b32 s2, s4
591; GPRIDX-NEXT:    s_mov_b32 s3, s5
592; GPRIDX-NEXT:    s_mov_b32 s4, s6
593; GPRIDX-NEXT:    s_mov_b32 s5, s7
594; GPRIDX-NEXT:    s_mov_b32 s6, s8
595; GPRIDX-NEXT:    s_mov_b32 s7, s9
596; GPRIDX-NEXT:    s_mov_b32 s8, s10
597; GPRIDX-NEXT:    s_mov_b32 s9, s11
598; GPRIDX-NEXT:    s_mov_b32 s10, s12
599; GPRIDX-NEXT:    s_mov_b32 s11, s13
600; GPRIDX-NEXT:    s_mov_b32 s12, s14
601; GPRIDX-NEXT:    s_mov_b32 s13, s15
602; GPRIDX-NEXT:    s_mov_b32 s14, s16
603; GPRIDX-NEXT:    s_mov_b32 s15, s17
604; GPRIDX-NEXT:    s_mov_b32 m0, s20
605; GPRIDX-NEXT:    s_nop 0
606; GPRIDX-NEXT:    s_movreld_b64 s[0:1], s[18:19]
607; GPRIDX-NEXT:    ; return to shader part epilog
608;
609; GFX10PLUS-LABEL: dyn_insertelement_v8i64_s_s_s:
610; GFX10PLUS:       ; %bb.0: ; %entry
611; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
612; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
613; GFX10PLUS-NEXT:    s_mov_b32 m0, s20
614; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
615; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
616; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
617; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
618; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
619; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
620; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
621; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
622; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
623; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
624; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
625; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
626; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
627; GFX10PLUS-NEXT:    s_mov_b32 s15, s17
628; GFX10PLUS-NEXT:    s_movreld_b64 s[0:1], s[18:19]
629; GFX10PLUS-NEXT:    ; return to shader part epilog
630entry:
631  %insert = insertelement <8 x i64> %vec, i64 %val, i32 %idx
632  ret <8 x i64> %insert
633}
634
635define amdgpu_ps <8 x ptr addrspace(1)> @dyn_insertelement_v8p1i8_s_s_s(<8 x ptr addrspace(1)> inreg %vec, ptr addrspace(1) inreg %val, i32 inreg %idx) {
636; GPRIDX-LABEL: dyn_insertelement_v8p1i8_s_s_s:
637; GPRIDX:       ; %bb.0: ; %entry
638; GPRIDX-NEXT:    s_mov_b32 s0, s2
639; GPRIDX-NEXT:    s_mov_b32 s1, s3
640; GPRIDX-NEXT:    s_mov_b32 s2, s4
641; GPRIDX-NEXT:    s_mov_b32 s3, s5
642; GPRIDX-NEXT:    s_mov_b32 s4, s6
643; GPRIDX-NEXT:    s_mov_b32 s5, s7
644; GPRIDX-NEXT:    s_mov_b32 s6, s8
645; GPRIDX-NEXT:    s_mov_b32 s7, s9
646; GPRIDX-NEXT:    s_mov_b32 s8, s10
647; GPRIDX-NEXT:    s_mov_b32 s9, s11
648; GPRIDX-NEXT:    s_mov_b32 s10, s12
649; GPRIDX-NEXT:    s_mov_b32 s11, s13
650; GPRIDX-NEXT:    s_mov_b32 s12, s14
651; GPRIDX-NEXT:    s_mov_b32 s13, s15
652; GPRIDX-NEXT:    s_mov_b32 s14, s16
653; GPRIDX-NEXT:    s_mov_b32 s15, s17
654; GPRIDX-NEXT:    s_mov_b32 m0, s20
655; GPRIDX-NEXT:    s_nop 0
656; GPRIDX-NEXT:    s_movreld_b64 s[0:1], s[18:19]
657; GPRIDX-NEXT:    ; return to shader part epilog
658;
659; GFX10PLUS-LABEL: dyn_insertelement_v8p1i8_s_s_s:
660; GFX10PLUS:       ; %bb.0: ; %entry
661; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
662; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
663; GFX10PLUS-NEXT:    s_mov_b32 m0, s20
664; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
665; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
666; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
667; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
668; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
669; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
670; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
671; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
672; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
673; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
674; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
675; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
676; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
677; GFX10PLUS-NEXT:    s_mov_b32 s15, s17
678; GFX10PLUS-NEXT:    s_movreld_b64 s[0:1], s[18:19]
679; GFX10PLUS-NEXT:    ; return to shader part epilog
680entry:
681  %insert = insertelement <8 x ptr addrspace(1)> %vec, ptr addrspace(1) %val, i32 %idx
682  ret <8 x ptr addrspace(1)> %insert
683}
684
685define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) {
686; GPRIDX-LABEL: dyn_insertelement_v8f64_const_s_v_v:
687; GPRIDX:       ; %bb.0: ; %entry
688; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
689; GPRIDX-NEXT:    s_mov_b32 s18, 0
690; GPRIDX-NEXT:    s_mov_b32 s16, 0
691; GPRIDX-NEXT:    s_mov_b32 s14, 0
692; GPRIDX-NEXT:    s_mov_b32 s12, 0
693; GPRIDX-NEXT:    s_mov_b32 s8, 0
694; GPRIDX-NEXT:    s_mov_b64 s[4:5], 1.0
695; GPRIDX-NEXT:    s_mov_b32 s19, 0x40200000
696; GPRIDX-NEXT:    s_mov_b32 s17, 0x401c0000
697; GPRIDX-NEXT:    s_mov_b32 s15, 0x40180000
698; GPRIDX-NEXT:    s_mov_b32 s13, 0x40140000
699; GPRIDX-NEXT:    s_mov_b64 s[10:11], 4.0
700; GPRIDX-NEXT:    s_mov_b32 s9, 0x40080000
701; GPRIDX-NEXT:    s_mov_b64 s[6:7], 2.0
702; GPRIDX-NEXT:    v_mov_b32_e32 v3, s4
703; GPRIDX-NEXT:    v_mov_b32_e32 v4, s5
704; GPRIDX-NEXT:    v_mov_b32_e32 v5, s6
705; GPRIDX-NEXT:    v_mov_b32_e32 v6, s7
706; GPRIDX-NEXT:    v_mov_b32_e32 v7, s8
707; GPRIDX-NEXT:    v_mov_b32_e32 v8, s9
708; GPRIDX-NEXT:    v_mov_b32_e32 v9, s10
709; GPRIDX-NEXT:    v_mov_b32_e32 v10, s11
710; GPRIDX-NEXT:    v_mov_b32_e32 v11, s12
711; GPRIDX-NEXT:    v_mov_b32_e32 v12, s13
712; GPRIDX-NEXT:    v_mov_b32_e32 v13, s14
713; GPRIDX-NEXT:    v_mov_b32_e32 v14, s15
714; GPRIDX-NEXT:    v_mov_b32_e32 v15, s16
715; GPRIDX-NEXT:    v_mov_b32_e32 v16, s17
716; GPRIDX-NEXT:    v_mov_b32_e32 v17, s18
717; GPRIDX-NEXT:    v_mov_b32_e32 v18, s19
718; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
719; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[16:17], 0, v2
720; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[4:5], 2, v2
721; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[6:7], 3, v2
722; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[8:9], 4, v2
723; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[10:11], 5, v2
724; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[12:13], 6, v2
725; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[14:15], 7, v2
726; GPRIDX-NEXT:    v_cndmask_b32_e64 v3, v3, v0, s[16:17]
727; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
728; GPRIDX-NEXT:    v_cndmask_b32_e64 v4, v4, v1, s[16:17]
729; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v1, vcc
730; GPRIDX-NEXT:    v_cndmask_b32_e64 v7, v7, v0, s[4:5]
731; GPRIDX-NEXT:    v_cndmask_b32_e64 v9, v9, v0, s[6:7]
732; GPRIDX-NEXT:    v_cndmask_b32_e64 v11, v11, v0, s[8:9]
733; GPRIDX-NEXT:    v_cndmask_b32_e64 v13, v13, v0, s[10:11]
734; GPRIDX-NEXT:    v_cndmask_b32_e64 v15, v15, v0, s[12:13]
735; GPRIDX-NEXT:    v_cndmask_b32_e64 v17, v17, v0, s[14:15]
736; GPRIDX-NEXT:    v_cndmask_b32_e64 v8, v8, v1, s[4:5]
737; GPRIDX-NEXT:    v_cndmask_b32_e64 v10, v10, v1, s[6:7]
738; GPRIDX-NEXT:    v_cndmask_b32_e64 v12, v12, v1, s[8:9]
739; GPRIDX-NEXT:    v_cndmask_b32_e64 v14, v14, v1, s[10:11]
740; GPRIDX-NEXT:    v_cndmask_b32_e64 v16, v16, v1, s[12:13]
741; GPRIDX-NEXT:    v_cndmask_b32_e64 v18, v18, v1, s[14:15]
742; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
743; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
744; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off
745; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
746; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[11:14], off
747; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
748; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[15:18], off
749; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
750; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
751;
752; GFX10-LABEL: dyn_insertelement_v8f64_const_s_v_v:
753; GFX10:       ; %bb.0: ; %entry
754; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
755; GFX10-NEXT:    s_mov_b64 s[4:5], 1.0
756; GFX10-NEXT:    s_mov_b32 s18, 0
757; GFX10-NEXT:    s_mov_b32 s16, 0
758; GFX10-NEXT:    s_mov_b32 s14, 0
759; GFX10-NEXT:    s_mov_b32 s12, 0
760; GFX10-NEXT:    s_mov_b32 s8, 0
761; GFX10-NEXT:    s_mov_b32 s19, 0x40200000
762; GFX10-NEXT:    s_mov_b32 s17, 0x401c0000
763; GFX10-NEXT:    s_mov_b32 s15, 0x40180000
764; GFX10-NEXT:    s_mov_b32 s13, 0x40140000
765; GFX10-NEXT:    s_mov_b64 s[10:11], 4.0
766; GFX10-NEXT:    s_mov_b32 s9, 0x40080000
767; GFX10-NEXT:    s_mov_b64 s[6:7], 2.0
768; GFX10-NEXT:    v_mov_b32_e32 v3, s4
769; GFX10-NEXT:    v_mov_b32_e32 v4, s5
770; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v2
771; GFX10-NEXT:    v_mov_b32_e32 v5, s6
772; GFX10-NEXT:    v_mov_b32_e32 v6, s7
773; GFX10-NEXT:    v_mov_b32_e32 v7, s8
774; GFX10-NEXT:    v_mov_b32_e32 v8, s9
775; GFX10-NEXT:    v_mov_b32_e32 v9, s10
776; GFX10-NEXT:    v_mov_b32_e32 v10, s11
777; GFX10-NEXT:    v_mov_b32_e32 v11, s12
778; GFX10-NEXT:    v_mov_b32_e32 v12, s13
779; GFX10-NEXT:    v_mov_b32_e32 v13, s14
780; GFX10-NEXT:    v_mov_b32_e32 v14, s15
781; GFX10-NEXT:    v_mov_b32_e32 v15, s16
782; GFX10-NEXT:    v_mov_b32_e32 v16, s17
783; GFX10-NEXT:    v_mov_b32_e32 v17, s18
784; GFX10-NEXT:    v_mov_b32_e32 v18, s19
785; GFX10-NEXT:    v_cmp_eq_u32_e64 s4, 1, v2
786; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc_lo
787; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc_lo
788; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v2
789; GFX10-NEXT:    v_cmp_eq_u32_e64 s5, 7, v2
790; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v0, s4
791; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v1, s4
792; GFX10-NEXT:    v_cmp_eq_u32_e64 s4, 3, v2
793; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v0, vcc_lo
794; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v1, vcc_lo
795; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v2
796; GFX10-NEXT:    v_cndmask_b32_e64 v17, v17, v0, s5
797; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, v0, s4
798; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, v1, s4
799; GFX10-NEXT:    v_cmp_eq_u32_e64 s4, 5, v2
800; GFX10-NEXT:    v_cndmask_b32_e32 v11, v11, v0, vcc_lo
801; GFX10-NEXT:    v_cndmask_b32_e32 v12, v12, v1, vcc_lo
802; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v2
803; GFX10-NEXT:    v_cndmask_b32_e64 v18, v18, v1, s5
804; GFX10-NEXT:    v_cndmask_b32_e64 v13, v13, v0, s4
805; GFX10-NEXT:    v_cndmask_b32_e64 v14, v14, v1, s4
806; GFX10-NEXT:    v_cndmask_b32_e32 v15, v15, v0, vcc_lo
807; GFX10-NEXT:    v_cndmask_b32_e32 v16, v16, v1, vcc_lo
808; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
809; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
810; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off
811; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
812; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[11:14], off
813; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
814; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[15:18], off
815; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
816; GFX10-NEXT:    s_setpc_b64 s[30:31]
817;
818; GFX11-LABEL: dyn_insertelement_v8f64_const_s_v_v:
819; GFX11:       ; %bb.0: ; %entry
820; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
821; GFX11-NEXT:    s_mov_b32 s14, 0
822; GFX11-NEXT:    s_mov_b32 s15, 0x40200000
823; GFX11-NEXT:    s_mov_b32 s12, 0
824; GFX11-NEXT:    s_mov_b32 s10, 0
825; GFX11-NEXT:    s_mov_b32 s8, 0
826; GFX11-NEXT:    s_mov_b32 s4, 0
827; GFX11-NEXT:    s_mov_b64 s[0:1], 1.0
828; GFX11-NEXT:    s_mov_b32 s13, 0x401c0000
829; GFX11-NEXT:    s_mov_b32 s11, 0x40180000
830; GFX11-NEXT:    s_mov_b32 s9, 0x40140000
831; GFX11-NEXT:    s_mov_b64 s[6:7], 4.0
832; GFX11-NEXT:    s_mov_b32 s5, 0x40080000
833; GFX11-NEXT:    s_mov_b64 s[2:3], 2.0
834; GFX11-NEXT:    v_dual_mov_b32 v18, s15 :: v_dual_mov_b32 v17, s14
835; GFX11-NEXT:    v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0
836; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v2
837; GFX11-NEXT:    v_dual_mov_b32 v16, s13 :: v_dual_mov_b32 v15, s12
838; GFX11-NEXT:    v_dual_mov_b32 v14, s11 :: v_dual_mov_b32 v13, s10
839; GFX11-NEXT:    v_dual_mov_b32 v12, s9 :: v_dual_mov_b32 v11, s8
840; GFX11-NEXT:    v_dual_mov_b32 v10, s7 :: v_dual_mov_b32 v9, s6
841; GFX11-NEXT:    v_dual_mov_b32 v8, s5 :: v_dual_mov_b32 v7, s4
842; GFX11-NEXT:    v_dual_mov_b32 v6, s3 :: v_dual_mov_b32 v5, s2
843; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 1, v2
844; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v0 :: v_dual_cndmask_b32 v4, v4, v1
845; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v2
846; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 7, v2
847; GFX11-NEXT:    v_cndmask_b32_e64 v5, v5, v0, s0
848; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, v1, s0
849; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 3, v2
850; GFX11-NEXT:    v_dual_cndmask_b32 v7, v7, v0 :: v_dual_cndmask_b32 v8, v8, v1
851; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v2
852; GFX11-NEXT:    v_cndmask_b32_e64 v17, v17, v0, s1
853; GFX11-NEXT:    v_cndmask_b32_e64 v9, v9, v0, s0
854; GFX11-NEXT:    v_cndmask_b32_e64 v10, v10, v1, s0
855; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 5, v2
856; GFX11-NEXT:    v_dual_cndmask_b32 v11, v11, v0 :: v_dual_cndmask_b32 v12, v12, v1
857; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v2
858; GFX11-NEXT:    v_cndmask_b32_e64 v18, v18, v1, s1
859; GFX11-NEXT:    v_cndmask_b32_e64 v13, v13, v0, s0
860; GFX11-NEXT:    v_cndmask_b32_e64 v14, v14, v1, s0
861; GFX11-NEXT:    v_dual_cndmask_b32 v15, v15, v0 :: v_dual_cndmask_b32 v16, v16, v1
862; GFX11-NEXT:    global_store_b128 v[0:1], v[3:6], off dlc
863; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
864; GFX11-NEXT:    global_store_b128 v[0:1], v[7:10], off dlc
865; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
866; GFX11-NEXT:    global_store_b128 v[0:1], v[11:14], off dlc
867; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
868; GFX11-NEXT:    global_store_b128 v[0:1], v[15:18], off dlc
869; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
870; GFX11-NEXT:    s_setpc_b64 s[30:31]
871entry:
872  %insert = insertelement <8 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>, double %val, i32 %idx
873  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
874  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
875  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
876  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
877  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
878  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
879  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
880  store volatile <2 x double> %vec.3, ptr addrspace(1) undef
881  ret void
882}
883
884define amdgpu_ps void @dyn_insertelement_v8f64_s_s_v(<8 x double> inreg %vec, double inreg %val, i32 %idx) {
885; GPRIDX-LABEL: dyn_insertelement_v8f64_s_s_v:
886; GPRIDX:       ; %bb.0: ; %entry
887; GPRIDX-NEXT:    s_mov_b32 s1, s3
888; GPRIDX-NEXT:    s_mov_b32 s3, s5
889; GPRIDX-NEXT:    s_mov_b32 s5, s7
890; GPRIDX-NEXT:    s_mov_b32 s7, s9
891; GPRIDX-NEXT:    s_mov_b32 s9, s11
892; GPRIDX-NEXT:    s_mov_b32 s11, s13
893; GPRIDX-NEXT:    s_mov_b32 s13, s15
894; GPRIDX-NEXT:    s_mov_b32 s15, s17
895; GPRIDX-NEXT:    s_mov_b32 s0, s2
896; GPRIDX-NEXT:    s_mov_b32 s2, s4
897; GPRIDX-NEXT:    s_mov_b32 s4, s6
898; GPRIDX-NEXT:    s_mov_b32 s6, s8
899; GPRIDX-NEXT:    s_mov_b32 s8, s10
900; GPRIDX-NEXT:    s_mov_b32 s10, s12
901; GPRIDX-NEXT:    s_mov_b32 s12, s14
902; GPRIDX-NEXT:    s_mov_b32 s14, s16
903; GPRIDX-NEXT:    v_mov_b32_e32 v16, s15
904; GPRIDX-NEXT:    v_mov_b32_e32 v15, s14
905; GPRIDX-NEXT:    v_mov_b32_e32 v14, s13
906; GPRIDX-NEXT:    v_mov_b32_e32 v13, s12
907; GPRIDX-NEXT:    v_mov_b32_e32 v12, s11
908; GPRIDX-NEXT:    v_mov_b32_e32 v11, s10
909; GPRIDX-NEXT:    v_mov_b32_e32 v10, s9
910; GPRIDX-NEXT:    v_mov_b32_e32 v9, s8
911; GPRIDX-NEXT:    v_mov_b32_e32 v8, s7
912; GPRIDX-NEXT:    v_mov_b32_e32 v7, s6
913; GPRIDX-NEXT:    v_mov_b32_e32 v6, s5
914; GPRIDX-NEXT:    v_mov_b32_e32 v5, s4
915; GPRIDX-NEXT:    v_mov_b32_e32 v4, s3
916; GPRIDX-NEXT:    v_mov_b32_e32 v3, s2
917; GPRIDX-NEXT:    v_mov_b32_e32 v2, s1
918; GPRIDX-NEXT:    v_mov_b32_e32 v1, s0
919; GPRIDX-NEXT:    v_mov_b32_e32 v17, s18
920; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
921; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[0:1], 2, v0
922; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[2:3], 3, v0
923; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[4:5], 4, v0
924; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[6:7], 5, v0
925; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[8:9], 6, v0
926; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[10:11], 7, v0
927; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[12:13], 0, v0
928; GPRIDX-NEXT:    v_mov_b32_e32 v0, s19
929; GPRIDX-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[12:13]
930; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v17, vcc
931; GPRIDX-NEXT:    v_cndmask_b32_e64 v2, v2, v0, s[12:13]
932; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
933; GPRIDX-NEXT:    v_cndmask_b32_e64 v5, v5, v17, s[0:1]
934; GPRIDX-NEXT:    v_cndmask_b32_e64 v7, v7, v17, s[2:3]
935; GPRIDX-NEXT:    v_cndmask_b32_e64 v9, v9, v17, s[4:5]
936; GPRIDX-NEXT:    v_cndmask_b32_e64 v11, v11, v17, s[6:7]
937; GPRIDX-NEXT:    v_cndmask_b32_e64 v13, v13, v17, s[8:9]
938; GPRIDX-NEXT:    v_cndmask_b32_e64 v15, v15, v17, s[10:11]
939; GPRIDX-NEXT:    v_cndmask_b32_e64 v6, v6, v0, s[0:1]
940; GPRIDX-NEXT:    v_cndmask_b32_e64 v8, v8, v0, s[2:3]
941; GPRIDX-NEXT:    v_cndmask_b32_e64 v10, v10, v0, s[4:5]
942; GPRIDX-NEXT:    v_cndmask_b32_e64 v12, v12, v0, s[6:7]
943; GPRIDX-NEXT:    v_cndmask_b32_e64 v14, v14, v0, s[8:9]
944; GPRIDX-NEXT:    v_cndmask_b32_e64 v16, v16, v0, s[10:11]
945; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[1:4], off
946; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
947; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[5:8], off
948; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
949; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[9:12], off
950; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
951; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[13:16], off
952; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
953; GPRIDX-NEXT:    s_endpgm
954;
955; GFX10-LABEL: dyn_insertelement_v8f64_s_s_v:
956; GFX10:       ; %bb.0: ; %entry
957; GFX10-NEXT:    s_mov_b32 s1, s3
958; GFX10-NEXT:    s_mov_b32 s3, s5
959; GFX10-NEXT:    s_mov_b32 s5, s7
960; GFX10-NEXT:    s_mov_b32 s7, s9
961; GFX10-NEXT:    s_mov_b32 s9, s11
962; GFX10-NEXT:    s_mov_b32 s11, s13
963; GFX10-NEXT:    s_mov_b32 s13, s15
964; GFX10-NEXT:    s_mov_b32 s15, s17
965; GFX10-NEXT:    s_mov_b32 s0, s2
966; GFX10-NEXT:    s_mov_b32 s2, s4
967; GFX10-NEXT:    s_mov_b32 s4, s6
968; GFX10-NEXT:    s_mov_b32 s6, s8
969; GFX10-NEXT:    s_mov_b32 s8, s10
970; GFX10-NEXT:    s_mov_b32 s10, s12
971; GFX10-NEXT:    s_mov_b32 s12, s14
972; GFX10-NEXT:    s_mov_b32 s14, s16
973; GFX10-NEXT:    v_mov_b32_e32 v16, s15
974; GFX10-NEXT:    v_mov_b32_e32 v2, s1
975; GFX10-NEXT:    v_mov_b32_e32 v1, s0
976; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
977; GFX10-NEXT:    v_mov_b32_e32 v15, s14
978; GFX10-NEXT:    v_mov_b32_e32 v14, s13
979; GFX10-NEXT:    v_mov_b32_e32 v13, s12
980; GFX10-NEXT:    v_mov_b32_e32 v12, s11
981; GFX10-NEXT:    v_mov_b32_e32 v11, s10
982; GFX10-NEXT:    v_mov_b32_e32 v10, s9
983; GFX10-NEXT:    v_mov_b32_e32 v9, s8
984; GFX10-NEXT:    v_mov_b32_e32 v8, s7
985; GFX10-NEXT:    v_mov_b32_e32 v7, s6
986; GFX10-NEXT:    v_mov_b32_e32 v6, s5
987; GFX10-NEXT:    v_mov_b32_e32 v5, s4
988; GFX10-NEXT:    v_mov_b32_e32 v4, s3
989; GFX10-NEXT:    v_mov_b32_e32 v3, s2
990; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 1, v0
991; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s18, vcc_lo
992; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s19, vcc_lo
993; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
994; GFX10-NEXT:    v_cmp_eq_u32_e64 s1, 7, v0
995; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, s18, s0
996; GFX10-NEXT:    v_cndmask_b32_e64 v4, v4, s19, s0
997; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 3, v0
998; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, s18, vcc_lo
999; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, s19, vcc_lo
1000; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
1001; GFX10-NEXT:    v_cndmask_b32_e64 v15, v15, s18, s1
1002; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, s18, s0
1003; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, s19, s0
1004; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 5, v0
1005; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, s18, vcc_lo
1006; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, s19, vcc_lo
1007; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
1008; GFX10-NEXT:    v_cndmask_b32_e64 v16, v16, s19, s1
1009; GFX10-NEXT:    v_cndmask_b32_e64 v11, v11, s18, s0
1010; GFX10-NEXT:    v_cndmask_b32_e64 v12, v12, s19, s0
1011; GFX10-NEXT:    v_cndmask_b32_e64 v13, v13, s18, vcc_lo
1012; GFX10-NEXT:    v_cndmask_b32_e64 v14, v14, s19, vcc_lo
1013; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[1:4], off
1014; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1015; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[5:8], off
1016; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1017; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[9:12], off
1018; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1019; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[13:16], off
1020; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1021; GFX10-NEXT:    s_endpgm
1022;
1023; GFX11-LABEL: dyn_insertelement_v8f64_s_s_v:
1024; GFX11:       ; %bb.0: ; %entry
1025; GFX11-NEXT:    s_mov_b32 s1, s3
1026; GFX11-NEXT:    s_mov_b32 s3, s5
1027; GFX11-NEXT:    s_mov_b32 s5, s7
1028; GFX11-NEXT:    s_mov_b32 s7, s9
1029; GFX11-NEXT:    s_mov_b32 s9, s11
1030; GFX11-NEXT:    s_mov_b32 s11, s13
1031; GFX11-NEXT:    s_mov_b32 s13, s15
1032; GFX11-NEXT:    s_mov_b32 s15, s17
1033; GFX11-NEXT:    s_mov_b32 s0, s2
1034; GFX11-NEXT:    s_mov_b32 s2, s4
1035; GFX11-NEXT:    s_mov_b32 s4, s6
1036; GFX11-NEXT:    s_mov_b32 s6, s8
1037; GFX11-NEXT:    s_mov_b32 s8, s10
1038; GFX11-NEXT:    s_mov_b32 s10, s12
1039; GFX11-NEXT:    s_mov_b32 s12, s14
1040; GFX11-NEXT:    s_mov_b32 s14, s16
1041; GFX11-NEXT:    v_dual_mov_b32 v16, s15 :: v_dual_mov_b32 v15, s14
1042; GFX11-NEXT:    v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
1043; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1044; GFX11-NEXT:    v_dual_mov_b32 v14, s13 :: v_dual_mov_b32 v13, s12
1045; GFX11-NEXT:    v_dual_mov_b32 v12, s11 :: v_dual_mov_b32 v11, s10
1046; GFX11-NEXT:    v_dual_mov_b32 v10, s9 :: v_dual_mov_b32 v9, s8
1047; GFX11-NEXT:    v_dual_mov_b32 v8, s7 :: v_dual_mov_b32 v7, s6
1048; GFX11-NEXT:    v_dual_mov_b32 v6, s5 :: v_dual_mov_b32 v5, s4
1049; GFX11-NEXT:    v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2
1050; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 1, v0
1051; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s18, vcc_lo
1052; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s19, vcc_lo
1053; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
1054; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 7, v0
1055; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, s18, s0
1056; GFX11-NEXT:    v_cndmask_b32_e64 v4, v4, s19, s0
1057; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 3, v0
1058; GFX11-NEXT:    v_cndmask_b32_e64 v5, v5, s18, vcc_lo
1059; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, s19, vcc_lo
1060; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
1061; GFX11-NEXT:    v_cndmask_b32_e64 v15, v15, s18, s1
1062; GFX11-NEXT:    v_cndmask_b32_e64 v7, v7, s18, s0
1063; GFX11-NEXT:    v_cndmask_b32_e64 v8, v8, s19, s0
1064; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 5, v0
1065; GFX11-NEXT:    v_cndmask_b32_e64 v9, v9, s18, vcc_lo
1066; GFX11-NEXT:    v_cndmask_b32_e64 v10, v10, s19, vcc_lo
1067; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
1068; GFX11-NEXT:    v_cndmask_b32_e64 v16, v16, s19, s1
1069; GFX11-NEXT:    v_cndmask_b32_e64 v11, v11, s18, s0
1070; GFX11-NEXT:    v_cndmask_b32_e64 v12, v12, s19, s0
1071; GFX11-NEXT:    v_cndmask_b32_e64 v13, v13, s18, vcc_lo
1072; GFX11-NEXT:    v_cndmask_b32_e64 v14, v14, s19, vcc_lo
1073; GFX11-NEXT:    global_store_b128 v[0:1], v[1:4], off dlc
1074; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1075; GFX11-NEXT:    global_store_b128 v[0:1], v[5:8], off dlc
1076; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1077; GFX11-NEXT:    global_store_b128 v[0:1], v[9:12], off dlc
1078; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1079; GFX11-NEXT:    global_store_b128 v[0:1], v[13:16], off dlc
1080; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1081; GFX11-NEXT:    s_endpgm
1082entry:
1083  %insert = insertelement <8 x double> %vec, double %val, i32 %idx
1084  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1085  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1086  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
1087  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
1088  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
1089  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
1090  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
1091  store volatile <2 x double> %vec.3, ptr addrspace(1) undef
1092  ret void
1093}
1094
1095define amdgpu_ps void @dyn_insertelement_v8f64_s_v_s(<8 x double> inreg %vec, double %val, i32 inreg %idx) {
1096; GPRIDX-LABEL: dyn_insertelement_v8f64_s_v_s:
1097; GPRIDX:       ; %bb.0: ; %entry
1098; GPRIDX-NEXT:    s_mov_b32 s1, s3
1099; GPRIDX-NEXT:    s_mov_b32 s3, s5
1100; GPRIDX-NEXT:    s_mov_b32 s5, s7
1101; GPRIDX-NEXT:    s_mov_b32 s7, s9
1102; GPRIDX-NEXT:    s_mov_b32 s9, s11
1103; GPRIDX-NEXT:    s_mov_b32 s11, s13
1104; GPRIDX-NEXT:    s_mov_b32 s13, s15
1105; GPRIDX-NEXT:    s_mov_b32 s15, s17
1106; GPRIDX-NEXT:    s_mov_b32 s0, s2
1107; GPRIDX-NEXT:    s_mov_b32 s2, s4
1108; GPRIDX-NEXT:    s_mov_b32 s4, s6
1109; GPRIDX-NEXT:    s_mov_b32 s6, s8
1110; GPRIDX-NEXT:    s_mov_b32 s8, s10
1111; GPRIDX-NEXT:    s_mov_b32 s10, s12
1112; GPRIDX-NEXT:    s_mov_b32 s12, s14
1113; GPRIDX-NEXT:    s_mov_b32 s14, s16
1114; GPRIDX-NEXT:    v_mov_b32_e32 v17, s15
1115; GPRIDX-NEXT:    v_mov_b32_e32 v16, s14
1116; GPRIDX-NEXT:    v_mov_b32_e32 v15, s13
1117; GPRIDX-NEXT:    v_mov_b32_e32 v14, s12
1118; GPRIDX-NEXT:    v_mov_b32_e32 v13, s11
1119; GPRIDX-NEXT:    v_mov_b32_e32 v12, s10
1120; GPRIDX-NEXT:    v_mov_b32_e32 v11, s9
1121; GPRIDX-NEXT:    v_mov_b32_e32 v10, s8
1122; GPRIDX-NEXT:    v_mov_b32_e32 v9, s7
1123; GPRIDX-NEXT:    v_mov_b32_e32 v8, s6
1124; GPRIDX-NEXT:    v_mov_b32_e32 v7, s5
1125; GPRIDX-NEXT:    v_mov_b32_e32 v6, s4
1126; GPRIDX-NEXT:    v_mov_b32_e32 v5, s3
1127; GPRIDX-NEXT:    v_mov_b32_e32 v4, s2
1128; GPRIDX-NEXT:    v_mov_b32_e32 v3, s1
1129; GPRIDX-NEXT:    v_mov_b32_e32 v2, s0
1130; GPRIDX-NEXT:    s_lshl_b32 s0, s18, 1
1131; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(DST)
1132; GPRIDX-NEXT:    v_mov_b32_e32 v2, v0
1133; GPRIDX-NEXT:    v_mov_b32_e32 v3, v1
1134; GPRIDX-NEXT:    s_set_gpr_idx_off
1135; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1136; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1137; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[6:9], off
1138; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1139; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[10:13], off
1140; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1141; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[14:17], off
1142; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1143; GPRIDX-NEXT:    s_endpgm
1144;
1145; GFX10-LABEL: dyn_insertelement_v8f64_s_v_s:
1146; GFX10:       ; %bb.0: ; %entry
1147; GFX10-NEXT:    s_mov_b32 s1, s3
1148; GFX10-NEXT:    s_mov_b32 s3, s5
1149; GFX10-NEXT:    s_mov_b32 s5, s7
1150; GFX10-NEXT:    s_mov_b32 s7, s9
1151; GFX10-NEXT:    s_mov_b32 s9, s11
1152; GFX10-NEXT:    s_mov_b32 s11, s13
1153; GFX10-NEXT:    s_mov_b32 s13, s15
1154; GFX10-NEXT:    s_mov_b32 s15, s17
1155; GFX10-NEXT:    s_mov_b32 s0, s2
1156; GFX10-NEXT:    s_mov_b32 s2, s4
1157; GFX10-NEXT:    s_mov_b32 s4, s6
1158; GFX10-NEXT:    s_mov_b32 s6, s8
1159; GFX10-NEXT:    s_mov_b32 s8, s10
1160; GFX10-NEXT:    s_mov_b32 s10, s12
1161; GFX10-NEXT:    s_mov_b32 s12, s14
1162; GFX10-NEXT:    s_mov_b32 s14, s16
1163; GFX10-NEXT:    v_mov_b32_e32 v17, s15
1164; GFX10-NEXT:    v_mov_b32_e32 v2, s0
1165; GFX10-NEXT:    s_lshl_b32 m0, s18, 1
1166; GFX10-NEXT:    v_mov_b32_e32 v16, s14
1167; GFX10-NEXT:    v_mov_b32_e32 v15, s13
1168; GFX10-NEXT:    v_mov_b32_e32 v14, s12
1169; GFX10-NEXT:    v_mov_b32_e32 v13, s11
1170; GFX10-NEXT:    v_mov_b32_e32 v12, s10
1171; GFX10-NEXT:    v_mov_b32_e32 v11, s9
1172; GFX10-NEXT:    v_mov_b32_e32 v10, s8
1173; GFX10-NEXT:    v_mov_b32_e32 v9, s7
1174; GFX10-NEXT:    v_mov_b32_e32 v8, s6
1175; GFX10-NEXT:    v_mov_b32_e32 v7, s5
1176; GFX10-NEXT:    v_mov_b32_e32 v6, s4
1177; GFX10-NEXT:    v_mov_b32_e32 v5, s3
1178; GFX10-NEXT:    v_mov_b32_e32 v4, s2
1179; GFX10-NEXT:    v_mov_b32_e32 v3, s1
1180; GFX10-NEXT:    v_movreld_b32_e32 v2, v0
1181; GFX10-NEXT:    v_movreld_b32_e32 v3, v1
1182; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1183; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1184; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[6:9], off
1185; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1186; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[10:13], off
1187; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1188; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[14:17], off
1189; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1190; GFX10-NEXT:    s_endpgm
1191;
1192; GFX11-LABEL: dyn_insertelement_v8f64_s_v_s:
1193; GFX11:       ; %bb.0: ; %entry
1194; GFX11-NEXT:    s_mov_b32 s1, s3
1195; GFX11-NEXT:    s_mov_b32 s3, s5
1196; GFX11-NEXT:    s_mov_b32 s5, s7
1197; GFX11-NEXT:    s_mov_b32 s7, s9
1198; GFX11-NEXT:    s_mov_b32 s9, s11
1199; GFX11-NEXT:    s_mov_b32 s11, s13
1200; GFX11-NEXT:    s_mov_b32 s13, s15
1201; GFX11-NEXT:    s_mov_b32 s15, s17
1202; GFX11-NEXT:    s_mov_b32 s0, s2
1203; GFX11-NEXT:    s_mov_b32 s2, s4
1204; GFX11-NEXT:    s_mov_b32 s4, s6
1205; GFX11-NEXT:    s_mov_b32 s6, s8
1206; GFX11-NEXT:    s_mov_b32 s8, s10
1207; GFX11-NEXT:    s_mov_b32 s10, s12
1208; GFX11-NEXT:    s_mov_b32 s12, s14
1209; GFX11-NEXT:    s_mov_b32 s14, s16
1210; GFX11-NEXT:    v_dual_mov_b32 v17, s15 :: v_dual_mov_b32 v16, s14
1211; GFX11-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
1212; GFX11-NEXT:    s_lshl_b32 m0, s18, 1
1213; GFX11-NEXT:    v_dual_mov_b32 v15, s13 :: v_dual_mov_b32 v14, s12
1214; GFX11-NEXT:    v_dual_mov_b32 v13, s11 :: v_dual_mov_b32 v12, s10
1215; GFX11-NEXT:    v_dual_mov_b32 v11, s9 :: v_dual_mov_b32 v10, s8
1216; GFX11-NEXT:    v_dual_mov_b32 v9, s7 :: v_dual_mov_b32 v8, s6
1217; GFX11-NEXT:    v_dual_mov_b32 v7, s5 :: v_dual_mov_b32 v6, s4
1218; GFX11-NEXT:    v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v4, s2
1219; GFX11-NEXT:    v_movreld_b32_e32 v2, v0
1220; GFX11-NEXT:    v_movreld_b32_e32 v3, v1
1221; GFX11-NEXT:    global_store_b128 v[0:1], v[2:5], off dlc
1222; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1223; GFX11-NEXT:    global_store_b128 v[0:1], v[6:9], off dlc
1224; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1225; GFX11-NEXT:    global_store_b128 v[0:1], v[10:13], off dlc
1226; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1227; GFX11-NEXT:    global_store_b128 v[0:1], v[14:17], off dlc
1228; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1229; GFX11-NEXT:    s_endpgm
1230entry:
1231  %insert = insertelement <8 x double> %vec, double %val, i32 %idx
1232  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1233  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1234  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
1235  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
1236  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
1237  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
1238  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
1239  store volatile <2 x double> %vec.3, ptr addrspace(1) undef
1240  ret void
1241}
1242
1243define amdgpu_ps void @dyn_insertelement_v8f64_v_s_s(<8 x double> %vec, double inreg %val, i32 inreg %idx) {
1244; GPRIDX-LABEL: dyn_insertelement_v8f64_v_s_s:
1245; GPRIDX:       ; %bb.0: ; %entry
1246; GPRIDX-NEXT:    s_lshl_b32 s0, s4, 1
1247; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(DST)
1248; GPRIDX-NEXT:    v_mov_b32_e32 v0, s2
1249; GPRIDX-NEXT:    v_mov_b32_e32 v1, s3
1250; GPRIDX-NEXT:    s_set_gpr_idx_off
1251; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1252; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1253; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1254; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1255; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1256; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1257; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
1258; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1259; GPRIDX-NEXT:    s_endpgm
1260;
1261; GFX10-LABEL: dyn_insertelement_v8f64_v_s_s:
1262; GFX10:       ; %bb.0: ; %entry
1263; GFX10-NEXT:    s_lshl_b32 m0, s4, 1
1264; GFX10-NEXT:    v_movreld_b32_e32 v0, s2
1265; GFX10-NEXT:    v_movreld_b32_e32 v1, s3
1266; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1267; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1268; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1269; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1270; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1271; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1272; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
1273; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1274; GFX10-NEXT:    s_endpgm
1275;
1276; GFX11-LABEL: dyn_insertelement_v8f64_v_s_s:
1277; GFX11:       ; %bb.0: ; %entry
1278; GFX11-NEXT:    s_lshl_b32 m0, s4, 1
1279; GFX11-NEXT:    v_movreld_b32_e32 v0, s2
1280; GFX11-NEXT:    v_movreld_b32_e32 v1, s3
1281; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off dlc
1282; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1283; GFX11-NEXT:    global_store_b128 v[0:1], v[4:7], off dlc
1284; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1285; GFX11-NEXT:    global_store_b128 v[0:1], v[8:11], off dlc
1286; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1287; GFX11-NEXT:    global_store_b128 v[0:1], v[12:15], off dlc
1288; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1289; GFX11-NEXT:    s_endpgm
1290entry:
1291  %insert = insertelement <8 x double> %vec, double %val, i32 %idx
1292  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1293  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1294  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
1295  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
1296  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
1297  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
1298  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
1299  store volatile <2 x double> %vec.3, ptr addrspace(1) undef
1300  ret void
1301}
1302
1303define amdgpu_ps void @dyn_insertelement_v8f64_s_v_v(<8 x double> inreg %vec, double %val, i32 %idx) {
1304; GPRIDX-LABEL: dyn_insertelement_v8f64_s_v_v:
1305; GPRIDX:       ; %bb.0: ; %entry
1306; GPRIDX-NEXT:    s_mov_b32 s1, s3
1307; GPRIDX-NEXT:    s_mov_b32 s3, s5
1308; GPRIDX-NEXT:    s_mov_b32 s5, s7
1309; GPRIDX-NEXT:    s_mov_b32 s7, s9
1310; GPRIDX-NEXT:    s_mov_b32 s9, s11
1311; GPRIDX-NEXT:    s_mov_b32 s11, s13
1312; GPRIDX-NEXT:    s_mov_b32 s13, s15
1313; GPRIDX-NEXT:    s_mov_b32 s15, s17
1314; GPRIDX-NEXT:    s_mov_b32 s0, s2
1315; GPRIDX-NEXT:    s_mov_b32 s2, s4
1316; GPRIDX-NEXT:    s_mov_b32 s4, s6
1317; GPRIDX-NEXT:    s_mov_b32 s6, s8
1318; GPRIDX-NEXT:    s_mov_b32 s8, s10
1319; GPRIDX-NEXT:    s_mov_b32 s10, s12
1320; GPRIDX-NEXT:    s_mov_b32 s12, s14
1321; GPRIDX-NEXT:    s_mov_b32 s14, s16
1322; GPRIDX-NEXT:    v_mov_b32_e32 v18, s15
1323; GPRIDX-NEXT:    v_mov_b32_e32 v17, s14
1324; GPRIDX-NEXT:    v_mov_b32_e32 v16, s13
1325; GPRIDX-NEXT:    v_mov_b32_e32 v15, s12
1326; GPRIDX-NEXT:    v_mov_b32_e32 v14, s11
1327; GPRIDX-NEXT:    v_mov_b32_e32 v13, s10
1328; GPRIDX-NEXT:    v_mov_b32_e32 v12, s9
1329; GPRIDX-NEXT:    v_mov_b32_e32 v11, s8
1330; GPRIDX-NEXT:    v_mov_b32_e32 v10, s7
1331; GPRIDX-NEXT:    v_mov_b32_e32 v9, s6
1332; GPRIDX-NEXT:    v_mov_b32_e32 v8, s5
1333; GPRIDX-NEXT:    v_mov_b32_e32 v7, s4
1334; GPRIDX-NEXT:    v_mov_b32_e32 v6, s3
1335; GPRIDX-NEXT:    v_mov_b32_e32 v5, s2
1336; GPRIDX-NEXT:    v_mov_b32_e32 v4, s1
1337; GPRIDX-NEXT:    v_mov_b32_e32 v3, s0
1338; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
1339; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[12:13], 0, v2
1340; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[0:1], 2, v2
1341; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[2:3], 3, v2
1342; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[4:5], 4, v2
1343; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[6:7], 5, v2
1344; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[8:9], 6, v2
1345; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[10:11], 7, v2
1346; GPRIDX-NEXT:    v_cndmask_b32_e64 v3, v3, v0, s[12:13]
1347; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
1348; GPRIDX-NEXT:    v_cndmask_b32_e64 v4, v4, v1, s[12:13]
1349; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v1, vcc
1350; GPRIDX-NEXT:    v_cndmask_b32_e64 v7, v7, v0, s[0:1]
1351; GPRIDX-NEXT:    v_cndmask_b32_e64 v9, v9, v0, s[2:3]
1352; GPRIDX-NEXT:    v_cndmask_b32_e64 v11, v11, v0, s[4:5]
1353; GPRIDX-NEXT:    v_cndmask_b32_e64 v13, v13, v0, s[6:7]
1354; GPRIDX-NEXT:    v_cndmask_b32_e64 v15, v15, v0, s[8:9]
1355; GPRIDX-NEXT:    v_cndmask_b32_e64 v17, v17, v0, s[10:11]
1356; GPRIDX-NEXT:    v_cndmask_b32_e64 v8, v8, v1, s[0:1]
1357; GPRIDX-NEXT:    v_cndmask_b32_e64 v10, v10, v1, s[2:3]
1358; GPRIDX-NEXT:    v_cndmask_b32_e64 v12, v12, v1, s[4:5]
1359; GPRIDX-NEXT:    v_cndmask_b32_e64 v14, v14, v1, s[6:7]
1360; GPRIDX-NEXT:    v_cndmask_b32_e64 v16, v16, v1, s[8:9]
1361; GPRIDX-NEXT:    v_cndmask_b32_e64 v18, v18, v1, s[10:11]
1362; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
1363; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1364; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off
1365; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1366; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[11:14], off
1367; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1368; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[15:18], off
1369; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1370; GPRIDX-NEXT:    s_endpgm
1371;
1372; GFX10-LABEL: dyn_insertelement_v8f64_s_v_v:
1373; GFX10:       ; %bb.0: ; %entry
1374; GFX10-NEXT:    s_mov_b32 s1, s3
1375; GFX10-NEXT:    s_mov_b32 s3, s5
1376; GFX10-NEXT:    s_mov_b32 s5, s7
1377; GFX10-NEXT:    s_mov_b32 s7, s9
1378; GFX10-NEXT:    s_mov_b32 s9, s11
1379; GFX10-NEXT:    s_mov_b32 s11, s13
1380; GFX10-NEXT:    s_mov_b32 s13, s15
1381; GFX10-NEXT:    s_mov_b32 s15, s17
1382; GFX10-NEXT:    s_mov_b32 s0, s2
1383; GFX10-NEXT:    s_mov_b32 s2, s4
1384; GFX10-NEXT:    s_mov_b32 s4, s6
1385; GFX10-NEXT:    s_mov_b32 s6, s8
1386; GFX10-NEXT:    s_mov_b32 s8, s10
1387; GFX10-NEXT:    s_mov_b32 s10, s12
1388; GFX10-NEXT:    s_mov_b32 s12, s14
1389; GFX10-NEXT:    s_mov_b32 s14, s16
1390; GFX10-NEXT:    v_mov_b32_e32 v18, s15
1391; GFX10-NEXT:    v_mov_b32_e32 v4, s1
1392; GFX10-NEXT:    v_mov_b32_e32 v3, s0
1393; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v2
1394; GFX10-NEXT:    v_mov_b32_e32 v17, s14
1395; GFX10-NEXT:    v_mov_b32_e32 v16, s13
1396; GFX10-NEXT:    v_mov_b32_e32 v15, s12
1397; GFX10-NEXT:    v_mov_b32_e32 v14, s11
1398; GFX10-NEXT:    v_mov_b32_e32 v13, s10
1399; GFX10-NEXT:    v_mov_b32_e32 v12, s9
1400; GFX10-NEXT:    v_mov_b32_e32 v11, s8
1401; GFX10-NEXT:    v_mov_b32_e32 v10, s7
1402; GFX10-NEXT:    v_mov_b32_e32 v9, s6
1403; GFX10-NEXT:    v_mov_b32_e32 v8, s5
1404; GFX10-NEXT:    v_mov_b32_e32 v7, s4
1405; GFX10-NEXT:    v_mov_b32_e32 v6, s3
1406; GFX10-NEXT:    v_mov_b32_e32 v5, s2
1407; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 1, v2
1408; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc_lo
1409; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc_lo
1410; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v2
1411; GFX10-NEXT:    v_cmp_eq_u32_e64 s1, 7, v2
1412; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v0, s0
1413; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v1, s0
1414; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 3, v2
1415; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v0, vcc_lo
1416; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v1, vcc_lo
1417; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v2
1418; GFX10-NEXT:    v_cndmask_b32_e64 v17, v17, v0, s1
1419; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, v0, s0
1420; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, v1, s0
1421; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 5, v2
1422; GFX10-NEXT:    v_cndmask_b32_e32 v11, v11, v0, vcc_lo
1423; GFX10-NEXT:    v_cndmask_b32_e32 v12, v12, v1, vcc_lo
1424; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v2
1425; GFX10-NEXT:    v_cndmask_b32_e64 v18, v18, v1, s1
1426; GFX10-NEXT:    v_cndmask_b32_e64 v13, v13, v0, s0
1427; GFX10-NEXT:    v_cndmask_b32_e64 v14, v14, v1, s0
1428; GFX10-NEXT:    v_cndmask_b32_e32 v15, v15, v0, vcc_lo
1429; GFX10-NEXT:    v_cndmask_b32_e32 v16, v16, v1, vcc_lo
1430; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
1431; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1432; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off
1433; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1434; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[11:14], off
1435; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1436; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[15:18], off
1437; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1438; GFX10-NEXT:    s_endpgm
1439;
1440; GFX11-LABEL: dyn_insertelement_v8f64_s_v_v:
1441; GFX11:       ; %bb.0: ; %entry
1442; GFX11-NEXT:    s_mov_b32 s1, s3
1443; GFX11-NEXT:    s_mov_b32 s3, s5
1444; GFX11-NEXT:    s_mov_b32 s5, s7
1445; GFX11-NEXT:    s_mov_b32 s7, s9
1446; GFX11-NEXT:    s_mov_b32 s9, s11
1447; GFX11-NEXT:    s_mov_b32 s11, s13
1448; GFX11-NEXT:    s_mov_b32 s13, s15
1449; GFX11-NEXT:    s_mov_b32 s15, s17
1450; GFX11-NEXT:    s_mov_b32 s0, s2
1451; GFX11-NEXT:    s_mov_b32 s2, s4
1452; GFX11-NEXT:    s_mov_b32 s4, s6
1453; GFX11-NEXT:    s_mov_b32 s6, s8
1454; GFX11-NEXT:    s_mov_b32 s8, s10
1455; GFX11-NEXT:    s_mov_b32 s10, s12
1456; GFX11-NEXT:    s_mov_b32 s12, s14
1457; GFX11-NEXT:    s_mov_b32 s14, s16
1458; GFX11-NEXT:    v_dual_mov_b32 v18, s15 :: v_dual_mov_b32 v17, s14
1459; GFX11-NEXT:    v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0
1460; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v2
1461; GFX11-NEXT:    v_dual_mov_b32 v16, s13 :: v_dual_mov_b32 v15, s12
1462; GFX11-NEXT:    v_dual_mov_b32 v14, s11 :: v_dual_mov_b32 v13, s10
1463; GFX11-NEXT:    v_dual_mov_b32 v12, s9 :: v_dual_mov_b32 v11, s8
1464; GFX11-NEXT:    v_dual_mov_b32 v10, s7 :: v_dual_mov_b32 v9, s6
1465; GFX11-NEXT:    v_dual_mov_b32 v8, s5 :: v_dual_mov_b32 v7, s4
1466; GFX11-NEXT:    v_dual_mov_b32 v6, s3 :: v_dual_mov_b32 v5, s2
1467; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 1, v2
1468; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v0 :: v_dual_cndmask_b32 v4, v4, v1
1469; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v2
1470; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 7, v2
1471; GFX11-NEXT:    v_cndmask_b32_e64 v5, v5, v0, s0
1472; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, v1, s0
1473; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 3, v2
1474; GFX11-NEXT:    v_dual_cndmask_b32 v7, v7, v0 :: v_dual_cndmask_b32 v8, v8, v1
1475; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v2
1476; GFX11-NEXT:    v_cndmask_b32_e64 v17, v17, v0, s1
1477; GFX11-NEXT:    v_cndmask_b32_e64 v9, v9, v0, s0
1478; GFX11-NEXT:    v_cndmask_b32_e64 v10, v10, v1, s0
1479; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 5, v2
1480; GFX11-NEXT:    v_dual_cndmask_b32 v11, v11, v0 :: v_dual_cndmask_b32 v12, v12, v1
1481; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v2
1482; GFX11-NEXT:    v_cndmask_b32_e64 v18, v18, v1, s1
1483; GFX11-NEXT:    v_cndmask_b32_e64 v13, v13, v0, s0
1484; GFX11-NEXT:    v_cndmask_b32_e64 v14, v14, v1, s0
1485; GFX11-NEXT:    v_dual_cndmask_b32 v15, v15, v0 :: v_dual_cndmask_b32 v16, v16, v1
1486; GFX11-NEXT:    global_store_b128 v[0:1], v[3:6], off dlc
1487; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1488; GFX11-NEXT:    global_store_b128 v[0:1], v[7:10], off dlc
1489; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1490; GFX11-NEXT:    global_store_b128 v[0:1], v[11:14], off dlc
1491; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1492; GFX11-NEXT:    global_store_b128 v[0:1], v[15:18], off dlc
1493; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1494; GFX11-NEXT:    s_endpgm
1495entry:
1496  %insert = insertelement <8 x double> %vec, double %val, i32 %idx
1497  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1498  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1499  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
1500  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
1501  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
1502  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
1503  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
1504  store volatile <2 x double> %vec.3, ptr addrspace(1) undef
1505  ret void
1506}
1507
1508define amdgpu_ps void @dyn_insertelement_v8f64_v_s_v(<8 x double> %vec, double inreg %val, i32 %idx) {
1509; GPRIDX-LABEL: dyn_insertelement_v8f64_v_s_v:
1510; GPRIDX:       ; %bb.0: ; %entry
1511; GPRIDX-NEXT:    v_mov_b32_e32 v17, s2
1512; GPRIDX-NEXT:    v_mov_b32_e32 v18, s3
1513; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v16
1514; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v17, vcc
1515; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v18, vcc
1516; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v16
1517; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v17, vcc
1518; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v18, vcc
1519; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v16
1520; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v17, vcc
1521; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v18, vcc
1522; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v16
1523; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v17, vcc
1524; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v18, vcc
1525; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v16
1526; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v8, v17, vcc
1527; GPRIDX-NEXT:    v_cndmask_b32_e32 v9, v9, v18, vcc
1528; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v16
1529; GPRIDX-NEXT:    v_cndmask_b32_e32 v10, v10, v17, vcc
1530; GPRIDX-NEXT:    v_cndmask_b32_e32 v11, v11, v18, vcc
1531; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v16
1532; GPRIDX-NEXT:    v_cndmask_b32_e32 v12, v12, v17, vcc
1533; GPRIDX-NEXT:    v_cndmask_b32_e32 v13, v13, v18, vcc
1534; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v16
1535; GPRIDX-NEXT:    v_cndmask_b32_e32 v14, v14, v17, vcc
1536; GPRIDX-NEXT:    v_cndmask_b32_e32 v15, v15, v18, vcc
1537; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1538; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1539; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1540; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1541; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1542; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1543; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
1544; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1545; GPRIDX-NEXT:    s_endpgm
1546;
1547; GFX10-LABEL: dyn_insertelement_v8f64_v_s_v:
1548; GFX10:       ; %bb.0: ; %entry
1549; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v16
1550; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 1, v16
1551; GFX10-NEXT:    v_cmp_eq_u32_e64 s1, 7, v16
1552; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc_lo
1553; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
1554; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
1555; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, s2, s0
1556; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, s3, s0
1557; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 3, v16
1558; GFX10-NEXT:    v_cndmask_b32_e64 v14, v14, s2, s1
1559; GFX10-NEXT:    v_cndmask_b32_e64 v4, v4, s2, vcc_lo
1560; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, s3, vcc_lo
1561; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
1562; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, s2, s0
1563; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, s3, s0
1564; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 5, v16
1565; GFX10-NEXT:    v_cndmask_b32_e64 v15, v15, s3, s1
1566; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, s2, vcc_lo
1567; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, s3, vcc_lo
1568; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v16
1569; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, s2, s0
1570; GFX10-NEXT:    v_cndmask_b32_e64 v11, v11, s3, s0
1571; GFX10-NEXT:    v_cndmask_b32_e64 v12, v12, s2, vcc_lo
1572; GFX10-NEXT:    v_cndmask_b32_e64 v13, v13, s3, vcc_lo
1573; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1574; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1575; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1576; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1577; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1578; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1579; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
1580; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1581; GFX10-NEXT:    s_endpgm
1582;
1583; GFX11-LABEL: dyn_insertelement_v8f64_v_s_v:
1584; GFX11:       ; %bb.0: ; %entry
1585; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v16
1586; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 1, v16
1587; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 7, v16
1588; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc_lo
1589; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
1590; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
1591; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, s2, s0
1592; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, s3, s0
1593; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 3, v16
1594; GFX11-NEXT:    v_cndmask_b32_e64 v14, v14, s2, s1
1595; GFX11-NEXT:    v_cndmask_b32_e64 v4, v4, s2, vcc_lo
1596; GFX11-NEXT:    v_cndmask_b32_e64 v5, v5, s3, vcc_lo
1597; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
1598; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, s2, s0
1599; GFX11-NEXT:    v_cndmask_b32_e64 v7, v7, s3, s0
1600; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 5, v16
1601; GFX11-NEXT:    v_cndmask_b32_e64 v15, v15, s3, s1
1602; GFX11-NEXT:    v_cndmask_b32_e64 v8, v8, s2, vcc_lo
1603; GFX11-NEXT:    v_cndmask_b32_e64 v9, v9, s3, vcc_lo
1604; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v16
1605; GFX11-NEXT:    v_cndmask_b32_e64 v10, v10, s2, s0
1606; GFX11-NEXT:    v_cndmask_b32_e64 v11, v11, s3, s0
1607; GFX11-NEXT:    v_cndmask_b32_e64 v12, v12, s2, vcc_lo
1608; GFX11-NEXT:    v_cndmask_b32_e64 v13, v13, s3, vcc_lo
1609; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off dlc
1610; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1611; GFX11-NEXT:    global_store_b128 v[0:1], v[4:7], off dlc
1612; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1613; GFX11-NEXT:    global_store_b128 v[0:1], v[8:11], off dlc
1614; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1615; GFX11-NEXT:    global_store_b128 v[0:1], v[12:15], off dlc
1616; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1617; GFX11-NEXT:    s_endpgm
1618entry:
1619  %insert = insertelement <8 x double> %vec, double %val, i32 %idx
1620  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1621  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1622  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
1623  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
1624  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
1625  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
1626  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
1627  store volatile <2 x double> %vec.3, ptr addrspace(1) undef
1628  ret void
1629}
1630
1631define amdgpu_ps void @dyn_insertelement_v8f64_v_v_s(<8 x double> %vec, double %val, i32 inreg %idx) {
1632; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_s:
1633; GPRIDX:       ; %bb.0: ; %entry
1634; GPRIDX-NEXT:    s_lshl_b32 s0, s2, 1
1635; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(DST)
1636; GPRIDX-NEXT:    v_mov_b32_e32 v0, v16
1637; GPRIDX-NEXT:    v_mov_b32_e32 v1, v17
1638; GPRIDX-NEXT:    s_set_gpr_idx_off
1639; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1640; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1641; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1642; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1643; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1644; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1645; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
1646; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1647; GPRIDX-NEXT:    s_endpgm
1648;
1649; GFX10-LABEL: dyn_insertelement_v8f64_v_v_s:
1650; GFX10:       ; %bb.0: ; %entry
1651; GFX10-NEXT:    s_lshl_b32 m0, s2, 1
1652; GFX10-NEXT:    v_movreld_b32_e32 v0, v16
1653; GFX10-NEXT:    v_movreld_b32_e32 v1, v17
1654; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1655; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1656; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1657; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1658; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1659; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1660; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
1661; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1662; GFX10-NEXT:    s_endpgm
1663;
1664; GFX11-LABEL: dyn_insertelement_v8f64_v_v_s:
1665; GFX11:       ; %bb.0: ; %entry
1666; GFX11-NEXT:    s_lshl_b32 m0, s2, 1
1667; GFX11-NEXT:    v_movreld_b32_e32 v0, v16
1668; GFX11-NEXT:    v_movreld_b32_e32 v1, v17
1669; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off dlc
1670; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1671; GFX11-NEXT:    global_store_b128 v[0:1], v[4:7], off dlc
1672; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1673; GFX11-NEXT:    global_store_b128 v[0:1], v[8:11], off dlc
1674; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1675; GFX11-NEXT:    global_store_b128 v[0:1], v[12:15], off dlc
1676; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1677; GFX11-NEXT:    s_endpgm
1678entry:
1679  %insert = insertelement <8 x double> %vec, double %val, i32 %idx
1680  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1681  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1682  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
1683  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
1684  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
1685  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
1686  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
1687  store volatile <2 x double> %vec.3, ptr addrspace(1) undef
1688  ret void
1689}
1690
1691define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v(<8 x double> %vec, double %val, i32 %idx) {
1692; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_v:
1693; GPRIDX:       ; %bb.0: ; %entry
1694; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v18
1695; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
1696; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
1697; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v18
1698; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v16, vcc
1699; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v17, vcc
1700; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v18
1701; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v16, vcc
1702; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v17, vcc
1703; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v18
1704; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v16, vcc
1705; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v17, vcc
1706; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v18
1707; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v8, v16, vcc
1708; GPRIDX-NEXT:    v_cndmask_b32_e32 v9, v9, v17, vcc
1709; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v18
1710; GPRIDX-NEXT:    v_cndmask_b32_e32 v10, v10, v16, vcc
1711; GPRIDX-NEXT:    v_cndmask_b32_e32 v11, v11, v17, vcc
1712; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v18
1713; GPRIDX-NEXT:    v_cndmask_b32_e32 v12, v12, v16, vcc
1714; GPRIDX-NEXT:    v_cndmask_b32_e32 v13, v13, v17, vcc
1715; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v18
1716; GPRIDX-NEXT:    v_cndmask_b32_e32 v14, v14, v16, vcc
1717; GPRIDX-NEXT:    v_cndmask_b32_e32 v15, v15, v17, vcc
1718; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1719; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1720; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1721; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1722; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1723; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1724; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
1725; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
1726; GPRIDX-NEXT:    s_endpgm
1727;
1728; GFX10-LABEL: dyn_insertelement_v8f64_v_v_v:
1729; GFX10:       ; %bb.0: ; %entry
1730; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v18
1731; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 1, v18
1732; GFX10-NEXT:    v_cmp_eq_u32_e64 s1, 7, v18
1733; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc_lo
1734; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc_lo
1735; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v18
1736; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v16, s0
1737; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v17, s0
1738; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 3, v18
1739; GFX10-NEXT:    v_cndmask_b32_e64 v14, v14, v16, s1
1740; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v16, vcc_lo
1741; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v17, vcc_lo
1742; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v18
1743; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v16, s0
1744; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v17, s0
1745; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 5, v18
1746; GFX10-NEXT:    v_cndmask_b32_e64 v15, v15, v17, s1
1747; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v16, vcc_lo
1748; GFX10-NEXT:    v_cndmask_b32_e32 v9, v9, v17, vcc_lo
1749; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v18
1750; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, v16, s0
1751; GFX10-NEXT:    v_cndmask_b32_e64 v11, v11, v17, s0
1752; GFX10-NEXT:    v_cndmask_b32_e32 v12, v12, v16, vcc_lo
1753; GFX10-NEXT:    v_cndmask_b32_e32 v13, v13, v17, vcc_lo
1754; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1755; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1756; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1757; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1758; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1759; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1760; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
1761; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1762; GFX10-NEXT:    s_endpgm
1763;
1764; GFX11-LABEL: dyn_insertelement_v8f64_v_v_v:
1765; GFX11:       ; %bb.0: ; %entry
1766; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v18
1767; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 1, v18
1768; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 7, v18
1769; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v16 :: v_dual_cndmask_b32 v1, v1, v17
1770; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v18
1771; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, v16, s0
1772; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, v17, s0
1773; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 3, v18
1774; GFX11-NEXT:    v_cndmask_b32_e64 v14, v14, v16, s1
1775; GFX11-NEXT:    v_dual_cndmask_b32 v4, v4, v16 :: v_dual_cndmask_b32 v5, v5, v17
1776; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v18
1777; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, v16, s0
1778; GFX11-NEXT:    v_cndmask_b32_e64 v7, v7, v17, s0
1779; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 5, v18
1780; GFX11-NEXT:    v_cndmask_b32_e64 v15, v15, v17, s1
1781; GFX11-NEXT:    v_dual_cndmask_b32 v8, v8, v16 :: v_dual_cndmask_b32 v9, v9, v17
1782; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v18
1783; GFX11-NEXT:    v_cndmask_b32_e64 v10, v10, v16, s0
1784; GFX11-NEXT:    v_cndmask_b32_e64 v11, v11, v17, s0
1785; GFX11-NEXT:    v_dual_cndmask_b32 v12, v12, v16 :: v_dual_cndmask_b32 v13, v13, v17
1786; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off dlc
1787; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1788; GFX11-NEXT:    global_store_b128 v[0:1], v[4:7], off dlc
1789; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1790; GFX11-NEXT:    global_store_b128 v[0:1], v[8:11], off dlc
1791; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1792; GFX11-NEXT:    global_store_b128 v[0:1], v[12:15], off dlc
1793; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1794; GFX11-NEXT:    s_endpgm
1795entry:
1796  %insert = insertelement <8 x double> %vec, double %val, i32 %idx
1797  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1798  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1799  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
1800  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
1801  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
1802  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
1803  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
1804  store volatile <2 x double> %vec.3, ptr addrspace(1) undef
1805  ret void
1806}
1807
1808define amdgpu_ps <3 x i32> @dyn_insertelement_v3i32_s_s_s(<3 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
1809; GPRIDX-LABEL: dyn_insertelement_v3i32_s_s_s:
1810; GPRIDX:       ; %bb.0: ; %entry
1811; GPRIDX-NEXT:    s_cmp_eq_u32 s6, 0
1812; GPRIDX-NEXT:    s_cselect_b32 s0, s5, s2
1813; GPRIDX-NEXT:    s_cmp_eq_u32 s6, 1
1814; GPRIDX-NEXT:    s_cselect_b32 s1, s5, s3
1815; GPRIDX-NEXT:    s_cmp_eq_u32 s6, 2
1816; GPRIDX-NEXT:    s_cselect_b32 s2, s5, s4
1817; GPRIDX-NEXT:    ; return to shader part epilog
1818;
1819; GFX10PLUS-LABEL: dyn_insertelement_v3i32_s_s_s:
1820; GFX10PLUS:       ; %bb.0: ; %entry
1821; GFX10PLUS-NEXT:    s_cmp_eq_u32 s6, 0
1822; GFX10PLUS-NEXT:    s_cselect_b32 s0, s5, s2
1823; GFX10PLUS-NEXT:    s_cmp_eq_u32 s6, 1
1824; GFX10PLUS-NEXT:    s_cselect_b32 s1, s5, s3
1825; GFX10PLUS-NEXT:    s_cmp_eq_u32 s6, 2
1826; GFX10PLUS-NEXT:    s_cselect_b32 s2, s5, s4
1827; GFX10PLUS-NEXT:    ; return to shader part epilog
1828entry:
1829  %insert = insertelement <3 x i32> %vec, i32 %val, i32 %idx
1830  ret <3 x i32> %insert
1831}
1832
1833define amdgpu_ps <3 x float> @dyn_insertelement_v3i32_v_v_s(<3 x float> %vec, float %val, i32 inreg %idx) {
1834; GPRIDX-LABEL: dyn_insertelement_v3i32_v_v_s:
1835; GPRIDX:       ; %bb.0: ; %entry
1836; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 0
1837; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1838; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
1839; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1840; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
1841; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1842; GPRIDX-NEXT:    ; return to shader part epilog
1843;
1844; GFX10PLUS-LABEL: dyn_insertelement_v3i32_v_v_s:
1845; GFX10PLUS:       ; %bb.0: ; %entry
1846; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 0
1847; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
1848; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 1
1849; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
1850; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 2
1851; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1852; GFX10PLUS-NEXT:    ; return to shader part epilog
1853entry:
1854  %insert = insertelement <3 x float> %vec, float %val, i32 %idx
1855  ret <3 x float> %insert
1856}
1857
1858define amdgpu_ps <5 x i32> @dyn_insertelement_v5i32_s_s_s(<5 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
1859; GPRIDX-LABEL: dyn_insertelement_v5i32_s_s_s:
1860; GPRIDX:       ; %bb.0: ; %entry
1861; GPRIDX-NEXT:    s_cmp_eq_u32 s8, 0
1862; GPRIDX-NEXT:    s_cselect_b32 s0, s7, s2
1863; GPRIDX-NEXT:    s_cmp_eq_u32 s8, 1
1864; GPRIDX-NEXT:    s_cselect_b32 s1, s7, s3
1865; GPRIDX-NEXT:    s_cmp_eq_u32 s8, 2
1866; GPRIDX-NEXT:    s_cselect_b32 s2, s7, s4
1867; GPRIDX-NEXT:    s_cmp_eq_u32 s8, 3
1868; GPRIDX-NEXT:    s_cselect_b32 s3, s7, s5
1869; GPRIDX-NEXT:    s_cmp_eq_u32 s8, 4
1870; GPRIDX-NEXT:    s_cselect_b32 s4, s7, s6
1871; GPRIDX-NEXT:    ; return to shader part epilog
1872;
1873; GFX10PLUS-LABEL: dyn_insertelement_v5i32_s_s_s:
1874; GFX10PLUS:       ; %bb.0: ; %entry
1875; GFX10PLUS-NEXT:    s_cmp_eq_u32 s8, 0
1876; GFX10PLUS-NEXT:    s_cselect_b32 s0, s7, s2
1877; GFX10PLUS-NEXT:    s_cmp_eq_u32 s8, 1
1878; GFX10PLUS-NEXT:    s_cselect_b32 s1, s7, s3
1879; GFX10PLUS-NEXT:    s_cmp_eq_u32 s8, 2
1880; GFX10PLUS-NEXT:    s_cselect_b32 s2, s7, s4
1881; GFX10PLUS-NEXT:    s_cmp_eq_u32 s8, 3
1882; GFX10PLUS-NEXT:    s_cselect_b32 s3, s7, s5
1883; GFX10PLUS-NEXT:    s_cmp_eq_u32 s8, 4
1884; GFX10PLUS-NEXT:    s_cselect_b32 s4, s7, s6
1885; GFX10PLUS-NEXT:    ; return to shader part epilog
1886entry:
1887  %insert = insertelement <5 x i32> %vec, i32 %val, i32 %idx
1888  ret <5 x i32> %insert
1889}
1890
1891define amdgpu_ps <5 x float> @dyn_insertelement_v5i32_v_v_s(<5 x float> %vec, float %val, i32 inreg %idx) {
1892; GPRIDX-LABEL: dyn_insertelement_v5i32_v_v_s:
1893; GPRIDX:       ; %bb.0: ; %entry
1894; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 0
1895; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
1896; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
1897; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
1898; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
1899; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
1900; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 3
1901; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
1902; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 4
1903; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
1904; GPRIDX-NEXT:    ; return to shader part epilog
1905;
1906; GFX10PLUS-LABEL: dyn_insertelement_v5i32_v_v_s:
1907; GFX10PLUS:       ; %bb.0: ; %entry
1908; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 0
1909; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
1910; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 1
1911; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
1912; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 2
1913; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc_lo
1914; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 3
1915; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc_lo
1916; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 4
1917; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc_lo
1918; GFX10PLUS-NEXT:    ; return to shader part epilog
1919entry:
1920  %insert = insertelement <5 x float> %vec, float %val, i32 %idx
1921  ret <5 x float> %insert
1922}
1923
1924define amdgpu_ps <32 x i32> @dyn_insertelement_v32i32_s_s_s(<32 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
1925; GPRIDX-LABEL: dyn_insertelement_v32i32_s_s_s:
1926; GPRIDX:       ; %bb.0: ; %entry
1927; GPRIDX-NEXT:    s_mov_b32 s0, s2
1928; GPRIDX-NEXT:    s_mov_b32 s1, s3
1929; GPRIDX-NEXT:    s_mov_b32 s2, s4
1930; GPRIDX-NEXT:    s_mov_b32 s3, s5
1931; GPRIDX-NEXT:    s_mov_b32 s4, s6
1932; GPRIDX-NEXT:    s_mov_b32 s5, s7
1933; GPRIDX-NEXT:    s_mov_b32 s6, s8
1934; GPRIDX-NEXT:    s_mov_b32 s7, s9
1935; GPRIDX-NEXT:    s_mov_b32 s8, s10
1936; GPRIDX-NEXT:    s_mov_b32 s9, s11
1937; GPRIDX-NEXT:    s_mov_b32 s10, s12
1938; GPRIDX-NEXT:    s_mov_b32 s11, s13
1939; GPRIDX-NEXT:    s_mov_b32 s12, s14
1940; GPRIDX-NEXT:    s_mov_b32 s13, s15
1941; GPRIDX-NEXT:    s_mov_b32 s14, s16
1942; GPRIDX-NEXT:    s_mov_b32 s15, s17
1943; GPRIDX-NEXT:    s_mov_b32 s16, s18
1944; GPRIDX-NEXT:    s_mov_b32 s17, s19
1945; GPRIDX-NEXT:    s_mov_b32 s18, s20
1946; GPRIDX-NEXT:    s_mov_b32 s19, s21
1947; GPRIDX-NEXT:    s_mov_b32 s20, s22
1948; GPRIDX-NEXT:    s_mov_b32 s21, s23
1949; GPRIDX-NEXT:    s_mov_b32 s22, s24
1950; GPRIDX-NEXT:    s_mov_b32 s23, s25
1951; GPRIDX-NEXT:    s_mov_b32 s24, s26
1952; GPRIDX-NEXT:    s_mov_b32 s25, s27
1953; GPRIDX-NEXT:    s_mov_b32 s26, s28
1954; GPRIDX-NEXT:    s_mov_b32 s27, s29
1955; GPRIDX-NEXT:    s_mov_b32 s28, s30
1956; GPRIDX-NEXT:    s_mov_b32 s29, s31
1957; GPRIDX-NEXT:    s_mov_b32 s31, s33
1958; GPRIDX-NEXT:    s_mov_b32 s30, s32
1959; GPRIDX-NEXT:    s_mov_b32 m0, s35
1960; GPRIDX-NEXT:    s_nop 0
1961; GPRIDX-NEXT:    s_movreld_b32 s0, s34
1962; GPRIDX-NEXT:    ; return to shader part epilog
1963;
1964; GFX10PLUS-LABEL: dyn_insertelement_v32i32_s_s_s:
1965; GFX10PLUS:       ; %bb.0: ; %entry
1966; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
1967; GFX10PLUS-NEXT:    s_mov_b32 m0, s35
1968; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
1969; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
1970; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
1971; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
1972; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
1973; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
1974; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
1975; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
1976; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
1977; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
1978; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
1979; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
1980; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
1981; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
1982; GFX10PLUS-NEXT:    s_mov_b32 s15, s17
1983; GFX10PLUS-NEXT:    s_mov_b32 s16, s18
1984; GFX10PLUS-NEXT:    s_mov_b32 s17, s19
1985; GFX10PLUS-NEXT:    s_mov_b32 s18, s20
1986; GFX10PLUS-NEXT:    s_mov_b32 s19, s21
1987; GFX10PLUS-NEXT:    s_mov_b32 s20, s22
1988; GFX10PLUS-NEXT:    s_mov_b32 s21, s23
1989; GFX10PLUS-NEXT:    s_mov_b32 s22, s24
1990; GFX10PLUS-NEXT:    s_mov_b32 s23, s25
1991; GFX10PLUS-NEXT:    s_mov_b32 s24, s26
1992; GFX10PLUS-NEXT:    s_mov_b32 s25, s27
1993; GFX10PLUS-NEXT:    s_mov_b32 s26, s28
1994; GFX10PLUS-NEXT:    s_mov_b32 s27, s29
1995; GFX10PLUS-NEXT:    s_mov_b32 s28, s30
1996; GFX10PLUS-NEXT:    s_mov_b32 s29, s31
1997; GFX10PLUS-NEXT:    s_mov_b32 s31, s33
1998; GFX10PLUS-NEXT:    s_mov_b32 s30, s32
1999; GFX10PLUS-NEXT:    s_movreld_b32 s0, s34
2000; GFX10PLUS-NEXT:    ; return to shader part epilog
2001entry:
2002  %insert = insertelement <32 x i32> %vec, i32 %val, i32 %idx
2003  ret <32 x i32> %insert
2004}
2005
2006define amdgpu_ps <32 x float> @dyn_insertelement_v32i32_v_v_s(<32 x float> %vec, float %val, i32 inreg %idx) {
2007; GPRIDX-LABEL: dyn_insertelement_v32i32_v_v_s:
2008; GPRIDX:       ; %bb.0: ; %entry
2009; GPRIDX-NEXT:    s_set_gpr_idx_on s2, gpr_idx(DST)
2010; GPRIDX-NEXT:    v_mov_b32_e32 v0, v32
2011; GPRIDX-NEXT:    s_set_gpr_idx_off
2012; GPRIDX-NEXT:    ; return to shader part epilog
2013;
2014; GFX10PLUS-LABEL: dyn_insertelement_v32i32_v_v_s:
2015; GFX10PLUS:       ; %bb.0: ; %entry
2016; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
2017; GFX10PLUS-NEXT:    v_movreld_b32_e32 v0, v32
2018; GFX10PLUS-NEXT:    ; return to shader part epilog
2019entry:
2020  %insert = insertelement <32 x float> %vec, float %val, i32 %idx
2021  ret <32 x float> %insert
2022}
2023
2024define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_s_add_1(<8 x float> inreg %vec, float inreg %val, i32 inreg %idx) {
2025; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_s_add_1:
2026; GPRIDX:       ; %bb.0: ; %entry
2027; GPRIDX-NEXT:    s_add_i32 s11, s11, 1
2028; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 0
2029; GPRIDX-NEXT:    s_cselect_b32 s0, s10, s2
2030; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 1
2031; GPRIDX-NEXT:    s_cselect_b32 s1, s10, s3
2032; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 2
2033; GPRIDX-NEXT:    s_cselect_b32 s2, s10, s4
2034; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 3
2035; GPRIDX-NEXT:    s_cselect_b32 s3, s10, s5
2036; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 4
2037; GPRIDX-NEXT:    s_cselect_b32 s4, s10, s6
2038; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 5
2039; GPRIDX-NEXT:    s_cselect_b32 s5, s10, s7
2040; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 6
2041; GPRIDX-NEXT:    s_cselect_b32 s6, s10, s8
2042; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 7
2043; GPRIDX-NEXT:    s_cselect_b32 s7, s10, s9
2044; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
2045; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
2046; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
2047; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
2048; GPRIDX-NEXT:    v_mov_b32_e32 v4, s4
2049; GPRIDX-NEXT:    v_mov_b32_e32 v5, s5
2050; GPRIDX-NEXT:    v_mov_b32_e32 v6, s6
2051; GPRIDX-NEXT:    v_mov_b32_e32 v7, s7
2052; GPRIDX-NEXT:    ; return to shader part epilog
2053;
2054; GFX10-LABEL: dyn_insertelement_v8f32_s_s_s_add_1:
2055; GFX10:       ; %bb.0: ; %entry
2056; GFX10-NEXT:    s_mov_b32 s1, s3
2057; GFX10-NEXT:    s_mov_b32 m0, s11
2058; GFX10-NEXT:    s_mov_b32 s0, s2
2059; GFX10-NEXT:    s_mov_b32 s2, s4
2060; GFX10-NEXT:    s_mov_b32 s3, s5
2061; GFX10-NEXT:    s_mov_b32 s4, s6
2062; GFX10-NEXT:    s_mov_b32 s5, s7
2063; GFX10-NEXT:    s_mov_b32 s6, s8
2064; GFX10-NEXT:    s_mov_b32 s7, s9
2065; GFX10-NEXT:    s_movreld_b32 s1, s10
2066; GFX10-NEXT:    v_mov_b32_e32 v0, s0
2067; GFX10-NEXT:    v_mov_b32_e32 v1, s1
2068; GFX10-NEXT:    v_mov_b32_e32 v2, s2
2069; GFX10-NEXT:    v_mov_b32_e32 v3, s3
2070; GFX10-NEXT:    v_mov_b32_e32 v4, s4
2071; GFX10-NEXT:    v_mov_b32_e32 v5, s5
2072; GFX10-NEXT:    v_mov_b32_e32 v6, s6
2073; GFX10-NEXT:    v_mov_b32_e32 v7, s7
2074; GFX10-NEXT:    ; return to shader part epilog
2075;
2076; GFX11-LABEL: dyn_insertelement_v8f32_s_s_s_add_1:
2077; GFX11:       ; %bb.0: ; %entry
2078; GFX11-NEXT:    s_mov_b32 s1, s3
2079; GFX11-NEXT:    s_mov_b32 m0, s11
2080; GFX11-NEXT:    s_mov_b32 s0, s2
2081; GFX11-NEXT:    s_mov_b32 s2, s4
2082; GFX11-NEXT:    s_mov_b32 s3, s5
2083; GFX11-NEXT:    s_mov_b32 s4, s6
2084; GFX11-NEXT:    s_mov_b32 s5, s7
2085; GFX11-NEXT:    s_mov_b32 s6, s8
2086; GFX11-NEXT:    s_mov_b32 s7, s9
2087; GFX11-NEXT:    s_movreld_b32 s1, s10
2088; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2089; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
2090; GFX11-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
2091; GFX11-NEXT:    v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
2092; GFX11-NEXT:    ; return to shader part epilog
2093entry:
2094  %idx.add = add i32 %idx, 1
2095  %insert = insertelement <8 x float> %vec, float %val, i32 %idx.add
2096  ret <8 x float> %insert
2097}
2098
2099define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_s_add_7(<8 x float> inreg %vec, float inreg %val, i32 inreg %idx) {
2100; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_s_add_7:
2101; GPRIDX:       ; %bb.0: ; %entry
2102; GPRIDX-NEXT:    s_add_i32 s11, s11, 7
2103; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 0
2104; GPRIDX-NEXT:    s_cselect_b32 s0, s10, s2
2105; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 1
2106; GPRIDX-NEXT:    s_cselect_b32 s1, s10, s3
2107; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 2
2108; GPRIDX-NEXT:    s_cselect_b32 s2, s10, s4
2109; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 3
2110; GPRIDX-NEXT:    s_cselect_b32 s3, s10, s5
2111; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 4
2112; GPRIDX-NEXT:    s_cselect_b32 s4, s10, s6
2113; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 5
2114; GPRIDX-NEXT:    s_cselect_b32 s5, s10, s7
2115; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 6
2116; GPRIDX-NEXT:    s_cselect_b32 s6, s10, s8
2117; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 7
2118; GPRIDX-NEXT:    s_cselect_b32 s7, s10, s9
2119; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
2120; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
2121; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
2122; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
2123; GPRIDX-NEXT:    v_mov_b32_e32 v4, s4
2124; GPRIDX-NEXT:    v_mov_b32_e32 v5, s5
2125; GPRIDX-NEXT:    v_mov_b32_e32 v6, s6
2126; GPRIDX-NEXT:    v_mov_b32_e32 v7, s7
2127; GPRIDX-NEXT:    ; return to shader part epilog
2128;
2129; GFX10-LABEL: dyn_insertelement_v8f32_s_s_s_add_7:
2130; GFX10:       ; %bb.0: ; %entry
2131; GFX10-NEXT:    s_mov_b32 s1, s3
2132; GFX10-NEXT:    s_mov_b32 s3, s5
2133; GFX10-NEXT:    s_mov_b32 s5, s7
2134; GFX10-NEXT:    s_mov_b32 s7, s9
2135; GFX10-NEXT:    s_mov_b32 m0, s11
2136; GFX10-NEXT:    s_mov_b32 s0, s2
2137; GFX10-NEXT:    s_mov_b32 s2, s4
2138; GFX10-NEXT:    s_mov_b32 s4, s6
2139; GFX10-NEXT:    s_mov_b32 s6, s8
2140; GFX10-NEXT:    s_movreld_b32 s7, s10
2141; GFX10-NEXT:    v_mov_b32_e32 v0, s0
2142; GFX10-NEXT:    v_mov_b32_e32 v1, s1
2143; GFX10-NEXT:    v_mov_b32_e32 v2, s2
2144; GFX10-NEXT:    v_mov_b32_e32 v3, s3
2145; GFX10-NEXT:    v_mov_b32_e32 v4, s4
2146; GFX10-NEXT:    v_mov_b32_e32 v5, s5
2147; GFX10-NEXT:    v_mov_b32_e32 v6, s6
2148; GFX10-NEXT:    v_mov_b32_e32 v7, s7
2149; GFX10-NEXT:    ; return to shader part epilog
2150;
2151; GFX11-LABEL: dyn_insertelement_v8f32_s_s_s_add_7:
2152; GFX11:       ; %bb.0: ; %entry
2153; GFX11-NEXT:    s_mov_b32 s1, s3
2154; GFX11-NEXT:    s_mov_b32 s3, s5
2155; GFX11-NEXT:    s_mov_b32 s5, s7
2156; GFX11-NEXT:    s_mov_b32 s7, s9
2157; GFX11-NEXT:    s_mov_b32 m0, s11
2158; GFX11-NEXT:    s_mov_b32 s0, s2
2159; GFX11-NEXT:    s_mov_b32 s2, s4
2160; GFX11-NEXT:    s_mov_b32 s4, s6
2161; GFX11-NEXT:    s_mov_b32 s6, s8
2162; GFX11-NEXT:    s_movreld_b32 s7, s10
2163; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2164; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
2165; GFX11-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
2166; GFX11-NEXT:    v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
2167; GFX11-NEXT:    ; return to shader part epilog
2168entry:
2169  %idx.add = add i32 %idx, 7
2170  %insert = insertelement <8 x float> %vec, float %val, i32 %idx.add
2171  ret <8 x float> %insert
2172}
2173
2174define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v_add_1(<8 x float> %vec, float %val, i32 %idx) {
2175; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v_add_1:
2176; GPRIDX:       ; %bb.0: ; %entry
2177; GPRIDX-NEXT:    v_add_u32_e32 v9, 1, v9
2178; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
2179; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
2180; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v9
2181; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
2182; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v9
2183; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
2184; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v9
2185; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
2186; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v9
2187; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
2188; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v9
2189; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc
2190; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v9
2191; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc
2192; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v9
2193; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
2194; GPRIDX-NEXT:    ; return to shader part epilog
2195;
2196; GFX10PLUS-LABEL: dyn_insertelement_v8f32_v_v_v_add_1:
2197; GFX10PLUS:       ; %bb.0: ; %entry
2198; GFX10PLUS-NEXT:    v_add_nc_u32_e32 v9, 1, v9
2199; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v9
2200; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
2201; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v9
2202; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc_lo
2203; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v9
2204; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc_lo
2205; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v9
2206; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc_lo
2207; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v9
2208; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc_lo
2209; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v9
2210; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc_lo
2211; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v9
2212; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc_lo
2213; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v9
2214; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc_lo
2215; GFX10PLUS-NEXT:    ; return to shader part epilog
2216entry:
2217  %idx.add = add i32 %idx, 1
2218  %insert = insertelement <8 x float> %vec, float %val, i32 %idx.add
2219  ret <8 x float> %insert
2220}
2221
2222define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v_add_7(<8 x float> %vec, float %val, i32 %idx) {
2223; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v_add_7:
2224; GPRIDX:       ; %bb.0: ; %entry
2225; GPRIDX-NEXT:    v_add_u32_e32 v9, 7, v9
2226; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
2227; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
2228; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v9
2229; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
2230; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v9
2231; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
2232; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v9
2233; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
2234; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v9
2235; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
2236; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v9
2237; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc
2238; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v9
2239; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc
2240; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v9
2241; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
2242; GPRIDX-NEXT:    ; return to shader part epilog
2243;
2244; GFX10PLUS-LABEL: dyn_insertelement_v8f32_v_v_v_add_7:
2245; GFX10PLUS:       ; %bb.0: ; %entry
2246; GFX10PLUS-NEXT:    v_add_nc_u32_e32 v9, 7, v9
2247; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v9
2248; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
2249; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v9
2250; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc_lo
2251; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v9
2252; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc_lo
2253; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v9
2254; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc_lo
2255; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v9
2256; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc_lo
2257; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v9
2258; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc_lo
2259; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v9
2260; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc_lo
2261; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v9
2262; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc_lo
2263; GFX10PLUS-NEXT:    ; return to shader part epilog
2264entry:
2265  %idx.add = add i32 %idx, 7
2266  %insert = insertelement <8 x float> %vec, float %val, i32 %idx.add
2267  ret <8 x float> %insert
2268}
2269
2270define amdgpu_ps void @dyn_insertelement_v8f64_s_s_s_add_1(<8 x double> inreg %vec, double inreg %val, i32 inreg %idx) {
2271; GPRIDX-LABEL: dyn_insertelement_v8f64_s_s_s_add_1:
2272; GPRIDX:       ; %bb.0: ; %entry
2273; GPRIDX-NEXT:    s_mov_b32 s0, s2
2274; GPRIDX-NEXT:    s_mov_b32 s1, s3
2275; GPRIDX-NEXT:    s_mov_b32 s2, s4
2276; GPRIDX-NEXT:    s_mov_b32 s3, s5
2277; GPRIDX-NEXT:    s_mov_b32 s4, s6
2278; GPRIDX-NEXT:    s_mov_b32 s5, s7
2279; GPRIDX-NEXT:    s_mov_b32 s6, s8
2280; GPRIDX-NEXT:    s_mov_b32 s7, s9
2281; GPRIDX-NEXT:    s_mov_b32 s8, s10
2282; GPRIDX-NEXT:    s_mov_b32 s9, s11
2283; GPRIDX-NEXT:    s_mov_b32 s10, s12
2284; GPRIDX-NEXT:    s_mov_b32 s11, s13
2285; GPRIDX-NEXT:    s_mov_b32 s12, s14
2286; GPRIDX-NEXT:    s_mov_b32 s13, s15
2287; GPRIDX-NEXT:    s_mov_b32 s14, s16
2288; GPRIDX-NEXT:    s_mov_b32 s15, s17
2289; GPRIDX-NEXT:    s_mov_b32 m0, s20
2290; GPRIDX-NEXT:    s_nop 0
2291; GPRIDX-NEXT:    s_movreld_b64 s[2:3], s[18:19]
2292; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
2293; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
2294; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
2295; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
2296; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
2297; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
2298; GPRIDX-NEXT:    v_mov_b32_e32 v0, s4
2299; GPRIDX-NEXT:    v_mov_b32_e32 v1, s5
2300; GPRIDX-NEXT:    v_mov_b32_e32 v2, s6
2301; GPRIDX-NEXT:    v_mov_b32_e32 v3, s7
2302; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
2303; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
2304; GPRIDX-NEXT:    v_mov_b32_e32 v0, s8
2305; GPRIDX-NEXT:    v_mov_b32_e32 v1, s9
2306; GPRIDX-NEXT:    v_mov_b32_e32 v2, s10
2307; GPRIDX-NEXT:    v_mov_b32_e32 v3, s11
2308; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
2309; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
2310; GPRIDX-NEXT:    v_mov_b32_e32 v0, s12
2311; GPRIDX-NEXT:    v_mov_b32_e32 v1, s13
2312; GPRIDX-NEXT:    v_mov_b32_e32 v2, s14
2313; GPRIDX-NEXT:    v_mov_b32_e32 v3, s15
2314; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
2315; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
2316; GPRIDX-NEXT:    s_endpgm
2317;
2318; GFX10-LABEL: dyn_insertelement_v8f64_s_s_s_add_1:
2319; GFX10:       ; %bb.0: ; %entry
2320; GFX10-NEXT:    s_mov_b32 s0, s2
2321; GFX10-NEXT:    s_mov_b32 s1, s3
2322; GFX10-NEXT:    s_mov_b32 s2, s4
2323; GFX10-NEXT:    s_mov_b32 s3, s5
2324; GFX10-NEXT:    s_mov_b32 m0, s20
2325; GFX10-NEXT:    s_mov_b32 s4, s6
2326; GFX10-NEXT:    s_mov_b32 s5, s7
2327; GFX10-NEXT:    s_mov_b32 s6, s8
2328; GFX10-NEXT:    s_mov_b32 s7, s9
2329; GFX10-NEXT:    s_mov_b32 s8, s10
2330; GFX10-NEXT:    s_mov_b32 s9, s11
2331; GFX10-NEXT:    s_mov_b32 s10, s12
2332; GFX10-NEXT:    s_mov_b32 s11, s13
2333; GFX10-NEXT:    s_mov_b32 s12, s14
2334; GFX10-NEXT:    s_mov_b32 s13, s15
2335; GFX10-NEXT:    s_mov_b32 s14, s16
2336; GFX10-NEXT:    s_mov_b32 s15, s17
2337; GFX10-NEXT:    s_movreld_b64 s[2:3], s[18:19]
2338; GFX10-NEXT:    v_mov_b32_e32 v0, s0
2339; GFX10-NEXT:    v_mov_b32_e32 v1, s1
2340; GFX10-NEXT:    v_mov_b32_e32 v2, s2
2341; GFX10-NEXT:    v_mov_b32_e32 v3, s3
2342; GFX10-NEXT:    v_mov_b32_e32 v4, s4
2343; GFX10-NEXT:    v_mov_b32_e32 v5, s5
2344; GFX10-NEXT:    v_mov_b32_e32 v6, s6
2345; GFX10-NEXT:    v_mov_b32_e32 v7, s7
2346; GFX10-NEXT:    v_mov_b32_e32 v8, s8
2347; GFX10-NEXT:    v_mov_b32_e32 v9, s9
2348; GFX10-NEXT:    v_mov_b32_e32 v10, s10
2349; GFX10-NEXT:    v_mov_b32_e32 v11, s11
2350; GFX10-NEXT:    v_mov_b32_e32 v12, s12
2351; GFX10-NEXT:    v_mov_b32_e32 v13, s13
2352; GFX10-NEXT:    v_mov_b32_e32 v14, s14
2353; GFX10-NEXT:    v_mov_b32_e32 v15, s15
2354; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
2355; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2356; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
2357; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2358; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
2359; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2360; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
2361; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2362; GFX10-NEXT:    s_endpgm
2363;
2364; GFX11-LABEL: dyn_insertelement_v8f64_s_s_s_add_1:
2365; GFX11:       ; %bb.0: ; %entry
2366; GFX11-NEXT:    s_mov_b32 s0, s2
2367; GFX11-NEXT:    s_mov_b32 s1, s3
2368; GFX11-NEXT:    s_mov_b32 s2, s4
2369; GFX11-NEXT:    s_mov_b32 s3, s5
2370; GFX11-NEXT:    s_mov_b32 m0, s20
2371; GFX11-NEXT:    s_mov_b32 s4, s6
2372; GFX11-NEXT:    s_mov_b32 s5, s7
2373; GFX11-NEXT:    s_mov_b32 s6, s8
2374; GFX11-NEXT:    s_mov_b32 s7, s9
2375; GFX11-NEXT:    s_mov_b32 s8, s10
2376; GFX11-NEXT:    s_mov_b32 s9, s11
2377; GFX11-NEXT:    s_mov_b32 s10, s12
2378; GFX11-NEXT:    s_mov_b32 s11, s13
2379; GFX11-NEXT:    s_mov_b32 s12, s14
2380; GFX11-NEXT:    s_mov_b32 s13, s15
2381; GFX11-NEXT:    s_mov_b32 s14, s16
2382; GFX11-NEXT:    s_mov_b32 s15, s17
2383; GFX11-NEXT:    s_movreld_b64 s[2:3], s[18:19]
2384; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2385; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
2386; GFX11-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
2387; GFX11-NEXT:    v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
2388; GFX11-NEXT:    v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
2389; GFX11-NEXT:    v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
2390; GFX11-NEXT:    v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13
2391; GFX11-NEXT:    v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15
2392; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off dlc
2393; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2394; GFX11-NEXT:    global_store_b128 v[0:1], v[4:7], off dlc
2395; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2396; GFX11-NEXT:    global_store_b128 v[0:1], v[8:11], off dlc
2397; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2398; GFX11-NEXT:    global_store_b128 v[0:1], v[12:15], off dlc
2399; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2400; GFX11-NEXT:    s_endpgm
2401entry:
2402  %idx.add = add i32 %idx, 1
2403  %insert = insertelement <8 x double> %vec, double %val, i32 %idx.add
2404  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
2405  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
2406  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
2407  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
2408  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
2409  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
2410  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
2411  store volatile <2 x double> %vec.3, ptr addrspace(1) undef
2412  ret void
2413}
2414
2415define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v_add_1(<8 x double> %vec, double %val, i32 %idx) {
2416; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_v_add_1:
2417; GPRIDX:       ; %bb.0: ; %entry
2418; GPRIDX-NEXT:    v_add_u32_e32 v18, 1, v18
2419; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v18
2420; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
2421; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
2422; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v18
2423; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v16, vcc
2424; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v17, vcc
2425; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v18
2426; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v16, vcc
2427; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v17, vcc
2428; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v18
2429; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v16, vcc
2430; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v17, vcc
2431; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v18
2432; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v8, v16, vcc
2433; GPRIDX-NEXT:    v_cndmask_b32_e32 v9, v9, v17, vcc
2434; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v18
2435; GPRIDX-NEXT:    v_cndmask_b32_e32 v10, v10, v16, vcc
2436; GPRIDX-NEXT:    v_cndmask_b32_e32 v11, v11, v17, vcc
2437; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v18
2438; GPRIDX-NEXT:    v_cndmask_b32_e32 v12, v12, v16, vcc
2439; GPRIDX-NEXT:    v_cndmask_b32_e32 v13, v13, v17, vcc
2440; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v18
2441; GPRIDX-NEXT:    v_cndmask_b32_e32 v14, v14, v16, vcc
2442; GPRIDX-NEXT:    v_cndmask_b32_e32 v15, v15, v17, vcc
2443; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
2444; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
2445; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
2446; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
2447; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
2448; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
2449; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
2450; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
2451; GPRIDX-NEXT:    s_endpgm
2452;
2453; GFX10-LABEL: dyn_insertelement_v8f64_v_v_v_add_1:
2454; GFX10:       ; %bb.0: ; %entry
2455; GFX10-NEXT:    v_add_nc_u32_e32 v18, 1, v18
2456; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v18
2457; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 1, v18
2458; GFX10-NEXT:    v_cmp_eq_u32_e64 s1, 7, v18
2459; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc_lo
2460; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc_lo
2461; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v18
2462; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v16, s0
2463; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v17, s0
2464; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 3, v18
2465; GFX10-NEXT:    v_cndmask_b32_e64 v14, v14, v16, s1
2466; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v16, vcc_lo
2467; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v17, vcc_lo
2468; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v18
2469; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v16, s0
2470; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v17, s0
2471; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 5, v18
2472; GFX10-NEXT:    v_cndmask_b32_e64 v15, v15, v17, s1
2473; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v16, vcc_lo
2474; GFX10-NEXT:    v_cndmask_b32_e32 v9, v9, v17, vcc_lo
2475; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v18
2476; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, v16, s0
2477; GFX10-NEXT:    v_cndmask_b32_e64 v11, v11, v17, s0
2478; GFX10-NEXT:    v_cndmask_b32_e32 v12, v12, v16, vcc_lo
2479; GFX10-NEXT:    v_cndmask_b32_e32 v13, v13, v17, vcc_lo
2480; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
2481; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2482; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
2483; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2484; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
2485; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2486; GFX10-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
2487; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2488; GFX10-NEXT:    s_endpgm
2489;
2490; GFX11-LABEL: dyn_insertelement_v8f64_v_v_v_add_1:
2491; GFX11:       ; %bb.0: ; %entry
2492; GFX11-NEXT:    v_add_nc_u32_e32 v18, 1, v18
2493; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v18
2494; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc_lo
2495; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 1, v18
2496; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc_lo
2497; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v18
2498; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 7, v18
2499; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, v16, s0
2500; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, v17, s0
2501; GFX11-NEXT:    v_cndmask_b32_e32 v5, v5, v17, vcc_lo
2502; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 3, v18
2503; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v16, vcc_lo
2504; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v18
2505; GFX11-NEXT:    v_cndmask_b32_e64 v14, v14, v16, s1
2506; GFX11-NEXT:    v_cndmask_b32_e64 v15, v15, v17, s1
2507; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, v16, s0
2508; GFX11-NEXT:    v_cndmask_b32_e64 v7, v7, v17, s0
2509; GFX11-NEXT:    v_cndmask_b32_e32 v9, v9, v17, vcc_lo
2510; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 5, v18
2511; GFX11-NEXT:    v_cndmask_b32_e32 v8, v8, v16, vcc_lo
2512; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v18
2513; GFX11-NEXT:    v_cndmask_b32_e64 v10, v10, v16, s0
2514; GFX11-NEXT:    v_cndmask_b32_e64 v11, v11, v17, s0
2515; GFX11-NEXT:    v_dual_cndmask_b32 v13, v13, v17 :: v_dual_cndmask_b32 v12, v12, v16
2516; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off dlc
2517; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2518; GFX11-NEXT:    global_store_b128 v[0:1], v[4:7], off dlc
2519; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2520; GFX11-NEXT:    global_store_b128 v[0:1], v[8:11], off dlc
2521; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2522; GFX11-NEXT:    global_store_b128 v[0:1], v[12:15], off dlc
2523; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2524; GFX11-NEXT:    s_endpgm
2525entry:
2526  %idx.add = add i32 %idx, 1
2527  %insert = insertelement <8 x double> %vec, double %val, i32 %idx.add
2528  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
2529  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
2530  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
2531  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
2532  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
2533  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
2534  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
2535  store volatile <2 x double> %vec.3, ptr addrspace(1) undef
2536  ret void
2537}
2538
2539define amdgpu_ps <9 x float> @dyn_insertelement_v9f32_s_v_s(<9 x float> inreg %vec, float %val, i32 inreg %idx) {
2540; GPRIDX-LABEL: dyn_insertelement_v9f32_s_v_s:
2541; GPRIDX:       ; %bb.0: ; %entry
2542; GPRIDX-NEXT:    s_mov_b32 s0, s2
2543; GPRIDX-NEXT:    s_mov_b32 s1, s3
2544; GPRIDX-NEXT:    s_mov_b32 s2, s4
2545; GPRIDX-NEXT:    s_mov_b32 s3, s5
2546; GPRIDX-NEXT:    s_mov_b32 s4, s6
2547; GPRIDX-NEXT:    s_mov_b32 s5, s7
2548; GPRIDX-NEXT:    s_mov_b32 s6, s8
2549; GPRIDX-NEXT:    s_mov_b32 s7, s9
2550; GPRIDX-NEXT:    s_mov_b32 s8, s10
2551; GPRIDX-NEXT:    v_mov_b32_e32 v9, v0
2552; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
2553; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
2554; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
2555; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
2556; GPRIDX-NEXT:    v_mov_b32_e32 v4, s4
2557; GPRIDX-NEXT:    v_mov_b32_e32 v5, s5
2558; GPRIDX-NEXT:    v_mov_b32_e32 v6, s6
2559; GPRIDX-NEXT:    v_mov_b32_e32 v7, s7
2560; GPRIDX-NEXT:    v_mov_b32_e32 v8, s8
2561; GPRIDX-NEXT:    s_set_gpr_idx_on s11, gpr_idx(DST)
2562; GPRIDX-NEXT:    v_mov_b32_e32 v0, v9
2563; GPRIDX-NEXT:    s_set_gpr_idx_off
2564; GPRIDX-NEXT:    ; return to shader part epilog
2565;
2566; GFX10-LABEL: dyn_insertelement_v9f32_s_v_s:
2567; GFX10:       ; %bb.0: ; %entry
2568; GFX10-NEXT:    s_mov_b32 s0, s2
2569; GFX10-NEXT:    s_mov_b32 s1, s3
2570; GFX10-NEXT:    s_mov_b32 s2, s4
2571; GFX10-NEXT:    s_mov_b32 s3, s5
2572; GFX10-NEXT:    s_mov_b32 s4, s6
2573; GFX10-NEXT:    s_mov_b32 s5, s7
2574; GFX10-NEXT:    s_mov_b32 s6, s8
2575; GFX10-NEXT:    s_mov_b32 s7, s9
2576; GFX10-NEXT:    s_mov_b32 s8, s10
2577; GFX10-NEXT:    v_mov_b32_e32 v9, v0
2578; GFX10-NEXT:    v_mov_b32_e32 v0, s0
2579; GFX10-NEXT:    s_mov_b32 m0, s11
2580; GFX10-NEXT:    v_mov_b32_e32 v1, s1
2581; GFX10-NEXT:    v_mov_b32_e32 v2, s2
2582; GFX10-NEXT:    v_mov_b32_e32 v3, s3
2583; GFX10-NEXT:    v_mov_b32_e32 v4, s4
2584; GFX10-NEXT:    v_mov_b32_e32 v5, s5
2585; GFX10-NEXT:    v_mov_b32_e32 v6, s6
2586; GFX10-NEXT:    v_mov_b32_e32 v7, s7
2587; GFX10-NEXT:    v_mov_b32_e32 v8, s8
2588; GFX10-NEXT:    v_movreld_b32_e32 v0, v9
2589; GFX10-NEXT:    ; return to shader part epilog
2590;
2591; GFX11-LABEL: dyn_insertelement_v9f32_s_v_s:
2592; GFX11:       ; %bb.0: ; %entry
2593; GFX11-NEXT:    s_mov_b32 s0, s2
2594; GFX11-NEXT:    s_mov_b32 s1, s3
2595; GFX11-NEXT:    s_mov_b32 s2, s4
2596; GFX11-NEXT:    s_mov_b32 s3, s5
2597; GFX11-NEXT:    s_mov_b32 s4, s6
2598; GFX11-NEXT:    s_mov_b32 s5, s7
2599; GFX11-NEXT:    s_mov_b32 s6, s8
2600; GFX11-NEXT:    s_mov_b32 s7, s9
2601; GFX11-NEXT:    s_mov_b32 s8, s10
2602; GFX11-NEXT:    v_dual_mov_b32 v9, v0 :: v_dual_mov_b32 v0, s0
2603; GFX11-NEXT:    s_mov_b32 m0, s11
2604; GFX11-NEXT:    v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
2605; GFX11-NEXT:    v_dual_mov_b32 v3, s3 :: v_dual_mov_b32 v4, s4
2606; GFX11-NEXT:    v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v6, s6
2607; GFX11-NEXT:    v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v8, s8
2608; GFX11-NEXT:    v_movreld_b32_e32 v0, v9
2609; GFX11-NEXT:    ; return to shader part epilog
2610entry:
2611  %insert = insertelement <9 x float> %vec, float %val, i32 %idx
2612  ret <9 x float> %insert
2613}
2614
2615define amdgpu_ps <9 x float> @dyn_insertelement_v9f32_s_v_v(<9 x float> inreg %vec, float %val, i32 %idx) {
2616; GPRIDX-LABEL: dyn_insertelement_v9f32_s_v_v:
2617; GPRIDX:       ; %bb.0: ; %entry
2618; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
2619; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
2620; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
2621; GPRIDX-NEXT:    v_cndmask_b32_e32 v10, v2, v0, vcc
2622; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2623; GPRIDX-NEXT:    v_mov_b32_e32 v4, s4
2624; GPRIDX-NEXT:    v_cndmask_b32_e32 v9, v3, v0, vcc
2625; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v1
2626; GPRIDX-NEXT:    v_mov_b32_e32 v5, s5
2627; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
2628; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v1
2629; GPRIDX-NEXT:    v_mov_b32_e32 v6, s6
2630; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v5, v0, vcc
2631; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v1
2632; GPRIDX-NEXT:    v_mov_b32_e32 v7, s7
2633; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v6, v0, vcc
2634; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v1
2635; GPRIDX-NEXT:    v_mov_b32_e32 v8, s8
2636; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v7, v0, vcc
2637; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v1
2638; GPRIDX-NEXT:    v_mov_b32_e32 v11, s9
2639; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v8, v0, vcc
2640; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v1
2641; GPRIDX-NEXT:    v_mov_b32_e32 v12, s10
2642; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v11, v0, vcc
2643; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v1
2644; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v12, v0, vcc
2645; GPRIDX-NEXT:    v_mov_b32_e32 v0, v10
2646; GPRIDX-NEXT:    v_mov_b32_e32 v1, v9
2647; GPRIDX-NEXT:    ; return to shader part epilog
2648;
2649; GFX10-LABEL: dyn_insertelement_v9f32_s_v_v:
2650; GFX10:       ; %bb.0: ; %entry
2651; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
2652; GFX10-NEXT:    v_cndmask_b32_e32 v10, s2, v0, vcc_lo
2653; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
2654; GFX10-NEXT:    v_cndmask_b32_e32 v9, s3, v0, vcc_lo
2655; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v1
2656; GFX10-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
2657; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v1
2658; GFX10-NEXT:    v_cndmask_b32_e32 v3, s5, v0, vcc_lo
2659; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v1
2660; GFX10-NEXT:    v_cndmask_b32_e32 v4, s6, v0, vcc_lo
2661; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v1
2662; GFX10-NEXT:    v_cndmask_b32_e32 v5, s7, v0, vcc_lo
2663; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v1
2664; GFX10-NEXT:    v_cndmask_b32_e32 v6, s8, v0, vcc_lo
2665; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v1
2666; GFX10-NEXT:    v_cndmask_b32_e32 v7, s9, v0, vcc_lo
2667; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v1
2668; GFX10-NEXT:    v_mov_b32_e32 v1, v9
2669; GFX10-NEXT:    v_cndmask_b32_e32 v8, s10, v0, vcc_lo
2670; GFX10-NEXT:    v_mov_b32_e32 v0, v10
2671; GFX10-NEXT:    ; return to shader part epilog
2672;
2673; GFX11-LABEL: dyn_insertelement_v9f32_s_v_v:
2674; GFX11:       ; %bb.0: ; %entry
2675; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
2676; GFX11-NEXT:    v_cndmask_b32_e32 v10, s2, v0, vcc_lo
2677; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
2678; GFX11-NEXT:    v_cndmask_b32_e32 v9, s3, v0, vcc_lo
2679; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v1
2680; GFX11-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
2681; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v1
2682; GFX11-NEXT:    v_cndmask_b32_e32 v3, s5, v0, vcc_lo
2683; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v1
2684; GFX11-NEXT:    v_cndmask_b32_e32 v4, s6, v0, vcc_lo
2685; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v1
2686; GFX11-NEXT:    v_cndmask_b32_e32 v5, s7, v0, vcc_lo
2687; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v1
2688; GFX11-NEXT:    v_cndmask_b32_e32 v6, s8, v0, vcc_lo
2689; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v1
2690; GFX11-NEXT:    v_cndmask_b32_e32 v7, s9, v0, vcc_lo
2691; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v1
2692; GFX11-NEXT:    v_dual_mov_b32 v1, v9 :: v_dual_cndmask_b32 v8, s10, v0
2693; GFX11-NEXT:    v_mov_b32_e32 v0, v10
2694; GFX11-NEXT:    ; return to shader part epilog
2695entry:
2696  %insert = insertelement <9 x float> %vec, float %val, i32 %idx
2697  ret <9 x float> %insert
2698}
2699
2700define amdgpu_ps <9 x float> @dyn_insertelement_v9f32_v_v_s(<9 x float> %vec, float %val, i32 inreg %idx) {
2701; GPRIDX-LABEL: dyn_insertelement_v9f32_v_v_s:
2702; GPRIDX:       ; %bb.0: ; %entry
2703; GPRIDX-NEXT:    s_set_gpr_idx_on s2, gpr_idx(DST)
2704; GPRIDX-NEXT:    v_mov_b32_e32 v0, v9
2705; GPRIDX-NEXT:    s_set_gpr_idx_off
2706; GPRIDX-NEXT:    ; return to shader part epilog
2707;
2708; GFX10PLUS-LABEL: dyn_insertelement_v9f32_v_v_s:
2709; GFX10PLUS:       ; %bb.0: ; %entry
2710; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
2711; GFX10PLUS-NEXT:    v_movreld_b32_e32 v0, v9
2712; GFX10PLUS-NEXT:    ; return to shader part epilog
2713entry:
2714  %insert = insertelement <9 x float> %vec, float %val, i32 %idx
2715  ret <9 x float> %insert
2716}
2717
2718define amdgpu_ps <9 x float> @dyn_insertelement_v9f32_v_v_v(<9 x float> %vec, float %val, i32 %idx) {
2719; GPRIDX-LABEL: dyn_insertelement_v9f32_v_v_v:
2720; GPRIDX:       ; %bb.0: ; %entry
2721; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v10
2722; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
2723; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v10
2724; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
2725; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v10
2726; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v9, vcc
2727; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v10
2728; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
2729; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v10
2730; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v9, vcc
2731; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v10
2732; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
2733; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v10
2734; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v9, vcc
2735; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v10
2736; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v9, vcc
2737; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v10
2738; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
2739; GPRIDX-NEXT:    ; return to shader part epilog
2740;
2741; GFX10PLUS-LABEL: dyn_insertelement_v9f32_v_v_v:
2742; GFX10PLUS:       ; %bb.0: ; %entry
2743; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v10
2744; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc_lo
2745; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v10
2746; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
2747; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v10
2748; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v2, v2, v9, vcc_lo
2749; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v10
2750; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc_lo
2751; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v10
2752; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v4, v4, v9, vcc_lo
2753; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v10
2754; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc_lo
2755; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v10
2756; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v6, v6, v9, vcc_lo
2757; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v10
2758; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v7, v7, v9, vcc_lo
2759; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v10
2760; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc_lo
2761; GFX10PLUS-NEXT:    ; return to shader part epilog
2762entry:
2763  %insert = insertelement <9 x float> %vec, float %val, i32 %idx
2764  ret <9 x float> %insert
2765}
2766
2767define amdgpu_ps <10 x float> @dyn_insertelement_v10f32_s_v_s(<10 x float> inreg %vec, float %val, i32 inreg %idx) {
2768; GPRIDX-LABEL: dyn_insertelement_v10f32_s_v_s:
2769; GPRIDX:       ; %bb.0: ; %entry
2770; GPRIDX-NEXT:    s_mov_b32 s0, s2
2771; GPRIDX-NEXT:    s_mov_b32 s1, s3
2772; GPRIDX-NEXT:    s_mov_b32 s2, s4
2773; GPRIDX-NEXT:    s_mov_b32 s3, s5
2774; GPRIDX-NEXT:    s_mov_b32 s4, s6
2775; GPRIDX-NEXT:    s_mov_b32 s5, s7
2776; GPRIDX-NEXT:    s_mov_b32 s6, s8
2777; GPRIDX-NEXT:    s_mov_b32 s7, s9
2778; GPRIDX-NEXT:    s_mov_b32 s8, s10
2779; GPRIDX-NEXT:    s_mov_b32 s9, s11
2780; GPRIDX-NEXT:    v_mov_b32_e32 v10, v0
2781; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
2782; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
2783; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
2784; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
2785; GPRIDX-NEXT:    v_mov_b32_e32 v4, s4
2786; GPRIDX-NEXT:    v_mov_b32_e32 v5, s5
2787; GPRIDX-NEXT:    v_mov_b32_e32 v6, s6
2788; GPRIDX-NEXT:    v_mov_b32_e32 v7, s7
2789; GPRIDX-NEXT:    v_mov_b32_e32 v8, s8
2790; GPRIDX-NEXT:    v_mov_b32_e32 v9, s9
2791; GPRIDX-NEXT:    s_set_gpr_idx_on s12, gpr_idx(DST)
2792; GPRIDX-NEXT:    v_mov_b32_e32 v0, v10
2793; GPRIDX-NEXT:    s_set_gpr_idx_off
2794; GPRIDX-NEXT:    ; return to shader part epilog
2795;
2796; GFX10-LABEL: dyn_insertelement_v10f32_s_v_s:
2797; GFX10:       ; %bb.0: ; %entry
2798; GFX10-NEXT:    s_mov_b32 s0, s2
2799; GFX10-NEXT:    s_mov_b32 s1, s3
2800; GFX10-NEXT:    s_mov_b32 s2, s4
2801; GFX10-NEXT:    s_mov_b32 s3, s5
2802; GFX10-NEXT:    s_mov_b32 s4, s6
2803; GFX10-NEXT:    s_mov_b32 s5, s7
2804; GFX10-NEXT:    s_mov_b32 s6, s8
2805; GFX10-NEXT:    s_mov_b32 s7, s9
2806; GFX10-NEXT:    s_mov_b32 s8, s10
2807; GFX10-NEXT:    s_mov_b32 s9, s11
2808; GFX10-NEXT:    v_mov_b32_e32 v10, v0
2809; GFX10-NEXT:    v_mov_b32_e32 v0, s0
2810; GFX10-NEXT:    s_mov_b32 m0, s12
2811; GFX10-NEXT:    v_mov_b32_e32 v1, s1
2812; GFX10-NEXT:    v_mov_b32_e32 v2, s2
2813; GFX10-NEXT:    v_mov_b32_e32 v3, s3
2814; GFX10-NEXT:    v_mov_b32_e32 v4, s4
2815; GFX10-NEXT:    v_mov_b32_e32 v5, s5
2816; GFX10-NEXT:    v_mov_b32_e32 v6, s6
2817; GFX10-NEXT:    v_mov_b32_e32 v7, s7
2818; GFX10-NEXT:    v_mov_b32_e32 v8, s8
2819; GFX10-NEXT:    v_mov_b32_e32 v9, s9
2820; GFX10-NEXT:    v_movreld_b32_e32 v0, v10
2821; GFX10-NEXT:    ; return to shader part epilog
2822;
2823; GFX11-LABEL: dyn_insertelement_v10f32_s_v_s:
2824; GFX11:       ; %bb.0: ; %entry
2825; GFX11-NEXT:    s_mov_b32 s0, s2
2826; GFX11-NEXT:    s_mov_b32 s1, s3
2827; GFX11-NEXT:    s_mov_b32 s2, s4
2828; GFX11-NEXT:    s_mov_b32 s3, s5
2829; GFX11-NEXT:    s_mov_b32 s4, s6
2830; GFX11-NEXT:    s_mov_b32 s5, s7
2831; GFX11-NEXT:    s_mov_b32 s6, s8
2832; GFX11-NEXT:    s_mov_b32 s7, s9
2833; GFX11-NEXT:    s_mov_b32 s8, s10
2834; GFX11-NEXT:    s_mov_b32 s9, s11
2835; GFX11-NEXT:    v_mov_b32_e32 v10, v0
2836; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
2837; GFX11-NEXT:    s_mov_b32 m0, s12
2838; GFX11-NEXT:    v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
2839; GFX11-NEXT:    v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v4, s4
2840; GFX11-NEXT:    v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v6, s6
2841; GFX11-NEXT:    v_dual_mov_b32 v9, s9 :: v_dual_mov_b32 v8, s8
2842; GFX11-NEXT:    v_movreld_b32_e32 v0, v10
2843; GFX11-NEXT:    ; return to shader part epilog
2844entry:
2845  %insert = insertelement <10 x float> %vec, float %val, i32 %idx
2846  ret <10 x float> %insert
2847}
2848
2849define amdgpu_ps <10 x float> @dyn_insertelement_v10f32_s_v_v(<10 x float> inreg %vec, float %val, i32 %idx) {
2850; GPRIDX-LABEL: dyn_insertelement_v10f32_s_v_v:
2851; GPRIDX:       ; %bb.0: ; %entry
2852; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
2853; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
2854; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
2855; GPRIDX-NEXT:    v_cndmask_b32_e32 v10, v2, v0, vcc
2856; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2857; GPRIDX-NEXT:    v_mov_b32_e32 v4, s4
2858; GPRIDX-NEXT:    v_cndmask_b32_e32 v11, v3, v0, vcc
2859; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v1
2860; GPRIDX-NEXT:    v_mov_b32_e32 v5, s5
2861; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
2862; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v1
2863; GPRIDX-NEXT:    v_mov_b32_e32 v6, s6
2864; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v5, v0, vcc
2865; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v1
2866; GPRIDX-NEXT:    v_mov_b32_e32 v7, s7
2867; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v6, v0, vcc
2868; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v1
2869; GPRIDX-NEXT:    v_mov_b32_e32 v8, s8
2870; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v7, v0, vcc
2871; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v1
2872; GPRIDX-NEXT:    v_mov_b32_e32 v9, s9
2873; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v8, v0, vcc
2874; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v1
2875; GPRIDX-NEXT:    v_mov_b32_e32 v12, s10
2876; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v9, v0, vcc
2877; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v1
2878; GPRIDX-NEXT:    v_mov_b32_e32 v13, s11
2879; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v12, v0, vcc
2880; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v1
2881; GPRIDX-NEXT:    v_cndmask_b32_e32 v9, v13, v0, vcc
2882; GPRIDX-NEXT:    v_mov_b32_e32 v0, v10
2883; GPRIDX-NEXT:    v_mov_b32_e32 v1, v11
2884; GPRIDX-NEXT:    ; return to shader part epilog
2885;
2886; GFX10-LABEL: dyn_insertelement_v10f32_s_v_v:
2887; GFX10:       ; %bb.0: ; %entry
2888; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
2889; GFX10-NEXT:    v_cndmask_b32_e32 v10, s2, v0, vcc_lo
2890; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
2891; GFX10-NEXT:    v_cndmask_b32_e32 v11, s3, v0, vcc_lo
2892; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v1
2893; GFX10-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
2894; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v1
2895; GFX10-NEXT:    v_cndmask_b32_e32 v3, s5, v0, vcc_lo
2896; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v1
2897; GFX10-NEXT:    v_cndmask_b32_e32 v4, s6, v0, vcc_lo
2898; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v1
2899; GFX10-NEXT:    v_cndmask_b32_e32 v5, s7, v0, vcc_lo
2900; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v1
2901; GFX10-NEXT:    v_cndmask_b32_e32 v6, s8, v0, vcc_lo
2902; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v1
2903; GFX10-NEXT:    v_cndmask_b32_e32 v7, s9, v0, vcc_lo
2904; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v1
2905; GFX10-NEXT:    v_cndmask_b32_e32 v8, s10, v0, vcc_lo
2906; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v1
2907; GFX10-NEXT:    v_mov_b32_e32 v1, v11
2908; GFX10-NEXT:    v_cndmask_b32_e32 v9, s11, v0, vcc_lo
2909; GFX10-NEXT:    v_mov_b32_e32 v0, v10
2910; GFX10-NEXT:    ; return to shader part epilog
2911;
2912; GFX11-LABEL: dyn_insertelement_v10f32_s_v_v:
2913; GFX11:       ; %bb.0: ; %entry
2914; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
2915; GFX11-NEXT:    v_cndmask_b32_e32 v10, s2, v0, vcc_lo
2916; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
2917; GFX11-NEXT:    v_cndmask_b32_e32 v11, s3, v0, vcc_lo
2918; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v1
2919; GFX11-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
2920; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v1
2921; GFX11-NEXT:    v_cndmask_b32_e32 v3, s5, v0, vcc_lo
2922; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v1
2923; GFX11-NEXT:    v_cndmask_b32_e32 v4, s6, v0, vcc_lo
2924; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v1
2925; GFX11-NEXT:    v_cndmask_b32_e32 v5, s7, v0, vcc_lo
2926; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v1
2927; GFX11-NEXT:    v_cndmask_b32_e32 v6, s8, v0, vcc_lo
2928; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v1
2929; GFX11-NEXT:    v_cndmask_b32_e32 v7, s9, v0, vcc_lo
2930; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v1
2931; GFX11-NEXT:    v_cndmask_b32_e32 v8, s10, v0, vcc_lo
2932; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v1
2933; GFX11-NEXT:    v_mov_b32_e32 v1, v11
2934; GFX11-NEXT:    v_dual_cndmask_b32 v9, s11, v0 :: v_dual_mov_b32 v0, v10
2935; GFX11-NEXT:    ; return to shader part epilog
2936entry:
2937  %insert = insertelement <10 x float> %vec, float %val, i32 %idx
2938  ret <10 x float> %insert
2939}
2940
2941define amdgpu_ps <10 x float> @dyn_insertelement_v10f32_v_v_s(<10 x float> %vec, float %val, i32 inreg %idx) {
2942; GPRIDX-LABEL: dyn_insertelement_v10f32_v_v_s:
2943; GPRIDX:       ; %bb.0: ; %entry
2944; GPRIDX-NEXT:    s_set_gpr_idx_on s2, gpr_idx(DST)
2945; GPRIDX-NEXT:    v_mov_b32_e32 v0, v10
2946; GPRIDX-NEXT:    s_set_gpr_idx_off
2947; GPRIDX-NEXT:    ; return to shader part epilog
2948;
2949; GFX10PLUS-LABEL: dyn_insertelement_v10f32_v_v_s:
2950; GFX10PLUS:       ; %bb.0: ; %entry
2951; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
2952; GFX10PLUS-NEXT:    v_movreld_b32_e32 v0, v10
2953; GFX10PLUS-NEXT:    ; return to shader part epilog
2954entry:
2955  %insert = insertelement <10 x float> %vec, float %val, i32 %idx
2956  ret <10 x float> %insert
2957}
2958
2959define amdgpu_ps <10 x float> @dyn_insertelement_v10f32_v_v_v(<10 x float> %vec, float %val, i32 %idx) {
2960; GPRIDX-LABEL: dyn_insertelement_v10f32_v_v_v:
2961; GPRIDX:       ; %bb.0: ; %entry
2962; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v11
2963; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
2964; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v11
2965; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v10, vcc
2966; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v11
2967; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
2968; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v11
2969; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v10, vcc
2970; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v11
2971; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
2972; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v11
2973; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v10, vcc
2974; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v11
2975; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
2976; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v11
2977; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v10, vcc
2978; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v11
2979; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc
2980; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v11
2981; GPRIDX-NEXT:    v_cndmask_b32_e32 v9, v9, v10, vcc
2982; GPRIDX-NEXT:    ; return to shader part epilog
2983;
2984; GFX10PLUS-LABEL: dyn_insertelement_v10f32_v_v_v:
2985; GFX10PLUS:       ; %bb.0: ; %entry
2986; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v11
2987; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
2988; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v11
2989; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, v1, v10, vcc_lo
2990; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v11
2991; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc_lo
2992; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v11
2993; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v3, v3, v10, vcc_lo
2994; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v11
2995; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc_lo
2996; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v11
2997; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v5, v5, v10, vcc_lo
2998; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v11
2999; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc_lo
3000; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v11
3001; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v7, v7, v10, vcc_lo
3002; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v11
3003; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc_lo
3004; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v11
3005; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v9, v9, v10, vcc_lo
3006; GFX10PLUS-NEXT:    ; return to shader part epilog
3007entry:
3008  %insert = insertelement <10 x float> %vec, float %val, i32 %idx
3009  ret <10 x float> %insert
3010}
3011
3012define amdgpu_ps <11 x float> @dyn_insertelement_v11f32_s_v_s(<11 x float> inreg %vec, float %val, i32 inreg %idx) {
3013; GPRIDX-LABEL: dyn_insertelement_v11f32_s_v_s:
3014; GPRIDX:       ; %bb.0: ; %entry
3015; GPRIDX-NEXT:    s_mov_b32 s0, s2
3016; GPRIDX-NEXT:    s_mov_b32 s1, s3
3017; GPRIDX-NEXT:    s_mov_b32 s2, s4
3018; GPRIDX-NEXT:    s_mov_b32 s3, s5
3019; GPRIDX-NEXT:    s_mov_b32 s4, s6
3020; GPRIDX-NEXT:    s_mov_b32 s5, s7
3021; GPRIDX-NEXT:    s_mov_b32 s6, s8
3022; GPRIDX-NEXT:    s_mov_b32 s7, s9
3023; GPRIDX-NEXT:    s_mov_b32 s8, s10
3024; GPRIDX-NEXT:    s_mov_b32 s9, s11
3025; GPRIDX-NEXT:    s_mov_b32 s10, s12
3026; GPRIDX-NEXT:    v_mov_b32_e32 v11, v0
3027; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
3028; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
3029; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
3030; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
3031; GPRIDX-NEXT:    v_mov_b32_e32 v4, s4
3032; GPRIDX-NEXT:    v_mov_b32_e32 v5, s5
3033; GPRIDX-NEXT:    v_mov_b32_e32 v6, s6
3034; GPRIDX-NEXT:    v_mov_b32_e32 v7, s7
3035; GPRIDX-NEXT:    v_mov_b32_e32 v8, s8
3036; GPRIDX-NEXT:    v_mov_b32_e32 v9, s9
3037; GPRIDX-NEXT:    v_mov_b32_e32 v10, s10
3038; GPRIDX-NEXT:    s_set_gpr_idx_on s13, gpr_idx(DST)
3039; GPRIDX-NEXT:    v_mov_b32_e32 v0, v11
3040; GPRIDX-NEXT:    s_set_gpr_idx_off
3041; GPRIDX-NEXT:    ; return to shader part epilog
3042;
3043; GFX10-LABEL: dyn_insertelement_v11f32_s_v_s:
3044; GFX10:       ; %bb.0: ; %entry
3045; GFX10-NEXT:    s_mov_b32 s0, s2
3046; GFX10-NEXT:    s_mov_b32 s1, s3
3047; GFX10-NEXT:    s_mov_b32 s2, s4
3048; GFX10-NEXT:    s_mov_b32 s3, s5
3049; GFX10-NEXT:    s_mov_b32 s4, s6
3050; GFX10-NEXT:    s_mov_b32 s5, s7
3051; GFX10-NEXT:    s_mov_b32 s6, s8
3052; GFX10-NEXT:    s_mov_b32 s7, s9
3053; GFX10-NEXT:    s_mov_b32 s8, s10
3054; GFX10-NEXT:    s_mov_b32 s9, s11
3055; GFX10-NEXT:    s_mov_b32 s10, s12
3056; GFX10-NEXT:    v_mov_b32_e32 v11, v0
3057; GFX10-NEXT:    v_mov_b32_e32 v0, s0
3058; GFX10-NEXT:    s_mov_b32 m0, s13
3059; GFX10-NEXT:    v_mov_b32_e32 v1, s1
3060; GFX10-NEXT:    v_mov_b32_e32 v2, s2
3061; GFX10-NEXT:    v_mov_b32_e32 v3, s3
3062; GFX10-NEXT:    v_mov_b32_e32 v4, s4
3063; GFX10-NEXT:    v_mov_b32_e32 v5, s5
3064; GFX10-NEXT:    v_mov_b32_e32 v6, s6
3065; GFX10-NEXT:    v_mov_b32_e32 v7, s7
3066; GFX10-NEXT:    v_mov_b32_e32 v8, s8
3067; GFX10-NEXT:    v_mov_b32_e32 v9, s9
3068; GFX10-NEXT:    v_mov_b32_e32 v10, s10
3069; GFX10-NEXT:    v_movreld_b32_e32 v0, v11
3070; GFX10-NEXT:    ; return to shader part epilog
3071;
3072; GFX11-LABEL: dyn_insertelement_v11f32_s_v_s:
3073; GFX11:       ; %bb.0: ; %entry
3074; GFX11-NEXT:    s_mov_b32 s0, s2
3075; GFX11-NEXT:    s_mov_b32 s1, s3
3076; GFX11-NEXT:    s_mov_b32 s2, s4
3077; GFX11-NEXT:    s_mov_b32 s3, s5
3078; GFX11-NEXT:    s_mov_b32 s4, s6
3079; GFX11-NEXT:    s_mov_b32 s5, s7
3080; GFX11-NEXT:    s_mov_b32 s6, s8
3081; GFX11-NEXT:    s_mov_b32 s7, s9
3082; GFX11-NEXT:    s_mov_b32 s8, s10
3083; GFX11-NEXT:    s_mov_b32 s9, s11
3084; GFX11-NEXT:    s_mov_b32 s10, s12
3085; GFX11-NEXT:    v_dual_mov_b32 v11, v0 :: v_dual_mov_b32 v0, s0
3086; GFX11-NEXT:    s_mov_b32 m0, s13
3087; GFX11-NEXT:    v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
3088; GFX11-NEXT:    v_dual_mov_b32 v3, s3 :: v_dual_mov_b32 v4, s4
3089; GFX11-NEXT:    v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v6, s6
3090; GFX11-NEXT:    v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v8, s8
3091; GFX11-NEXT:    v_dual_mov_b32 v9, s9 :: v_dual_mov_b32 v10, s10
3092; GFX11-NEXT:    v_movreld_b32_e32 v0, v11
3093; GFX11-NEXT:    ; return to shader part epilog
3094entry:
3095  %insert = insertelement <11 x float> %vec, float %val, i32 %idx
3096  ret <11 x float> %insert
3097}
3098
3099define amdgpu_ps <11 x float> @dyn_insertelement_v11f32_s_v_v(<11 x float> inreg %vec, float %val, i32 %idx) {
3100; GPRIDX-LABEL: dyn_insertelement_v11f32_s_v_v:
3101; GPRIDX:       ; %bb.0: ; %entry
3102; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
3103; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
3104; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
3105; GPRIDX-NEXT:    v_cndmask_b32_e32 v12, v2, v0, vcc
3106; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
3107; GPRIDX-NEXT:    v_mov_b32_e32 v4, s4
3108; GPRIDX-NEXT:    v_cndmask_b32_e32 v11, v3, v0, vcc
3109; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v1
3110; GPRIDX-NEXT:    v_mov_b32_e32 v5, s5
3111; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
3112; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v1
3113; GPRIDX-NEXT:    v_mov_b32_e32 v6, s6
3114; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v5, v0, vcc
3115; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v1
3116; GPRIDX-NEXT:    v_mov_b32_e32 v7, s7
3117; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v6, v0, vcc
3118; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v1
3119; GPRIDX-NEXT:    v_mov_b32_e32 v8, s8
3120; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v7, v0, vcc
3121; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v1
3122; GPRIDX-NEXT:    v_mov_b32_e32 v9, s9
3123; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v8, v0, vcc
3124; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v1
3125; GPRIDX-NEXT:    v_mov_b32_e32 v10, s10
3126; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v9, v0, vcc
3127; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v1
3128; GPRIDX-NEXT:    v_mov_b32_e32 v13, s11
3129; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v10, v0, vcc
3130; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v1
3131; GPRIDX-NEXT:    v_mov_b32_e32 v14, s12
3132; GPRIDX-NEXT:    v_cndmask_b32_e32 v9, v13, v0, vcc
3133; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v1
3134; GPRIDX-NEXT:    v_cndmask_b32_e32 v10, v14, v0, vcc
3135; GPRIDX-NEXT:    v_mov_b32_e32 v0, v12
3136; GPRIDX-NEXT:    v_mov_b32_e32 v1, v11
3137; GPRIDX-NEXT:    ; return to shader part epilog
3138;
3139; GFX10-LABEL: dyn_insertelement_v11f32_s_v_v:
3140; GFX10:       ; %bb.0: ; %entry
3141; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
3142; GFX10-NEXT:    v_cndmask_b32_e32 v12, s2, v0, vcc_lo
3143; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
3144; GFX10-NEXT:    v_cndmask_b32_e32 v11, s3, v0, vcc_lo
3145; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v1
3146; GFX10-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
3147; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v1
3148; GFX10-NEXT:    v_cndmask_b32_e32 v3, s5, v0, vcc_lo
3149; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v1
3150; GFX10-NEXT:    v_cndmask_b32_e32 v4, s6, v0, vcc_lo
3151; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v1
3152; GFX10-NEXT:    v_cndmask_b32_e32 v5, s7, v0, vcc_lo
3153; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v1
3154; GFX10-NEXT:    v_cndmask_b32_e32 v6, s8, v0, vcc_lo
3155; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v1
3156; GFX10-NEXT:    v_cndmask_b32_e32 v7, s9, v0, vcc_lo
3157; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v1
3158; GFX10-NEXT:    v_cndmask_b32_e32 v8, s10, v0, vcc_lo
3159; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v1
3160; GFX10-NEXT:    v_cndmask_b32_e32 v9, s11, v0, vcc_lo
3161; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v1
3162; GFX10-NEXT:    v_mov_b32_e32 v1, v11
3163; GFX10-NEXT:    v_cndmask_b32_e32 v10, s12, v0, vcc_lo
3164; GFX10-NEXT:    v_mov_b32_e32 v0, v12
3165; GFX10-NEXT:    ; return to shader part epilog
3166;
3167; GFX11-LABEL: dyn_insertelement_v11f32_s_v_v:
3168; GFX11:       ; %bb.0: ; %entry
3169; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
3170; GFX11-NEXT:    v_cndmask_b32_e32 v12, s2, v0, vcc_lo
3171; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
3172; GFX11-NEXT:    v_cndmask_b32_e32 v11, s3, v0, vcc_lo
3173; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v1
3174; GFX11-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
3175; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v1
3176; GFX11-NEXT:    v_cndmask_b32_e32 v3, s5, v0, vcc_lo
3177; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v1
3178; GFX11-NEXT:    v_cndmask_b32_e32 v4, s6, v0, vcc_lo
3179; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v1
3180; GFX11-NEXT:    v_cndmask_b32_e32 v5, s7, v0, vcc_lo
3181; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v1
3182; GFX11-NEXT:    v_cndmask_b32_e32 v6, s8, v0, vcc_lo
3183; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v1
3184; GFX11-NEXT:    v_cndmask_b32_e32 v7, s9, v0, vcc_lo
3185; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v1
3186; GFX11-NEXT:    v_cndmask_b32_e32 v8, s10, v0, vcc_lo
3187; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v1
3188; GFX11-NEXT:    v_cndmask_b32_e32 v9, s11, v0, vcc_lo
3189; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v1
3190; GFX11-NEXT:    v_dual_mov_b32 v1, v11 :: v_dual_cndmask_b32 v10, s12, v0
3191; GFX11-NEXT:    v_mov_b32_e32 v0, v12
3192; GFX11-NEXT:    ; return to shader part epilog
3193entry:
3194  %insert = insertelement <11 x float> %vec, float %val, i32 %idx
3195  ret <11 x float> %insert
3196}
3197
3198define amdgpu_ps <11 x float> @dyn_insertelement_v11f32_v_v_s(<11 x float> %vec, float %val, i32 inreg %idx) {
3199; GPRIDX-LABEL: dyn_insertelement_v11f32_v_v_s:
3200; GPRIDX:       ; %bb.0: ; %entry
3201; GPRIDX-NEXT:    s_set_gpr_idx_on s2, gpr_idx(DST)
3202; GPRIDX-NEXT:    v_mov_b32_e32 v0, v11
3203; GPRIDX-NEXT:    s_set_gpr_idx_off
3204; GPRIDX-NEXT:    ; return to shader part epilog
3205;
3206; GFX10PLUS-LABEL: dyn_insertelement_v11f32_v_v_s:
3207; GFX10PLUS:       ; %bb.0: ; %entry
3208; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
3209; GFX10PLUS-NEXT:    v_movreld_b32_e32 v0, v11
3210; GFX10PLUS-NEXT:    ; return to shader part epilog
3211entry:
3212  %insert = insertelement <11 x float> %vec, float %val, i32 %idx
3213  ret <11 x float> %insert
3214}
3215
3216define amdgpu_ps <11 x float> @dyn_insertelement_v11f32_v_v_v(<11 x float> %vec, float %val, i32 %idx) {
3217; GPRIDX-LABEL: dyn_insertelement_v11f32_v_v_v:
3218; GPRIDX:       ; %bb.0: ; %entry
3219; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v12
3220; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
3221; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v12
3222; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
3223; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v12
3224; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v11, vcc
3225; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v12
3226; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc
3227; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v12
3228; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v11, vcc
3229; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v12
3230; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v11, vcc
3231; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v12
3232; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v11, vcc
3233; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v12
3234; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v11, vcc
3235; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v12
3236; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v8, v11, vcc
3237; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v12
3238; GPRIDX-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
3239; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v12
3240; GPRIDX-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
3241; GPRIDX-NEXT:    ; return to shader part epilog
3242;
3243; GFX10PLUS-LABEL: dyn_insertelement_v11f32_v_v_v:
3244; GFX10PLUS:       ; %bb.0: ; %entry
3245; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v12
3246; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc_lo
3247; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v12
3248; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
3249; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v12
3250; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v2, v2, v11, vcc_lo
3251; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v12
3252; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc_lo
3253; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v12
3254; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v4, v4, v11, vcc_lo
3255; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v12
3256; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v5, v5, v11, vcc_lo
3257; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v12
3258; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v6, v6, v11, vcc_lo
3259; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v12
3260; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v7, v7, v11, vcc_lo
3261; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v12
3262; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v8, v8, v11, vcc_lo
3263; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v12
3264; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc_lo
3265; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v12
3266; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc_lo
3267; GFX10PLUS-NEXT:    ; return to shader part epilog
3268entry:
3269  %insert = insertelement <11 x float> %vec, float %val, i32 %idx
3270  ret <11 x float> %insert
3271}
3272
3273define amdgpu_ps <12 x float> @dyn_insertelement_v12f32_s_v_s(<12 x float> inreg %vec, float %val, i32 inreg %idx) {
3274; GPRIDX-LABEL: dyn_insertelement_v12f32_s_v_s:
3275; GPRIDX:       ; %bb.0: ; %entry
3276; GPRIDX-NEXT:    s_mov_b32 s0, s2
3277; GPRIDX-NEXT:    s_mov_b32 s1, s3
3278; GPRIDX-NEXT:    s_mov_b32 s2, s4
3279; GPRIDX-NEXT:    s_mov_b32 s3, s5
3280; GPRIDX-NEXT:    s_mov_b32 s4, s6
3281; GPRIDX-NEXT:    s_mov_b32 s5, s7
3282; GPRIDX-NEXT:    s_mov_b32 s6, s8
3283; GPRIDX-NEXT:    s_mov_b32 s7, s9
3284; GPRIDX-NEXT:    s_mov_b32 s8, s10
3285; GPRIDX-NEXT:    s_mov_b32 s9, s11
3286; GPRIDX-NEXT:    s_mov_b32 s10, s12
3287; GPRIDX-NEXT:    s_mov_b32 s11, s13
3288; GPRIDX-NEXT:    v_mov_b32_e32 v12, v0
3289; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
3290; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
3291; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
3292; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
3293; GPRIDX-NEXT:    v_mov_b32_e32 v4, s4
3294; GPRIDX-NEXT:    v_mov_b32_e32 v5, s5
3295; GPRIDX-NEXT:    v_mov_b32_e32 v6, s6
3296; GPRIDX-NEXT:    v_mov_b32_e32 v7, s7
3297; GPRIDX-NEXT:    v_mov_b32_e32 v8, s8
3298; GPRIDX-NEXT:    v_mov_b32_e32 v9, s9
3299; GPRIDX-NEXT:    v_mov_b32_e32 v10, s10
3300; GPRIDX-NEXT:    v_mov_b32_e32 v11, s11
3301; GPRIDX-NEXT:    s_set_gpr_idx_on s14, gpr_idx(DST)
3302; GPRIDX-NEXT:    v_mov_b32_e32 v0, v12
3303; GPRIDX-NEXT:    s_set_gpr_idx_off
3304; GPRIDX-NEXT:    ; return to shader part epilog
3305;
3306; GFX10-LABEL: dyn_insertelement_v12f32_s_v_s:
3307; GFX10:       ; %bb.0: ; %entry
3308; GFX10-NEXT:    s_mov_b32 s0, s2
3309; GFX10-NEXT:    s_mov_b32 s1, s3
3310; GFX10-NEXT:    s_mov_b32 s2, s4
3311; GFX10-NEXT:    s_mov_b32 s3, s5
3312; GFX10-NEXT:    s_mov_b32 s4, s6
3313; GFX10-NEXT:    s_mov_b32 s5, s7
3314; GFX10-NEXT:    s_mov_b32 s6, s8
3315; GFX10-NEXT:    s_mov_b32 s7, s9
3316; GFX10-NEXT:    s_mov_b32 s8, s10
3317; GFX10-NEXT:    s_mov_b32 s9, s11
3318; GFX10-NEXT:    s_mov_b32 s10, s12
3319; GFX10-NEXT:    s_mov_b32 s11, s13
3320; GFX10-NEXT:    v_mov_b32_e32 v12, v0
3321; GFX10-NEXT:    v_mov_b32_e32 v0, s0
3322; GFX10-NEXT:    s_mov_b32 m0, s14
3323; GFX10-NEXT:    v_mov_b32_e32 v1, s1
3324; GFX10-NEXT:    v_mov_b32_e32 v2, s2
3325; GFX10-NEXT:    v_mov_b32_e32 v3, s3
3326; GFX10-NEXT:    v_mov_b32_e32 v4, s4
3327; GFX10-NEXT:    v_mov_b32_e32 v5, s5
3328; GFX10-NEXT:    v_mov_b32_e32 v6, s6
3329; GFX10-NEXT:    v_mov_b32_e32 v7, s7
3330; GFX10-NEXT:    v_mov_b32_e32 v8, s8
3331; GFX10-NEXT:    v_mov_b32_e32 v9, s9
3332; GFX10-NEXT:    v_mov_b32_e32 v10, s10
3333; GFX10-NEXT:    v_mov_b32_e32 v11, s11
3334; GFX10-NEXT:    v_movreld_b32_e32 v0, v12
3335; GFX10-NEXT:    ; return to shader part epilog
3336;
3337; GFX11-LABEL: dyn_insertelement_v12f32_s_v_s:
3338; GFX11:       ; %bb.0: ; %entry
3339; GFX11-NEXT:    s_mov_b32 s0, s2
3340; GFX11-NEXT:    s_mov_b32 s1, s3
3341; GFX11-NEXT:    s_mov_b32 s2, s4
3342; GFX11-NEXT:    s_mov_b32 s3, s5
3343; GFX11-NEXT:    s_mov_b32 s4, s6
3344; GFX11-NEXT:    s_mov_b32 s5, s7
3345; GFX11-NEXT:    s_mov_b32 s6, s8
3346; GFX11-NEXT:    s_mov_b32 s7, s9
3347; GFX11-NEXT:    s_mov_b32 s8, s10
3348; GFX11-NEXT:    s_mov_b32 s9, s11
3349; GFX11-NEXT:    s_mov_b32 s10, s12
3350; GFX11-NEXT:    s_mov_b32 s11, s13
3351; GFX11-NEXT:    v_mov_b32_e32 v12, v0
3352; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
3353; GFX11-NEXT:    s_mov_b32 m0, s14
3354; GFX11-NEXT:    v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
3355; GFX11-NEXT:    v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v4, s4
3356; GFX11-NEXT:    v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v6, s6
3357; GFX11-NEXT:    v_dual_mov_b32 v9, s9 :: v_dual_mov_b32 v8, s8
3358; GFX11-NEXT:    v_dual_mov_b32 v11, s11 :: v_dual_mov_b32 v10, s10
3359; GFX11-NEXT:    v_movreld_b32_e32 v0, v12
3360; GFX11-NEXT:    ; return to shader part epilog
3361entry:
3362  %insert = insertelement <12 x float> %vec, float %val, i32 %idx
3363  ret <12 x float> %insert
3364}
3365
3366define amdgpu_ps <12 x float> @dyn_insertelement_v12f32_s_v_v(<12 x float> inreg %vec, float %val, i32 %idx) {
3367; GPRIDX-LABEL: dyn_insertelement_v12f32_s_v_v:
3368; GPRIDX:       ; %bb.0: ; %entry
3369; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
3370; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
3371; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
3372; GPRIDX-NEXT:    v_cndmask_b32_e32 v12, v2, v0, vcc
3373; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
3374; GPRIDX-NEXT:    v_mov_b32_e32 v4, s4
3375; GPRIDX-NEXT:    v_cndmask_b32_e32 v13, v3, v0, vcc
3376; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v1
3377; GPRIDX-NEXT:    v_mov_b32_e32 v5, s5
3378; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
3379; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v1
3380; GPRIDX-NEXT:    v_mov_b32_e32 v6, s6
3381; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v5, v0, vcc
3382; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v1
3383; GPRIDX-NEXT:    v_mov_b32_e32 v7, s7
3384; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v6, v0, vcc
3385; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v1
3386; GPRIDX-NEXT:    v_mov_b32_e32 v8, s8
3387; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v7, v0, vcc
3388; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v1
3389; GPRIDX-NEXT:    v_mov_b32_e32 v9, s9
3390; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v8, v0, vcc
3391; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v1
3392; GPRIDX-NEXT:    v_mov_b32_e32 v10, s10
3393; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v9, v0, vcc
3394; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v1
3395; GPRIDX-NEXT:    v_mov_b32_e32 v11, s11
3396; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v10, v0, vcc
3397; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v1
3398; GPRIDX-NEXT:    v_mov_b32_e32 v14, s12
3399; GPRIDX-NEXT:    v_cndmask_b32_e32 v9, v11, v0, vcc
3400; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v1
3401; GPRIDX-NEXT:    v_mov_b32_e32 v15, s13
3402; GPRIDX-NEXT:    v_cndmask_b32_e32 v10, v14, v0, vcc
3403; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v1
3404; GPRIDX-NEXT:    v_cndmask_b32_e32 v11, v15, v0, vcc
3405; GPRIDX-NEXT:    v_mov_b32_e32 v0, v12
3406; GPRIDX-NEXT:    v_mov_b32_e32 v1, v13
3407; GPRIDX-NEXT:    ; return to shader part epilog
3408;
3409; GFX10-LABEL: dyn_insertelement_v12f32_s_v_v:
3410; GFX10:       ; %bb.0: ; %entry
3411; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
3412; GFX10-NEXT:    v_cndmask_b32_e32 v12, s2, v0, vcc_lo
3413; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
3414; GFX10-NEXT:    v_cndmask_b32_e32 v13, s3, v0, vcc_lo
3415; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v1
3416; GFX10-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
3417; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v1
3418; GFX10-NEXT:    v_cndmask_b32_e32 v3, s5, v0, vcc_lo
3419; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v1
3420; GFX10-NEXT:    v_cndmask_b32_e32 v4, s6, v0, vcc_lo
3421; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v1
3422; GFX10-NEXT:    v_cndmask_b32_e32 v5, s7, v0, vcc_lo
3423; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v1
3424; GFX10-NEXT:    v_cndmask_b32_e32 v6, s8, v0, vcc_lo
3425; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v1
3426; GFX10-NEXT:    v_cndmask_b32_e32 v7, s9, v0, vcc_lo
3427; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v1
3428; GFX10-NEXT:    v_cndmask_b32_e32 v8, s10, v0, vcc_lo
3429; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v1
3430; GFX10-NEXT:    v_cndmask_b32_e32 v9, s11, v0, vcc_lo
3431; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v1
3432; GFX10-NEXT:    v_cndmask_b32_e32 v10, s12, v0, vcc_lo
3433; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v1
3434; GFX10-NEXT:    v_mov_b32_e32 v1, v13
3435; GFX10-NEXT:    v_cndmask_b32_e32 v11, s13, v0, vcc_lo
3436; GFX10-NEXT:    v_mov_b32_e32 v0, v12
3437; GFX10-NEXT:    ; return to shader part epilog
3438;
3439; GFX11-LABEL: dyn_insertelement_v12f32_s_v_v:
3440; GFX11:       ; %bb.0: ; %entry
3441; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
3442; GFX11-NEXT:    v_cndmask_b32_e32 v12, s2, v0, vcc_lo
3443; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
3444; GFX11-NEXT:    v_cndmask_b32_e32 v13, s3, v0, vcc_lo
3445; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v1
3446; GFX11-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
3447; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v1
3448; GFX11-NEXT:    v_cndmask_b32_e32 v3, s5, v0, vcc_lo
3449; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v1
3450; GFX11-NEXT:    v_cndmask_b32_e32 v4, s6, v0, vcc_lo
3451; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v1
3452; GFX11-NEXT:    v_cndmask_b32_e32 v5, s7, v0, vcc_lo
3453; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v1
3454; GFX11-NEXT:    v_cndmask_b32_e32 v6, s8, v0, vcc_lo
3455; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v1
3456; GFX11-NEXT:    v_cndmask_b32_e32 v7, s9, v0, vcc_lo
3457; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v1
3458; GFX11-NEXT:    v_cndmask_b32_e32 v8, s10, v0, vcc_lo
3459; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v1
3460; GFX11-NEXT:    v_cndmask_b32_e32 v9, s11, v0, vcc_lo
3461; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v1
3462; GFX11-NEXT:    v_cndmask_b32_e32 v10, s12, v0, vcc_lo
3463; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v1
3464; GFX11-NEXT:    v_mov_b32_e32 v1, v13
3465; GFX11-NEXT:    v_dual_cndmask_b32 v11, s13, v0 :: v_dual_mov_b32 v0, v12
3466; GFX11-NEXT:    ; return to shader part epilog
3467entry:
3468  %insert = insertelement <12 x float> %vec, float %val, i32 %idx
3469  ret <12 x float> %insert
3470}
3471
3472define amdgpu_ps <12 x float> @dyn_insertelement_v12f32_v_v_s(<12 x float> %vec, float %val, i32 inreg %idx) {
3473; GPRIDX-LABEL: dyn_insertelement_v12f32_v_v_s:
3474; GPRIDX:       ; %bb.0: ; %entry
3475; GPRIDX-NEXT:    s_set_gpr_idx_on s2, gpr_idx(DST)
3476; GPRIDX-NEXT:    v_mov_b32_e32 v0, v12
3477; GPRIDX-NEXT:    s_set_gpr_idx_off
3478; GPRIDX-NEXT:    ; return to shader part epilog
3479;
3480; GFX10PLUS-LABEL: dyn_insertelement_v12f32_v_v_s:
3481; GFX10PLUS:       ; %bb.0: ; %entry
3482; GFX10PLUS-NEXT:    s_mov_b32 m0, s2
3483; GFX10PLUS-NEXT:    v_movreld_b32_e32 v0, v12
3484; GFX10PLUS-NEXT:    ; return to shader part epilog
3485entry:
3486  %insert = insertelement <12 x float> %vec, float %val, i32 %idx
3487  ret <12 x float> %insert
3488}
3489
3490define amdgpu_ps <12 x float> @dyn_insertelement_v12f32_v_v_v(<12 x float> %vec, float %val, i32 %idx) {
3491; GPRIDX-LABEL: dyn_insertelement_v12f32_v_v_v:
3492; GPRIDX:       ; %bb.0: ; %entry
3493; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v13
3494; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc
3495; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v13
3496; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v12, vcc
3497; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v13
3498; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v12, vcc
3499; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v13
3500; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v12, vcc
3501; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v13
3502; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc
3503; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v13
3504; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v12, vcc
3505; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v13
3506; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v12, vcc
3507; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v13
3508; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v12, vcc
3509; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 8, v13
3510; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v8, v12, vcc
3511; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 9, v13
3512; GPRIDX-NEXT:    v_cndmask_b32_e32 v9, v9, v12, vcc
3513; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 10, v13
3514; GPRIDX-NEXT:    v_cndmask_b32_e32 v10, v10, v12, vcc
3515; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 11, v13
3516; GPRIDX-NEXT:    v_cndmask_b32_e32 v11, v11, v12, vcc
3517; GPRIDX-NEXT:    ; return to shader part epilog
3518;
3519; GFX10PLUS-LABEL: dyn_insertelement_v12f32_v_v_v:
3520; GFX10PLUS:       ; %bb.0: ; %entry
3521; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v13
3522; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v12, vcc_lo
3523; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v13
3524; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, v1, v12, vcc_lo
3525; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v13
3526; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v2, v2, v12, vcc_lo
3527; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v13
3528; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v3, v3, v12, vcc_lo
3529; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v13
3530; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v4, v4, v12, vcc_lo
3531; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v13
3532; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v5, v5, v12, vcc_lo
3533; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v13
3534; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v6, v6, v12, vcc_lo
3535; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v13
3536; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v7, v7, v12, vcc_lo
3537; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 8, v13
3538; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v8, v8, v12, vcc_lo
3539; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 9, v13
3540; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v9, v9, v12, vcc_lo
3541; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 10, v13
3542; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v10, v10, v12, vcc_lo
3543; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 11, v13
3544; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v11, v11, v12, vcc_lo
3545; GFX10PLUS-NEXT:    ; return to shader part epilog
3546entry:
3547  %insert = insertelement <12 x float> %vec, float %val, i32 %idx
3548  ret <12 x float> %insert
3549}
3550
3551define amdgpu_ps <16 x i32> @dyn_insertelement_v16i32_s_s_s(<16 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
3552; GPRIDX-LABEL: dyn_insertelement_v16i32_s_s_s:
3553; GPRIDX:       ; %bb.0: ; %entry
3554; GPRIDX-NEXT:    s_mov_b32 s0, s2
3555; GPRIDX-NEXT:    s_mov_b32 s1, s3
3556; GPRIDX-NEXT:    s_mov_b32 s2, s4
3557; GPRIDX-NEXT:    s_mov_b32 s3, s5
3558; GPRIDX-NEXT:    s_mov_b32 s4, s6
3559; GPRIDX-NEXT:    s_mov_b32 s5, s7
3560; GPRIDX-NEXT:    s_mov_b32 s6, s8
3561; GPRIDX-NEXT:    s_mov_b32 s7, s9
3562; GPRIDX-NEXT:    s_mov_b32 s8, s10
3563; GPRIDX-NEXT:    s_mov_b32 s9, s11
3564; GPRIDX-NEXT:    s_mov_b32 s10, s12
3565; GPRIDX-NEXT:    s_mov_b32 s11, s13
3566; GPRIDX-NEXT:    s_mov_b32 s12, s14
3567; GPRIDX-NEXT:    s_mov_b32 s13, s15
3568; GPRIDX-NEXT:    s_mov_b32 s14, s16
3569; GPRIDX-NEXT:    s_mov_b32 s15, s17
3570; GPRIDX-NEXT:    s_mov_b32 m0, s19
3571; GPRIDX-NEXT:    s_nop 0
3572; GPRIDX-NEXT:    s_movreld_b32 s0, s18
3573; GPRIDX-NEXT:    ; return to shader part epilog
3574;
3575; GFX10PLUS-LABEL: dyn_insertelement_v16i32_s_s_s:
3576; GFX10PLUS:       ; %bb.0: ; %entry
3577; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
3578; GFX10PLUS-NEXT:    s_mov_b32 m0, s19
3579; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
3580; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
3581; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
3582; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
3583; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
3584; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
3585; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
3586; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
3587; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
3588; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
3589; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
3590; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
3591; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
3592; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
3593; GFX10PLUS-NEXT:    s_mov_b32 s15, s17
3594; GFX10PLUS-NEXT:    s_movreld_b32 s0, s18
3595; GFX10PLUS-NEXT:    ; return to shader part epilog
3596entry:
3597  %insert = insertelement <16 x i32> %vec, i32 %val, i32 %idx
3598  ret <16 x i32> %insert
3599}
3600
3601define amdgpu_ps <16 x float> @dyn_insertelement_v16f32_s_s_s(<16 x float> inreg %vec, float inreg %val, i32 inreg %idx) {
3602; GPRIDX-LABEL: dyn_insertelement_v16f32_s_s_s:
3603; GPRIDX:       ; %bb.0: ; %entry
3604; GPRIDX-NEXT:    s_mov_b32 s0, s2
3605; GPRIDX-NEXT:    s_mov_b32 s1, s3
3606; GPRIDX-NEXT:    s_mov_b32 s2, s4
3607; GPRIDX-NEXT:    s_mov_b32 s3, s5
3608; GPRIDX-NEXT:    s_mov_b32 s4, s6
3609; GPRIDX-NEXT:    s_mov_b32 s5, s7
3610; GPRIDX-NEXT:    s_mov_b32 s6, s8
3611; GPRIDX-NEXT:    s_mov_b32 s7, s9
3612; GPRIDX-NEXT:    s_mov_b32 s8, s10
3613; GPRIDX-NEXT:    s_mov_b32 s9, s11
3614; GPRIDX-NEXT:    s_mov_b32 s10, s12
3615; GPRIDX-NEXT:    s_mov_b32 s11, s13
3616; GPRIDX-NEXT:    s_mov_b32 s12, s14
3617; GPRIDX-NEXT:    s_mov_b32 s13, s15
3618; GPRIDX-NEXT:    s_mov_b32 s14, s16
3619; GPRIDX-NEXT:    s_mov_b32 s15, s17
3620; GPRIDX-NEXT:    s_mov_b32 m0, s19
3621; GPRIDX-NEXT:    s_nop 0
3622; GPRIDX-NEXT:    s_movreld_b32 s0, s18
3623; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
3624; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
3625; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
3626; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
3627; GPRIDX-NEXT:    v_mov_b32_e32 v4, s4
3628; GPRIDX-NEXT:    v_mov_b32_e32 v5, s5
3629; GPRIDX-NEXT:    v_mov_b32_e32 v6, s6
3630; GPRIDX-NEXT:    v_mov_b32_e32 v7, s7
3631; GPRIDX-NEXT:    v_mov_b32_e32 v8, s8
3632; GPRIDX-NEXT:    v_mov_b32_e32 v9, s9
3633; GPRIDX-NEXT:    v_mov_b32_e32 v10, s10
3634; GPRIDX-NEXT:    v_mov_b32_e32 v11, s11
3635; GPRIDX-NEXT:    v_mov_b32_e32 v12, s12
3636; GPRIDX-NEXT:    v_mov_b32_e32 v13, s13
3637; GPRIDX-NEXT:    v_mov_b32_e32 v14, s14
3638; GPRIDX-NEXT:    v_mov_b32_e32 v15, s15
3639; GPRIDX-NEXT:    ; return to shader part epilog
3640;
3641; GFX10-LABEL: dyn_insertelement_v16f32_s_s_s:
3642; GFX10:       ; %bb.0: ; %entry
3643; GFX10-NEXT:    s_mov_b32 s0, s2
3644; GFX10-NEXT:    s_mov_b32 m0, s19
3645; GFX10-NEXT:    s_mov_b32 s1, s3
3646; GFX10-NEXT:    s_mov_b32 s2, s4
3647; GFX10-NEXT:    s_mov_b32 s3, s5
3648; GFX10-NEXT:    s_mov_b32 s4, s6
3649; GFX10-NEXT:    s_mov_b32 s5, s7
3650; GFX10-NEXT:    s_mov_b32 s6, s8
3651; GFX10-NEXT:    s_mov_b32 s7, s9
3652; GFX10-NEXT:    s_mov_b32 s8, s10
3653; GFX10-NEXT:    s_mov_b32 s9, s11
3654; GFX10-NEXT:    s_mov_b32 s10, s12
3655; GFX10-NEXT:    s_mov_b32 s11, s13
3656; GFX10-NEXT:    s_mov_b32 s12, s14
3657; GFX10-NEXT:    s_mov_b32 s13, s15
3658; GFX10-NEXT:    s_mov_b32 s14, s16
3659; GFX10-NEXT:    s_mov_b32 s15, s17
3660; GFX10-NEXT:    s_movreld_b32 s0, s18
3661; GFX10-NEXT:    v_mov_b32_e32 v0, s0
3662; GFX10-NEXT:    v_mov_b32_e32 v1, s1
3663; GFX10-NEXT:    v_mov_b32_e32 v2, s2
3664; GFX10-NEXT:    v_mov_b32_e32 v3, s3
3665; GFX10-NEXT:    v_mov_b32_e32 v4, s4
3666; GFX10-NEXT:    v_mov_b32_e32 v5, s5
3667; GFX10-NEXT:    v_mov_b32_e32 v6, s6
3668; GFX10-NEXT:    v_mov_b32_e32 v7, s7
3669; GFX10-NEXT:    v_mov_b32_e32 v8, s8
3670; GFX10-NEXT:    v_mov_b32_e32 v9, s9
3671; GFX10-NEXT:    v_mov_b32_e32 v10, s10
3672; GFX10-NEXT:    v_mov_b32_e32 v11, s11
3673; GFX10-NEXT:    v_mov_b32_e32 v12, s12
3674; GFX10-NEXT:    v_mov_b32_e32 v13, s13
3675; GFX10-NEXT:    v_mov_b32_e32 v14, s14
3676; GFX10-NEXT:    v_mov_b32_e32 v15, s15
3677; GFX10-NEXT:    ; return to shader part epilog
3678;
3679; GFX11-LABEL: dyn_insertelement_v16f32_s_s_s:
3680; GFX11:       ; %bb.0: ; %entry
3681; GFX11-NEXT:    s_mov_b32 s0, s2
3682; GFX11-NEXT:    s_mov_b32 m0, s19
3683; GFX11-NEXT:    s_mov_b32 s1, s3
3684; GFX11-NEXT:    s_mov_b32 s2, s4
3685; GFX11-NEXT:    s_mov_b32 s3, s5
3686; GFX11-NEXT:    s_mov_b32 s4, s6
3687; GFX11-NEXT:    s_mov_b32 s5, s7
3688; GFX11-NEXT:    s_mov_b32 s6, s8
3689; GFX11-NEXT:    s_mov_b32 s7, s9
3690; GFX11-NEXT:    s_mov_b32 s8, s10
3691; GFX11-NEXT:    s_mov_b32 s9, s11
3692; GFX11-NEXT:    s_mov_b32 s10, s12
3693; GFX11-NEXT:    s_mov_b32 s11, s13
3694; GFX11-NEXT:    s_mov_b32 s12, s14
3695; GFX11-NEXT:    s_mov_b32 s13, s15
3696; GFX11-NEXT:    s_mov_b32 s14, s16
3697; GFX11-NEXT:    s_mov_b32 s15, s17
3698; GFX11-NEXT:    s_movreld_b32 s0, s18
3699; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3700; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
3701; GFX11-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
3702; GFX11-NEXT:    v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
3703; GFX11-NEXT:    v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
3704; GFX11-NEXT:    v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
3705; GFX11-NEXT:    v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13
3706; GFX11-NEXT:    v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15
3707; GFX11-NEXT:    ; return to shader part epilog
3708entry:
3709  %insert = insertelement <16 x float> %vec, float %val, i32 %idx
3710  ret <16 x float> %insert
3711}
3712
3713define amdgpu_ps <32 x float> @dyn_insertelement_v32f32_s_s_s(<32 x float> inreg %vec, float inreg %val, i32 inreg %idx) {
3714; GPRIDX-LABEL: dyn_insertelement_v32f32_s_s_s:
3715; GPRIDX:       ; %bb.0: ; %entry
3716; GPRIDX-NEXT:    s_mov_b32 s0, s2
3717; GPRIDX-NEXT:    s_mov_b32 s1, s3
3718; GPRIDX-NEXT:    s_mov_b32 s2, s4
3719; GPRIDX-NEXT:    s_mov_b32 s3, s5
3720; GPRIDX-NEXT:    s_mov_b32 s4, s6
3721; GPRIDX-NEXT:    s_mov_b32 s5, s7
3722; GPRIDX-NEXT:    s_mov_b32 s6, s8
3723; GPRIDX-NEXT:    s_mov_b32 s7, s9
3724; GPRIDX-NEXT:    s_mov_b32 s8, s10
3725; GPRIDX-NEXT:    s_mov_b32 s9, s11
3726; GPRIDX-NEXT:    s_mov_b32 s10, s12
3727; GPRIDX-NEXT:    s_mov_b32 s11, s13
3728; GPRIDX-NEXT:    s_mov_b32 s12, s14
3729; GPRIDX-NEXT:    s_mov_b32 s13, s15
3730; GPRIDX-NEXT:    s_mov_b32 s14, s16
3731; GPRIDX-NEXT:    s_mov_b32 s15, s17
3732; GPRIDX-NEXT:    s_mov_b32 s16, s18
3733; GPRIDX-NEXT:    s_mov_b32 s17, s19
3734; GPRIDX-NEXT:    s_mov_b32 s18, s20
3735; GPRIDX-NEXT:    s_mov_b32 s19, s21
3736; GPRIDX-NEXT:    s_mov_b32 s20, s22
3737; GPRIDX-NEXT:    s_mov_b32 s21, s23
3738; GPRIDX-NEXT:    s_mov_b32 s22, s24
3739; GPRIDX-NEXT:    s_mov_b32 s23, s25
3740; GPRIDX-NEXT:    s_mov_b32 s24, s26
3741; GPRIDX-NEXT:    s_mov_b32 s25, s27
3742; GPRIDX-NEXT:    s_mov_b32 s26, s28
3743; GPRIDX-NEXT:    s_mov_b32 s27, s29
3744; GPRIDX-NEXT:    s_mov_b32 s28, s30
3745; GPRIDX-NEXT:    s_mov_b32 s29, s31
3746; GPRIDX-NEXT:    s_mov_b32 s31, s33
3747; GPRIDX-NEXT:    s_mov_b32 s30, s32
3748; GPRIDX-NEXT:    s_mov_b32 m0, s35
3749; GPRIDX-NEXT:    s_nop 0
3750; GPRIDX-NEXT:    s_movreld_b32 s0, s34
3751; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
3752; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
3753; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
3754; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
3755; GPRIDX-NEXT:    v_mov_b32_e32 v4, s4
3756; GPRIDX-NEXT:    v_mov_b32_e32 v5, s5
3757; GPRIDX-NEXT:    v_mov_b32_e32 v6, s6
3758; GPRIDX-NEXT:    v_mov_b32_e32 v7, s7
3759; GPRIDX-NEXT:    v_mov_b32_e32 v8, s8
3760; GPRIDX-NEXT:    v_mov_b32_e32 v9, s9
3761; GPRIDX-NEXT:    v_mov_b32_e32 v10, s10
3762; GPRIDX-NEXT:    v_mov_b32_e32 v11, s11
3763; GPRIDX-NEXT:    v_mov_b32_e32 v12, s12
3764; GPRIDX-NEXT:    v_mov_b32_e32 v13, s13
3765; GPRIDX-NEXT:    v_mov_b32_e32 v14, s14
3766; GPRIDX-NEXT:    v_mov_b32_e32 v15, s15
3767; GPRIDX-NEXT:    v_mov_b32_e32 v16, s16
3768; GPRIDX-NEXT:    v_mov_b32_e32 v17, s17
3769; GPRIDX-NEXT:    v_mov_b32_e32 v18, s18
3770; GPRIDX-NEXT:    v_mov_b32_e32 v19, s19
3771; GPRIDX-NEXT:    v_mov_b32_e32 v20, s20
3772; GPRIDX-NEXT:    v_mov_b32_e32 v21, s21
3773; GPRIDX-NEXT:    v_mov_b32_e32 v22, s22
3774; GPRIDX-NEXT:    v_mov_b32_e32 v23, s23
3775; GPRIDX-NEXT:    v_mov_b32_e32 v24, s24
3776; GPRIDX-NEXT:    v_mov_b32_e32 v25, s25
3777; GPRIDX-NEXT:    v_mov_b32_e32 v26, s26
3778; GPRIDX-NEXT:    v_mov_b32_e32 v27, s27
3779; GPRIDX-NEXT:    v_mov_b32_e32 v28, s28
3780; GPRIDX-NEXT:    v_mov_b32_e32 v29, s29
3781; GPRIDX-NEXT:    v_mov_b32_e32 v30, s30
3782; GPRIDX-NEXT:    v_mov_b32_e32 v31, s31
3783; GPRIDX-NEXT:    ; return to shader part epilog
3784;
3785; GFX10-LABEL: dyn_insertelement_v32f32_s_s_s:
3786; GFX10:       ; %bb.0: ; %entry
3787; GFX10-NEXT:    s_mov_b32 s0, s2
3788; GFX10-NEXT:    s_mov_b32 m0, s35
3789; GFX10-NEXT:    s_mov_b32 s1, s3
3790; GFX10-NEXT:    s_mov_b32 s2, s4
3791; GFX10-NEXT:    s_mov_b32 s3, s5
3792; GFX10-NEXT:    s_mov_b32 s4, s6
3793; GFX10-NEXT:    s_mov_b32 s5, s7
3794; GFX10-NEXT:    s_mov_b32 s6, s8
3795; GFX10-NEXT:    s_mov_b32 s7, s9
3796; GFX10-NEXT:    s_mov_b32 s8, s10
3797; GFX10-NEXT:    s_mov_b32 s9, s11
3798; GFX10-NEXT:    s_mov_b32 s10, s12
3799; GFX10-NEXT:    s_mov_b32 s11, s13
3800; GFX10-NEXT:    s_mov_b32 s12, s14
3801; GFX10-NEXT:    s_mov_b32 s13, s15
3802; GFX10-NEXT:    s_mov_b32 s14, s16
3803; GFX10-NEXT:    s_mov_b32 s15, s17
3804; GFX10-NEXT:    s_mov_b32 s16, s18
3805; GFX10-NEXT:    s_mov_b32 s17, s19
3806; GFX10-NEXT:    s_mov_b32 s18, s20
3807; GFX10-NEXT:    s_mov_b32 s19, s21
3808; GFX10-NEXT:    s_mov_b32 s20, s22
3809; GFX10-NEXT:    s_mov_b32 s21, s23
3810; GFX10-NEXT:    s_mov_b32 s22, s24
3811; GFX10-NEXT:    s_mov_b32 s23, s25
3812; GFX10-NEXT:    s_mov_b32 s24, s26
3813; GFX10-NEXT:    s_mov_b32 s25, s27
3814; GFX10-NEXT:    s_mov_b32 s26, s28
3815; GFX10-NEXT:    s_mov_b32 s27, s29
3816; GFX10-NEXT:    s_mov_b32 s28, s30
3817; GFX10-NEXT:    s_mov_b32 s29, s31
3818; GFX10-NEXT:    s_mov_b32 s31, s33
3819; GFX10-NEXT:    s_mov_b32 s30, s32
3820; GFX10-NEXT:    s_movreld_b32 s0, s34
3821; GFX10-NEXT:    v_mov_b32_e32 v0, s0
3822; GFX10-NEXT:    v_mov_b32_e32 v1, s1
3823; GFX10-NEXT:    v_mov_b32_e32 v2, s2
3824; GFX10-NEXT:    v_mov_b32_e32 v3, s3
3825; GFX10-NEXT:    v_mov_b32_e32 v4, s4
3826; GFX10-NEXT:    v_mov_b32_e32 v5, s5
3827; GFX10-NEXT:    v_mov_b32_e32 v6, s6
3828; GFX10-NEXT:    v_mov_b32_e32 v7, s7
3829; GFX10-NEXT:    v_mov_b32_e32 v8, s8
3830; GFX10-NEXT:    v_mov_b32_e32 v9, s9
3831; GFX10-NEXT:    v_mov_b32_e32 v10, s10
3832; GFX10-NEXT:    v_mov_b32_e32 v11, s11
3833; GFX10-NEXT:    v_mov_b32_e32 v12, s12
3834; GFX10-NEXT:    v_mov_b32_e32 v13, s13
3835; GFX10-NEXT:    v_mov_b32_e32 v14, s14
3836; GFX10-NEXT:    v_mov_b32_e32 v15, s15
3837; GFX10-NEXT:    v_mov_b32_e32 v16, s16
3838; GFX10-NEXT:    v_mov_b32_e32 v17, s17
3839; GFX10-NEXT:    v_mov_b32_e32 v18, s18
3840; GFX10-NEXT:    v_mov_b32_e32 v19, s19
3841; GFX10-NEXT:    v_mov_b32_e32 v20, s20
3842; GFX10-NEXT:    v_mov_b32_e32 v21, s21
3843; GFX10-NEXT:    v_mov_b32_e32 v22, s22
3844; GFX10-NEXT:    v_mov_b32_e32 v23, s23
3845; GFX10-NEXT:    v_mov_b32_e32 v24, s24
3846; GFX10-NEXT:    v_mov_b32_e32 v25, s25
3847; GFX10-NEXT:    v_mov_b32_e32 v26, s26
3848; GFX10-NEXT:    v_mov_b32_e32 v27, s27
3849; GFX10-NEXT:    v_mov_b32_e32 v28, s28
3850; GFX10-NEXT:    v_mov_b32_e32 v29, s29
3851; GFX10-NEXT:    v_mov_b32_e32 v30, s30
3852; GFX10-NEXT:    v_mov_b32_e32 v31, s31
3853; GFX10-NEXT:    ; return to shader part epilog
3854;
3855; GFX11-LABEL: dyn_insertelement_v32f32_s_s_s:
3856; GFX11:       ; %bb.0: ; %entry
3857; GFX11-NEXT:    s_mov_b32 s0, s2
3858; GFX11-NEXT:    s_mov_b32 m0, s35
3859; GFX11-NEXT:    s_mov_b32 s1, s3
3860; GFX11-NEXT:    s_mov_b32 s2, s4
3861; GFX11-NEXT:    s_mov_b32 s3, s5
3862; GFX11-NEXT:    s_mov_b32 s4, s6
3863; GFX11-NEXT:    s_mov_b32 s5, s7
3864; GFX11-NEXT:    s_mov_b32 s6, s8
3865; GFX11-NEXT:    s_mov_b32 s7, s9
3866; GFX11-NEXT:    s_mov_b32 s8, s10
3867; GFX11-NEXT:    s_mov_b32 s9, s11
3868; GFX11-NEXT:    s_mov_b32 s10, s12
3869; GFX11-NEXT:    s_mov_b32 s11, s13
3870; GFX11-NEXT:    s_mov_b32 s12, s14
3871; GFX11-NEXT:    s_mov_b32 s13, s15
3872; GFX11-NEXT:    s_mov_b32 s14, s16
3873; GFX11-NEXT:    s_mov_b32 s15, s17
3874; GFX11-NEXT:    s_mov_b32 s16, s18
3875; GFX11-NEXT:    s_mov_b32 s17, s19
3876; GFX11-NEXT:    s_mov_b32 s18, s20
3877; GFX11-NEXT:    s_mov_b32 s19, s21
3878; GFX11-NEXT:    s_mov_b32 s20, s22
3879; GFX11-NEXT:    s_mov_b32 s21, s23
3880; GFX11-NEXT:    s_mov_b32 s22, s24
3881; GFX11-NEXT:    s_mov_b32 s23, s25
3882; GFX11-NEXT:    s_mov_b32 s24, s26
3883; GFX11-NEXT:    s_mov_b32 s25, s27
3884; GFX11-NEXT:    s_mov_b32 s26, s28
3885; GFX11-NEXT:    s_mov_b32 s27, s29
3886; GFX11-NEXT:    s_mov_b32 s28, s30
3887; GFX11-NEXT:    s_mov_b32 s29, s31
3888; GFX11-NEXT:    s_mov_b32 s31, s33
3889; GFX11-NEXT:    s_mov_b32 s30, s32
3890; GFX11-NEXT:    s_movreld_b32 s0, s34
3891; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3892; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
3893; GFX11-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
3894; GFX11-NEXT:    v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
3895; GFX11-NEXT:    v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
3896; GFX11-NEXT:    v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
3897; GFX11-NEXT:    v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13
3898; GFX11-NEXT:    v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15
3899; GFX11-NEXT:    v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17
3900; GFX11-NEXT:    v_dual_mov_b32 v18, s18 :: v_dual_mov_b32 v19, s19
3901; GFX11-NEXT:    v_dual_mov_b32 v20, s20 :: v_dual_mov_b32 v21, s21
3902; GFX11-NEXT:    v_dual_mov_b32 v22, s22 :: v_dual_mov_b32 v23, s23
3903; GFX11-NEXT:    v_dual_mov_b32 v24, s24 :: v_dual_mov_b32 v25, s25
3904; GFX11-NEXT:    v_dual_mov_b32 v26, s26 :: v_dual_mov_b32 v27, s27
3905; GFX11-NEXT:    v_dual_mov_b32 v28, s28 :: v_dual_mov_b32 v29, s29
3906; GFX11-NEXT:    v_dual_mov_b32 v30, s30 :: v_dual_mov_b32 v31, s31
3907; GFX11-NEXT:    ; return to shader part epilog
3908entry:
3909  %insert = insertelement <32 x float> %vec, float %val, i32 %idx
3910  ret <32 x float> %insert
3911}
3912
3913define amdgpu_ps <16 x i64> @dyn_insertelement_v16i64_s_s_s(<16 x i64> inreg %vec, i64 inreg %val, i32 inreg %idx) {
3914; GPRIDX-LABEL: dyn_insertelement_v16i64_s_s_s:
3915; GPRIDX:       ; %bb.0: ; %entry
3916; GPRIDX-NEXT:    s_mov_b32 s0, s2
3917; GPRIDX-NEXT:    s_mov_b32 s1, s3
3918; GPRIDX-NEXT:    s_mov_b32 s2, s4
3919; GPRIDX-NEXT:    s_mov_b32 s3, s5
3920; GPRIDX-NEXT:    s_mov_b32 s4, s6
3921; GPRIDX-NEXT:    s_mov_b32 s5, s7
3922; GPRIDX-NEXT:    s_mov_b32 s6, s8
3923; GPRIDX-NEXT:    s_mov_b32 s7, s9
3924; GPRIDX-NEXT:    s_mov_b32 s8, s10
3925; GPRIDX-NEXT:    s_mov_b32 s9, s11
3926; GPRIDX-NEXT:    s_mov_b32 s10, s12
3927; GPRIDX-NEXT:    s_mov_b32 s11, s13
3928; GPRIDX-NEXT:    s_mov_b32 s12, s14
3929; GPRIDX-NEXT:    s_mov_b32 s13, s15
3930; GPRIDX-NEXT:    s_mov_b32 s14, s16
3931; GPRIDX-NEXT:    s_mov_b32 s15, s17
3932; GPRIDX-NEXT:    s_mov_b32 s16, s18
3933; GPRIDX-NEXT:    s_mov_b32 s17, s19
3934; GPRIDX-NEXT:    s_mov_b32 s18, s20
3935; GPRIDX-NEXT:    s_mov_b32 s19, s21
3936; GPRIDX-NEXT:    s_mov_b32 s20, s22
3937; GPRIDX-NEXT:    s_mov_b32 s21, s23
3938; GPRIDX-NEXT:    s_mov_b32 s22, s24
3939; GPRIDX-NEXT:    s_mov_b32 s23, s25
3940; GPRIDX-NEXT:    s_mov_b32 s24, s26
3941; GPRIDX-NEXT:    s_mov_b32 s25, s27
3942; GPRIDX-NEXT:    s_mov_b32 s26, s28
3943; GPRIDX-NEXT:    s_mov_b32 s27, s29
3944; GPRIDX-NEXT:    s_mov_b32 s28, s30
3945; GPRIDX-NEXT:    s_mov_b32 s29, s31
3946; GPRIDX-NEXT:    s_mov_b32 s31, s33
3947; GPRIDX-NEXT:    s_mov_b32 s30, s32
3948; GPRIDX-NEXT:    s_mov_b32 m0, s36
3949; GPRIDX-NEXT:    s_nop 0
3950; GPRIDX-NEXT:    s_movreld_b64 s[0:1], s[34:35]
3951; GPRIDX-NEXT:    ; return to shader part epilog
3952;
3953; GFX10PLUS-LABEL: dyn_insertelement_v16i64_s_s_s:
3954; GFX10PLUS:       ; %bb.0: ; %entry
3955; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
3956; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
3957; GFX10PLUS-NEXT:    s_mov_b32 m0, s36
3958; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
3959; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
3960; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
3961; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
3962; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
3963; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
3964; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
3965; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
3966; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
3967; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
3968; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
3969; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
3970; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
3971; GFX10PLUS-NEXT:    s_mov_b32 s15, s17
3972; GFX10PLUS-NEXT:    s_mov_b32 s16, s18
3973; GFX10PLUS-NEXT:    s_mov_b32 s17, s19
3974; GFX10PLUS-NEXT:    s_mov_b32 s18, s20
3975; GFX10PLUS-NEXT:    s_mov_b32 s19, s21
3976; GFX10PLUS-NEXT:    s_mov_b32 s20, s22
3977; GFX10PLUS-NEXT:    s_mov_b32 s21, s23
3978; GFX10PLUS-NEXT:    s_mov_b32 s22, s24
3979; GFX10PLUS-NEXT:    s_mov_b32 s23, s25
3980; GFX10PLUS-NEXT:    s_mov_b32 s24, s26
3981; GFX10PLUS-NEXT:    s_mov_b32 s25, s27
3982; GFX10PLUS-NEXT:    s_mov_b32 s26, s28
3983; GFX10PLUS-NEXT:    s_mov_b32 s27, s29
3984; GFX10PLUS-NEXT:    s_mov_b32 s28, s30
3985; GFX10PLUS-NEXT:    s_mov_b32 s29, s31
3986; GFX10PLUS-NEXT:    s_mov_b32 s31, s33
3987; GFX10PLUS-NEXT:    s_mov_b32 s30, s32
3988; GFX10PLUS-NEXT:    s_movreld_b64 s[0:1], s[34:35]
3989; GFX10PLUS-NEXT:    ; return to shader part epilog
3990entry:
3991  %insert = insertelement <16 x i64> %vec, i64 %val, i32 %idx
3992  ret <16 x i64> %insert
3993}
3994
3995define amdgpu_ps <16 x double> @dyn_insertelement_v16f64_s_s_s(<16 x double> inreg %vec, double inreg %val, i32 inreg %idx) {
3996; GPRIDX-LABEL: dyn_insertelement_v16f64_s_s_s:
3997; GPRIDX:       ; %bb.0: ; %entry
3998; GPRIDX-NEXT:    s_mov_b32 s0, s2
3999; GPRIDX-NEXT:    s_mov_b32 s1, s3
4000; GPRIDX-NEXT:    s_mov_b32 s2, s4
4001; GPRIDX-NEXT:    s_mov_b32 s3, s5
4002; GPRIDX-NEXT:    s_mov_b32 s4, s6
4003; GPRIDX-NEXT:    s_mov_b32 s5, s7
4004; GPRIDX-NEXT:    s_mov_b32 s6, s8
4005; GPRIDX-NEXT:    s_mov_b32 s7, s9
4006; GPRIDX-NEXT:    s_mov_b32 s8, s10
4007; GPRIDX-NEXT:    s_mov_b32 s9, s11
4008; GPRIDX-NEXT:    s_mov_b32 s10, s12
4009; GPRIDX-NEXT:    s_mov_b32 s11, s13
4010; GPRIDX-NEXT:    s_mov_b32 s12, s14
4011; GPRIDX-NEXT:    s_mov_b32 s13, s15
4012; GPRIDX-NEXT:    s_mov_b32 s14, s16
4013; GPRIDX-NEXT:    s_mov_b32 s15, s17
4014; GPRIDX-NEXT:    s_mov_b32 s16, s18
4015; GPRIDX-NEXT:    s_mov_b32 s17, s19
4016; GPRIDX-NEXT:    s_mov_b32 s18, s20
4017; GPRIDX-NEXT:    s_mov_b32 s19, s21
4018; GPRIDX-NEXT:    s_mov_b32 s20, s22
4019; GPRIDX-NEXT:    s_mov_b32 s21, s23
4020; GPRIDX-NEXT:    s_mov_b32 s22, s24
4021; GPRIDX-NEXT:    s_mov_b32 s23, s25
4022; GPRIDX-NEXT:    s_mov_b32 s24, s26
4023; GPRIDX-NEXT:    s_mov_b32 s25, s27
4024; GPRIDX-NEXT:    s_mov_b32 s26, s28
4025; GPRIDX-NEXT:    s_mov_b32 s27, s29
4026; GPRIDX-NEXT:    s_mov_b32 s28, s30
4027; GPRIDX-NEXT:    s_mov_b32 s29, s31
4028; GPRIDX-NEXT:    s_mov_b32 s31, s33
4029; GPRIDX-NEXT:    s_mov_b32 s30, s32
4030; GPRIDX-NEXT:    s_mov_b32 m0, s36
4031; GPRIDX-NEXT:    s_nop 0
4032; GPRIDX-NEXT:    s_movreld_b64 s[0:1], s[34:35]
4033; GPRIDX-NEXT:    ; return to shader part epilog
4034;
4035; GFX10PLUS-LABEL: dyn_insertelement_v16f64_s_s_s:
4036; GFX10PLUS:       ; %bb.0: ; %entry
4037; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
4038; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
4039; GFX10PLUS-NEXT:    s_mov_b32 m0, s36
4040; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
4041; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
4042; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
4043; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
4044; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
4045; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
4046; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
4047; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
4048; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
4049; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
4050; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
4051; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
4052; GFX10PLUS-NEXT:    s_mov_b32 s14, s16
4053; GFX10PLUS-NEXT:    s_mov_b32 s15, s17
4054; GFX10PLUS-NEXT:    s_mov_b32 s16, s18
4055; GFX10PLUS-NEXT:    s_mov_b32 s17, s19
4056; GFX10PLUS-NEXT:    s_mov_b32 s18, s20
4057; GFX10PLUS-NEXT:    s_mov_b32 s19, s21
4058; GFX10PLUS-NEXT:    s_mov_b32 s20, s22
4059; GFX10PLUS-NEXT:    s_mov_b32 s21, s23
4060; GFX10PLUS-NEXT:    s_mov_b32 s22, s24
4061; GFX10PLUS-NEXT:    s_mov_b32 s23, s25
4062; GFX10PLUS-NEXT:    s_mov_b32 s24, s26
4063; GFX10PLUS-NEXT:    s_mov_b32 s25, s27
4064; GFX10PLUS-NEXT:    s_mov_b32 s26, s28
4065; GFX10PLUS-NEXT:    s_mov_b32 s27, s29
4066; GFX10PLUS-NEXT:    s_mov_b32 s28, s30
4067; GFX10PLUS-NEXT:    s_mov_b32 s29, s31
4068; GFX10PLUS-NEXT:    s_mov_b32 s31, s33
4069; GFX10PLUS-NEXT:    s_mov_b32 s30, s32
4070; GFX10PLUS-NEXT:    s_movreld_b64 s[0:1], s[34:35]
4071; GFX10PLUS-NEXT:    ; return to shader part epilog
4072entry:
4073  %insert = insertelement <16 x double> %vec, double %val, i32 %idx
4074  ret <16 x double> %insert
4075}
4076
4077define amdgpu_ps <16 x i32> @dyn_insertelement_v16i32_s_v_s(<16 x i32> inreg %vec, i32 %val, i32 inreg %idx) {
4078; GPRIDX-LABEL: dyn_insertelement_v16i32_s_v_s:
4079; GPRIDX:       ; %bb.0: ; %entry
4080; GPRIDX-NEXT:    s_mov_b32 s1, s3
4081; GPRIDX-NEXT:    s_mov_b32 s3, s5
4082; GPRIDX-NEXT:    s_mov_b32 s5, s7
4083; GPRIDX-NEXT:    s_mov_b32 s7, s9
4084; GPRIDX-NEXT:    s_mov_b32 s9, s11
4085; GPRIDX-NEXT:    s_mov_b32 s11, s13
4086; GPRIDX-NEXT:    s_mov_b32 s13, s15
4087; GPRIDX-NEXT:    s_mov_b32 s15, s17
4088; GPRIDX-NEXT:    s_mov_b32 s0, s2
4089; GPRIDX-NEXT:    s_mov_b32 s2, s4
4090; GPRIDX-NEXT:    s_mov_b32 s4, s6
4091; GPRIDX-NEXT:    s_mov_b32 s6, s8
4092; GPRIDX-NEXT:    s_mov_b32 s8, s10
4093; GPRIDX-NEXT:    s_mov_b32 s10, s12
4094; GPRIDX-NEXT:    s_mov_b32 s12, s14
4095; GPRIDX-NEXT:    s_mov_b32 s14, s16
4096; GPRIDX-NEXT:    v_mov_b32_e32 v16, s15
4097; GPRIDX-NEXT:    v_mov_b32_e32 v15, s14
4098; GPRIDX-NEXT:    v_mov_b32_e32 v14, s13
4099; GPRIDX-NEXT:    v_mov_b32_e32 v13, s12
4100; GPRIDX-NEXT:    v_mov_b32_e32 v12, s11
4101; GPRIDX-NEXT:    v_mov_b32_e32 v11, s10
4102; GPRIDX-NEXT:    v_mov_b32_e32 v10, s9
4103; GPRIDX-NEXT:    v_mov_b32_e32 v9, s8
4104; GPRIDX-NEXT:    v_mov_b32_e32 v8, s7
4105; GPRIDX-NEXT:    v_mov_b32_e32 v7, s6
4106; GPRIDX-NEXT:    v_mov_b32_e32 v6, s5
4107; GPRIDX-NEXT:    v_mov_b32_e32 v5, s4
4108; GPRIDX-NEXT:    v_mov_b32_e32 v4, s3
4109; GPRIDX-NEXT:    v_mov_b32_e32 v3, s2
4110; GPRIDX-NEXT:    v_mov_b32_e32 v2, s1
4111; GPRIDX-NEXT:    v_mov_b32_e32 v1, s0
4112; GPRIDX-NEXT:    s_set_gpr_idx_on s18, gpr_idx(DST)
4113; GPRIDX-NEXT:    v_mov_b32_e32 v1, v0
4114; GPRIDX-NEXT:    s_set_gpr_idx_off
4115; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v1
4116; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v2
4117; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v3
4118; GPRIDX-NEXT:    v_readfirstlane_b32 s3, v4
4119; GPRIDX-NEXT:    v_readfirstlane_b32 s4, v5
4120; GPRIDX-NEXT:    v_readfirstlane_b32 s5, v6
4121; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v7
4122; GPRIDX-NEXT:    v_readfirstlane_b32 s7, v8
4123; GPRIDX-NEXT:    v_readfirstlane_b32 s8, v9
4124; GPRIDX-NEXT:    v_readfirstlane_b32 s9, v10
4125; GPRIDX-NEXT:    v_readfirstlane_b32 s10, v11
4126; GPRIDX-NEXT:    v_readfirstlane_b32 s11, v12
4127; GPRIDX-NEXT:    v_readfirstlane_b32 s12, v13
4128; GPRIDX-NEXT:    v_readfirstlane_b32 s13, v14
4129; GPRIDX-NEXT:    v_readfirstlane_b32 s14, v15
4130; GPRIDX-NEXT:    v_readfirstlane_b32 s15, v16
4131; GPRIDX-NEXT:    ; return to shader part epilog
4132;
4133; GFX10-LABEL: dyn_insertelement_v16i32_s_v_s:
4134; GFX10:       ; %bb.0: ; %entry
4135; GFX10-NEXT:    s_mov_b32 s1, s3
4136; GFX10-NEXT:    s_mov_b32 s3, s5
4137; GFX10-NEXT:    s_mov_b32 s5, s7
4138; GFX10-NEXT:    s_mov_b32 s7, s9
4139; GFX10-NEXT:    s_mov_b32 s9, s11
4140; GFX10-NEXT:    s_mov_b32 s11, s13
4141; GFX10-NEXT:    s_mov_b32 s13, s15
4142; GFX10-NEXT:    s_mov_b32 s15, s17
4143; GFX10-NEXT:    s_mov_b32 s0, s2
4144; GFX10-NEXT:    s_mov_b32 s2, s4
4145; GFX10-NEXT:    s_mov_b32 s4, s6
4146; GFX10-NEXT:    s_mov_b32 s6, s8
4147; GFX10-NEXT:    s_mov_b32 s8, s10
4148; GFX10-NEXT:    s_mov_b32 s10, s12
4149; GFX10-NEXT:    s_mov_b32 s12, s14
4150; GFX10-NEXT:    s_mov_b32 s14, s16
4151; GFX10-NEXT:    v_mov_b32_e32 v16, s15
4152; GFX10-NEXT:    v_mov_b32_e32 v1, s0
4153; GFX10-NEXT:    s_mov_b32 m0, s18
4154; GFX10-NEXT:    v_mov_b32_e32 v15, s14
4155; GFX10-NEXT:    v_mov_b32_e32 v14, s13
4156; GFX10-NEXT:    v_mov_b32_e32 v13, s12
4157; GFX10-NEXT:    v_mov_b32_e32 v12, s11
4158; GFX10-NEXT:    v_mov_b32_e32 v11, s10
4159; GFX10-NEXT:    v_mov_b32_e32 v10, s9
4160; GFX10-NEXT:    v_mov_b32_e32 v9, s8
4161; GFX10-NEXT:    v_mov_b32_e32 v8, s7
4162; GFX10-NEXT:    v_mov_b32_e32 v7, s6
4163; GFX10-NEXT:    v_mov_b32_e32 v6, s5
4164; GFX10-NEXT:    v_mov_b32_e32 v5, s4
4165; GFX10-NEXT:    v_mov_b32_e32 v4, s3
4166; GFX10-NEXT:    v_mov_b32_e32 v3, s2
4167; GFX10-NEXT:    v_mov_b32_e32 v2, s1
4168; GFX10-NEXT:    v_movreld_b32_e32 v1, v0
4169; GFX10-NEXT:    v_readfirstlane_b32 s0, v1
4170; GFX10-NEXT:    v_readfirstlane_b32 s1, v2
4171; GFX10-NEXT:    v_readfirstlane_b32 s2, v3
4172; GFX10-NEXT:    v_readfirstlane_b32 s3, v4
4173; GFX10-NEXT:    v_readfirstlane_b32 s4, v5
4174; GFX10-NEXT:    v_readfirstlane_b32 s5, v6
4175; GFX10-NEXT:    v_readfirstlane_b32 s6, v7
4176; GFX10-NEXT:    v_readfirstlane_b32 s7, v8
4177; GFX10-NEXT:    v_readfirstlane_b32 s8, v9
4178; GFX10-NEXT:    v_readfirstlane_b32 s9, v10
4179; GFX10-NEXT:    v_readfirstlane_b32 s10, v11
4180; GFX10-NEXT:    v_readfirstlane_b32 s11, v12
4181; GFX10-NEXT:    v_readfirstlane_b32 s12, v13
4182; GFX10-NEXT:    v_readfirstlane_b32 s13, v14
4183; GFX10-NEXT:    v_readfirstlane_b32 s14, v15
4184; GFX10-NEXT:    v_readfirstlane_b32 s15, v16
4185; GFX10-NEXT:    ; return to shader part epilog
4186;
4187; GFX11-LABEL: dyn_insertelement_v16i32_s_v_s:
4188; GFX11:       ; %bb.0: ; %entry
4189; GFX11-NEXT:    s_mov_b32 s1, s3
4190; GFX11-NEXT:    s_mov_b32 s3, s5
4191; GFX11-NEXT:    s_mov_b32 s5, s7
4192; GFX11-NEXT:    s_mov_b32 s7, s9
4193; GFX11-NEXT:    s_mov_b32 s9, s11
4194; GFX11-NEXT:    s_mov_b32 s11, s13
4195; GFX11-NEXT:    s_mov_b32 s13, s15
4196; GFX11-NEXT:    s_mov_b32 s15, s17
4197; GFX11-NEXT:    s_mov_b32 s0, s2
4198; GFX11-NEXT:    s_mov_b32 s2, s4
4199; GFX11-NEXT:    s_mov_b32 s4, s6
4200; GFX11-NEXT:    s_mov_b32 s6, s8
4201; GFX11-NEXT:    s_mov_b32 s8, s10
4202; GFX11-NEXT:    s_mov_b32 s10, s12
4203; GFX11-NEXT:    s_mov_b32 s12, s14
4204; GFX11-NEXT:    s_mov_b32 s14, s16
4205; GFX11-NEXT:    v_dual_mov_b32 v16, s15 :: v_dual_mov_b32 v15, s14
4206; GFX11-NEXT:    v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
4207; GFX11-NEXT:    s_mov_b32 m0, s18
4208; GFX11-NEXT:    v_dual_mov_b32 v14, s13 :: v_dual_mov_b32 v13, s12
4209; GFX11-NEXT:    v_dual_mov_b32 v12, s11 :: v_dual_mov_b32 v11, s10
4210; GFX11-NEXT:    v_dual_mov_b32 v10, s9 :: v_dual_mov_b32 v9, s8
4211; GFX11-NEXT:    v_dual_mov_b32 v8, s7 :: v_dual_mov_b32 v7, s6
4212; GFX11-NEXT:    v_dual_mov_b32 v6, s5 :: v_dual_mov_b32 v5, s4
4213; GFX11-NEXT:    v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2
4214; GFX11-NEXT:    v_movreld_b32_e32 v1, v0
4215; GFX11-NEXT:    v_readfirstlane_b32 s0, v1
4216; GFX11-NEXT:    v_readfirstlane_b32 s1, v2
4217; GFX11-NEXT:    v_readfirstlane_b32 s2, v3
4218; GFX11-NEXT:    v_readfirstlane_b32 s3, v4
4219; GFX11-NEXT:    v_readfirstlane_b32 s4, v5
4220; GFX11-NEXT:    v_readfirstlane_b32 s5, v6
4221; GFX11-NEXT:    v_readfirstlane_b32 s6, v7
4222; GFX11-NEXT:    v_readfirstlane_b32 s7, v8
4223; GFX11-NEXT:    v_readfirstlane_b32 s8, v9
4224; GFX11-NEXT:    v_readfirstlane_b32 s9, v10
4225; GFX11-NEXT:    v_readfirstlane_b32 s10, v11
4226; GFX11-NEXT:    v_readfirstlane_b32 s11, v12
4227; GFX11-NEXT:    v_readfirstlane_b32 s12, v13
4228; GFX11-NEXT:    v_readfirstlane_b32 s13, v14
4229; GFX11-NEXT:    v_readfirstlane_b32 s14, v15
4230; GFX11-NEXT:    v_readfirstlane_b32 s15, v16
4231; GFX11-NEXT:    ; return to shader part epilog
4232entry:
4233  %insert = insertelement <16 x i32> %vec, i32 %val, i32 %idx
4234  ret <16 x i32> %insert
4235}
4236
4237define amdgpu_ps <16 x float> @dyn_insertelement_v16f32_s_v_s(<16 x float> inreg %vec, float %val, i32 inreg %idx) {
4238; GPRIDX-LABEL: dyn_insertelement_v16f32_s_v_s:
4239; GPRIDX:       ; %bb.0: ; %entry
4240; GPRIDX-NEXT:    s_mov_b32 s0, s2
4241; GPRIDX-NEXT:    s_mov_b32 s1, s3
4242; GPRIDX-NEXT:    s_mov_b32 s2, s4
4243; GPRIDX-NEXT:    s_mov_b32 s3, s5
4244; GPRIDX-NEXT:    s_mov_b32 s4, s6
4245; GPRIDX-NEXT:    s_mov_b32 s5, s7
4246; GPRIDX-NEXT:    s_mov_b32 s6, s8
4247; GPRIDX-NEXT:    s_mov_b32 s7, s9
4248; GPRIDX-NEXT:    s_mov_b32 s8, s10
4249; GPRIDX-NEXT:    s_mov_b32 s9, s11
4250; GPRIDX-NEXT:    s_mov_b32 s10, s12
4251; GPRIDX-NEXT:    s_mov_b32 s11, s13
4252; GPRIDX-NEXT:    s_mov_b32 s12, s14
4253; GPRIDX-NEXT:    s_mov_b32 s13, s15
4254; GPRIDX-NEXT:    s_mov_b32 s14, s16
4255; GPRIDX-NEXT:    s_mov_b32 s15, s17
4256; GPRIDX-NEXT:    v_mov_b32_e32 v16, v0
4257; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
4258; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
4259; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
4260; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
4261; GPRIDX-NEXT:    v_mov_b32_e32 v4, s4
4262; GPRIDX-NEXT:    v_mov_b32_e32 v5, s5
4263; GPRIDX-NEXT:    v_mov_b32_e32 v6, s6
4264; GPRIDX-NEXT:    v_mov_b32_e32 v7, s7
4265; GPRIDX-NEXT:    v_mov_b32_e32 v8, s8
4266; GPRIDX-NEXT:    v_mov_b32_e32 v9, s9
4267; GPRIDX-NEXT:    v_mov_b32_e32 v10, s10
4268; GPRIDX-NEXT:    v_mov_b32_e32 v11, s11
4269; GPRIDX-NEXT:    v_mov_b32_e32 v12, s12
4270; GPRIDX-NEXT:    v_mov_b32_e32 v13, s13
4271; GPRIDX-NEXT:    v_mov_b32_e32 v14, s14
4272; GPRIDX-NEXT:    v_mov_b32_e32 v15, s15
4273; GPRIDX-NEXT:    s_set_gpr_idx_on s18, gpr_idx(DST)
4274; GPRIDX-NEXT:    v_mov_b32_e32 v0, v16
4275; GPRIDX-NEXT:    s_set_gpr_idx_off
4276; GPRIDX-NEXT:    ; return to shader part epilog
4277;
4278; GFX10-LABEL: dyn_insertelement_v16f32_s_v_s:
4279; GFX10:       ; %bb.0: ; %entry
4280; GFX10-NEXT:    s_mov_b32 s0, s2
4281; GFX10-NEXT:    s_mov_b32 s1, s3
4282; GFX10-NEXT:    s_mov_b32 s2, s4
4283; GFX10-NEXT:    s_mov_b32 s3, s5
4284; GFX10-NEXT:    s_mov_b32 s4, s6
4285; GFX10-NEXT:    s_mov_b32 s5, s7
4286; GFX10-NEXT:    s_mov_b32 s6, s8
4287; GFX10-NEXT:    s_mov_b32 s7, s9
4288; GFX10-NEXT:    s_mov_b32 s8, s10
4289; GFX10-NEXT:    s_mov_b32 s9, s11
4290; GFX10-NEXT:    s_mov_b32 s10, s12
4291; GFX10-NEXT:    s_mov_b32 s11, s13
4292; GFX10-NEXT:    s_mov_b32 s12, s14
4293; GFX10-NEXT:    s_mov_b32 s13, s15
4294; GFX10-NEXT:    s_mov_b32 s14, s16
4295; GFX10-NEXT:    s_mov_b32 s15, s17
4296; GFX10-NEXT:    v_mov_b32_e32 v16, v0
4297; GFX10-NEXT:    v_mov_b32_e32 v0, s0
4298; GFX10-NEXT:    s_mov_b32 m0, s18
4299; GFX10-NEXT:    v_mov_b32_e32 v1, s1
4300; GFX10-NEXT:    v_mov_b32_e32 v2, s2
4301; GFX10-NEXT:    v_mov_b32_e32 v3, s3
4302; GFX10-NEXT:    v_mov_b32_e32 v4, s4
4303; GFX10-NEXT:    v_mov_b32_e32 v5, s5
4304; GFX10-NEXT:    v_mov_b32_e32 v6, s6
4305; GFX10-NEXT:    v_mov_b32_e32 v7, s7
4306; GFX10-NEXT:    v_mov_b32_e32 v8, s8
4307; GFX10-NEXT:    v_mov_b32_e32 v9, s9
4308; GFX10-NEXT:    v_mov_b32_e32 v10, s10
4309; GFX10-NEXT:    v_mov_b32_e32 v11, s11
4310; GFX10-NEXT:    v_mov_b32_e32 v12, s12
4311; GFX10-NEXT:    v_mov_b32_e32 v13, s13
4312; GFX10-NEXT:    v_mov_b32_e32 v14, s14
4313; GFX10-NEXT:    v_mov_b32_e32 v15, s15
4314; GFX10-NEXT:    v_movreld_b32_e32 v0, v16
4315; GFX10-NEXT:    ; return to shader part epilog
4316;
4317; GFX11-LABEL: dyn_insertelement_v16f32_s_v_s:
4318; GFX11:       ; %bb.0: ; %entry
4319; GFX11-NEXT:    s_mov_b32 s0, s2
4320; GFX11-NEXT:    s_mov_b32 s1, s3
4321; GFX11-NEXT:    s_mov_b32 s2, s4
4322; GFX11-NEXT:    s_mov_b32 s3, s5
4323; GFX11-NEXT:    s_mov_b32 s4, s6
4324; GFX11-NEXT:    s_mov_b32 s5, s7
4325; GFX11-NEXT:    s_mov_b32 s6, s8
4326; GFX11-NEXT:    s_mov_b32 s7, s9
4327; GFX11-NEXT:    s_mov_b32 s8, s10
4328; GFX11-NEXT:    s_mov_b32 s9, s11
4329; GFX11-NEXT:    s_mov_b32 s10, s12
4330; GFX11-NEXT:    s_mov_b32 s11, s13
4331; GFX11-NEXT:    s_mov_b32 s12, s14
4332; GFX11-NEXT:    s_mov_b32 s13, s15
4333; GFX11-NEXT:    s_mov_b32 s14, s16
4334; GFX11-NEXT:    s_mov_b32 s15, s17
4335; GFX11-NEXT:    v_mov_b32_e32 v16, v0
4336; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
4337; GFX11-NEXT:    s_mov_b32 m0, s18
4338; GFX11-NEXT:    v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
4339; GFX11-NEXT:    v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v4, s4
4340; GFX11-NEXT:    v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v6, s6
4341; GFX11-NEXT:    v_dual_mov_b32 v9, s9 :: v_dual_mov_b32 v8, s8
4342; GFX11-NEXT:    v_dual_mov_b32 v11, s11 :: v_dual_mov_b32 v10, s10
4343; GFX11-NEXT:    v_dual_mov_b32 v13, s13 :: v_dual_mov_b32 v12, s12
4344; GFX11-NEXT:    v_dual_mov_b32 v15, s15 :: v_dual_mov_b32 v14, s14
4345; GFX11-NEXT:    v_movreld_b32_e32 v0, v16
4346; GFX11-NEXT:    ; return to shader part epilog
4347entry:
4348  %insert = insertelement <16 x float> %vec, float %val, i32 %idx
4349  ret <16 x float> %insert
4350}
4351
4352define amdgpu_ps <32 x float> @dyn_insertelement_v32f32_s_v_s(<32 x float> inreg %vec, float %val, i32 inreg %idx) {
4353; GPRIDX-LABEL: dyn_insertelement_v32f32_s_v_s:
4354; GPRIDX:       ; %bb.0: ; %entry
4355; GPRIDX-NEXT:    s_mov_b32 s0, s2
4356; GPRIDX-NEXT:    s_mov_b32 s1, s3
4357; GPRIDX-NEXT:    s_mov_b32 s2, s4
4358; GPRIDX-NEXT:    s_mov_b32 s3, s5
4359; GPRIDX-NEXT:    s_mov_b32 s4, s6
4360; GPRIDX-NEXT:    s_mov_b32 s5, s7
4361; GPRIDX-NEXT:    s_mov_b32 s6, s8
4362; GPRIDX-NEXT:    s_mov_b32 s7, s9
4363; GPRIDX-NEXT:    s_mov_b32 s8, s10
4364; GPRIDX-NEXT:    s_mov_b32 s9, s11
4365; GPRIDX-NEXT:    s_mov_b32 s10, s12
4366; GPRIDX-NEXT:    s_mov_b32 s11, s13
4367; GPRIDX-NEXT:    s_mov_b32 s12, s14
4368; GPRIDX-NEXT:    s_mov_b32 s13, s15
4369; GPRIDX-NEXT:    s_mov_b32 s14, s16
4370; GPRIDX-NEXT:    s_mov_b32 s15, s17
4371; GPRIDX-NEXT:    s_mov_b32 s16, s18
4372; GPRIDX-NEXT:    s_mov_b32 s17, s19
4373; GPRIDX-NEXT:    s_mov_b32 s18, s20
4374; GPRIDX-NEXT:    s_mov_b32 s19, s21
4375; GPRIDX-NEXT:    s_mov_b32 s20, s22
4376; GPRIDX-NEXT:    s_mov_b32 s21, s23
4377; GPRIDX-NEXT:    s_mov_b32 s22, s24
4378; GPRIDX-NEXT:    s_mov_b32 s23, s25
4379; GPRIDX-NEXT:    s_mov_b32 s24, s26
4380; GPRIDX-NEXT:    s_mov_b32 s25, s27
4381; GPRIDX-NEXT:    s_mov_b32 s26, s28
4382; GPRIDX-NEXT:    s_mov_b32 s27, s29
4383; GPRIDX-NEXT:    s_mov_b32 s28, s30
4384; GPRIDX-NEXT:    s_mov_b32 s29, s31
4385; GPRIDX-NEXT:    s_mov_b32 s31, s33
4386; GPRIDX-NEXT:    s_mov_b32 s30, s32
4387; GPRIDX-NEXT:    v_mov_b32_e32 v32, v0
4388; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
4389; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
4390; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
4391; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
4392; GPRIDX-NEXT:    v_mov_b32_e32 v4, s4
4393; GPRIDX-NEXT:    v_mov_b32_e32 v5, s5
4394; GPRIDX-NEXT:    v_mov_b32_e32 v6, s6
4395; GPRIDX-NEXT:    v_mov_b32_e32 v7, s7
4396; GPRIDX-NEXT:    v_mov_b32_e32 v8, s8
4397; GPRIDX-NEXT:    v_mov_b32_e32 v9, s9
4398; GPRIDX-NEXT:    v_mov_b32_e32 v10, s10
4399; GPRIDX-NEXT:    v_mov_b32_e32 v11, s11
4400; GPRIDX-NEXT:    v_mov_b32_e32 v12, s12
4401; GPRIDX-NEXT:    v_mov_b32_e32 v13, s13
4402; GPRIDX-NEXT:    v_mov_b32_e32 v14, s14
4403; GPRIDX-NEXT:    v_mov_b32_e32 v15, s15
4404; GPRIDX-NEXT:    v_mov_b32_e32 v16, s16
4405; GPRIDX-NEXT:    v_mov_b32_e32 v17, s17
4406; GPRIDX-NEXT:    v_mov_b32_e32 v18, s18
4407; GPRIDX-NEXT:    v_mov_b32_e32 v19, s19
4408; GPRIDX-NEXT:    v_mov_b32_e32 v20, s20
4409; GPRIDX-NEXT:    v_mov_b32_e32 v21, s21
4410; GPRIDX-NEXT:    v_mov_b32_e32 v22, s22
4411; GPRIDX-NEXT:    v_mov_b32_e32 v23, s23
4412; GPRIDX-NEXT:    v_mov_b32_e32 v24, s24
4413; GPRIDX-NEXT:    v_mov_b32_e32 v25, s25
4414; GPRIDX-NEXT:    v_mov_b32_e32 v26, s26
4415; GPRIDX-NEXT:    v_mov_b32_e32 v27, s27
4416; GPRIDX-NEXT:    v_mov_b32_e32 v28, s28
4417; GPRIDX-NEXT:    v_mov_b32_e32 v29, s29
4418; GPRIDX-NEXT:    v_mov_b32_e32 v30, s30
4419; GPRIDX-NEXT:    v_mov_b32_e32 v31, s31
4420; GPRIDX-NEXT:    s_set_gpr_idx_on s34, gpr_idx(DST)
4421; GPRIDX-NEXT:    v_mov_b32_e32 v0, v32
4422; GPRIDX-NEXT:    s_set_gpr_idx_off
4423; GPRIDX-NEXT:    ; return to shader part epilog
4424;
4425; GFX10-LABEL: dyn_insertelement_v32f32_s_v_s:
4426; GFX10:       ; %bb.0: ; %entry
4427; GFX10-NEXT:    s_mov_b32 s0, s2
4428; GFX10-NEXT:    s_mov_b32 s1, s3
4429; GFX10-NEXT:    s_mov_b32 s2, s4
4430; GFX10-NEXT:    s_mov_b32 s3, s5
4431; GFX10-NEXT:    s_mov_b32 s4, s6
4432; GFX10-NEXT:    s_mov_b32 s5, s7
4433; GFX10-NEXT:    s_mov_b32 s6, s8
4434; GFX10-NEXT:    s_mov_b32 s7, s9
4435; GFX10-NEXT:    s_mov_b32 s8, s10
4436; GFX10-NEXT:    s_mov_b32 s9, s11
4437; GFX10-NEXT:    s_mov_b32 s10, s12
4438; GFX10-NEXT:    s_mov_b32 s11, s13
4439; GFX10-NEXT:    s_mov_b32 s12, s14
4440; GFX10-NEXT:    s_mov_b32 s13, s15
4441; GFX10-NEXT:    s_mov_b32 s14, s16
4442; GFX10-NEXT:    s_mov_b32 s15, s17
4443; GFX10-NEXT:    s_mov_b32 s16, s18
4444; GFX10-NEXT:    s_mov_b32 s17, s19
4445; GFX10-NEXT:    s_mov_b32 s18, s20
4446; GFX10-NEXT:    s_mov_b32 s19, s21
4447; GFX10-NEXT:    s_mov_b32 s20, s22
4448; GFX10-NEXT:    s_mov_b32 s21, s23
4449; GFX10-NEXT:    s_mov_b32 s22, s24
4450; GFX10-NEXT:    s_mov_b32 s23, s25
4451; GFX10-NEXT:    s_mov_b32 s24, s26
4452; GFX10-NEXT:    s_mov_b32 s25, s27
4453; GFX10-NEXT:    s_mov_b32 s26, s28
4454; GFX10-NEXT:    s_mov_b32 s27, s29
4455; GFX10-NEXT:    s_mov_b32 s28, s30
4456; GFX10-NEXT:    s_mov_b32 s29, s31
4457; GFX10-NEXT:    s_mov_b32 s31, s33
4458; GFX10-NEXT:    s_mov_b32 s30, s32
4459; GFX10-NEXT:    v_mov_b32_e32 v32, v0
4460; GFX10-NEXT:    v_mov_b32_e32 v0, s0
4461; GFX10-NEXT:    s_mov_b32 m0, s34
4462; GFX10-NEXT:    v_mov_b32_e32 v1, s1
4463; GFX10-NEXT:    v_mov_b32_e32 v2, s2
4464; GFX10-NEXT:    v_mov_b32_e32 v3, s3
4465; GFX10-NEXT:    v_mov_b32_e32 v4, s4
4466; GFX10-NEXT:    v_mov_b32_e32 v5, s5
4467; GFX10-NEXT:    v_mov_b32_e32 v6, s6
4468; GFX10-NEXT:    v_mov_b32_e32 v7, s7
4469; GFX10-NEXT:    v_mov_b32_e32 v8, s8
4470; GFX10-NEXT:    v_mov_b32_e32 v9, s9
4471; GFX10-NEXT:    v_mov_b32_e32 v10, s10
4472; GFX10-NEXT:    v_mov_b32_e32 v11, s11
4473; GFX10-NEXT:    v_mov_b32_e32 v12, s12
4474; GFX10-NEXT:    v_mov_b32_e32 v13, s13
4475; GFX10-NEXT:    v_mov_b32_e32 v14, s14
4476; GFX10-NEXT:    v_mov_b32_e32 v15, s15
4477; GFX10-NEXT:    v_mov_b32_e32 v16, s16
4478; GFX10-NEXT:    v_mov_b32_e32 v17, s17
4479; GFX10-NEXT:    v_mov_b32_e32 v18, s18
4480; GFX10-NEXT:    v_mov_b32_e32 v19, s19
4481; GFX10-NEXT:    v_mov_b32_e32 v20, s20
4482; GFX10-NEXT:    v_mov_b32_e32 v21, s21
4483; GFX10-NEXT:    v_mov_b32_e32 v22, s22
4484; GFX10-NEXT:    v_mov_b32_e32 v23, s23
4485; GFX10-NEXT:    v_mov_b32_e32 v24, s24
4486; GFX10-NEXT:    v_mov_b32_e32 v25, s25
4487; GFX10-NEXT:    v_mov_b32_e32 v26, s26
4488; GFX10-NEXT:    v_mov_b32_e32 v27, s27
4489; GFX10-NEXT:    v_mov_b32_e32 v28, s28
4490; GFX10-NEXT:    v_mov_b32_e32 v29, s29
4491; GFX10-NEXT:    v_mov_b32_e32 v30, s30
4492; GFX10-NEXT:    v_mov_b32_e32 v31, s31
4493; GFX10-NEXT:    v_movreld_b32_e32 v0, v32
4494; GFX10-NEXT:    ; return to shader part epilog
4495;
4496; GFX11-LABEL: dyn_insertelement_v32f32_s_v_s:
4497; GFX11:       ; %bb.0: ; %entry
4498; GFX11-NEXT:    s_mov_b32 s0, s2
4499; GFX11-NEXT:    s_mov_b32 s1, s3
4500; GFX11-NEXT:    s_mov_b32 s2, s4
4501; GFX11-NEXT:    s_mov_b32 s3, s5
4502; GFX11-NEXT:    s_mov_b32 s4, s6
4503; GFX11-NEXT:    s_mov_b32 s5, s7
4504; GFX11-NEXT:    s_mov_b32 s6, s8
4505; GFX11-NEXT:    s_mov_b32 s7, s9
4506; GFX11-NEXT:    s_mov_b32 s8, s10
4507; GFX11-NEXT:    s_mov_b32 s9, s11
4508; GFX11-NEXT:    s_mov_b32 s10, s12
4509; GFX11-NEXT:    s_mov_b32 s11, s13
4510; GFX11-NEXT:    s_mov_b32 s12, s14
4511; GFX11-NEXT:    s_mov_b32 s13, s15
4512; GFX11-NEXT:    s_mov_b32 s14, s16
4513; GFX11-NEXT:    s_mov_b32 s15, s17
4514; GFX11-NEXT:    s_mov_b32 s16, s18
4515; GFX11-NEXT:    s_mov_b32 s17, s19
4516; GFX11-NEXT:    s_mov_b32 s18, s20
4517; GFX11-NEXT:    s_mov_b32 s19, s21
4518; GFX11-NEXT:    s_mov_b32 s20, s22
4519; GFX11-NEXT:    s_mov_b32 s21, s23
4520; GFX11-NEXT:    s_mov_b32 s22, s24
4521; GFX11-NEXT:    s_mov_b32 s23, s25
4522; GFX11-NEXT:    s_mov_b32 s24, s26
4523; GFX11-NEXT:    s_mov_b32 s25, s27
4524; GFX11-NEXT:    s_mov_b32 s26, s28
4525; GFX11-NEXT:    s_mov_b32 s27, s29
4526; GFX11-NEXT:    s_mov_b32 s28, s30
4527; GFX11-NEXT:    s_mov_b32 s29, s31
4528; GFX11-NEXT:    s_mov_b32 s31, s33
4529; GFX11-NEXT:    s_mov_b32 s30, s32
4530; GFX11-NEXT:    v_mov_b32_e32 v32, v0
4531; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
4532; GFX11-NEXT:    s_mov_b32 m0, s34
4533; GFX11-NEXT:    v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
4534; GFX11-NEXT:    v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v4, s4
4535; GFX11-NEXT:    v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v6, s6
4536; GFX11-NEXT:    v_dual_mov_b32 v9, s9 :: v_dual_mov_b32 v8, s8
4537; GFX11-NEXT:    v_dual_mov_b32 v11, s11 :: v_dual_mov_b32 v10, s10
4538; GFX11-NEXT:    v_dual_mov_b32 v13, s13 :: v_dual_mov_b32 v12, s12
4539; GFX11-NEXT:    v_dual_mov_b32 v15, s15 :: v_dual_mov_b32 v14, s14
4540; GFX11-NEXT:    v_dual_mov_b32 v17, s17 :: v_dual_mov_b32 v16, s16
4541; GFX11-NEXT:    v_dual_mov_b32 v19, s19 :: v_dual_mov_b32 v18, s18
4542; GFX11-NEXT:    v_dual_mov_b32 v21, s21 :: v_dual_mov_b32 v20, s20
4543; GFX11-NEXT:    v_dual_mov_b32 v23, s23 :: v_dual_mov_b32 v22, s22
4544; GFX11-NEXT:    v_dual_mov_b32 v25, s25 :: v_dual_mov_b32 v24, s24
4545; GFX11-NEXT:    v_dual_mov_b32 v27, s27 :: v_dual_mov_b32 v26, s26
4546; GFX11-NEXT:    v_dual_mov_b32 v29, s29 :: v_dual_mov_b32 v28, s28
4547; GFX11-NEXT:    v_dual_mov_b32 v31, s31 :: v_dual_mov_b32 v30, s30
4548; GFX11-NEXT:    v_movreld_b32_e32 v0, v32
4549; GFX11-NEXT:    ; return to shader part epilog
4550entry:
4551  %insert = insertelement <32 x float> %vec, float %val, i32 %idx
4552  ret <32 x float> %insert
4553}
4554
4555define amdgpu_ps <16 x i64> @dyn_insertelement_v16i64_s_v_s(<16 x i64> inreg %vec, i64 %val, i32 inreg %idx) {
4556; GPRIDX-LABEL: dyn_insertelement_v16i64_s_v_s:
4557; GPRIDX:       ; %bb.0: ; %entry
4558; GPRIDX-NEXT:    s_mov_b32 s1, s3
4559; GPRIDX-NEXT:    s_mov_b32 s3, s5
4560; GPRIDX-NEXT:    s_mov_b32 s5, s7
4561; GPRIDX-NEXT:    s_mov_b32 s7, s9
4562; GPRIDX-NEXT:    s_mov_b32 s9, s11
4563; GPRIDX-NEXT:    s_mov_b32 s11, s13
4564; GPRIDX-NEXT:    s_mov_b32 s13, s15
4565; GPRIDX-NEXT:    s_mov_b32 s15, s17
4566; GPRIDX-NEXT:    s_mov_b32 s17, s19
4567; GPRIDX-NEXT:    s_mov_b32 s19, s21
4568; GPRIDX-NEXT:    s_mov_b32 s21, s23
4569; GPRIDX-NEXT:    s_mov_b32 s23, s25
4570; GPRIDX-NEXT:    s_mov_b32 s25, s27
4571; GPRIDX-NEXT:    s_mov_b32 s27, s29
4572; GPRIDX-NEXT:    s_mov_b32 s29, s31
4573; GPRIDX-NEXT:    s_mov_b32 s31, s33
4574; GPRIDX-NEXT:    s_mov_b32 s0, s2
4575; GPRIDX-NEXT:    s_mov_b32 s2, s4
4576; GPRIDX-NEXT:    s_mov_b32 s4, s6
4577; GPRIDX-NEXT:    s_mov_b32 s6, s8
4578; GPRIDX-NEXT:    s_mov_b32 s8, s10
4579; GPRIDX-NEXT:    s_mov_b32 s10, s12
4580; GPRIDX-NEXT:    s_mov_b32 s12, s14
4581; GPRIDX-NEXT:    s_mov_b32 s14, s16
4582; GPRIDX-NEXT:    s_mov_b32 s16, s18
4583; GPRIDX-NEXT:    s_mov_b32 s18, s20
4584; GPRIDX-NEXT:    s_mov_b32 s20, s22
4585; GPRIDX-NEXT:    s_mov_b32 s22, s24
4586; GPRIDX-NEXT:    s_mov_b32 s24, s26
4587; GPRIDX-NEXT:    s_mov_b32 s26, s28
4588; GPRIDX-NEXT:    s_mov_b32 s28, s30
4589; GPRIDX-NEXT:    s_mov_b32 s30, s32
4590; GPRIDX-NEXT:    v_mov_b32_e32 v33, s31
4591; GPRIDX-NEXT:    s_lshl_b32 s33, s34, 1
4592; GPRIDX-NEXT:    v_mov_b32_e32 v32, s30
4593; GPRIDX-NEXT:    v_mov_b32_e32 v31, s29
4594; GPRIDX-NEXT:    v_mov_b32_e32 v30, s28
4595; GPRIDX-NEXT:    v_mov_b32_e32 v29, s27
4596; GPRIDX-NEXT:    v_mov_b32_e32 v28, s26
4597; GPRIDX-NEXT:    v_mov_b32_e32 v27, s25
4598; GPRIDX-NEXT:    v_mov_b32_e32 v26, s24
4599; GPRIDX-NEXT:    v_mov_b32_e32 v25, s23
4600; GPRIDX-NEXT:    v_mov_b32_e32 v24, s22
4601; GPRIDX-NEXT:    v_mov_b32_e32 v23, s21
4602; GPRIDX-NEXT:    v_mov_b32_e32 v22, s20
4603; GPRIDX-NEXT:    v_mov_b32_e32 v21, s19
4604; GPRIDX-NEXT:    v_mov_b32_e32 v20, s18
4605; GPRIDX-NEXT:    v_mov_b32_e32 v19, s17
4606; GPRIDX-NEXT:    v_mov_b32_e32 v18, s16
4607; GPRIDX-NEXT:    v_mov_b32_e32 v17, s15
4608; GPRIDX-NEXT:    v_mov_b32_e32 v16, s14
4609; GPRIDX-NEXT:    v_mov_b32_e32 v15, s13
4610; GPRIDX-NEXT:    v_mov_b32_e32 v14, s12
4611; GPRIDX-NEXT:    v_mov_b32_e32 v13, s11
4612; GPRIDX-NEXT:    v_mov_b32_e32 v12, s10
4613; GPRIDX-NEXT:    v_mov_b32_e32 v11, s9
4614; GPRIDX-NEXT:    v_mov_b32_e32 v10, s8
4615; GPRIDX-NEXT:    v_mov_b32_e32 v9, s7
4616; GPRIDX-NEXT:    v_mov_b32_e32 v8, s6
4617; GPRIDX-NEXT:    v_mov_b32_e32 v7, s5
4618; GPRIDX-NEXT:    v_mov_b32_e32 v6, s4
4619; GPRIDX-NEXT:    v_mov_b32_e32 v5, s3
4620; GPRIDX-NEXT:    v_mov_b32_e32 v4, s2
4621; GPRIDX-NEXT:    v_mov_b32_e32 v3, s1
4622; GPRIDX-NEXT:    v_mov_b32_e32 v2, s0
4623; GPRIDX-NEXT:    s_set_gpr_idx_on s33, gpr_idx(DST)
4624; GPRIDX-NEXT:    v_mov_b32_e32 v2, v0
4625; GPRIDX-NEXT:    v_mov_b32_e32 v3, v1
4626; GPRIDX-NEXT:    s_set_gpr_idx_off
4627; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v2
4628; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v3
4629; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v4
4630; GPRIDX-NEXT:    v_readfirstlane_b32 s3, v5
4631; GPRIDX-NEXT:    v_readfirstlane_b32 s4, v6
4632; GPRIDX-NEXT:    v_readfirstlane_b32 s5, v7
4633; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v8
4634; GPRIDX-NEXT:    v_readfirstlane_b32 s7, v9
4635; GPRIDX-NEXT:    v_readfirstlane_b32 s8, v10
4636; GPRIDX-NEXT:    v_readfirstlane_b32 s9, v11
4637; GPRIDX-NEXT:    v_readfirstlane_b32 s10, v12
4638; GPRIDX-NEXT:    v_readfirstlane_b32 s11, v13
4639; GPRIDX-NEXT:    v_readfirstlane_b32 s12, v14
4640; GPRIDX-NEXT:    v_readfirstlane_b32 s13, v15
4641; GPRIDX-NEXT:    v_readfirstlane_b32 s14, v16
4642; GPRIDX-NEXT:    v_readfirstlane_b32 s15, v17
4643; GPRIDX-NEXT:    v_readfirstlane_b32 s16, v18
4644; GPRIDX-NEXT:    v_readfirstlane_b32 s17, v19
4645; GPRIDX-NEXT:    v_readfirstlane_b32 s18, v20
4646; GPRIDX-NEXT:    v_readfirstlane_b32 s19, v21
4647; GPRIDX-NEXT:    v_readfirstlane_b32 s20, v22
4648; GPRIDX-NEXT:    v_readfirstlane_b32 s21, v23
4649; GPRIDX-NEXT:    v_readfirstlane_b32 s22, v24
4650; GPRIDX-NEXT:    v_readfirstlane_b32 s23, v25
4651; GPRIDX-NEXT:    v_readfirstlane_b32 s24, v26
4652; GPRIDX-NEXT:    v_readfirstlane_b32 s25, v27
4653; GPRIDX-NEXT:    v_readfirstlane_b32 s26, v28
4654; GPRIDX-NEXT:    v_readfirstlane_b32 s27, v29
4655; GPRIDX-NEXT:    v_readfirstlane_b32 s28, v30
4656; GPRIDX-NEXT:    v_readfirstlane_b32 s29, v31
4657; GPRIDX-NEXT:    v_readfirstlane_b32 s30, v32
4658; GPRIDX-NEXT:    v_readfirstlane_b32 s31, v33
4659; GPRIDX-NEXT:    ; return to shader part epilog
4660;
4661; GFX10-LABEL: dyn_insertelement_v16i64_s_v_s:
4662; GFX10:       ; %bb.0: ; %entry
4663; GFX10-NEXT:    s_mov_b32 s1, s3
4664; GFX10-NEXT:    s_mov_b32 s3, s5
4665; GFX10-NEXT:    s_mov_b32 s5, s7
4666; GFX10-NEXT:    s_mov_b32 s7, s9
4667; GFX10-NEXT:    s_mov_b32 s9, s11
4668; GFX10-NEXT:    s_mov_b32 s11, s13
4669; GFX10-NEXT:    s_mov_b32 s13, s15
4670; GFX10-NEXT:    s_mov_b32 s15, s17
4671; GFX10-NEXT:    s_mov_b32 s17, s19
4672; GFX10-NEXT:    s_mov_b32 s19, s21
4673; GFX10-NEXT:    s_mov_b32 s21, s23
4674; GFX10-NEXT:    s_mov_b32 s23, s25
4675; GFX10-NEXT:    s_mov_b32 s25, s27
4676; GFX10-NEXT:    s_mov_b32 s27, s29
4677; GFX10-NEXT:    s_mov_b32 s29, s31
4678; GFX10-NEXT:    s_mov_b32 s31, s33
4679; GFX10-NEXT:    s_mov_b32 s0, s2
4680; GFX10-NEXT:    s_mov_b32 s2, s4
4681; GFX10-NEXT:    s_mov_b32 s4, s6
4682; GFX10-NEXT:    s_mov_b32 s6, s8
4683; GFX10-NEXT:    s_mov_b32 s8, s10
4684; GFX10-NEXT:    s_mov_b32 s10, s12
4685; GFX10-NEXT:    s_mov_b32 s12, s14
4686; GFX10-NEXT:    s_mov_b32 s14, s16
4687; GFX10-NEXT:    s_mov_b32 s16, s18
4688; GFX10-NEXT:    s_mov_b32 s18, s20
4689; GFX10-NEXT:    s_mov_b32 s20, s22
4690; GFX10-NEXT:    s_mov_b32 s22, s24
4691; GFX10-NEXT:    s_mov_b32 s24, s26
4692; GFX10-NEXT:    s_mov_b32 s26, s28
4693; GFX10-NEXT:    s_mov_b32 s28, s30
4694; GFX10-NEXT:    s_mov_b32 s30, s32
4695; GFX10-NEXT:    v_mov_b32_e32 v33, s31
4696; GFX10-NEXT:    v_mov_b32_e32 v2, s0
4697; GFX10-NEXT:    s_lshl_b32 m0, s34, 1
4698; GFX10-NEXT:    v_mov_b32_e32 v32, s30
4699; GFX10-NEXT:    v_mov_b32_e32 v31, s29
4700; GFX10-NEXT:    v_mov_b32_e32 v30, s28
4701; GFX10-NEXT:    v_mov_b32_e32 v29, s27
4702; GFX10-NEXT:    v_mov_b32_e32 v28, s26
4703; GFX10-NEXT:    v_mov_b32_e32 v27, s25
4704; GFX10-NEXT:    v_mov_b32_e32 v26, s24
4705; GFX10-NEXT:    v_mov_b32_e32 v25, s23
4706; GFX10-NEXT:    v_mov_b32_e32 v24, s22
4707; GFX10-NEXT:    v_mov_b32_e32 v23, s21
4708; GFX10-NEXT:    v_mov_b32_e32 v22, s20
4709; GFX10-NEXT:    v_mov_b32_e32 v21, s19
4710; GFX10-NEXT:    v_mov_b32_e32 v20, s18
4711; GFX10-NEXT:    v_mov_b32_e32 v19, s17
4712; GFX10-NEXT:    v_mov_b32_e32 v18, s16
4713; GFX10-NEXT:    v_mov_b32_e32 v17, s15
4714; GFX10-NEXT:    v_mov_b32_e32 v16, s14
4715; GFX10-NEXT:    v_mov_b32_e32 v15, s13
4716; GFX10-NEXT:    v_mov_b32_e32 v14, s12
4717; GFX10-NEXT:    v_mov_b32_e32 v13, s11
4718; GFX10-NEXT:    v_mov_b32_e32 v12, s10
4719; GFX10-NEXT:    v_mov_b32_e32 v11, s9
4720; GFX10-NEXT:    v_mov_b32_e32 v10, s8
4721; GFX10-NEXT:    v_mov_b32_e32 v9, s7
4722; GFX10-NEXT:    v_mov_b32_e32 v8, s6
4723; GFX10-NEXT:    v_mov_b32_e32 v7, s5
4724; GFX10-NEXT:    v_mov_b32_e32 v6, s4
4725; GFX10-NEXT:    v_mov_b32_e32 v5, s3
4726; GFX10-NEXT:    v_mov_b32_e32 v4, s2
4727; GFX10-NEXT:    v_mov_b32_e32 v3, s1
4728; GFX10-NEXT:    v_movreld_b32_e32 v2, v0
4729; GFX10-NEXT:    v_movreld_b32_e32 v3, v1
4730; GFX10-NEXT:    v_readfirstlane_b32 s0, v2
4731; GFX10-NEXT:    v_readfirstlane_b32 s1, v3
4732; GFX10-NEXT:    v_readfirstlane_b32 s2, v4
4733; GFX10-NEXT:    v_readfirstlane_b32 s3, v5
4734; GFX10-NEXT:    v_readfirstlane_b32 s4, v6
4735; GFX10-NEXT:    v_readfirstlane_b32 s5, v7
4736; GFX10-NEXT:    v_readfirstlane_b32 s6, v8
4737; GFX10-NEXT:    v_readfirstlane_b32 s7, v9
4738; GFX10-NEXT:    v_readfirstlane_b32 s8, v10
4739; GFX10-NEXT:    v_readfirstlane_b32 s9, v11
4740; GFX10-NEXT:    v_readfirstlane_b32 s10, v12
4741; GFX10-NEXT:    v_readfirstlane_b32 s11, v13
4742; GFX10-NEXT:    v_readfirstlane_b32 s12, v14
4743; GFX10-NEXT:    v_readfirstlane_b32 s13, v15
4744; GFX10-NEXT:    v_readfirstlane_b32 s14, v16
4745; GFX10-NEXT:    v_readfirstlane_b32 s15, v17
4746; GFX10-NEXT:    v_readfirstlane_b32 s16, v18
4747; GFX10-NEXT:    v_readfirstlane_b32 s17, v19
4748; GFX10-NEXT:    v_readfirstlane_b32 s18, v20
4749; GFX10-NEXT:    v_readfirstlane_b32 s19, v21
4750; GFX10-NEXT:    v_readfirstlane_b32 s20, v22
4751; GFX10-NEXT:    v_readfirstlane_b32 s21, v23
4752; GFX10-NEXT:    v_readfirstlane_b32 s22, v24
4753; GFX10-NEXT:    v_readfirstlane_b32 s23, v25
4754; GFX10-NEXT:    v_readfirstlane_b32 s24, v26
4755; GFX10-NEXT:    v_readfirstlane_b32 s25, v27
4756; GFX10-NEXT:    v_readfirstlane_b32 s26, v28
4757; GFX10-NEXT:    v_readfirstlane_b32 s27, v29
4758; GFX10-NEXT:    v_readfirstlane_b32 s28, v30
4759; GFX10-NEXT:    v_readfirstlane_b32 s29, v31
4760; GFX10-NEXT:    v_readfirstlane_b32 s30, v32
4761; GFX10-NEXT:    v_readfirstlane_b32 s31, v33
4762; GFX10-NEXT:    ; return to shader part epilog
4763;
4764; GFX11-LABEL: dyn_insertelement_v16i64_s_v_s:
4765; GFX11:       ; %bb.0: ; %entry
4766; GFX11-NEXT:    s_mov_b32 s1, s3
4767; GFX11-NEXT:    s_mov_b32 s3, s5
4768; GFX11-NEXT:    s_mov_b32 s5, s7
4769; GFX11-NEXT:    s_mov_b32 s7, s9
4770; GFX11-NEXT:    s_mov_b32 s9, s11
4771; GFX11-NEXT:    s_mov_b32 s11, s13
4772; GFX11-NEXT:    s_mov_b32 s13, s15
4773; GFX11-NEXT:    s_mov_b32 s15, s17
4774; GFX11-NEXT:    s_mov_b32 s17, s19
4775; GFX11-NEXT:    s_mov_b32 s19, s21
4776; GFX11-NEXT:    s_mov_b32 s21, s23
4777; GFX11-NEXT:    s_mov_b32 s23, s25
4778; GFX11-NEXT:    s_mov_b32 s25, s27
4779; GFX11-NEXT:    s_mov_b32 s27, s29
4780; GFX11-NEXT:    s_mov_b32 s29, s31
4781; GFX11-NEXT:    s_mov_b32 s31, s33
4782; GFX11-NEXT:    s_mov_b32 s0, s2
4783; GFX11-NEXT:    s_mov_b32 s2, s4
4784; GFX11-NEXT:    s_mov_b32 s4, s6
4785; GFX11-NEXT:    s_mov_b32 s6, s8
4786; GFX11-NEXT:    s_mov_b32 s8, s10
4787; GFX11-NEXT:    s_mov_b32 s10, s12
4788; GFX11-NEXT:    s_mov_b32 s12, s14
4789; GFX11-NEXT:    s_mov_b32 s14, s16
4790; GFX11-NEXT:    s_mov_b32 s16, s18
4791; GFX11-NEXT:    s_mov_b32 s18, s20
4792; GFX11-NEXT:    s_mov_b32 s20, s22
4793; GFX11-NEXT:    s_mov_b32 s22, s24
4794; GFX11-NEXT:    s_mov_b32 s24, s26
4795; GFX11-NEXT:    s_mov_b32 s26, s28
4796; GFX11-NEXT:    s_mov_b32 s28, s30
4797; GFX11-NEXT:    s_mov_b32 s30, s32
4798; GFX11-NEXT:    v_dual_mov_b32 v33, s31 :: v_dual_mov_b32 v32, s30
4799; GFX11-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
4800; GFX11-NEXT:    s_lshl_b32 m0, s34, 1
4801; GFX11-NEXT:    v_dual_mov_b32 v31, s29 :: v_dual_mov_b32 v30, s28
4802; GFX11-NEXT:    v_dual_mov_b32 v29, s27 :: v_dual_mov_b32 v28, s26
4803; GFX11-NEXT:    v_dual_mov_b32 v27, s25 :: v_dual_mov_b32 v26, s24
4804; GFX11-NEXT:    v_dual_mov_b32 v25, s23 :: v_dual_mov_b32 v24, s22
4805; GFX11-NEXT:    v_dual_mov_b32 v23, s21 :: v_dual_mov_b32 v22, s20
4806; GFX11-NEXT:    v_dual_mov_b32 v21, s19 :: v_dual_mov_b32 v20, s18
4807; GFX11-NEXT:    v_dual_mov_b32 v19, s17 :: v_dual_mov_b32 v18, s16
4808; GFX11-NEXT:    v_dual_mov_b32 v17, s15 :: v_dual_mov_b32 v16, s14
4809; GFX11-NEXT:    v_dual_mov_b32 v15, s13 :: v_dual_mov_b32 v14, s12
4810; GFX11-NEXT:    v_dual_mov_b32 v13, s11 :: v_dual_mov_b32 v12, s10
4811; GFX11-NEXT:    v_dual_mov_b32 v11, s9 :: v_dual_mov_b32 v10, s8
4812; GFX11-NEXT:    v_dual_mov_b32 v9, s7 :: v_dual_mov_b32 v8, s6
4813; GFX11-NEXT:    v_dual_mov_b32 v7, s5 :: v_dual_mov_b32 v6, s4
4814; GFX11-NEXT:    v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v4, s2
4815; GFX11-NEXT:    v_movreld_b32_e32 v2, v0
4816; GFX11-NEXT:    v_movreld_b32_e32 v3, v1
4817; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
4818; GFX11-NEXT:    v_readfirstlane_b32 s1, v3
4819; GFX11-NEXT:    v_readfirstlane_b32 s2, v4
4820; GFX11-NEXT:    v_readfirstlane_b32 s3, v5
4821; GFX11-NEXT:    v_readfirstlane_b32 s4, v6
4822; GFX11-NEXT:    v_readfirstlane_b32 s5, v7
4823; GFX11-NEXT:    v_readfirstlane_b32 s6, v8
4824; GFX11-NEXT:    v_readfirstlane_b32 s7, v9
4825; GFX11-NEXT:    v_readfirstlane_b32 s8, v10
4826; GFX11-NEXT:    v_readfirstlane_b32 s9, v11
4827; GFX11-NEXT:    v_readfirstlane_b32 s10, v12
4828; GFX11-NEXT:    v_readfirstlane_b32 s11, v13
4829; GFX11-NEXT:    v_readfirstlane_b32 s12, v14
4830; GFX11-NEXT:    v_readfirstlane_b32 s13, v15
4831; GFX11-NEXT:    v_readfirstlane_b32 s14, v16
4832; GFX11-NEXT:    v_readfirstlane_b32 s15, v17
4833; GFX11-NEXT:    v_readfirstlane_b32 s16, v18
4834; GFX11-NEXT:    v_readfirstlane_b32 s17, v19
4835; GFX11-NEXT:    v_readfirstlane_b32 s18, v20
4836; GFX11-NEXT:    v_readfirstlane_b32 s19, v21
4837; GFX11-NEXT:    v_readfirstlane_b32 s20, v22
4838; GFX11-NEXT:    v_readfirstlane_b32 s21, v23
4839; GFX11-NEXT:    v_readfirstlane_b32 s22, v24
4840; GFX11-NEXT:    v_readfirstlane_b32 s23, v25
4841; GFX11-NEXT:    v_readfirstlane_b32 s24, v26
4842; GFX11-NEXT:    v_readfirstlane_b32 s25, v27
4843; GFX11-NEXT:    v_readfirstlane_b32 s26, v28
4844; GFX11-NEXT:    v_readfirstlane_b32 s27, v29
4845; GFX11-NEXT:    v_readfirstlane_b32 s28, v30
4846; GFX11-NEXT:    v_readfirstlane_b32 s29, v31
4847; GFX11-NEXT:    v_readfirstlane_b32 s30, v32
4848; GFX11-NEXT:    v_readfirstlane_b32 s31, v33
4849; GFX11-NEXT:    ; return to shader part epilog
4850entry:
4851  %insert = insertelement <16 x i64> %vec, i64 %val, i32 %idx
4852  ret <16 x i64> %insert
4853}
4854
4855define amdgpu_ps <16 x double> @dyn_insertelement_v16f64_s_v_s(<16 x double> inreg %vec, double %val, i32 inreg %idx) {
4856; GPRIDX-LABEL: dyn_insertelement_v16f64_s_v_s:
4857; GPRIDX:       ; %bb.0: ; %entry
4858; GPRIDX-NEXT:    s_mov_b32 s1, s3
4859; GPRIDX-NEXT:    s_mov_b32 s3, s5
4860; GPRIDX-NEXT:    s_mov_b32 s5, s7
4861; GPRIDX-NEXT:    s_mov_b32 s7, s9
4862; GPRIDX-NEXT:    s_mov_b32 s9, s11
4863; GPRIDX-NEXT:    s_mov_b32 s11, s13
4864; GPRIDX-NEXT:    s_mov_b32 s13, s15
4865; GPRIDX-NEXT:    s_mov_b32 s15, s17
4866; GPRIDX-NEXT:    s_mov_b32 s17, s19
4867; GPRIDX-NEXT:    s_mov_b32 s19, s21
4868; GPRIDX-NEXT:    s_mov_b32 s21, s23
4869; GPRIDX-NEXT:    s_mov_b32 s23, s25
4870; GPRIDX-NEXT:    s_mov_b32 s25, s27
4871; GPRIDX-NEXT:    s_mov_b32 s27, s29
4872; GPRIDX-NEXT:    s_mov_b32 s29, s31
4873; GPRIDX-NEXT:    s_mov_b32 s31, s33
4874; GPRIDX-NEXT:    s_mov_b32 s0, s2
4875; GPRIDX-NEXT:    s_mov_b32 s2, s4
4876; GPRIDX-NEXT:    s_mov_b32 s4, s6
4877; GPRIDX-NEXT:    s_mov_b32 s6, s8
4878; GPRIDX-NEXT:    s_mov_b32 s8, s10
4879; GPRIDX-NEXT:    s_mov_b32 s10, s12
4880; GPRIDX-NEXT:    s_mov_b32 s12, s14
4881; GPRIDX-NEXT:    s_mov_b32 s14, s16
4882; GPRIDX-NEXT:    s_mov_b32 s16, s18
4883; GPRIDX-NEXT:    s_mov_b32 s18, s20
4884; GPRIDX-NEXT:    s_mov_b32 s20, s22
4885; GPRIDX-NEXT:    s_mov_b32 s22, s24
4886; GPRIDX-NEXT:    s_mov_b32 s24, s26
4887; GPRIDX-NEXT:    s_mov_b32 s26, s28
4888; GPRIDX-NEXT:    s_mov_b32 s28, s30
4889; GPRIDX-NEXT:    s_mov_b32 s30, s32
4890; GPRIDX-NEXT:    v_mov_b32_e32 v33, s31
4891; GPRIDX-NEXT:    s_lshl_b32 s33, s34, 1
4892; GPRIDX-NEXT:    v_mov_b32_e32 v32, s30
4893; GPRIDX-NEXT:    v_mov_b32_e32 v31, s29
4894; GPRIDX-NEXT:    v_mov_b32_e32 v30, s28
4895; GPRIDX-NEXT:    v_mov_b32_e32 v29, s27
4896; GPRIDX-NEXT:    v_mov_b32_e32 v28, s26
4897; GPRIDX-NEXT:    v_mov_b32_e32 v27, s25
4898; GPRIDX-NEXT:    v_mov_b32_e32 v26, s24
4899; GPRIDX-NEXT:    v_mov_b32_e32 v25, s23
4900; GPRIDX-NEXT:    v_mov_b32_e32 v24, s22
4901; GPRIDX-NEXT:    v_mov_b32_e32 v23, s21
4902; GPRIDX-NEXT:    v_mov_b32_e32 v22, s20
4903; GPRIDX-NEXT:    v_mov_b32_e32 v21, s19
4904; GPRIDX-NEXT:    v_mov_b32_e32 v20, s18
4905; GPRIDX-NEXT:    v_mov_b32_e32 v19, s17
4906; GPRIDX-NEXT:    v_mov_b32_e32 v18, s16
4907; GPRIDX-NEXT:    v_mov_b32_e32 v17, s15
4908; GPRIDX-NEXT:    v_mov_b32_e32 v16, s14
4909; GPRIDX-NEXT:    v_mov_b32_e32 v15, s13
4910; GPRIDX-NEXT:    v_mov_b32_e32 v14, s12
4911; GPRIDX-NEXT:    v_mov_b32_e32 v13, s11
4912; GPRIDX-NEXT:    v_mov_b32_e32 v12, s10
4913; GPRIDX-NEXT:    v_mov_b32_e32 v11, s9
4914; GPRIDX-NEXT:    v_mov_b32_e32 v10, s8
4915; GPRIDX-NEXT:    v_mov_b32_e32 v9, s7
4916; GPRIDX-NEXT:    v_mov_b32_e32 v8, s6
4917; GPRIDX-NEXT:    v_mov_b32_e32 v7, s5
4918; GPRIDX-NEXT:    v_mov_b32_e32 v6, s4
4919; GPRIDX-NEXT:    v_mov_b32_e32 v5, s3
4920; GPRIDX-NEXT:    v_mov_b32_e32 v4, s2
4921; GPRIDX-NEXT:    v_mov_b32_e32 v3, s1
4922; GPRIDX-NEXT:    v_mov_b32_e32 v2, s0
4923; GPRIDX-NEXT:    s_set_gpr_idx_on s33, gpr_idx(DST)
4924; GPRIDX-NEXT:    v_mov_b32_e32 v2, v0
4925; GPRIDX-NEXT:    v_mov_b32_e32 v3, v1
4926; GPRIDX-NEXT:    s_set_gpr_idx_off
4927; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v2
4928; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v3
4929; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v4
4930; GPRIDX-NEXT:    v_readfirstlane_b32 s3, v5
4931; GPRIDX-NEXT:    v_readfirstlane_b32 s4, v6
4932; GPRIDX-NEXT:    v_readfirstlane_b32 s5, v7
4933; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v8
4934; GPRIDX-NEXT:    v_readfirstlane_b32 s7, v9
4935; GPRIDX-NEXT:    v_readfirstlane_b32 s8, v10
4936; GPRIDX-NEXT:    v_readfirstlane_b32 s9, v11
4937; GPRIDX-NEXT:    v_readfirstlane_b32 s10, v12
4938; GPRIDX-NEXT:    v_readfirstlane_b32 s11, v13
4939; GPRIDX-NEXT:    v_readfirstlane_b32 s12, v14
4940; GPRIDX-NEXT:    v_readfirstlane_b32 s13, v15
4941; GPRIDX-NEXT:    v_readfirstlane_b32 s14, v16
4942; GPRIDX-NEXT:    v_readfirstlane_b32 s15, v17
4943; GPRIDX-NEXT:    v_readfirstlane_b32 s16, v18
4944; GPRIDX-NEXT:    v_readfirstlane_b32 s17, v19
4945; GPRIDX-NEXT:    v_readfirstlane_b32 s18, v20
4946; GPRIDX-NEXT:    v_readfirstlane_b32 s19, v21
4947; GPRIDX-NEXT:    v_readfirstlane_b32 s20, v22
4948; GPRIDX-NEXT:    v_readfirstlane_b32 s21, v23
4949; GPRIDX-NEXT:    v_readfirstlane_b32 s22, v24
4950; GPRIDX-NEXT:    v_readfirstlane_b32 s23, v25
4951; GPRIDX-NEXT:    v_readfirstlane_b32 s24, v26
4952; GPRIDX-NEXT:    v_readfirstlane_b32 s25, v27
4953; GPRIDX-NEXT:    v_readfirstlane_b32 s26, v28
4954; GPRIDX-NEXT:    v_readfirstlane_b32 s27, v29
4955; GPRIDX-NEXT:    v_readfirstlane_b32 s28, v30
4956; GPRIDX-NEXT:    v_readfirstlane_b32 s29, v31
4957; GPRIDX-NEXT:    v_readfirstlane_b32 s30, v32
4958; GPRIDX-NEXT:    v_readfirstlane_b32 s31, v33
4959; GPRIDX-NEXT:    ; return to shader part epilog
4960;
4961; GFX10-LABEL: dyn_insertelement_v16f64_s_v_s:
4962; GFX10:       ; %bb.0: ; %entry
4963; GFX10-NEXT:    s_mov_b32 s1, s3
4964; GFX10-NEXT:    s_mov_b32 s3, s5
4965; GFX10-NEXT:    s_mov_b32 s5, s7
4966; GFX10-NEXT:    s_mov_b32 s7, s9
4967; GFX10-NEXT:    s_mov_b32 s9, s11
4968; GFX10-NEXT:    s_mov_b32 s11, s13
4969; GFX10-NEXT:    s_mov_b32 s13, s15
4970; GFX10-NEXT:    s_mov_b32 s15, s17
4971; GFX10-NEXT:    s_mov_b32 s17, s19
4972; GFX10-NEXT:    s_mov_b32 s19, s21
4973; GFX10-NEXT:    s_mov_b32 s21, s23
4974; GFX10-NEXT:    s_mov_b32 s23, s25
4975; GFX10-NEXT:    s_mov_b32 s25, s27
4976; GFX10-NEXT:    s_mov_b32 s27, s29
4977; GFX10-NEXT:    s_mov_b32 s29, s31
4978; GFX10-NEXT:    s_mov_b32 s31, s33
4979; GFX10-NEXT:    s_mov_b32 s0, s2
4980; GFX10-NEXT:    s_mov_b32 s2, s4
4981; GFX10-NEXT:    s_mov_b32 s4, s6
4982; GFX10-NEXT:    s_mov_b32 s6, s8
4983; GFX10-NEXT:    s_mov_b32 s8, s10
4984; GFX10-NEXT:    s_mov_b32 s10, s12
4985; GFX10-NEXT:    s_mov_b32 s12, s14
4986; GFX10-NEXT:    s_mov_b32 s14, s16
4987; GFX10-NEXT:    s_mov_b32 s16, s18
4988; GFX10-NEXT:    s_mov_b32 s18, s20
4989; GFX10-NEXT:    s_mov_b32 s20, s22
4990; GFX10-NEXT:    s_mov_b32 s22, s24
4991; GFX10-NEXT:    s_mov_b32 s24, s26
4992; GFX10-NEXT:    s_mov_b32 s26, s28
4993; GFX10-NEXT:    s_mov_b32 s28, s30
4994; GFX10-NEXT:    s_mov_b32 s30, s32
4995; GFX10-NEXT:    v_mov_b32_e32 v33, s31
4996; GFX10-NEXT:    v_mov_b32_e32 v2, s0
4997; GFX10-NEXT:    s_lshl_b32 m0, s34, 1
4998; GFX10-NEXT:    v_mov_b32_e32 v32, s30
4999; GFX10-NEXT:    v_mov_b32_e32 v31, s29
5000; GFX10-NEXT:    v_mov_b32_e32 v30, s28
5001; GFX10-NEXT:    v_mov_b32_e32 v29, s27
5002; GFX10-NEXT:    v_mov_b32_e32 v28, s26
5003; GFX10-NEXT:    v_mov_b32_e32 v27, s25
5004; GFX10-NEXT:    v_mov_b32_e32 v26, s24
5005; GFX10-NEXT:    v_mov_b32_e32 v25, s23
5006; GFX10-NEXT:    v_mov_b32_e32 v24, s22
5007; GFX10-NEXT:    v_mov_b32_e32 v23, s21
5008; GFX10-NEXT:    v_mov_b32_e32 v22, s20
5009; GFX10-NEXT:    v_mov_b32_e32 v21, s19
5010; GFX10-NEXT:    v_mov_b32_e32 v20, s18
5011; GFX10-NEXT:    v_mov_b32_e32 v19, s17
5012; GFX10-NEXT:    v_mov_b32_e32 v18, s16
5013; GFX10-NEXT:    v_mov_b32_e32 v17, s15
5014; GFX10-NEXT:    v_mov_b32_e32 v16, s14
5015; GFX10-NEXT:    v_mov_b32_e32 v15, s13
5016; GFX10-NEXT:    v_mov_b32_e32 v14, s12
5017; GFX10-NEXT:    v_mov_b32_e32 v13, s11
5018; GFX10-NEXT:    v_mov_b32_e32 v12, s10
5019; GFX10-NEXT:    v_mov_b32_e32 v11, s9
5020; GFX10-NEXT:    v_mov_b32_e32 v10, s8
5021; GFX10-NEXT:    v_mov_b32_e32 v9, s7
5022; GFX10-NEXT:    v_mov_b32_e32 v8, s6
5023; GFX10-NEXT:    v_mov_b32_e32 v7, s5
5024; GFX10-NEXT:    v_mov_b32_e32 v6, s4
5025; GFX10-NEXT:    v_mov_b32_e32 v5, s3
5026; GFX10-NEXT:    v_mov_b32_e32 v4, s2
5027; GFX10-NEXT:    v_mov_b32_e32 v3, s1
5028; GFX10-NEXT:    v_movreld_b32_e32 v2, v0
5029; GFX10-NEXT:    v_movreld_b32_e32 v3, v1
5030; GFX10-NEXT:    v_readfirstlane_b32 s0, v2
5031; GFX10-NEXT:    v_readfirstlane_b32 s1, v3
5032; GFX10-NEXT:    v_readfirstlane_b32 s2, v4
5033; GFX10-NEXT:    v_readfirstlane_b32 s3, v5
5034; GFX10-NEXT:    v_readfirstlane_b32 s4, v6
5035; GFX10-NEXT:    v_readfirstlane_b32 s5, v7
5036; GFX10-NEXT:    v_readfirstlane_b32 s6, v8
5037; GFX10-NEXT:    v_readfirstlane_b32 s7, v9
5038; GFX10-NEXT:    v_readfirstlane_b32 s8, v10
5039; GFX10-NEXT:    v_readfirstlane_b32 s9, v11
5040; GFX10-NEXT:    v_readfirstlane_b32 s10, v12
5041; GFX10-NEXT:    v_readfirstlane_b32 s11, v13
5042; GFX10-NEXT:    v_readfirstlane_b32 s12, v14
5043; GFX10-NEXT:    v_readfirstlane_b32 s13, v15
5044; GFX10-NEXT:    v_readfirstlane_b32 s14, v16
5045; GFX10-NEXT:    v_readfirstlane_b32 s15, v17
5046; GFX10-NEXT:    v_readfirstlane_b32 s16, v18
5047; GFX10-NEXT:    v_readfirstlane_b32 s17, v19
5048; GFX10-NEXT:    v_readfirstlane_b32 s18, v20
5049; GFX10-NEXT:    v_readfirstlane_b32 s19, v21
5050; GFX10-NEXT:    v_readfirstlane_b32 s20, v22
5051; GFX10-NEXT:    v_readfirstlane_b32 s21, v23
5052; GFX10-NEXT:    v_readfirstlane_b32 s22, v24
5053; GFX10-NEXT:    v_readfirstlane_b32 s23, v25
5054; GFX10-NEXT:    v_readfirstlane_b32 s24, v26
5055; GFX10-NEXT:    v_readfirstlane_b32 s25, v27
5056; GFX10-NEXT:    v_readfirstlane_b32 s26, v28
5057; GFX10-NEXT:    v_readfirstlane_b32 s27, v29
5058; GFX10-NEXT:    v_readfirstlane_b32 s28, v30
5059; GFX10-NEXT:    v_readfirstlane_b32 s29, v31
5060; GFX10-NEXT:    v_readfirstlane_b32 s30, v32
5061; GFX10-NEXT:    v_readfirstlane_b32 s31, v33
5062; GFX10-NEXT:    ; return to shader part epilog
5063;
5064; GFX11-LABEL: dyn_insertelement_v16f64_s_v_s:
5065; GFX11:       ; %bb.0: ; %entry
5066; GFX11-NEXT:    s_mov_b32 s1, s3
5067; GFX11-NEXT:    s_mov_b32 s3, s5
5068; GFX11-NEXT:    s_mov_b32 s5, s7
5069; GFX11-NEXT:    s_mov_b32 s7, s9
5070; GFX11-NEXT:    s_mov_b32 s9, s11
5071; GFX11-NEXT:    s_mov_b32 s11, s13
5072; GFX11-NEXT:    s_mov_b32 s13, s15
5073; GFX11-NEXT:    s_mov_b32 s15, s17
5074; GFX11-NEXT:    s_mov_b32 s17, s19
5075; GFX11-NEXT:    s_mov_b32 s19, s21
5076; GFX11-NEXT:    s_mov_b32 s21, s23
5077; GFX11-NEXT:    s_mov_b32 s23, s25
5078; GFX11-NEXT:    s_mov_b32 s25, s27
5079; GFX11-NEXT:    s_mov_b32 s27, s29
5080; GFX11-NEXT:    s_mov_b32 s29, s31
5081; GFX11-NEXT:    s_mov_b32 s31, s33
5082; GFX11-NEXT:    s_mov_b32 s0, s2
5083; GFX11-NEXT:    s_mov_b32 s2, s4
5084; GFX11-NEXT:    s_mov_b32 s4, s6
5085; GFX11-NEXT:    s_mov_b32 s6, s8
5086; GFX11-NEXT:    s_mov_b32 s8, s10
5087; GFX11-NEXT:    s_mov_b32 s10, s12
5088; GFX11-NEXT:    s_mov_b32 s12, s14
5089; GFX11-NEXT:    s_mov_b32 s14, s16
5090; GFX11-NEXT:    s_mov_b32 s16, s18
5091; GFX11-NEXT:    s_mov_b32 s18, s20
5092; GFX11-NEXT:    s_mov_b32 s20, s22
5093; GFX11-NEXT:    s_mov_b32 s22, s24
5094; GFX11-NEXT:    s_mov_b32 s24, s26
5095; GFX11-NEXT:    s_mov_b32 s26, s28
5096; GFX11-NEXT:    s_mov_b32 s28, s30
5097; GFX11-NEXT:    s_mov_b32 s30, s32
5098; GFX11-NEXT:    v_dual_mov_b32 v33, s31 :: v_dual_mov_b32 v32, s30
5099; GFX11-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
5100; GFX11-NEXT:    s_lshl_b32 m0, s34, 1
5101; GFX11-NEXT:    v_dual_mov_b32 v31, s29 :: v_dual_mov_b32 v30, s28
5102; GFX11-NEXT:    v_dual_mov_b32 v29, s27 :: v_dual_mov_b32 v28, s26
5103; GFX11-NEXT:    v_dual_mov_b32 v27, s25 :: v_dual_mov_b32 v26, s24
5104; GFX11-NEXT:    v_dual_mov_b32 v25, s23 :: v_dual_mov_b32 v24, s22
5105; GFX11-NEXT:    v_dual_mov_b32 v23, s21 :: v_dual_mov_b32 v22, s20
5106; GFX11-NEXT:    v_dual_mov_b32 v21, s19 :: v_dual_mov_b32 v20, s18
5107; GFX11-NEXT:    v_dual_mov_b32 v19, s17 :: v_dual_mov_b32 v18, s16
5108; GFX11-NEXT:    v_dual_mov_b32 v17, s15 :: v_dual_mov_b32 v16, s14
5109; GFX11-NEXT:    v_dual_mov_b32 v15, s13 :: v_dual_mov_b32 v14, s12
5110; GFX11-NEXT:    v_dual_mov_b32 v13, s11 :: v_dual_mov_b32 v12, s10
5111; GFX11-NEXT:    v_dual_mov_b32 v11, s9 :: v_dual_mov_b32 v10, s8
5112; GFX11-NEXT:    v_dual_mov_b32 v9, s7 :: v_dual_mov_b32 v8, s6
5113; GFX11-NEXT:    v_dual_mov_b32 v7, s5 :: v_dual_mov_b32 v6, s4
5114; GFX11-NEXT:    v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v4, s2
5115; GFX11-NEXT:    v_movreld_b32_e32 v2, v0
5116; GFX11-NEXT:    v_movreld_b32_e32 v3, v1
5117; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
5118; GFX11-NEXT:    v_readfirstlane_b32 s1, v3
5119; GFX11-NEXT:    v_readfirstlane_b32 s2, v4
5120; GFX11-NEXT:    v_readfirstlane_b32 s3, v5
5121; GFX11-NEXT:    v_readfirstlane_b32 s4, v6
5122; GFX11-NEXT:    v_readfirstlane_b32 s5, v7
5123; GFX11-NEXT:    v_readfirstlane_b32 s6, v8
5124; GFX11-NEXT:    v_readfirstlane_b32 s7, v9
5125; GFX11-NEXT:    v_readfirstlane_b32 s8, v10
5126; GFX11-NEXT:    v_readfirstlane_b32 s9, v11
5127; GFX11-NEXT:    v_readfirstlane_b32 s10, v12
5128; GFX11-NEXT:    v_readfirstlane_b32 s11, v13
5129; GFX11-NEXT:    v_readfirstlane_b32 s12, v14
5130; GFX11-NEXT:    v_readfirstlane_b32 s13, v15
5131; GFX11-NEXT:    v_readfirstlane_b32 s14, v16
5132; GFX11-NEXT:    v_readfirstlane_b32 s15, v17
5133; GFX11-NEXT:    v_readfirstlane_b32 s16, v18
5134; GFX11-NEXT:    v_readfirstlane_b32 s17, v19
5135; GFX11-NEXT:    v_readfirstlane_b32 s18, v20
5136; GFX11-NEXT:    v_readfirstlane_b32 s19, v21
5137; GFX11-NEXT:    v_readfirstlane_b32 s20, v22
5138; GFX11-NEXT:    v_readfirstlane_b32 s21, v23
5139; GFX11-NEXT:    v_readfirstlane_b32 s22, v24
5140; GFX11-NEXT:    v_readfirstlane_b32 s23, v25
5141; GFX11-NEXT:    v_readfirstlane_b32 s24, v26
5142; GFX11-NEXT:    v_readfirstlane_b32 s25, v27
5143; GFX11-NEXT:    v_readfirstlane_b32 s26, v28
5144; GFX11-NEXT:    v_readfirstlane_b32 s27, v29
5145; GFX11-NEXT:    v_readfirstlane_b32 s28, v30
5146; GFX11-NEXT:    v_readfirstlane_b32 s29, v31
5147; GFX11-NEXT:    v_readfirstlane_b32 s30, v32
5148; GFX11-NEXT:    v_readfirstlane_b32 s31, v33
5149; GFX11-NEXT:    ; return to shader part epilog
5150entry:
5151  %insert = insertelement <16 x double> %vec, double %val, i32 %idx
5152  ret <16 x double> %insert
5153}
5154
5155define amdgpu_ps <7 x i32> @dyn_insertelement_v7i32_s_s_s(<7 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
5156; GPRIDX-LABEL: dyn_insertelement_v7i32_s_s_s:
5157; GPRIDX:       ; %bb.0: ; %entry
5158; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 0
5159; GPRIDX-NEXT:    s_cselect_b32 s0, s9, s2
5160; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 1
5161; GPRIDX-NEXT:    s_cselect_b32 s1, s9, s3
5162; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 2
5163; GPRIDX-NEXT:    s_cselect_b32 s2, s9, s4
5164; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 3
5165; GPRIDX-NEXT:    s_cselect_b32 s3, s9, s5
5166; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 4
5167; GPRIDX-NEXT:    s_cselect_b32 s4, s9, s6
5168; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 5
5169; GPRIDX-NEXT:    s_cselect_b32 s5, s9, s7
5170; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 6
5171; GPRIDX-NEXT:    s_cselect_b32 s6, s9, s8
5172; GPRIDX-NEXT:    ; return to shader part epilog
5173;
5174; GFX10PLUS-LABEL: dyn_insertelement_v7i32_s_s_s:
5175; GFX10PLUS:       ; %bb.0: ; %entry
5176; GFX10PLUS-NEXT:    s_cmp_eq_u32 s10, 0
5177; GFX10PLUS-NEXT:    s_cselect_b32 s0, s9, s2
5178; GFX10PLUS-NEXT:    s_cmp_eq_u32 s10, 1
5179; GFX10PLUS-NEXT:    s_cselect_b32 s1, s9, s3
5180; GFX10PLUS-NEXT:    s_cmp_eq_u32 s10, 2
5181; GFX10PLUS-NEXT:    s_cselect_b32 s2, s9, s4
5182; GFX10PLUS-NEXT:    s_cmp_eq_u32 s10, 3
5183; GFX10PLUS-NEXT:    s_cselect_b32 s3, s9, s5
5184; GFX10PLUS-NEXT:    s_cmp_eq_u32 s10, 4
5185; GFX10PLUS-NEXT:    s_cselect_b32 s4, s9, s6
5186; GFX10PLUS-NEXT:    s_cmp_eq_u32 s10, 5
5187; GFX10PLUS-NEXT:    s_cselect_b32 s5, s9, s7
5188; GFX10PLUS-NEXT:    s_cmp_eq_u32 s10, 6
5189; GFX10PLUS-NEXT:    s_cselect_b32 s6, s9, s8
5190; GFX10PLUS-NEXT:    ; return to shader part epilog
5191entry:
5192  %insert = insertelement <7 x i32> %vec, i32 %val, i32 %idx
5193  ret <7 x i32> %insert
5194}
5195
5196define amdgpu_ps <7 x ptr addrspace(3)> @dyn_insertelement_v7p3i8_s_s_s(<7 x ptr addrspace(3)> inreg %vec, ptr addrspace(3) inreg %val, i32 inreg %idx) {
5197; GPRIDX-LABEL: dyn_insertelement_v7p3i8_s_s_s:
5198; GPRIDX:       ; %bb.0: ; %entry
5199; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 0
5200; GPRIDX-NEXT:    s_cselect_b32 s0, s9, s2
5201; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 1
5202; GPRIDX-NEXT:    s_cselect_b32 s1, s9, s3
5203; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 2
5204; GPRIDX-NEXT:    s_cselect_b32 s2, s9, s4
5205; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 3
5206; GPRIDX-NEXT:    s_cselect_b32 s3, s9, s5
5207; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 4
5208; GPRIDX-NEXT:    s_cselect_b32 s4, s9, s6
5209; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 5
5210; GPRIDX-NEXT:    s_cselect_b32 s5, s9, s7
5211; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 6
5212; GPRIDX-NEXT:    s_cselect_b32 s6, s9, s8
5213; GPRIDX-NEXT:    ; return to shader part epilog
5214;
5215; GFX10PLUS-LABEL: dyn_insertelement_v7p3i8_s_s_s:
5216; GFX10PLUS:       ; %bb.0: ; %entry
5217; GFX10PLUS-NEXT:    s_cmp_eq_u32 s10, 0
5218; GFX10PLUS-NEXT:    s_cselect_b32 s0, s9, s2
5219; GFX10PLUS-NEXT:    s_cmp_eq_u32 s10, 1
5220; GFX10PLUS-NEXT:    s_cselect_b32 s1, s9, s3
5221; GFX10PLUS-NEXT:    s_cmp_eq_u32 s10, 2
5222; GFX10PLUS-NEXT:    s_cselect_b32 s2, s9, s4
5223; GFX10PLUS-NEXT:    s_cmp_eq_u32 s10, 3
5224; GFX10PLUS-NEXT:    s_cselect_b32 s3, s9, s5
5225; GFX10PLUS-NEXT:    s_cmp_eq_u32 s10, 4
5226; GFX10PLUS-NEXT:    s_cselect_b32 s4, s9, s6
5227; GFX10PLUS-NEXT:    s_cmp_eq_u32 s10, 5
5228; GFX10PLUS-NEXT:    s_cselect_b32 s5, s9, s7
5229; GFX10PLUS-NEXT:    s_cmp_eq_u32 s10, 6
5230; GFX10PLUS-NEXT:    s_cselect_b32 s6, s9, s8
5231; GFX10PLUS-NEXT:    ; return to shader part epilog
5232entry:
5233  %insert = insertelement <7 x ptr addrspace(3)> %vec, ptr addrspace(3) %val, i32 %idx
5234  ret <7 x ptr addrspace(3)> %insert
5235}
5236
5237define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_s_v_s(<7 x float> inreg %vec, float %val, i32 inreg %idx) {
5238; GPRIDX-LABEL: dyn_insertelement_v7f32_s_v_s:
5239; GPRIDX:       ; %bb.0: ; %entry
5240; GPRIDX-NEXT:    v_mov_b32_e32 v1, s2
5241; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s9, 0
5242; GPRIDX-NEXT:    v_mov_b32_e32 v2, s3
5243; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v1, v0, vcc
5244; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s9, 1
5245; GPRIDX-NEXT:    v_mov_b32_e32 v3, s4
5246; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v2, v0, vcc
5247; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s9, 2
5248; GPRIDX-NEXT:    v_mov_b32_e32 v4, s5
5249; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v3, v0, vcc
5250; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s9, 3
5251; GPRIDX-NEXT:    v_mov_b32_e32 v5, s6
5252; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v4, v0, vcc
5253; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s9, 4
5254; GPRIDX-NEXT:    v_mov_b32_e32 v6, s7
5255; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v5, v0, vcc
5256; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s9, 5
5257; GPRIDX-NEXT:    v_mov_b32_e32 v8, s8
5258; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v6, v0, vcc
5259; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s9, 6
5260; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v8, v0, vcc
5261; GPRIDX-NEXT:    v_mov_b32_e32 v0, v7
5262; GPRIDX-NEXT:    ; return to shader part epilog
5263;
5264; GFX10PLUS-LABEL: dyn_insertelement_v7f32_s_v_s:
5265; GFX10PLUS:       ; %bb.0: ; %entry
5266; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s9, 0
5267; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v7, s2, v0, vcc_lo
5268; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s9, 1
5269; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, s3, v0, vcc_lo
5270; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s9, 2
5271; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
5272; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s9, 3
5273; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v3, s5, v0, vcc_lo
5274; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s9, 4
5275; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v4, s6, v0, vcc_lo
5276; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s9, 5
5277; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v5, s7, v0, vcc_lo
5278; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s9, 6
5279; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v6, s8, v0, vcc_lo
5280; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, v7
5281; GFX10PLUS-NEXT:    ; return to shader part epilog
5282entry:
5283  %insert = insertelement <7 x float> %vec, float %val, i32 %idx
5284  ret <7 x float> %insert
5285}
5286
5287define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_s_v_v(<7 x float> inreg %vec, float %val, i32 %idx) {
5288; GPRIDX-LABEL: dyn_insertelement_v7f32_s_v_v:
5289; GPRIDX:       ; %bb.0: ; %entry
5290; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
5291; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
5292; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
5293; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v2, v0, vcc
5294; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
5295; GPRIDX-NEXT:    v_mov_b32_e32 v4, s4
5296; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v3, v0, vcc
5297; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v1
5298; GPRIDX-NEXT:    v_mov_b32_e32 v5, s5
5299; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v4, v0, vcc
5300; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v1
5301; GPRIDX-NEXT:    v_mov_b32_e32 v6, s6
5302; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v5, v0, vcc
5303; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v1
5304; GPRIDX-NEXT:    v_mov_b32_e32 v9, s7
5305; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v6, v0, vcc
5306; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v1
5307; GPRIDX-NEXT:    v_mov_b32_e32 v10, s8
5308; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v9, v0, vcc
5309; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v1
5310; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v10, v0, vcc
5311; GPRIDX-NEXT:    v_mov_b32_e32 v0, v8
5312; GPRIDX-NEXT:    v_mov_b32_e32 v1, v7
5313; GPRIDX-NEXT:    ; return to shader part epilog
5314;
5315; GFX10-LABEL: dyn_insertelement_v7f32_s_v_v:
5316; GFX10:       ; %bb.0: ; %entry
5317; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
5318; GFX10-NEXT:    v_cndmask_b32_e32 v8, s2, v0, vcc_lo
5319; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
5320; GFX10-NEXT:    v_cndmask_b32_e32 v7, s3, v0, vcc_lo
5321; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v1
5322; GFX10-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
5323; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v1
5324; GFX10-NEXT:    v_cndmask_b32_e32 v3, s5, v0, vcc_lo
5325; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v1
5326; GFX10-NEXT:    v_cndmask_b32_e32 v4, s6, v0, vcc_lo
5327; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v1
5328; GFX10-NEXT:    v_cndmask_b32_e32 v5, s7, v0, vcc_lo
5329; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v1
5330; GFX10-NEXT:    v_mov_b32_e32 v1, v7
5331; GFX10-NEXT:    v_cndmask_b32_e32 v6, s8, v0, vcc_lo
5332; GFX10-NEXT:    v_mov_b32_e32 v0, v8
5333; GFX10-NEXT:    ; return to shader part epilog
5334;
5335; GFX11-LABEL: dyn_insertelement_v7f32_s_v_v:
5336; GFX11:       ; %bb.0: ; %entry
5337; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
5338; GFX11-NEXT:    v_cndmask_b32_e32 v8, s2, v0, vcc_lo
5339; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
5340; GFX11-NEXT:    v_cndmask_b32_e32 v7, s3, v0, vcc_lo
5341; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v1
5342; GFX11-NEXT:    v_cndmask_b32_e32 v2, s4, v0, vcc_lo
5343; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v1
5344; GFX11-NEXT:    v_cndmask_b32_e32 v3, s5, v0, vcc_lo
5345; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v1
5346; GFX11-NEXT:    v_cndmask_b32_e32 v4, s6, v0, vcc_lo
5347; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v1
5348; GFX11-NEXT:    v_cndmask_b32_e32 v5, s7, v0, vcc_lo
5349; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v1
5350; GFX11-NEXT:    v_dual_mov_b32 v1, v7 :: v_dual_cndmask_b32 v6, s8, v0
5351; GFX11-NEXT:    v_mov_b32_e32 v0, v8
5352; GFX11-NEXT:    ; return to shader part epilog
5353entry:
5354  %insert = insertelement <7 x float> %vec, float %val, i32 %idx
5355  ret <7 x float> %insert
5356}
5357
5358define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_v_v_s(<7 x float> %vec, float %val, i32 inreg %idx) {
5359; GPRIDX-LABEL: dyn_insertelement_v7f32_v_v_s:
5360; GPRIDX:       ; %bb.0: ; %entry
5361; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 0
5362; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
5363; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
5364; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
5365; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
5366; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
5367; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 3
5368; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
5369; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 4
5370; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
5371; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 5
5372; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
5373; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 6
5374; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
5375; GPRIDX-NEXT:    ; return to shader part epilog
5376;
5377; GFX10PLUS-LABEL: dyn_insertelement_v7f32_v_v_s:
5378; GFX10PLUS:       ; %bb.0: ; %entry
5379; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 0
5380; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
5381; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 1
5382; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
5383; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 2
5384; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc_lo
5385; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 3
5386; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
5387; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 4
5388; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc_lo
5389; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 5
5390; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
5391; GFX10PLUS-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 6
5392; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc_lo
5393; GFX10PLUS-NEXT:    ; return to shader part epilog
5394entry:
5395  %insert = insertelement <7 x float> %vec, float %val, i32 %idx
5396  ret <7 x float> %insert
5397}
5398
5399define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_v_v_v(<7 x float> %vec, float %val, i32 %idx) {
5400; GPRIDX-LABEL: dyn_insertelement_v7f32_v_v_v:
5401; GPRIDX:       ; %bb.0: ; %entry
5402; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v8
5403; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
5404; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
5405; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
5406; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v8
5407; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
5408; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
5409; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
5410; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v8
5411; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
5412; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v8
5413; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
5414; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v8
5415; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
5416; GPRIDX-NEXT:    ; return to shader part epilog
5417;
5418; GFX10PLUS-LABEL: dyn_insertelement_v7f32_v_v_v:
5419; GFX10PLUS:       ; %bb.0: ; %entry
5420; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v8
5421; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
5422; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
5423; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
5424; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v8
5425; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc_lo
5426; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v8
5427; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
5428; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v8
5429; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc_lo
5430; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v8
5431; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
5432; GFX10PLUS-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v8
5433; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc_lo
5434; GFX10PLUS-NEXT:    ; return to shader part epilog
5435entry:
5436  %insert = insertelement <7 x float> %vec, float %val, i32 %idx
5437  ret <7 x float> %insert
5438}
5439
5440define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_s_s_s(<7 x double> inreg %vec, double inreg %val, i32 inreg %idx) {
5441; GPRIDX-LABEL: dyn_insertelement_v7f64_s_s_s:
5442; GPRIDX:       ; %bb.0: ; %entry
5443; GPRIDX-NEXT:    s_mov_b32 s0, s2
5444; GPRIDX-NEXT:    s_mov_b32 s1, s3
5445; GPRIDX-NEXT:    s_mov_b32 s2, s4
5446; GPRIDX-NEXT:    s_mov_b32 s3, s5
5447; GPRIDX-NEXT:    s_mov_b32 s4, s6
5448; GPRIDX-NEXT:    s_mov_b32 s5, s7
5449; GPRIDX-NEXT:    s_mov_b32 s6, s8
5450; GPRIDX-NEXT:    s_mov_b32 s7, s9
5451; GPRIDX-NEXT:    s_mov_b32 s8, s10
5452; GPRIDX-NEXT:    s_mov_b32 s9, s11
5453; GPRIDX-NEXT:    s_mov_b32 s10, s12
5454; GPRIDX-NEXT:    s_mov_b32 s11, s13
5455; GPRIDX-NEXT:    s_mov_b32 s12, s14
5456; GPRIDX-NEXT:    s_mov_b32 s13, s15
5457; GPRIDX-NEXT:    s_mov_b32 m0, s18
5458; GPRIDX-NEXT:    s_nop 0
5459; GPRIDX-NEXT:    s_movreld_b64 s[0:1], s[16:17]
5460; GPRIDX-NEXT:    ; return to shader part epilog
5461;
5462; GFX10PLUS-LABEL: dyn_insertelement_v7f64_s_s_s:
5463; GFX10PLUS:       ; %bb.0: ; %entry
5464; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
5465; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
5466; GFX10PLUS-NEXT:    s_mov_b32 m0, s18
5467; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
5468; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
5469; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
5470; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
5471; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
5472; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
5473; GFX10PLUS-NEXT:    s_mov_b32 s8, s10
5474; GFX10PLUS-NEXT:    s_mov_b32 s9, s11
5475; GFX10PLUS-NEXT:    s_mov_b32 s10, s12
5476; GFX10PLUS-NEXT:    s_mov_b32 s11, s13
5477; GFX10PLUS-NEXT:    s_mov_b32 s12, s14
5478; GFX10PLUS-NEXT:    s_mov_b32 s13, s15
5479; GFX10PLUS-NEXT:    s_movreld_b64 s[0:1], s[16:17]
5480; GFX10PLUS-NEXT:    ; return to shader part epilog
5481entry:
5482  %insert = insertelement <7 x double> %vec, double %val, i32 %idx
5483  ret <7 x double> %insert
5484}
5485
5486define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_s_v_s(<7 x double> inreg %vec, double %val, i32 inreg %idx) {
5487; GPRIDX-LABEL: dyn_insertelement_v7f64_s_v_s:
5488; GPRIDX:       ; %bb.0: ; %entry
5489; GPRIDX-NEXT:    s_mov_b32 s0, s2
5490; GPRIDX-NEXT:    s_mov_b32 s1, s3
5491; GPRIDX-NEXT:    s_mov_b32 s2, s4
5492; GPRIDX-NEXT:    s_mov_b32 s3, s5
5493; GPRIDX-NEXT:    s_mov_b32 s4, s6
5494; GPRIDX-NEXT:    s_mov_b32 s5, s7
5495; GPRIDX-NEXT:    s_mov_b32 s6, s8
5496; GPRIDX-NEXT:    s_mov_b32 s7, s9
5497; GPRIDX-NEXT:    s_mov_b32 s8, s10
5498; GPRIDX-NEXT:    s_mov_b32 s9, s11
5499; GPRIDX-NEXT:    s_mov_b32 s10, s12
5500; GPRIDX-NEXT:    s_mov_b32 s11, s13
5501; GPRIDX-NEXT:    s_mov_b32 s12, s14
5502; GPRIDX-NEXT:    s_mov_b32 s13, s15
5503; GPRIDX-NEXT:    v_mov_b32_e32 v17, s15
5504; GPRIDX-NEXT:    v_mov_b32_e32 v16, s14
5505; GPRIDX-NEXT:    v_mov_b32_e32 v15, s13
5506; GPRIDX-NEXT:    v_mov_b32_e32 v14, s12
5507; GPRIDX-NEXT:    v_mov_b32_e32 v13, s11
5508; GPRIDX-NEXT:    v_mov_b32_e32 v12, s10
5509; GPRIDX-NEXT:    v_mov_b32_e32 v11, s9
5510; GPRIDX-NEXT:    v_mov_b32_e32 v10, s8
5511; GPRIDX-NEXT:    v_mov_b32_e32 v9, s7
5512; GPRIDX-NEXT:    v_mov_b32_e32 v8, s6
5513; GPRIDX-NEXT:    v_mov_b32_e32 v7, s5
5514; GPRIDX-NEXT:    v_mov_b32_e32 v6, s4
5515; GPRIDX-NEXT:    v_mov_b32_e32 v5, s3
5516; GPRIDX-NEXT:    v_mov_b32_e32 v4, s2
5517; GPRIDX-NEXT:    v_mov_b32_e32 v3, s1
5518; GPRIDX-NEXT:    v_mov_b32_e32 v2, s0
5519; GPRIDX-NEXT:    s_lshl_b32 s0, s16, 1
5520; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(DST)
5521; GPRIDX-NEXT:    v_mov_b32_e32 v2, v0
5522; GPRIDX-NEXT:    v_mov_b32_e32 v3, v1
5523; GPRIDX-NEXT:    s_set_gpr_idx_off
5524; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v2
5525; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v3
5526; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v4
5527; GPRIDX-NEXT:    v_readfirstlane_b32 s3, v5
5528; GPRIDX-NEXT:    v_readfirstlane_b32 s4, v6
5529; GPRIDX-NEXT:    v_readfirstlane_b32 s5, v7
5530; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v8
5531; GPRIDX-NEXT:    v_readfirstlane_b32 s7, v9
5532; GPRIDX-NEXT:    v_readfirstlane_b32 s8, v10
5533; GPRIDX-NEXT:    v_readfirstlane_b32 s9, v11
5534; GPRIDX-NEXT:    v_readfirstlane_b32 s10, v12
5535; GPRIDX-NEXT:    v_readfirstlane_b32 s11, v13
5536; GPRIDX-NEXT:    v_readfirstlane_b32 s12, v14
5537; GPRIDX-NEXT:    v_readfirstlane_b32 s13, v15
5538; GPRIDX-NEXT:    ; return to shader part epilog
5539;
5540; GFX10-LABEL: dyn_insertelement_v7f64_s_v_s:
5541; GFX10:       ; %bb.0: ; %entry
5542; GFX10-NEXT:    s_mov_b32 s0, s2
5543; GFX10-NEXT:    s_mov_b32 s1, s3
5544; GFX10-NEXT:    s_mov_b32 s2, s4
5545; GFX10-NEXT:    s_mov_b32 s3, s5
5546; GFX10-NEXT:    s_mov_b32 s4, s6
5547; GFX10-NEXT:    s_mov_b32 s5, s7
5548; GFX10-NEXT:    s_mov_b32 s6, s8
5549; GFX10-NEXT:    s_mov_b32 s7, s9
5550; GFX10-NEXT:    s_mov_b32 s8, s10
5551; GFX10-NEXT:    s_mov_b32 s9, s11
5552; GFX10-NEXT:    s_mov_b32 s10, s12
5553; GFX10-NEXT:    s_mov_b32 s11, s13
5554; GFX10-NEXT:    s_mov_b32 s12, s14
5555; GFX10-NEXT:    s_mov_b32 s13, s15
5556; GFX10-NEXT:    v_mov_b32_e32 v17, s15
5557; GFX10-NEXT:    v_mov_b32_e32 v2, s0
5558; GFX10-NEXT:    s_lshl_b32 m0, s16, 1
5559; GFX10-NEXT:    v_mov_b32_e32 v16, s14
5560; GFX10-NEXT:    v_mov_b32_e32 v15, s13
5561; GFX10-NEXT:    v_mov_b32_e32 v14, s12
5562; GFX10-NEXT:    v_mov_b32_e32 v13, s11
5563; GFX10-NEXT:    v_mov_b32_e32 v12, s10
5564; GFX10-NEXT:    v_mov_b32_e32 v11, s9
5565; GFX10-NEXT:    v_mov_b32_e32 v10, s8
5566; GFX10-NEXT:    v_mov_b32_e32 v9, s7
5567; GFX10-NEXT:    v_mov_b32_e32 v8, s6
5568; GFX10-NEXT:    v_mov_b32_e32 v7, s5
5569; GFX10-NEXT:    v_mov_b32_e32 v6, s4
5570; GFX10-NEXT:    v_mov_b32_e32 v5, s3
5571; GFX10-NEXT:    v_mov_b32_e32 v4, s2
5572; GFX10-NEXT:    v_mov_b32_e32 v3, s1
5573; GFX10-NEXT:    v_movreld_b32_e32 v2, v0
5574; GFX10-NEXT:    v_movreld_b32_e32 v3, v1
5575; GFX10-NEXT:    v_readfirstlane_b32 s0, v2
5576; GFX10-NEXT:    v_readfirstlane_b32 s1, v3
5577; GFX10-NEXT:    v_readfirstlane_b32 s2, v4
5578; GFX10-NEXT:    v_readfirstlane_b32 s3, v5
5579; GFX10-NEXT:    v_readfirstlane_b32 s4, v6
5580; GFX10-NEXT:    v_readfirstlane_b32 s5, v7
5581; GFX10-NEXT:    v_readfirstlane_b32 s6, v8
5582; GFX10-NEXT:    v_readfirstlane_b32 s7, v9
5583; GFX10-NEXT:    v_readfirstlane_b32 s8, v10
5584; GFX10-NEXT:    v_readfirstlane_b32 s9, v11
5585; GFX10-NEXT:    v_readfirstlane_b32 s10, v12
5586; GFX10-NEXT:    v_readfirstlane_b32 s11, v13
5587; GFX10-NEXT:    v_readfirstlane_b32 s12, v14
5588; GFX10-NEXT:    v_readfirstlane_b32 s13, v15
5589; GFX10-NEXT:    ; return to shader part epilog
5590;
5591; GFX11-LABEL: dyn_insertelement_v7f64_s_v_s:
5592; GFX11:       ; %bb.0: ; %entry
5593; GFX11-NEXT:    s_mov_b32 s0, s2
5594; GFX11-NEXT:    s_mov_b32 s1, s3
5595; GFX11-NEXT:    s_mov_b32 s2, s4
5596; GFX11-NEXT:    s_mov_b32 s3, s5
5597; GFX11-NEXT:    s_mov_b32 s4, s6
5598; GFX11-NEXT:    s_mov_b32 s5, s7
5599; GFX11-NEXT:    s_mov_b32 s6, s8
5600; GFX11-NEXT:    s_mov_b32 s7, s9
5601; GFX11-NEXT:    s_mov_b32 s8, s10
5602; GFX11-NEXT:    s_mov_b32 s9, s11
5603; GFX11-NEXT:    s_mov_b32 s10, s12
5604; GFX11-NEXT:    s_mov_b32 s11, s13
5605; GFX11-NEXT:    s_mov_b32 s12, s14
5606; GFX11-NEXT:    s_mov_b32 s13, s15
5607; GFX11-NEXT:    v_dual_mov_b32 v17, s15 :: v_dual_mov_b32 v16, s14
5608; GFX11-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
5609; GFX11-NEXT:    s_lshl_b32 m0, s16, 1
5610; GFX11-NEXT:    v_dual_mov_b32 v15, s13 :: v_dual_mov_b32 v14, s12
5611; GFX11-NEXT:    v_dual_mov_b32 v13, s11 :: v_dual_mov_b32 v12, s10
5612; GFX11-NEXT:    v_dual_mov_b32 v11, s9 :: v_dual_mov_b32 v10, s8
5613; GFX11-NEXT:    v_dual_mov_b32 v9, s7 :: v_dual_mov_b32 v8, s6
5614; GFX11-NEXT:    v_dual_mov_b32 v7, s5 :: v_dual_mov_b32 v6, s4
5615; GFX11-NEXT:    v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v4, s2
5616; GFX11-NEXT:    v_movreld_b32_e32 v2, v0
5617; GFX11-NEXT:    v_movreld_b32_e32 v3, v1
5618; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
5619; GFX11-NEXT:    v_readfirstlane_b32 s1, v3
5620; GFX11-NEXT:    v_readfirstlane_b32 s2, v4
5621; GFX11-NEXT:    v_readfirstlane_b32 s3, v5
5622; GFX11-NEXT:    v_readfirstlane_b32 s4, v6
5623; GFX11-NEXT:    v_readfirstlane_b32 s5, v7
5624; GFX11-NEXT:    v_readfirstlane_b32 s6, v8
5625; GFX11-NEXT:    v_readfirstlane_b32 s7, v9
5626; GFX11-NEXT:    v_readfirstlane_b32 s8, v10
5627; GFX11-NEXT:    v_readfirstlane_b32 s9, v11
5628; GFX11-NEXT:    v_readfirstlane_b32 s10, v12
5629; GFX11-NEXT:    v_readfirstlane_b32 s11, v13
5630; GFX11-NEXT:    v_readfirstlane_b32 s12, v14
5631; GFX11-NEXT:    v_readfirstlane_b32 s13, v15
5632; GFX11-NEXT:    ; return to shader part epilog
5633entry:
5634  %insert = insertelement <7 x double> %vec, double %val, i32 %idx
5635  ret <7 x double> %insert
5636}
5637
5638define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_s_v_v(<7 x double> inreg %vec, double %val, i32 %idx) {
5639; GPRIDX-LABEL: dyn_insertelement_v7f64_s_v_v:
5640; GPRIDX:       ; %bb.0: ; %entry
5641; GPRIDX-NEXT:    s_mov_b32 s0, s2
5642; GPRIDX-NEXT:    s_mov_b32 s1, s3
5643; GPRIDX-NEXT:    s_mov_b32 s2, s4
5644; GPRIDX-NEXT:    s_mov_b32 s3, s5
5645; GPRIDX-NEXT:    s_mov_b32 s4, s6
5646; GPRIDX-NEXT:    s_mov_b32 s5, s7
5647; GPRIDX-NEXT:    s_mov_b32 s6, s8
5648; GPRIDX-NEXT:    s_mov_b32 s7, s9
5649; GPRIDX-NEXT:    s_mov_b32 s8, s10
5650; GPRIDX-NEXT:    s_mov_b32 s9, s11
5651; GPRIDX-NEXT:    s_mov_b32 s10, s12
5652; GPRIDX-NEXT:    s_mov_b32 s11, s13
5653; GPRIDX-NEXT:    s_mov_b32 s12, s14
5654; GPRIDX-NEXT:    s_mov_b32 s13, s15
5655; GPRIDX-NEXT:    v_mov_b32_e32 v18, s15
5656; GPRIDX-NEXT:    v_mov_b32_e32 v17, s14
5657; GPRIDX-NEXT:    v_mov_b32_e32 v16, s13
5658; GPRIDX-NEXT:    v_mov_b32_e32 v15, s12
5659; GPRIDX-NEXT:    v_mov_b32_e32 v14, s11
5660; GPRIDX-NEXT:    v_mov_b32_e32 v13, s10
5661; GPRIDX-NEXT:    v_mov_b32_e32 v12, s9
5662; GPRIDX-NEXT:    v_mov_b32_e32 v11, s8
5663; GPRIDX-NEXT:    v_mov_b32_e32 v10, s7
5664; GPRIDX-NEXT:    v_mov_b32_e32 v9, s6
5665; GPRIDX-NEXT:    v_mov_b32_e32 v8, s5
5666; GPRIDX-NEXT:    v_mov_b32_e32 v7, s4
5667; GPRIDX-NEXT:    v_mov_b32_e32 v6, s3
5668; GPRIDX-NEXT:    v_mov_b32_e32 v5, s2
5669; GPRIDX-NEXT:    v_mov_b32_e32 v4, s1
5670; GPRIDX-NEXT:    v_mov_b32_e32 v3, s0
5671; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
5672; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[0:1], 2, v2
5673; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[2:3], 3, v2
5674; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[4:5], 4, v2
5675; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[6:7], 5, v2
5676; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[8:9], 6, v2
5677; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[10:11], 1, v2
5678; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
5679; GPRIDX-NEXT:    v_cndmask_b32_e64 v2, v5, v0, s[10:11]
5680; GPRIDX-NEXT:    v_cndmask_b32_e64 v5, v7, v0, s[0:1]
5681; GPRIDX-NEXT:    v_cndmask_b32_e64 v7, v9, v0, s[2:3]
5682; GPRIDX-NEXT:    v_cndmask_b32_e64 v9, v11, v0, s[4:5]
5683; GPRIDX-NEXT:    v_cndmask_b32_e64 v11, v13, v0, s[6:7]
5684; GPRIDX-NEXT:    v_cndmask_b32_e64 v0, v15, v0, s[8:9]
5685; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
5686; GPRIDX-NEXT:    v_cndmask_b32_e64 v6, v6, v1, s[10:11]
5687; GPRIDX-NEXT:    v_cndmask_b32_e64 v8, v8, v1, s[0:1]
5688; GPRIDX-NEXT:    v_cndmask_b32_e64 v10, v10, v1, s[2:3]
5689; GPRIDX-NEXT:    v_cndmask_b32_e64 v12, v12, v1, s[4:5]
5690; GPRIDX-NEXT:    v_cndmask_b32_e64 v13, v14, v1, s[6:7]
5691; GPRIDX-NEXT:    v_cndmask_b32_e64 v1, v16, v1, s[8:9]
5692; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v3
5693; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v4
5694; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v2
5695; GPRIDX-NEXT:    v_readfirstlane_b32 s3, v6
5696; GPRIDX-NEXT:    v_readfirstlane_b32 s4, v5
5697; GPRIDX-NEXT:    v_readfirstlane_b32 s5, v8
5698; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v7
5699; GPRIDX-NEXT:    v_readfirstlane_b32 s7, v10
5700; GPRIDX-NEXT:    v_readfirstlane_b32 s8, v9
5701; GPRIDX-NEXT:    v_readfirstlane_b32 s9, v12
5702; GPRIDX-NEXT:    v_readfirstlane_b32 s10, v11
5703; GPRIDX-NEXT:    v_readfirstlane_b32 s11, v13
5704; GPRIDX-NEXT:    v_readfirstlane_b32 s12, v0
5705; GPRIDX-NEXT:    v_readfirstlane_b32 s13, v1
5706; GPRIDX-NEXT:    ; return to shader part epilog
5707;
5708; GFX10-LABEL: dyn_insertelement_v7f64_s_v_v:
5709; GFX10:       ; %bb.0: ; %entry
5710; GFX10-NEXT:    s_mov_b32 s0, s2
5711; GFX10-NEXT:    s_mov_b32 s1, s3
5712; GFX10-NEXT:    s_mov_b32 s2, s4
5713; GFX10-NEXT:    s_mov_b32 s3, s5
5714; GFX10-NEXT:    s_mov_b32 s4, s6
5715; GFX10-NEXT:    s_mov_b32 s5, s7
5716; GFX10-NEXT:    s_mov_b32 s6, s8
5717; GFX10-NEXT:    s_mov_b32 s7, s9
5718; GFX10-NEXT:    s_mov_b32 s8, s10
5719; GFX10-NEXT:    s_mov_b32 s9, s11
5720; GFX10-NEXT:    s_mov_b32 s10, s12
5721; GFX10-NEXT:    s_mov_b32 s11, s13
5722; GFX10-NEXT:    s_mov_b32 s12, s14
5723; GFX10-NEXT:    s_mov_b32 s13, s15
5724; GFX10-NEXT:    v_mov_b32_e32 v18, s15
5725; GFX10-NEXT:    v_mov_b32_e32 v17, s14
5726; GFX10-NEXT:    v_mov_b32_e32 v16, s13
5727; GFX10-NEXT:    v_mov_b32_e32 v15, s12
5728; GFX10-NEXT:    v_mov_b32_e32 v14, s11
5729; GFX10-NEXT:    v_mov_b32_e32 v13, s10
5730; GFX10-NEXT:    v_mov_b32_e32 v12, s9
5731; GFX10-NEXT:    v_mov_b32_e32 v11, s8
5732; GFX10-NEXT:    v_mov_b32_e32 v10, s7
5733; GFX10-NEXT:    v_mov_b32_e32 v9, s6
5734; GFX10-NEXT:    v_mov_b32_e32 v8, s5
5735; GFX10-NEXT:    v_mov_b32_e32 v7, s4
5736; GFX10-NEXT:    v_mov_b32_e32 v6, s3
5737; GFX10-NEXT:    v_mov_b32_e32 v5, s2
5738; GFX10-NEXT:    v_mov_b32_e32 v4, s1
5739; GFX10-NEXT:    v_mov_b32_e32 v3, s0
5740; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v2
5741; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 1, v2
5742; GFX10-NEXT:    v_cmp_eq_u32_e64 s1, 6, v2
5743; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc_lo
5744; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc_lo
5745; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v2
5746; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v0, s0
5747; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v1, s0
5748; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 3, v2
5749; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v0, vcc_lo
5750; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v1, vcc_lo
5751; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v2
5752; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, v0, s0
5753; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, v1, s0
5754; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 5, v2
5755; GFX10-NEXT:    v_readfirstlane_b32 s2, v5
5756; GFX10-NEXT:    v_cndmask_b32_e32 v11, v11, v0, vcc_lo
5757; GFX10-NEXT:    v_cndmask_b32_e32 v2, v12, v1, vcc_lo
5758; GFX10-NEXT:    v_readfirstlane_b32 s3, v6
5759; GFX10-NEXT:    v_cndmask_b32_e64 v12, v13, v0, s0
5760; GFX10-NEXT:    v_cndmask_b32_e64 v13, v14, v1, s0
5761; GFX10-NEXT:    v_cndmask_b32_e64 v0, v15, v0, s1
5762; GFX10-NEXT:    v_cndmask_b32_e64 v1, v16, v1, s1
5763; GFX10-NEXT:    v_readfirstlane_b32 s0, v3
5764; GFX10-NEXT:    v_readfirstlane_b32 s1, v4
5765; GFX10-NEXT:    v_readfirstlane_b32 s4, v7
5766; GFX10-NEXT:    v_readfirstlane_b32 s5, v8
5767; GFX10-NEXT:    v_readfirstlane_b32 s6, v9
5768; GFX10-NEXT:    v_readfirstlane_b32 s7, v10
5769; GFX10-NEXT:    v_readfirstlane_b32 s8, v11
5770; GFX10-NEXT:    v_readfirstlane_b32 s9, v2
5771; GFX10-NEXT:    v_readfirstlane_b32 s10, v12
5772; GFX10-NEXT:    v_readfirstlane_b32 s11, v13
5773; GFX10-NEXT:    v_readfirstlane_b32 s12, v0
5774; GFX10-NEXT:    v_readfirstlane_b32 s13, v1
5775; GFX10-NEXT:    ; return to shader part epilog
5776;
5777; GFX11-LABEL: dyn_insertelement_v7f64_s_v_v:
5778; GFX11:       ; %bb.0: ; %entry
5779; GFX11-NEXT:    s_mov_b32 s0, s2
5780; GFX11-NEXT:    s_mov_b32 s1, s3
5781; GFX11-NEXT:    s_mov_b32 s2, s4
5782; GFX11-NEXT:    s_mov_b32 s3, s5
5783; GFX11-NEXT:    s_mov_b32 s4, s6
5784; GFX11-NEXT:    s_mov_b32 s5, s7
5785; GFX11-NEXT:    s_mov_b32 s6, s8
5786; GFX11-NEXT:    s_mov_b32 s7, s9
5787; GFX11-NEXT:    s_mov_b32 s8, s10
5788; GFX11-NEXT:    s_mov_b32 s9, s11
5789; GFX11-NEXT:    s_mov_b32 s10, s12
5790; GFX11-NEXT:    s_mov_b32 s11, s13
5791; GFX11-NEXT:    s_mov_b32 s12, s14
5792; GFX11-NEXT:    s_mov_b32 s13, s15
5793; GFX11-NEXT:    v_dual_mov_b32 v18, s15 :: v_dual_mov_b32 v17, s14
5794; GFX11-NEXT:    v_dual_mov_b32 v16, s13 :: v_dual_mov_b32 v15, s12
5795; GFX11-NEXT:    v_dual_mov_b32 v14, s11 :: v_dual_mov_b32 v13, s10
5796; GFX11-NEXT:    v_dual_mov_b32 v12, s9 :: v_dual_mov_b32 v11, s8
5797; GFX11-NEXT:    v_dual_mov_b32 v10, s7 :: v_dual_mov_b32 v9, s6
5798; GFX11-NEXT:    v_dual_mov_b32 v8, s5 :: v_dual_mov_b32 v7, s4
5799; GFX11-NEXT:    v_dual_mov_b32 v6, s3 :: v_dual_mov_b32 v5, s2
5800; GFX11-NEXT:    v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0
5801; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v2
5802; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 1, v2
5803; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 6, v2
5804; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v0 :: v_dual_cndmask_b32 v4, v4, v1
5805; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v2
5806; GFX11-NEXT:    v_cndmask_b32_e64 v5, v5, v0, s0
5807; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, v1, s0
5808; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 3, v2
5809; GFX11-NEXT:    v_dual_cndmask_b32 v7, v7, v0 :: v_dual_cndmask_b32 v8, v8, v1
5810; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v2
5811; GFX11-NEXT:    v_cndmask_b32_e64 v9, v9, v0, s0
5812; GFX11-NEXT:    v_cndmask_b32_e64 v10, v10, v1, s0
5813; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 5, v2
5814; GFX11-NEXT:    v_readfirstlane_b32 s2, v5
5815; GFX11-NEXT:    v_dual_cndmask_b32 v11, v11, v0 :: v_dual_cndmask_b32 v2, v12, v1
5816; GFX11-NEXT:    v_readfirstlane_b32 s3, v6
5817; GFX11-NEXT:    v_cndmask_b32_e64 v12, v13, v0, s0
5818; GFX11-NEXT:    v_cndmask_b32_e64 v13, v14, v1, s0
5819; GFX11-NEXT:    v_cndmask_b32_e64 v0, v15, v0, s1
5820; GFX11-NEXT:    v_cndmask_b32_e64 v1, v16, v1, s1
5821; GFX11-NEXT:    v_readfirstlane_b32 s0, v3
5822; GFX11-NEXT:    v_readfirstlane_b32 s1, v4
5823; GFX11-NEXT:    v_readfirstlane_b32 s4, v7
5824; GFX11-NEXT:    v_readfirstlane_b32 s5, v8
5825; GFX11-NEXT:    v_readfirstlane_b32 s6, v9
5826; GFX11-NEXT:    v_readfirstlane_b32 s7, v10
5827; GFX11-NEXT:    v_readfirstlane_b32 s8, v11
5828; GFX11-NEXT:    v_readfirstlane_b32 s9, v2
5829; GFX11-NEXT:    v_readfirstlane_b32 s10, v12
5830; GFX11-NEXT:    v_readfirstlane_b32 s11, v13
5831; GFX11-NEXT:    v_readfirstlane_b32 s12, v0
5832; GFX11-NEXT:    v_readfirstlane_b32 s13, v1
5833; GFX11-NEXT:    ; return to shader part epilog
5834entry:
5835  %insert = insertelement <7 x double> %vec, double %val, i32 %idx
5836  ret <7 x double> %insert
5837}
5838
5839define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_v_v_s(<7 x double> %vec, double %val, i32 inreg %idx) {
5840; GPRIDX-LABEL: dyn_insertelement_v7f64_v_v_s:
5841; GPRIDX:       ; %bb.0: ; %entry
5842; GPRIDX-NEXT:    s_lshl_b32 s0, s2, 1
5843; GPRIDX-NEXT:    v_mov_b32_e32 v16, v15
5844; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(DST)
5845; GPRIDX-NEXT:    v_mov_b32_e32 v0, v14
5846; GPRIDX-NEXT:    v_mov_b32_e32 v1, v16
5847; GPRIDX-NEXT:    s_set_gpr_idx_off
5848; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v0
5849; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v1
5850; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v2
5851; GPRIDX-NEXT:    v_readfirstlane_b32 s3, v3
5852; GPRIDX-NEXT:    v_readfirstlane_b32 s4, v4
5853; GPRIDX-NEXT:    v_readfirstlane_b32 s5, v5
5854; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v6
5855; GPRIDX-NEXT:    v_readfirstlane_b32 s7, v7
5856; GPRIDX-NEXT:    v_readfirstlane_b32 s8, v8
5857; GPRIDX-NEXT:    v_readfirstlane_b32 s9, v9
5858; GPRIDX-NEXT:    v_readfirstlane_b32 s10, v10
5859; GPRIDX-NEXT:    v_readfirstlane_b32 s11, v11
5860; GPRIDX-NEXT:    v_readfirstlane_b32 s12, v12
5861; GPRIDX-NEXT:    v_readfirstlane_b32 s13, v13
5862; GPRIDX-NEXT:    ; return to shader part epilog
5863;
5864; GFX10PLUS-LABEL: dyn_insertelement_v7f64_v_v_s:
5865; GFX10PLUS:       ; %bb.0: ; %entry
5866; GFX10PLUS-NEXT:    v_mov_b32_e32 v16, v15
5867; GFX10PLUS-NEXT:    s_lshl_b32 m0, s2, 1
5868; GFX10PLUS-NEXT:    v_movreld_b32_e32 v0, v14
5869; GFX10PLUS-NEXT:    v_movreld_b32_e32 v1, v16
5870; GFX10PLUS-NEXT:    v_readfirstlane_b32 s0, v0
5871; GFX10PLUS-NEXT:    v_readfirstlane_b32 s1, v1
5872; GFX10PLUS-NEXT:    v_readfirstlane_b32 s2, v2
5873; GFX10PLUS-NEXT:    v_readfirstlane_b32 s3, v3
5874; GFX10PLUS-NEXT:    v_readfirstlane_b32 s4, v4
5875; GFX10PLUS-NEXT:    v_readfirstlane_b32 s5, v5
5876; GFX10PLUS-NEXT:    v_readfirstlane_b32 s6, v6
5877; GFX10PLUS-NEXT:    v_readfirstlane_b32 s7, v7
5878; GFX10PLUS-NEXT:    v_readfirstlane_b32 s8, v8
5879; GFX10PLUS-NEXT:    v_readfirstlane_b32 s9, v9
5880; GFX10PLUS-NEXT:    v_readfirstlane_b32 s10, v10
5881; GFX10PLUS-NEXT:    v_readfirstlane_b32 s11, v11
5882; GFX10PLUS-NEXT:    v_readfirstlane_b32 s12, v12
5883; GFX10PLUS-NEXT:    v_readfirstlane_b32 s13, v13
5884; GFX10PLUS-NEXT:    ; return to shader part epilog
5885entry:
5886  %insert = insertelement <7 x double> %vec, double %val, i32 %idx
5887  ret <7 x double> %insert
5888}
5889
5890define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_v_v_v(<7 x double> %vec, double %val, i32 %idx) {
5891; GPRIDX-LABEL: dyn_insertelement_v7f64_v_v_v:
5892; GPRIDX:       ; %bb.0: ; %entry
5893; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v16
5894; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
5895; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
5896; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v16
5897; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v14, vcc
5898; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v15, vcc
5899; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v16
5900; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v14, vcc
5901; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v15, vcc
5902; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v16
5903; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
5904; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
5905; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v16
5906; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v8, v14, vcc
5907; GPRIDX-NEXT:    v_cndmask_b32_e32 v9, v9, v15, vcc
5908; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v16
5909; GPRIDX-NEXT:    v_cndmask_b32_e32 v10, v10, v14, vcc
5910; GPRIDX-NEXT:    v_cndmask_b32_e32 v11, v11, v15, vcc
5911; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v16
5912; GPRIDX-NEXT:    v_cndmask_b32_e32 v12, v12, v14, vcc
5913; GPRIDX-NEXT:    v_cndmask_b32_e32 v13, v13, v15, vcc
5914; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v0
5915; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v1
5916; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v2
5917; GPRIDX-NEXT:    v_readfirstlane_b32 s3, v3
5918; GPRIDX-NEXT:    v_readfirstlane_b32 s4, v4
5919; GPRIDX-NEXT:    v_readfirstlane_b32 s5, v5
5920; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v6
5921; GPRIDX-NEXT:    v_readfirstlane_b32 s7, v7
5922; GPRIDX-NEXT:    v_readfirstlane_b32 s8, v8
5923; GPRIDX-NEXT:    v_readfirstlane_b32 s9, v9
5924; GPRIDX-NEXT:    v_readfirstlane_b32 s10, v10
5925; GPRIDX-NEXT:    v_readfirstlane_b32 s11, v11
5926; GPRIDX-NEXT:    v_readfirstlane_b32 s12, v12
5927; GPRIDX-NEXT:    v_readfirstlane_b32 s13, v13
5928; GPRIDX-NEXT:    ; return to shader part epilog
5929;
5930; GFX10-LABEL: dyn_insertelement_v7f64_v_v_v:
5931; GFX10:       ; %bb.0: ; %entry
5932; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v16
5933; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 1, v16
5934; GFX10-NEXT:    v_cmp_eq_u32_e64 s1, 6, v16
5935; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
5936; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc_lo
5937; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
5938; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v14, s0
5939; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v15, s0
5940; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 3, v16
5941; GFX10-NEXT:    v_cndmask_b32_e64 v12, v12, v14, s1
5942; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v14, vcc_lo
5943; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v15, vcc_lo
5944; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
5945; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v14, s0
5946; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v15, s0
5947; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 5, v16
5948; GFX10-NEXT:    v_cndmask_b32_e64 v13, v13, v15, s1
5949; GFX10-NEXT:    v_cndmask_b32_e32 v8, v8, v14, vcc_lo
5950; GFX10-NEXT:    v_cndmask_b32_e32 v9, v9, v15, vcc_lo
5951; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
5952; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, v14, s0
5953; GFX10-NEXT:    v_cndmask_b32_e64 v11, v11, v15, s0
5954; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
5955; GFX10-NEXT:    v_readfirstlane_b32 s2, v2
5956; GFX10-NEXT:    v_readfirstlane_b32 s3, v3
5957; GFX10-NEXT:    v_readfirstlane_b32 s4, v4
5958; GFX10-NEXT:    v_readfirstlane_b32 s5, v5
5959; GFX10-NEXT:    v_readfirstlane_b32 s6, v6
5960; GFX10-NEXT:    v_readfirstlane_b32 s7, v7
5961; GFX10-NEXT:    v_readfirstlane_b32 s8, v8
5962; GFX10-NEXT:    v_readfirstlane_b32 s9, v9
5963; GFX10-NEXT:    v_readfirstlane_b32 s10, v10
5964; GFX10-NEXT:    v_readfirstlane_b32 s11, v11
5965; GFX10-NEXT:    v_readfirstlane_b32 s12, v12
5966; GFX10-NEXT:    v_readfirstlane_b32 s13, v13
5967; GFX10-NEXT:    ; return to shader part epilog
5968;
5969; GFX11-LABEL: dyn_insertelement_v7f64_v_v_v:
5970; GFX11:       ; %bb.0: ; %entry
5971; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v16
5972; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 1, v16
5973; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 6, v16
5974; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15
5975; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
5976; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, v14, s0
5977; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, v15, s0
5978; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 3, v16
5979; GFX11-NEXT:    v_cndmask_b32_e64 v12, v12, v14, s1
5980; GFX11-NEXT:    v_dual_cndmask_b32 v4, v4, v14 :: v_dual_cndmask_b32 v5, v5, v15
5981; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
5982; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, v14, s0
5983; GFX11-NEXT:    v_cndmask_b32_e64 v7, v7, v15, s0
5984; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 5, v16
5985; GFX11-NEXT:    v_cndmask_b32_e64 v13, v13, v15, s1
5986; GFX11-NEXT:    v_dual_cndmask_b32 v8, v8, v14 :: v_dual_cndmask_b32 v9, v9, v15
5987; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
5988; GFX11-NEXT:    v_cndmask_b32_e64 v10, v10, v14, s0
5989; GFX11-NEXT:    v_cndmask_b32_e64 v11, v11, v15, s0
5990; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
5991; GFX11-NEXT:    v_readfirstlane_b32 s2, v2
5992; GFX11-NEXT:    v_readfirstlane_b32 s3, v3
5993; GFX11-NEXT:    v_readfirstlane_b32 s4, v4
5994; GFX11-NEXT:    v_readfirstlane_b32 s5, v5
5995; GFX11-NEXT:    v_readfirstlane_b32 s6, v6
5996; GFX11-NEXT:    v_readfirstlane_b32 s7, v7
5997; GFX11-NEXT:    v_readfirstlane_b32 s8, v8
5998; GFX11-NEXT:    v_readfirstlane_b32 s9, v9
5999; GFX11-NEXT:    v_readfirstlane_b32 s10, v10
6000; GFX11-NEXT:    v_readfirstlane_b32 s11, v11
6001; GFX11-NEXT:    v_readfirstlane_b32 s12, v12
6002; GFX11-NEXT:    v_readfirstlane_b32 s13, v13
6003; GFX11-NEXT:    ; return to shader part epilog
6004entry:
6005  %insert = insertelement <7 x double> %vec, double %val, i32 %idx
6006  ret <7 x double> %insert
6007}
6008
6009define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_s_s(<5 x double> inreg %vec, double inreg %val, i32 inreg %idx) {
6010; GPRIDX-LABEL: dyn_insertelement_v5f64_s_s_s:
6011; GPRIDX:       ; %bb.0: ; %entry
6012; GPRIDX-NEXT:    s_cmp_eq_u32 s14, 0
6013; GPRIDX-NEXT:    s_cselect_b64 s[0:1], s[12:13], s[2:3]
6014; GPRIDX-NEXT:    s_cmp_eq_u32 s14, 1
6015; GPRIDX-NEXT:    s_cselect_b64 s[2:3], s[12:13], s[4:5]
6016; GPRIDX-NEXT:    s_cmp_eq_u32 s14, 2
6017; GPRIDX-NEXT:    s_cselect_b64 s[4:5], s[12:13], s[6:7]
6018; GPRIDX-NEXT:    s_cmp_eq_u32 s14, 3
6019; GPRIDX-NEXT:    s_cselect_b64 s[6:7], s[12:13], s[8:9]
6020; GPRIDX-NEXT:    s_cmp_eq_u32 s14, 4
6021; GPRIDX-NEXT:    s_cselect_b64 s[8:9], s[12:13], s[10:11]
6022; GPRIDX-NEXT:    ; return to shader part epilog
6023;
6024; GFX10PLUS-LABEL: dyn_insertelement_v5f64_s_s_s:
6025; GFX10PLUS:       ; %bb.0: ; %entry
6026; GFX10PLUS-NEXT:    s_cmp_eq_u32 s14, 0
6027; GFX10PLUS-NEXT:    s_cselect_b64 s[0:1], s[12:13], s[2:3]
6028; GFX10PLUS-NEXT:    s_cmp_eq_u32 s14, 1
6029; GFX10PLUS-NEXT:    s_cselect_b64 s[2:3], s[12:13], s[4:5]
6030; GFX10PLUS-NEXT:    s_cmp_eq_u32 s14, 2
6031; GFX10PLUS-NEXT:    s_cselect_b64 s[4:5], s[12:13], s[6:7]
6032; GFX10PLUS-NEXT:    s_cmp_eq_u32 s14, 3
6033; GFX10PLUS-NEXT:    s_cselect_b64 s[6:7], s[12:13], s[8:9]
6034; GFX10PLUS-NEXT:    s_cmp_eq_u32 s14, 4
6035; GFX10PLUS-NEXT:    s_cselect_b64 s[8:9], s[12:13], s[10:11]
6036; GFX10PLUS-NEXT:    ; return to shader part epilog
6037entry:
6038  %insert = insertelement <5 x double> %vec, double %val, i32 %idx
6039  ret <5 x double> %insert
6040}
6041
6042define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_v_s(<5 x double> inreg %vec, double %val, i32 inreg %idx) {
6043; GPRIDX-LABEL: dyn_insertelement_v5f64_s_v_s:
6044; GPRIDX:       ; %bb.0: ; %entry
6045; GPRIDX-NEXT:    s_mov_b32 s1, s3
6046; GPRIDX-NEXT:    s_mov_b32 s3, s5
6047; GPRIDX-NEXT:    s_mov_b32 s5, s7
6048; GPRIDX-NEXT:    s_mov_b32 s7, s9
6049; GPRIDX-NEXT:    s_mov_b32 s9, s11
6050; GPRIDX-NEXT:    s_mov_b32 s0, s2
6051; GPRIDX-NEXT:    s_mov_b32 s2, s4
6052; GPRIDX-NEXT:    s_mov_b32 s4, s6
6053; GPRIDX-NEXT:    s_mov_b32 s6, s8
6054; GPRIDX-NEXT:    s_mov_b32 s8, s10
6055; GPRIDX-NEXT:    v_mov_b32_e32 v11, s9
6056; GPRIDX-NEXT:    v_mov_b32_e32 v3, s1
6057; GPRIDX-NEXT:    v_mov_b32_e32 v2, s0
6058; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s12, 0
6059; GPRIDX-NEXT:    v_mov_b32_e32 v5, s3
6060; GPRIDX-NEXT:    v_mov_b32_e32 v4, s2
6061; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
6062; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
6063; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s12, 1
6064; GPRIDX-NEXT:    v_mov_b32_e32 v7, s5
6065; GPRIDX-NEXT:    v_mov_b32_e32 v6, s4
6066; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
6067; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v1, vcc
6068; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s12, 2
6069; GPRIDX-NEXT:    v_mov_b32_e32 v9, s7
6070; GPRIDX-NEXT:    v_mov_b32_e32 v8, s6
6071; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc
6072; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc
6073; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s12, 3
6074; GPRIDX-NEXT:    v_mov_b32_e32 v10, s8
6075; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
6076; GPRIDX-NEXT:    v_cndmask_b32_e32 v9, v9, v1, vcc
6077; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s12, 4
6078; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v10, v0, vcc
6079; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v11, v1, vcc
6080; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v2
6081; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v3
6082; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v4
6083; GPRIDX-NEXT:    v_readfirstlane_b32 s3, v5
6084; GPRIDX-NEXT:    v_readfirstlane_b32 s4, v6
6085; GPRIDX-NEXT:    v_readfirstlane_b32 s5, v7
6086; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v8
6087; GPRIDX-NEXT:    v_readfirstlane_b32 s7, v9
6088; GPRIDX-NEXT:    v_readfirstlane_b32 s8, v0
6089; GPRIDX-NEXT:    v_readfirstlane_b32 s9, v1
6090; GPRIDX-NEXT:    ; return to shader part epilog
6091;
6092; GFX10-LABEL: dyn_insertelement_v5f64_s_v_s:
6093; GFX10:       ; %bb.0: ; %entry
6094; GFX10-NEXT:    s_mov_b32 s1, s3
6095; GFX10-NEXT:    s_mov_b32 s3, s5
6096; GFX10-NEXT:    s_mov_b32 s5, s7
6097; GFX10-NEXT:    s_mov_b32 s7, s9
6098; GFX10-NEXT:    s_mov_b32 s9, s11
6099; GFX10-NEXT:    s_mov_b32 s0, s2
6100; GFX10-NEXT:    s_mov_b32 s2, s4
6101; GFX10-NEXT:    s_mov_b32 s4, s6
6102; GFX10-NEXT:    s_mov_b32 s6, s8
6103; GFX10-NEXT:    s_mov_b32 s8, s10
6104; GFX10-NEXT:    v_mov_b32_e32 v11, s9
6105; GFX10-NEXT:    v_mov_b32_e32 v10, s8
6106; GFX10-NEXT:    v_mov_b32_e32 v9, s7
6107; GFX10-NEXT:    v_mov_b32_e32 v8, s6
6108; GFX10-NEXT:    v_mov_b32_e32 v7, s5
6109; GFX10-NEXT:    v_mov_b32_e32 v6, s4
6110; GFX10-NEXT:    v_mov_b32_e32 v5, s3
6111; GFX10-NEXT:    v_mov_b32_e32 v4, s2
6112; GFX10-NEXT:    v_mov_b32_e32 v3, s1
6113; GFX10-NEXT:    v_mov_b32_e32 v2, s0
6114; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s12, 0
6115; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s12, 1
6116; GFX10-NEXT:    v_cmp_eq_u32_e64 s1, s12, 4
6117; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc_lo
6118; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
6119; GFX10-NEXT:    v_cndmask_b32_e64 v4, v4, v0, s0
6120; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s12, 2
6121; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v1, s0
6122; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s12, 3
6123; GFX10-NEXT:    v_readfirstlane_b32 s2, v4
6124; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v0, vcc_lo
6125; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v1, vcc_lo
6126; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v0, s0
6127; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, v1, s0
6128; GFX10-NEXT:    v_cndmask_b32_e64 v0, v10, v0, s1
6129; GFX10-NEXT:    v_cndmask_b32_e64 v1, v11, v1, s1
6130; GFX10-NEXT:    v_readfirstlane_b32 s0, v2
6131; GFX10-NEXT:    v_readfirstlane_b32 s1, v3
6132; GFX10-NEXT:    v_readfirstlane_b32 s3, v5
6133; GFX10-NEXT:    v_readfirstlane_b32 s4, v6
6134; GFX10-NEXT:    v_readfirstlane_b32 s5, v7
6135; GFX10-NEXT:    v_readfirstlane_b32 s6, v8
6136; GFX10-NEXT:    v_readfirstlane_b32 s7, v9
6137; GFX10-NEXT:    v_readfirstlane_b32 s8, v0
6138; GFX10-NEXT:    v_readfirstlane_b32 s9, v1
6139; GFX10-NEXT:    ; return to shader part epilog
6140;
6141; GFX11-LABEL: dyn_insertelement_v5f64_s_v_s:
6142; GFX11:       ; %bb.0: ; %entry
6143; GFX11-NEXT:    s_mov_b32 s1, s3
6144; GFX11-NEXT:    s_mov_b32 s3, s5
6145; GFX11-NEXT:    s_mov_b32 s5, s7
6146; GFX11-NEXT:    s_mov_b32 s7, s9
6147; GFX11-NEXT:    s_mov_b32 s9, s11
6148; GFX11-NEXT:    s_mov_b32 s0, s2
6149; GFX11-NEXT:    s_mov_b32 s2, s4
6150; GFX11-NEXT:    s_mov_b32 s4, s6
6151; GFX11-NEXT:    s_mov_b32 s6, s8
6152; GFX11-NEXT:    s_mov_b32 s8, s10
6153; GFX11-NEXT:    v_dual_mov_b32 v11, s9 :: v_dual_mov_b32 v10, s8
6154; GFX11-NEXT:    v_dual_mov_b32 v9, s7 :: v_dual_mov_b32 v8, s6
6155; GFX11-NEXT:    v_dual_mov_b32 v7, s5 :: v_dual_mov_b32 v6, s4
6156; GFX11-NEXT:    v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v4, s2
6157; GFX11-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
6158; GFX11-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s12, 0
6159; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, s12, 1
6160; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, s12, 4
6161; GFX11-NEXT:    v_dual_cndmask_b32 v2, v2, v0 :: v_dual_cndmask_b32 v3, v3, v1
6162; GFX11-NEXT:    v_cndmask_b32_e64 v4, v4, v0, s0
6163; GFX11-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s12, 2
6164; GFX11-NEXT:    v_cndmask_b32_e64 v5, v5, v1, s0
6165; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, s12, 3
6166; GFX11-NEXT:    v_readfirstlane_b32 s2, v4
6167; GFX11-NEXT:    v_dual_cndmask_b32 v6, v6, v0 :: v_dual_cndmask_b32 v7, v7, v1
6168; GFX11-NEXT:    v_cndmask_b32_e64 v8, v8, v0, s0
6169; GFX11-NEXT:    v_cndmask_b32_e64 v9, v9, v1, s0
6170; GFX11-NEXT:    v_cndmask_b32_e64 v0, v10, v0, s1
6171; GFX11-NEXT:    v_cndmask_b32_e64 v1, v11, v1, s1
6172; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
6173; GFX11-NEXT:    v_readfirstlane_b32 s1, v3
6174; GFX11-NEXT:    v_readfirstlane_b32 s3, v5
6175; GFX11-NEXT:    v_readfirstlane_b32 s4, v6
6176; GFX11-NEXT:    v_readfirstlane_b32 s5, v7
6177; GFX11-NEXT:    v_readfirstlane_b32 s6, v8
6178; GFX11-NEXT:    v_readfirstlane_b32 s7, v9
6179; GFX11-NEXT:    v_readfirstlane_b32 s8, v0
6180; GFX11-NEXT:    v_readfirstlane_b32 s9, v1
6181; GFX11-NEXT:    ; return to shader part epilog
6182entry:
6183  %insert = insertelement <5 x double> %vec, double %val, i32 %idx
6184  ret <5 x double> %insert
6185}
6186
6187define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_v_v(<5 x double> inreg %vec, double %val, i32 %idx) {
6188; GPRIDX-LABEL: dyn_insertelement_v5f64_s_v_v:
6189; GPRIDX:       ; %bb.0: ; %entry
6190; GPRIDX-NEXT:    s_mov_b32 s1, s3
6191; GPRIDX-NEXT:    s_mov_b32 s3, s5
6192; GPRIDX-NEXT:    s_mov_b32 s5, s7
6193; GPRIDX-NEXT:    s_mov_b32 s7, s9
6194; GPRIDX-NEXT:    s_mov_b32 s9, s11
6195; GPRIDX-NEXT:    s_mov_b32 s0, s2
6196; GPRIDX-NEXT:    s_mov_b32 s2, s4
6197; GPRIDX-NEXT:    s_mov_b32 s4, s6
6198; GPRIDX-NEXT:    s_mov_b32 s6, s8
6199; GPRIDX-NEXT:    s_mov_b32 s8, s10
6200; GPRIDX-NEXT:    v_mov_b32_e32 v12, s9
6201; GPRIDX-NEXT:    v_mov_b32_e32 v4, s1
6202; GPRIDX-NEXT:    v_mov_b32_e32 v3, s0
6203; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
6204; GPRIDX-NEXT:    v_mov_b32_e32 v6, s3
6205; GPRIDX-NEXT:    v_mov_b32_e32 v5, s2
6206; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
6207; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
6208; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
6209; GPRIDX-NEXT:    v_mov_b32_e32 v8, s5
6210; GPRIDX-NEXT:    v_mov_b32_e32 v7, s4
6211; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
6212; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v1, vcc
6213; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v2
6214; GPRIDX-NEXT:    v_mov_b32_e32 v11, s8
6215; GPRIDX-NEXT:    v_mov_b32_e32 v10, s7
6216; GPRIDX-NEXT:    v_mov_b32_e32 v9, s6
6217; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v0, vcc
6218; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v8, v1, vcc
6219; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v2
6220; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[0:1], 4, v2
6221; GPRIDX-NEXT:    v_cndmask_b32_e32 v9, v9, v0, vcc
6222; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v10, v1, vcc
6223; GPRIDX-NEXT:    v_cndmask_b32_e64 v0, v11, v0, s[0:1]
6224; GPRIDX-NEXT:    v_cndmask_b32_e64 v1, v12, v1, s[0:1]
6225; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v3
6226; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v4
6227; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v5
6228; GPRIDX-NEXT:    v_readfirstlane_b32 s3, v6
6229; GPRIDX-NEXT:    v_readfirstlane_b32 s4, v7
6230; GPRIDX-NEXT:    v_readfirstlane_b32 s5, v8
6231; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v9
6232; GPRIDX-NEXT:    v_readfirstlane_b32 s7, v2
6233; GPRIDX-NEXT:    v_readfirstlane_b32 s8, v0
6234; GPRIDX-NEXT:    v_readfirstlane_b32 s9, v1
6235; GPRIDX-NEXT:    ; return to shader part epilog
6236;
6237; GFX10-LABEL: dyn_insertelement_v5f64_s_v_v:
6238; GFX10:       ; %bb.0: ; %entry
6239; GFX10-NEXT:    s_mov_b32 s1, s3
6240; GFX10-NEXT:    s_mov_b32 s3, s5
6241; GFX10-NEXT:    s_mov_b32 s5, s7
6242; GFX10-NEXT:    s_mov_b32 s7, s9
6243; GFX10-NEXT:    s_mov_b32 s9, s11
6244; GFX10-NEXT:    s_mov_b32 s0, s2
6245; GFX10-NEXT:    s_mov_b32 s2, s4
6246; GFX10-NEXT:    s_mov_b32 s4, s6
6247; GFX10-NEXT:    s_mov_b32 s6, s8
6248; GFX10-NEXT:    s_mov_b32 s8, s10
6249; GFX10-NEXT:    v_mov_b32_e32 v12, s9
6250; GFX10-NEXT:    v_mov_b32_e32 v11, s8
6251; GFX10-NEXT:    v_mov_b32_e32 v10, s7
6252; GFX10-NEXT:    v_mov_b32_e32 v9, s6
6253; GFX10-NEXT:    v_mov_b32_e32 v8, s5
6254; GFX10-NEXT:    v_mov_b32_e32 v7, s4
6255; GFX10-NEXT:    v_mov_b32_e32 v6, s3
6256; GFX10-NEXT:    v_mov_b32_e32 v5, s2
6257; GFX10-NEXT:    v_mov_b32_e32 v4, s1
6258; GFX10-NEXT:    v_mov_b32_e32 v3, s0
6259; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v2
6260; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 1, v2
6261; GFX10-NEXT:    v_cmp_eq_u32_e64 s1, 4, v2
6262; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc_lo
6263; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc_lo
6264; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v0, s0
6265; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v2
6266; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v1, s0
6267; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 3, v2
6268; GFX10-NEXT:    v_readfirstlane_b32 s2, v5
6269; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v0, vcc_lo
6270; GFX10-NEXT:    v_cndmask_b32_e32 v2, v8, v1, vcc_lo
6271; GFX10-NEXT:    v_cndmask_b32_e64 v8, v9, v0, s0
6272; GFX10-NEXT:    v_cndmask_b32_e64 v9, v10, v1, s0
6273; GFX10-NEXT:    v_cndmask_b32_e64 v0, v11, v0, s1
6274; GFX10-NEXT:    v_cndmask_b32_e64 v1, v12, v1, s1
6275; GFX10-NEXT:    v_readfirstlane_b32 s0, v3
6276; GFX10-NEXT:    v_readfirstlane_b32 s1, v4
6277; GFX10-NEXT:    v_readfirstlane_b32 s3, v6
6278; GFX10-NEXT:    v_readfirstlane_b32 s4, v7
6279; GFX10-NEXT:    v_readfirstlane_b32 s5, v2
6280; GFX10-NEXT:    v_readfirstlane_b32 s6, v8
6281; GFX10-NEXT:    v_readfirstlane_b32 s7, v9
6282; GFX10-NEXT:    v_readfirstlane_b32 s8, v0
6283; GFX10-NEXT:    v_readfirstlane_b32 s9, v1
6284; GFX10-NEXT:    ; return to shader part epilog
6285;
6286; GFX11-LABEL: dyn_insertelement_v5f64_s_v_v:
6287; GFX11:       ; %bb.0: ; %entry
6288; GFX11-NEXT:    s_mov_b32 s1, s3
6289; GFX11-NEXT:    s_mov_b32 s3, s5
6290; GFX11-NEXT:    s_mov_b32 s5, s7
6291; GFX11-NEXT:    s_mov_b32 s7, s9
6292; GFX11-NEXT:    s_mov_b32 s9, s11
6293; GFX11-NEXT:    s_mov_b32 s0, s2
6294; GFX11-NEXT:    s_mov_b32 s2, s4
6295; GFX11-NEXT:    s_mov_b32 s4, s6
6296; GFX11-NEXT:    s_mov_b32 s6, s8
6297; GFX11-NEXT:    s_mov_b32 s8, s10
6298; GFX11-NEXT:    v_dual_mov_b32 v12, s9 :: v_dual_mov_b32 v11, s8
6299; GFX11-NEXT:    v_dual_mov_b32 v10, s7 :: v_dual_mov_b32 v9, s6
6300; GFX11-NEXT:    v_dual_mov_b32 v8, s5 :: v_dual_mov_b32 v7, s4
6301; GFX11-NEXT:    v_dual_mov_b32 v6, s3 :: v_dual_mov_b32 v5, s2
6302; GFX11-NEXT:    v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0
6303; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v2
6304; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 1, v2
6305; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 4, v2
6306; GFX11-NEXT:    v_dual_cndmask_b32 v3, v3, v0 :: v_dual_cndmask_b32 v4, v4, v1
6307; GFX11-NEXT:    v_cndmask_b32_e64 v5, v5, v0, s0
6308; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v2
6309; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, v1, s0
6310; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 3, v2
6311; GFX11-NEXT:    v_readfirstlane_b32 s2, v5
6312; GFX11-NEXT:    v_dual_cndmask_b32 v7, v7, v0 :: v_dual_cndmask_b32 v2, v8, v1
6313; GFX11-NEXT:    v_cndmask_b32_e64 v8, v9, v0, s0
6314; GFX11-NEXT:    v_cndmask_b32_e64 v9, v10, v1, s0
6315; GFX11-NEXT:    v_cndmask_b32_e64 v0, v11, v0, s1
6316; GFX11-NEXT:    v_cndmask_b32_e64 v1, v12, v1, s1
6317; GFX11-NEXT:    v_readfirstlane_b32 s0, v3
6318; GFX11-NEXT:    v_readfirstlane_b32 s1, v4
6319; GFX11-NEXT:    v_readfirstlane_b32 s3, v6
6320; GFX11-NEXT:    v_readfirstlane_b32 s4, v7
6321; GFX11-NEXT:    v_readfirstlane_b32 s5, v2
6322; GFX11-NEXT:    v_readfirstlane_b32 s6, v8
6323; GFX11-NEXT:    v_readfirstlane_b32 s7, v9
6324; GFX11-NEXT:    v_readfirstlane_b32 s8, v0
6325; GFX11-NEXT:    v_readfirstlane_b32 s9, v1
6326; GFX11-NEXT:    ; return to shader part epilog
6327entry:
6328  %insert = insertelement <5 x double> %vec, double %val, i32 %idx
6329  ret <5 x double> %insert
6330}
6331
6332define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_v_v_s(<5 x double> %vec, double %val, i32 inreg %idx) {
6333; GPRIDX-LABEL: dyn_insertelement_v5f64_v_v_s:
6334; GPRIDX:       ; %bb.0: ; %entry
6335; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 0
6336; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
6337; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
6338; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
6339; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
6340; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc
6341; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
6342; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
6343; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v11, vcc
6344; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 3
6345; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
6346; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v11, vcc
6347; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 4
6348; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc
6349; GPRIDX-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
6350; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v0
6351; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v1
6352; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v2
6353; GPRIDX-NEXT:    v_readfirstlane_b32 s3, v3
6354; GPRIDX-NEXT:    v_readfirstlane_b32 s4, v4
6355; GPRIDX-NEXT:    v_readfirstlane_b32 s5, v5
6356; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v6
6357; GPRIDX-NEXT:    v_readfirstlane_b32 s7, v7
6358; GPRIDX-NEXT:    v_readfirstlane_b32 s8, v8
6359; GPRIDX-NEXT:    v_readfirstlane_b32 s9, v9
6360; GPRIDX-NEXT:    ; return to shader part epilog
6361;
6362; GFX10-LABEL: dyn_insertelement_v5f64_v_v_s:
6363; GFX10:       ; %bb.0: ; %entry
6364; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 0
6365; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s2, 1
6366; GFX10-NEXT:    v_cmp_eq_u32_e64 s1, s2, 4
6367; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
6368; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
6369; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s0
6370; GFX10-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 2
6371; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s0
6372; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s2, 3
6373; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s1
6374; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s1
6375; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc_lo
6376; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v11, vcc_lo
6377; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v10, s0
6378; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v11, s0
6379; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
6380; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
6381; GFX10-NEXT:    v_readfirstlane_b32 s2, v2
6382; GFX10-NEXT:    v_readfirstlane_b32 s3, v3
6383; GFX10-NEXT:    v_readfirstlane_b32 s4, v4
6384; GFX10-NEXT:    v_readfirstlane_b32 s5, v5
6385; GFX10-NEXT:    v_readfirstlane_b32 s6, v6
6386; GFX10-NEXT:    v_readfirstlane_b32 s7, v7
6387; GFX10-NEXT:    v_readfirstlane_b32 s8, v8
6388; GFX10-NEXT:    v_readfirstlane_b32 s9, v9
6389; GFX10-NEXT:    ; return to shader part epilog
6390;
6391; GFX11-LABEL: dyn_insertelement_v5f64_v_v_s:
6392; GFX11:       ; %bb.0: ; %entry
6393; GFX11-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 0
6394; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, s2, 1
6395; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, s2, 4
6396; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11
6397; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s0
6398; GFX11-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 2
6399; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s0
6400; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, s2, 3
6401; GFX11-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s1
6402; GFX11-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s1
6403; GFX11-NEXT:    v_dual_cndmask_b32 v4, v4, v10 :: v_dual_cndmask_b32 v5, v5, v11
6404; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, v10, s0
6405; GFX11-NEXT:    v_cndmask_b32_e64 v7, v7, v11, s0
6406; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
6407; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
6408; GFX11-NEXT:    v_readfirstlane_b32 s2, v2
6409; GFX11-NEXT:    v_readfirstlane_b32 s3, v3
6410; GFX11-NEXT:    v_readfirstlane_b32 s4, v4
6411; GFX11-NEXT:    v_readfirstlane_b32 s5, v5
6412; GFX11-NEXT:    v_readfirstlane_b32 s6, v6
6413; GFX11-NEXT:    v_readfirstlane_b32 s7, v7
6414; GFX11-NEXT:    v_readfirstlane_b32 s8, v8
6415; GFX11-NEXT:    v_readfirstlane_b32 s9, v9
6416; GFX11-NEXT:    ; return to shader part epilog
6417entry:
6418  %insert = insertelement <5 x double> %vec, double %val, i32 %idx
6419  ret <5 x double> %insert
6420}
6421
6422define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_v_v_v(<5 x double> %vec, double %val, i32 %idx) {
6423; GPRIDX-LABEL: dyn_insertelement_v5f64_v_v_v:
6424; GPRIDX:       ; %bb.0: ; %entry
6425; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v12
6426; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
6427; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
6428; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v12
6429; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v10, vcc
6430; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc
6431; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v12
6432; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
6433; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v11, vcc
6434; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v12
6435; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
6436; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v11, vcc
6437; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v12
6438; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc
6439; GPRIDX-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc
6440; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v0
6441; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v1
6442; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v2
6443; GPRIDX-NEXT:    v_readfirstlane_b32 s3, v3
6444; GPRIDX-NEXT:    v_readfirstlane_b32 s4, v4
6445; GPRIDX-NEXT:    v_readfirstlane_b32 s5, v5
6446; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v6
6447; GPRIDX-NEXT:    v_readfirstlane_b32 s7, v7
6448; GPRIDX-NEXT:    v_readfirstlane_b32 s8, v8
6449; GPRIDX-NEXT:    v_readfirstlane_b32 s9, v9
6450; GPRIDX-NEXT:    ; return to shader part epilog
6451;
6452; GFX10-LABEL: dyn_insertelement_v5f64_v_v_v:
6453; GFX10:       ; %bb.0: ; %entry
6454; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v12
6455; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 1, v12
6456; GFX10-NEXT:    v_cmp_eq_u32_e64 s1, 4, v12
6457; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
6458; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
6459; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s0
6460; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v12
6461; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s0
6462; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 3, v12
6463; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s1
6464; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s1
6465; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc_lo
6466; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v11, vcc_lo
6467; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v10, s0
6468; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v11, s0
6469; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
6470; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
6471; GFX10-NEXT:    v_readfirstlane_b32 s2, v2
6472; GFX10-NEXT:    v_readfirstlane_b32 s3, v3
6473; GFX10-NEXT:    v_readfirstlane_b32 s4, v4
6474; GFX10-NEXT:    v_readfirstlane_b32 s5, v5
6475; GFX10-NEXT:    v_readfirstlane_b32 s6, v6
6476; GFX10-NEXT:    v_readfirstlane_b32 s7, v7
6477; GFX10-NEXT:    v_readfirstlane_b32 s8, v8
6478; GFX10-NEXT:    v_readfirstlane_b32 s9, v9
6479; GFX10-NEXT:    ; return to shader part epilog
6480;
6481; GFX11-LABEL: dyn_insertelement_v5f64_v_v_v:
6482; GFX11:       ; %bb.0: ; %entry
6483; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v12
6484; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 1, v12
6485; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 4, v12
6486; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11
6487; GFX11-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s0
6488; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v12
6489; GFX11-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s0
6490; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 3, v12
6491; GFX11-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s1
6492; GFX11-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s1
6493; GFX11-NEXT:    v_dual_cndmask_b32 v4, v4, v10 :: v_dual_cndmask_b32 v5, v5, v11
6494; GFX11-NEXT:    v_cndmask_b32_e64 v6, v6, v10, s0
6495; GFX11-NEXT:    v_cndmask_b32_e64 v7, v7, v11, s0
6496; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
6497; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
6498; GFX11-NEXT:    v_readfirstlane_b32 s2, v2
6499; GFX11-NEXT:    v_readfirstlane_b32 s3, v3
6500; GFX11-NEXT:    v_readfirstlane_b32 s4, v4
6501; GFX11-NEXT:    v_readfirstlane_b32 s5, v5
6502; GFX11-NEXT:    v_readfirstlane_b32 s6, v6
6503; GFX11-NEXT:    v_readfirstlane_b32 s7, v7
6504; GFX11-NEXT:    v_readfirstlane_b32 s8, v8
6505; GFX11-NEXT:    v_readfirstlane_b32 s9, v9
6506; GFX11-NEXT:    ; return to shader part epilog
6507entry:
6508  %insert = insertelement <5 x double> %vec, double %val, i32 %idx
6509  ret <5 x double> %insert
6510}
6511