xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll (revision fd3eaf76ba3392a4406247d996e757ef49f7a8b2)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s
5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
6; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
7
8define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
9; GFX9-LABEL: extractelement_sgpr_v4i16_sgpr_idx:
10; GFX9:       ; %bb.0:
11; GFX9-NEXT:    s_and_b32 s0, s4, 3
12; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
13; GFX9-NEXT:    v_mov_b32_e32 v0, s0
14; GFX9-NEXT:    global_load_ushort v0, v0, s[2:3]
15; GFX9-NEXT:    s_waitcnt vmcnt(0)
16; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
17; GFX9-NEXT:    ; return to shader part epilog
18;
19; GFX8-LABEL: extractelement_sgpr_v4i16_sgpr_idx:
20; GFX8:       ; %bb.0:
21; GFX8-NEXT:    s_and_b32 s0, s4, 3
22; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
23; GFX8-NEXT:    s_add_u32 s0, s2, s0
24; GFX8-NEXT:    s_addc_u32 s1, s3, 0
25; GFX8-NEXT:    v_mov_b32_e32 v0, s0
26; GFX8-NEXT:    v_mov_b32_e32 v1, s1
27; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
28; GFX8-NEXT:    s_waitcnt vmcnt(0)
29; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
30; GFX8-NEXT:    ; return to shader part epilog
31;
32; GFX7-LABEL: extractelement_sgpr_v4i16_sgpr_idx:
33; GFX7:       ; %bb.0:
34; GFX7-NEXT:    s_mov_b32 s0, s2
35; GFX7-NEXT:    s_and_b32 s2, s4, 3
36; GFX7-NEXT:    s_lshl_b32 s4, s2, 1
37; GFX7-NEXT:    s_mov_b32 s5, 0
38; GFX7-NEXT:    v_mov_b32_e32 v0, s4
39; GFX7-NEXT:    s_mov_b32 s1, s3
40; GFX7-NEXT:    s_mov_b32 s3, 0xf000
41; GFX7-NEXT:    s_mov_b32 s2, s5
42; GFX7-NEXT:    v_mov_b32_e32 v1, s5
43; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
44; GFX7-NEXT:    s_waitcnt vmcnt(0)
45; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
46; GFX7-NEXT:    ; return to shader part epilog
47;
48; GFX10-LABEL: extractelement_sgpr_v4i16_sgpr_idx:
49; GFX10:       ; %bb.0:
50; GFX10-NEXT:    s_and_b32 s0, s4, 3
51; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
52; GFX10-NEXT:    v_mov_b32_e32 v0, s0
53; GFX10-NEXT:    global_load_ushort v0, v0, s[2:3]
54; GFX10-NEXT:    s_waitcnt vmcnt(0)
55; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
56; GFX10-NEXT:    ; return to shader part epilog
57;
58; GFX11-LABEL: extractelement_sgpr_v4i16_sgpr_idx:
59; GFX11:       ; %bb.0:
60; GFX11-NEXT:    s_and_b32 s0, s4, 3
61; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
62; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
63; GFX11-NEXT:    v_mov_b32_e32 v0, s0
64; GFX11-NEXT:    global_load_u16 v0, v0, s[2:3]
65; GFX11-NEXT:    s_waitcnt vmcnt(0)
66; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
67; GFX11-NEXT:    ; return to shader part epilog
68  %vector = load <4 x i16>, ptr addrspace(4) %ptr
69  %element = extractelement <4 x i16> %vector, i32 %idx
70  ret i16 %element
71}
72
73define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) {
74; GFX9-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
75; GFX9:       ; %bb.0:
76; GFX9-NEXT:    s_and_b32 s0, s2, 3
77; GFX9-NEXT:    s_mov_b32 s1, 0
78; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
79; GFX9-NEXT:    v_mov_b32_e32 v3, s1
80; GFX9-NEXT:    v_mov_b32_e32 v2, s0
81; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
82; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
83; GFX9-NEXT:    global_load_ushort v0, v[0:1], off
84; GFX9-NEXT:    s_waitcnt vmcnt(0)
85; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
86; GFX9-NEXT:    ; return to shader part epilog
87;
88; GFX8-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
89; GFX8:       ; %bb.0:
90; GFX8-NEXT:    s_and_b32 s0, s2, 3
91; GFX8-NEXT:    s_mov_b32 s1, 0
92; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
93; GFX8-NEXT:    v_mov_b32_e32 v3, s1
94; GFX8-NEXT:    v_mov_b32_e32 v2, s0
95; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
96; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
97; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
98; GFX8-NEXT:    s_waitcnt vmcnt(0)
99; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
100; GFX8-NEXT:    ; return to shader part epilog
101;
102; GFX7-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
103; GFX7:       ; %bb.0:
104; GFX7-NEXT:    s_and_b32 s0, s2, 3
105; GFX7-NEXT:    s_mov_b32 s1, 0
106; GFX7-NEXT:    s_lshl_b32 s0, s0, 1
107; GFX7-NEXT:    s_mov_b32 s3, 0xf000
108; GFX7-NEXT:    s_mov_b32 s2, s1
109; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
110; GFX7-NEXT:    s_waitcnt vmcnt(0)
111; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
112; GFX7-NEXT:    ; return to shader part epilog
113;
114; GFX10-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
115; GFX10:       ; %bb.0:
116; GFX10-NEXT:    s_and_b32 s0, s2, 3
117; GFX10-NEXT:    s_mov_b32 s1, 0
118; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
119; GFX10-NEXT:    v_mov_b32_e32 v3, s1
120; GFX10-NEXT:    v_mov_b32_e32 v2, s0
121; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
122; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
123; GFX10-NEXT:    global_load_ushort v0, v[0:1], off
124; GFX10-NEXT:    s_waitcnt vmcnt(0)
125; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
126; GFX10-NEXT:    ; return to shader part epilog
127;
128; GFX11-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
129; GFX11:       ; %bb.0:
130; GFX11-NEXT:    s_and_b32 s0, s2, 3
131; GFX11-NEXT:    s_mov_b32 s1, 0
132; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
133; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
134; GFX11-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
135; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
136; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
137; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
138; GFX11-NEXT:    global_load_u16 v0, v[0:1], off
139; GFX11-NEXT:    s_waitcnt vmcnt(0)
140; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
141; GFX11-NEXT:    ; return to shader part epilog
142  %vector = load <4 x i16>, ptr addrspace(1) %ptr
143  %element = extractelement <4 x i16> %vector, i32 %idx
144  ret i16 %element
145}
146
147define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) {
148; GFX9-LABEL: extractelement_vgpr_v4i16_vgpr_idx:
149; GFX9:       ; %bb.0:
150; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151; GFX9-NEXT:    v_and_b32_e32 v2, 3, v2
152; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
153; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
154; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
155; GFX9-NEXT:    global_load_ushort v0, v[0:1], off
156; GFX9-NEXT:    s_waitcnt vmcnt(0)
157; GFX9-NEXT:    s_setpc_b64 s[30:31]
158;
159; GFX8-LABEL: extractelement_vgpr_v4i16_vgpr_idx:
160; GFX8:       ; %bb.0:
161; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162; GFX8-NEXT:    v_and_b32_e32 v2, 3, v2
163; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
164; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
165; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
166; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
167; GFX8-NEXT:    s_waitcnt vmcnt(0)
168; GFX8-NEXT:    s_setpc_b64 s[30:31]
169;
170; GFX7-LABEL: extractelement_vgpr_v4i16_vgpr_idx:
171; GFX7:       ; %bb.0:
172; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173; GFX7-NEXT:    v_and_b32_e32 v2, 3, v2
174; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
175; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
176; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
177; GFX7-NEXT:    s_mov_b32 s6, 0
178; GFX7-NEXT:    s_mov_b32 s7, 0xf000
179; GFX7-NEXT:    s_mov_b64 s[4:5], 0
180; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
181; GFX7-NEXT:    s_waitcnt vmcnt(0)
182; GFX7-NEXT:    s_setpc_b64 s[30:31]
183;
184; GFX10-LABEL: extractelement_vgpr_v4i16_vgpr_idx:
185; GFX10:       ; %bb.0:
186; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187; GFX10-NEXT:    v_and_b32_e32 v2, 3, v2
188; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
189; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
190; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
191; GFX10-NEXT:    global_load_ushort v0, v[0:1], off
192; GFX10-NEXT:    s_waitcnt vmcnt(0)
193; GFX10-NEXT:    s_setpc_b64 s[30:31]
194;
195; GFX11-LABEL: extractelement_vgpr_v4i16_vgpr_idx:
196; GFX11:       ; %bb.0:
197; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
198; GFX11-NEXT:    v_and_b32_e32 v2, 3, v2
199; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
200; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
201; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
202; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
203; GFX11-NEXT:    global_load_u16 v0, v[0:1], off
204; GFX11-NEXT:    s_waitcnt vmcnt(0)
205; GFX11-NEXT:    s_setpc_b64 s[30:31]
206  %vector = load <4 x i16>, ptr addrspace(1) %ptr
207  %element = extractelement <4 x i16> %vector, i32 %idx
208  ret i16 %element
209}
210
211define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) {
212; GFX9-LABEL: extractelement_sgpr_v4i16_vgpr_idx:
213; GFX9:       ; %bb.0:
214; GFX9-NEXT:    v_and_b32_e32 v0, 3, v0
215; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
216; GFX9-NEXT:    global_load_ushort v0, v0, s[2:3]
217; GFX9-NEXT:    s_waitcnt vmcnt(0)
218; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
219; GFX9-NEXT:    ; return to shader part epilog
220;
221; GFX8-LABEL: extractelement_sgpr_v4i16_vgpr_idx:
222; GFX8:       ; %bb.0:
223; GFX8-NEXT:    v_and_b32_e32 v0, 3, v0
224; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 1, v0
225; GFX8-NEXT:    v_mov_b32_e32 v0, s2
226; GFX8-NEXT:    v_mov_b32_e32 v1, s3
227; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
228; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
229; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
230; GFX8-NEXT:    s_waitcnt vmcnt(0)
231; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
232; GFX8-NEXT:    ; return to shader part epilog
233;
234; GFX7-LABEL: extractelement_sgpr_v4i16_vgpr_idx:
235; GFX7:       ; %bb.0:
236; GFX7-NEXT:    v_and_b32_e32 v0, 3, v0
237; GFX7-NEXT:    s_mov_b32 s0, s2
238; GFX7-NEXT:    s_mov_b32 s1, s3
239; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
240; GFX7-NEXT:    v_mov_b32_e32 v1, 0
241; GFX7-NEXT:    s_mov_b32 s2, 0
242; GFX7-NEXT:    s_mov_b32 s3, 0xf000
243; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
244; GFX7-NEXT:    s_waitcnt vmcnt(0)
245; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
246; GFX7-NEXT:    ; return to shader part epilog
247;
248; GFX10-LABEL: extractelement_sgpr_v4i16_vgpr_idx:
249; GFX10:       ; %bb.0:
250; GFX10-NEXT:    v_and_b32_e32 v0, 3, v0
251; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
252; GFX10-NEXT:    global_load_ushort v0, v0, s[2:3]
253; GFX10-NEXT:    s_waitcnt vmcnt(0)
254; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
255; GFX10-NEXT:    ; return to shader part epilog
256;
257; GFX11-LABEL: extractelement_sgpr_v4i16_vgpr_idx:
258; GFX11:       ; %bb.0:
259; GFX11-NEXT:    v_and_b32_e32 v0, 3, v0
260; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
261; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
262; GFX11-NEXT:    global_load_u16 v0, v0, s[2:3]
263; GFX11-NEXT:    s_waitcnt vmcnt(0)
264; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
265; GFX11-NEXT:    ; return to shader part epilog
266  %vector = load <4 x i16>, ptr addrspace(4) %ptr
267  %element = extractelement <4 x i16> %vector, i32 %idx
268  ret i16 %element
269}
270
271define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx0(ptr addrspace(4) inreg %ptr) {
272; GFX9-LABEL: extractelement_sgpr_v4i16_idx0:
273; GFX9:       ; %bb.0:
274; GFX9-NEXT:    v_mov_b32_e32 v0, 0
275; GFX9-NEXT:    global_load_ushort v0, v0, s[2:3]
276; GFX9-NEXT:    s_waitcnt vmcnt(0)
277; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
278; GFX9-NEXT:    ; return to shader part epilog
279;
280; GFX8-LABEL: extractelement_sgpr_v4i16_idx0:
281; GFX8:       ; %bb.0:
282; GFX8-NEXT:    v_mov_b32_e32 v0, s2
283; GFX8-NEXT:    v_mov_b32_e32 v1, s3
284; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
285; GFX8-NEXT:    s_waitcnt vmcnt(0)
286; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
287; GFX8-NEXT:    ; return to shader part epilog
288;
289; GFX7-LABEL: extractelement_sgpr_v4i16_idx0:
290; GFX7:       ; %bb.0:
291; GFX7-NEXT:    s_mov_b32 s0, s2
292; GFX7-NEXT:    s_mov_b32 s1, s3
293; GFX7-NEXT:    s_mov_b32 s2, -1
294; GFX7-NEXT:    s_mov_b32 s3, 0xf000
295; GFX7-NEXT:    buffer_load_ushort v0, off, s[0:3], 0
296; GFX7-NEXT:    s_waitcnt vmcnt(0)
297; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
298; GFX7-NEXT:    ; return to shader part epilog
299;
300; GFX10-LABEL: extractelement_sgpr_v4i16_idx0:
301; GFX10:       ; %bb.0:
302; GFX10-NEXT:    v_mov_b32_e32 v0, 0
303; GFX10-NEXT:    global_load_ushort v0, v0, s[2:3]
304; GFX10-NEXT:    s_waitcnt vmcnt(0)
305; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
306; GFX10-NEXT:    ; return to shader part epilog
307;
308; GFX11-LABEL: extractelement_sgpr_v4i16_idx0:
309; GFX11:       ; %bb.0:
310; GFX11-NEXT:    v_mov_b32_e32 v0, 0
311; GFX11-NEXT:    global_load_u16 v0, v0, s[2:3]
312; GFX11-NEXT:    s_waitcnt vmcnt(0)
313; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
314; GFX11-NEXT:    ; return to shader part epilog
315  %vector = load <4 x i16>, ptr addrspace(4) %ptr
316  %element = extractelement <4 x i16> %vector, i32 0
317  ret i16 %element
318}
319
320define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx1(ptr addrspace(4) inreg %ptr) {
321; GFX9-LABEL: extractelement_sgpr_v4i16_idx1:
322; GFX9:       ; %bb.0:
323; GFX9-NEXT:    v_mov_b32_e32 v0, 0
324; GFX9-NEXT:    global_load_ushort v0, v0, s[2:3] offset:2
325; GFX9-NEXT:    s_waitcnt vmcnt(0)
326; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
327; GFX9-NEXT:    ; return to shader part epilog
328;
329; GFX8-LABEL: extractelement_sgpr_v4i16_idx1:
330; GFX8:       ; %bb.0:
331; GFX8-NEXT:    s_add_u32 s0, s2, 2
332; GFX8-NEXT:    s_addc_u32 s1, s3, 0
333; GFX8-NEXT:    v_mov_b32_e32 v0, s0
334; GFX8-NEXT:    v_mov_b32_e32 v1, s1
335; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
336; GFX8-NEXT:    s_waitcnt vmcnt(0)
337; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
338; GFX8-NEXT:    ; return to shader part epilog
339;
340; GFX7-LABEL: extractelement_sgpr_v4i16_idx1:
341; GFX7:       ; %bb.0:
342; GFX7-NEXT:    s_mov_b32 s0, s2
343; GFX7-NEXT:    s_mov_b32 s1, s3
344; GFX7-NEXT:    s_mov_b32 s2, -1
345; GFX7-NEXT:    s_mov_b32 s3, 0xf000
346; GFX7-NEXT:    buffer_load_ushort v0, off, s[0:3], 0 offset:2
347; GFX7-NEXT:    s_waitcnt vmcnt(0)
348; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
349; GFX7-NEXT:    ; return to shader part epilog
350;
351; GFX10-LABEL: extractelement_sgpr_v4i16_idx1:
352; GFX10:       ; %bb.0:
353; GFX10-NEXT:    v_mov_b32_e32 v0, 0
354; GFX10-NEXT:    global_load_ushort v0, v0, s[2:3] offset:2
355; GFX10-NEXT:    s_waitcnt vmcnt(0)
356; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
357; GFX10-NEXT:    ; return to shader part epilog
358;
359; GFX11-LABEL: extractelement_sgpr_v4i16_idx1:
360; GFX11:       ; %bb.0:
361; GFX11-NEXT:    v_mov_b32_e32 v0, 0
362; GFX11-NEXT:    global_load_u16 v0, v0, s[2:3] offset:2
363; GFX11-NEXT:    s_waitcnt vmcnt(0)
364; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
365; GFX11-NEXT:    ; return to shader part epilog
366  %vector = load <4 x i16>, ptr addrspace(4) %ptr
367  %element = extractelement <4 x i16> %vector, i32 1
368  ret i16 %element
369}
370
371define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx2(ptr addrspace(4) inreg %ptr) {
372; GFX9-LABEL: extractelement_sgpr_v4i16_idx2:
373; GFX9:       ; %bb.0:
374; GFX9-NEXT:    v_mov_b32_e32 v0, 0
375; GFX9-NEXT:    global_load_ushort v0, v0, s[2:3] offset:4
376; GFX9-NEXT:    s_waitcnt vmcnt(0)
377; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
378; GFX9-NEXT:    ; return to shader part epilog
379;
380; GFX8-LABEL: extractelement_sgpr_v4i16_idx2:
381; GFX8:       ; %bb.0:
382; GFX8-NEXT:    s_add_u32 s0, s2, 4
383; GFX8-NEXT:    s_addc_u32 s1, s3, 0
384; GFX8-NEXT:    v_mov_b32_e32 v0, s0
385; GFX8-NEXT:    v_mov_b32_e32 v1, s1
386; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
387; GFX8-NEXT:    s_waitcnt vmcnt(0)
388; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
389; GFX8-NEXT:    ; return to shader part epilog
390;
391; GFX7-LABEL: extractelement_sgpr_v4i16_idx2:
392; GFX7:       ; %bb.0:
393; GFX7-NEXT:    s_mov_b32 s0, s2
394; GFX7-NEXT:    s_mov_b32 s1, s3
395; GFX7-NEXT:    s_mov_b32 s2, -1
396; GFX7-NEXT:    s_mov_b32 s3, 0xf000
397; GFX7-NEXT:    buffer_load_ushort v0, off, s[0:3], 0 offset:4
398; GFX7-NEXT:    s_waitcnt vmcnt(0)
399; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
400; GFX7-NEXT:    ; return to shader part epilog
401;
402; GFX10-LABEL: extractelement_sgpr_v4i16_idx2:
403; GFX10:       ; %bb.0:
404; GFX10-NEXT:    v_mov_b32_e32 v0, 0
405; GFX10-NEXT:    global_load_ushort v0, v0, s[2:3] offset:4
406; GFX10-NEXT:    s_waitcnt vmcnt(0)
407; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
408; GFX10-NEXT:    ; return to shader part epilog
409;
410; GFX11-LABEL: extractelement_sgpr_v4i16_idx2:
411; GFX11:       ; %bb.0:
412; GFX11-NEXT:    v_mov_b32_e32 v0, 0
413; GFX11-NEXT:    global_load_u16 v0, v0, s[2:3] offset:4
414; GFX11-NEXT:    s_waitcnt vmcnt(0)
415; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
416; GFX11-NEXT:    ; return to shader part epilog
417  %vector = load <4 x i16>, ptr addrspace(4) %ptr
418  %element = extractelement <4 x i16> %vector, i32 2
419  ret i16 %element
420}
421
422define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx3(ptr addrspace(4) inreg %ptr) {
423; GFX9-LABEL: extractelement_sgpr_v4i16_idx3:
424; GFX9:       ; %bb.0:
425; GFX9-NEXT:    v_mov_b32_e32 v0, 0
426; GFX9-NEXT:    global_load_ushort v0, v0, s[2:3] offset:6
427; GFX9-NEXT:    s_waitcnt vmcnt(0)
428; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
429; GFX9-NEXT:    ; return to shader part epilog
430;
431; GFX8-LABEL: extractelement_sgpr_v4i16_idx3:
432; GFX8:       ; %bb.0:
433; GFX8-NEXT:    s_add_u32 s0, s2, 6
434; GFX8-NEXT:    s_addc_u32 s1, s3, 0
435; GFX8-NEXT:    v_mov_b32_e32 v0, s0
436; GFX8-NEXT:    v_mov_b32_e32 v1, s1
437; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
438; GFX8-NEXT:    s_waitcnt vmcnt(0)
439; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
440; GFX8-NEXT:    ; return to shader part epilog
441;
442; GFX7-LABEL: extractelement_sgpr_v4i16_idx3:
443; GFX7:       ; %bb.0:
444; GFX7-NEXT:    s_mov_b32 s0, s2
445; GFX7-NEXT:    s_mov_b32 s1, s3
446; GFX7-NEXT:    s_mov_b32 s2, -1
447; GFX7-NEXT:    s_mov_b32 s3, 0xf000
448; GFX7-NEXT:    buffer_load_ushort v0, off, s[0:3], 0 offset:6
449; GFX7-NEXT:    s_waitcnt vmcnt(0)
450; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
451; GFX7-NEXT:    ; return to shader part epilog
452;
453; GFX10-LABEL: extractelement_sgpr_v4i16_idx3:
454; GFX10:       ; %bb.0:
455; GFX10-NEXT:    v_mov_b32_e32 v0, 0
456; GFX10-NEXT:    global_load_ushort v0, v0, s[2:3] offset:6
457; GFX10-NEXT:    s_waitcnt vmcnt(0)
458; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
459; GFX10-NEXT:    ; return to shader part epilog
460;
461; GFX11-LABEL: extractelement_sgpr_v4i16_idx3:
462; GFX11:       ; %bb.0:
463; GFX11-NEXT:    v_mov_b32_e32 v0, 0
464; GFX11-NEXT:    global_load_u16 v0, v0, s[2:3] offset:6
465; GFX11-NEXT:    s_waitcnt vmcnt(0)
466; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
467; GFX11-NEXT:    ; return to shader part epilog
468  %vector = load <4 x i16>, ptr addrspace(4) %ptr
469  %element = extractelement <4 x i16> %vector, i32 3
470  ret i16 %element
471}
472
473define i16 @extractelement_vgpr_v4i16_idx0(ptr addrspace(1) %ptr) {
474; GFX9-LABEL: extractelement_vgpr_v4i16_idx0:
475; GFX9:       ; %bb.0:
476; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
477; GFX9-NEXT:    global_load_ushort v0, v[0:1], off
478; GFX9-NEXT:    s_waitcnt vmcnt(0)
479; GFX9-NEXT:    s_setpc_b64 s[30:31]
480;
481; GFX8-LABEL: extractelement_vgpr_v4i16_idx0:
482; GFX8:       ; %bb.0:
483; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
484; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
485; GFX8-NEXT:    s_waitcnt vmcnt(0)
486; GFX8-NEXT:    s_setpc_b64 s[30:31]
487;
488; GFX7-LABEL: extractelement_vgpr_v4i16_idx0:
489; GFX7:       ; %bb.0:
490; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
491; GFX7-NEXT:    s_mov_b32 s6, 0
492; GFX7-NEXT:    s_mov_b32 s7, 0xf000
493; GFX7-NEXT:    s_mov_b64 s[4:5], 0
494; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
495; GFX7-NEXT:    s_waitcnt vmcnt(0)
496; GFX7-NEXT:    s_setpc_b64 s[30:31]
497;
498; GFX10-LABEL: extractelement_vgpr_v4i16_idx0:
499; GFX10:       ; %bb.0:
500; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
501; GFX10-NEXT:    global_load_ushort v0, v[0:1], off
502; GFX10-NEXT:    s_waitcnt vmcnt(0)
503; GFX10-NEXT:    s_setpc_b64 s[30:31]
504;
505; GFX11-LABEL: extractelement_vgpr_v4i16_idx0:
506; GFX11:       ; %bb.0:
507; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
508; GFX11-NEXT:    global_load_u16 v0, v[0:1], off
509; GFX11-NEXT:    s_waitcnt vmcnt(0)
510; GFX11-NEXT:    s_setpc_b64 s[30:31]
511  %vector = load <4 x i16>, ptr addrspace(1) %ptr
512  %element = extractelement <4 x i16> %vector, i32 0
513  ret i16 %element
514}
515
516define i16 @extractelement_vgpr_v4i16_idx1(ptr addrspace(1) %ptr) {
517; GFX9-LABEL: extractelement_vgpr_v4i16_idx1:
518; GFX9:       ; %bb.0:
519; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
520; GFX9-NEXT:    global_load_ushort v0, v[0:1], off offset:2
521; GFX9-NEXT:    s_waitcnt vmcnt(0)
522; GFX9-NEXT:    s_setpc_b64 s[30:31]
523;
524; GFX8-LABEL: extractelement_vgpr_v4i16_idx1:
525; GFX8:       ; %bb.0:
526; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
527; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 2, v0
528; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
529; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
530; GFX8-NEXT:    s_waitcnt vmcnt(0)
531; GFX8-NEXT:    s_setpc_b64 s[30:31]
532;
533; GFX7-LABEL: extractelement_vgpr_v4i16_idx1:
534; GFX7:       ; %bb.0:
535; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
536; GFX7-NEXT:    s_mov_b32 s6, 0
537; GFX7-NEXT:    s_mov_b32 s7, 0xf000
538; GFX7-NEXT:    s_mov_b64 s[4:5], 0
539; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:2
540; GFX7-NEXT:    s_waitcnt vmcnt(0)
541; GFX7-NEXT:    s_setpc_b64 s[30:31]
542;
543; GFX10-LABEL: extractelement_vgpr_v4i16_idx1:
544; GFX10:       ; %bb.0:
545; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
546; GFX10-NEXT:    global_load_ushort v0, v[0:1], off offset:2
547; GFX10-NEXT:    s_waitcnt vmcnt(0)
548; GFX10-NEXT:    s_setpc_b64 s[30:31]
549;
550; GFX11-LABEL: extractelement_vgpr_v4i16_idx1:
551; GFX11:       ; %bb.0:
552; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
553; GFX11-NEXT:    global_load_u16 v0, v[0:1], off offset:2
554; GFX11-NEXT:    s_waitcnt vmcnt(0)
555; GFX11-NEXT:    s_setpc_b64 s[30:31]
556  %vector = load <4 x i16>, ptr addrspace(1) %ptr
557  %element = extractelement <4 x i16> %vector, i32 1
558  ret i16 %element
559}
560
561define i16 @extractelement_vgpr_v4i16_idx2(ptr addrspace(1) %ptr) {
562; GFX9-LABEL: extractelement_vgpr_v4i16_idx2:
563; GFX9:       ; %bb.0:
564; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
565; GFX9-NEXT:    global_load_ushort v0, v[0:1], off offset:4
566; GFX9-NEXT:    s_waitcnt vmcnt(0)
567; GFX9-NEXT:    s_setpc_b64 s[30:31]
568;
569; GFX8-LABEL: extractelement_vgpr_v4i16_idx2:
570; GFX8:       ; %bb.0:
571; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
572; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 4, v0
573; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
574; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
575; GFX8-NEXT:    s_waitcnt vmcnt(0)
576; GFX8-NEXT:    s_setpc_b64 s[30:31]
577;
578; GFX7-LABEL: extractelement_vgpr_v4i16_idx2:
579; GFX7:       ; %bb.0:
580; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
581; GFX7-NEXT:    s_mov_b32 s6, 0
582; GFX7-NEXT:    s_mov_b32 s7, 0xf000
583; GFX7-NEXT:    s_mov_b64 s[4:5], 0
584; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:4
585; GFX7-NEXT:    s_waitcnt vmcnt(0)
586; GFX7-NEXT:    s_setpc_b64 s[30:31]
587;
588; GFX10-LABEL: extractelement_vgpr_v4i16_idx2:
589; GFX10:       ; %bb.0:
590; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
591; GFX10-NEXT:    global_load_ushort v0, v[0:1], off offset:4
592; GFX10-NEXT:    s_waitcnt vmcnt(0)
593; GFX10-NEXT:    s_setpc_b64 s[30:31]
594;
595; GFX11-LABEL: extractelement_vgpr_v4i16_idx2:
596; GFX11:       ; %bb.0:
597; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
598; GFX11-NEXT:    global_load_u16 v0, v[0:1], off offset:4
599; GFX11-NEXT:    s_waitcnt vmcnt(0)
600; GFX11-NEXT:    s_setpc_b64 s[30:31]
601  %vector = load <4 x i16>, ptr addrspace(1) %ptr
602  %element = extractelement <4 x i16> %vector, i32 2
603  ret i16 %element
604}
605
606define i16 @extractelement_vgpr_v4i16_idx3(ptr addrspace(1) %ptr) {
607; GFX9-LABEL: extractelement_vgpr_v4i16_idx3:
608; GFX9:       ; %bb.0:
609; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
610; GFX9-NEXT:    global_load_ushort v0, v[0:1], off offset:6
611; GFX9-NEXT:    s_waitcnt vmcnt(0)
612; GFX9-NEXT:    s_setpc_b64 s[30:31]
613;
614; GFX8-LABEL: extractelement_vgpr_v4i16_idx3:
615; GFX8:       ; %bb.0:
616; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
617; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 6, v0
618; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
619; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
620; GFX8-NEXT:    s_waitcnt vmcnt(0)
621; GFX8-NEXT:    s_setpc_b64 s[30:31]
622;
623; GFX7-LABEL: extractelement_vgpr_v4i16_idx3:
624; GFX7:       ; %bb.0:
625; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
626; GFX7-NEXT:    s_mov_b32 s6, 0
627; GFX7-NEXT:    s_mov_b32 s7, 0xf000
628; GFX7-NEXT:    s_mov_b64 s[4:5], 0
629; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:6
630; GFX7-NEXT:    s_waitcnt vmcnt(0)
631; GFX7-NEXT:    s_setpc_b64 s[30:31]
632;
633; GFX10-LABEL: extractelement_vgpr_v4i16_idx3:
634; GFX10:       ; %bb.0:
635; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
636; GFX10-NEXT:    global_load_ushort v0, v[0:1], off offset:6
637; GFX10-NEXT:    s_waitcnt vmcnt(0)
638; GFX10-NEXT:    s_setpc_b64 s[30:31]
639;
640; GFX11-LABEL: extractelement_vgpr_v4i16_idx3:
641; GFX11:       ; %bb.0:
642; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
643; GFX11-NEXT:    global_load_u16 v0, v[0:1], off offset:6
644; GFX11-NEXT:    s_waitcnt vmcnt(0)
645; GFX11-NEXT:    s_setpc_b64 s[30:31]
646  %vector = load <4 x i16>, ptr addrspace(1) %ptr
647  %element = extractelement <4 x i16> %vector, i32 3
648  ret i16 %element
649}
650
651define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
652; GFX9-LABEL: extractelement_sgpr_v8i16_sgpr_idx:
653; GFX9:       ; %bb.0:
654; GFX9-NEXT:    s_and_b32 s0, s4, 7
655; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
656; GFX9-NEXT:    v_mov_b32_e32 v0, s0
657; GFX9-NEXT:    global_load_ushort v0, v0, s[2:3]
658; GFX9-NEXT:    s_waitcnt vmcnt(0)
659; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
660; GFX9-NEXT:    ; return to shader part epilog
661;
662; GFX8-LABEL: extractelement_sgpr_v8i16_sgpr_idx:
663; GFX8:       ; %bb.0:
664; GFX8-NEXT:    s_and_b32 s0, s4, 7
665; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
666; GFX8-NEXT:    s_add_u32 s0, s2, s0
667; GFX8-NEXT:    s_addc_u32 s1, s3, 0
668; GFX8-NEXT:    v_mov_b32_e32 v0, s0
669; GFX8-NEXT:    v_mov_b32_e32 v1, s1
670; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
671; GFX8-NEXT:    s_waitcnt vmcnt(0)
672; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
673; GFX8-NEXT:    ; return to shader part epilog
674;
675; GFX7-LABEL: extractelement_sgpr_v8i16_sgpr_idx:
676; GFX7:       ; %bb.0:
677; GFX7-NEXT:    s_mov_b32 s0, s2
678; GFX7-NEXT:    s_and_b32 s2, s4, 7
679; GFX7-NEXT:    s_lshl_b32 s4, s2, 1
680; GFX7-NEXT:    s_mov_b32 s5, 0
681; GFX7-NEXT:    v_mov_b32_e32 v0, s4
682; GFX7-NEXT:    s_mov_b32 s1, s3
683; GFX7-NEXT:    s_mov_b32 s3, 0xf000
684; GFX7-NEXT:    s_mov_b32 s2, s5
685; GFX7-NEXT:    v_mov_b32_e32 v1, s5
686; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
687; GFX7-NEXT:    s_waitcnt vmcnt(0)
688; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
689; GFX7-NEXT:    ; return to shader part epilog
690;
691; GFX10-LABEL: extractelement_sgpr_v8i16_sgpr_idx:
692; GFX10:       ; %bb.0:
693; GFX10-NEXT:    s_and_b32 s0, s4, 7
694; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
695; GFX10-NEXT:    v_mov_b32_e32 v0, s0
696; GFX10-NEXT:    global_load_ushort v0, v0, s[2:3]
697; GFX10-NEXT:    s_waitcnt vmcnt(0)
698; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
699; GFX10-NEXT:    ; return to shader part epilog
700;
701; GFX11-LABEL: extractelement_sgpr_v8i16_sgpr_idx:
702; GFX11:       ; %bb.0:
703; GFX11-NEXT:    s_and_b32 s0, s4, 7
704; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
705; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
706; GFX11-NEXT:    v_mov_b32_e32 v0, s0
707; GFX11-NEXT:    global_load_u16 v0, v0, s[2:3]
708; GFX11-NEXT:    s_waitcnt vmcnt(0)
709; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
710; GFX11-NEXT:    ; return to shader part epilog
711  %vector = load <8 x i16>, ptr addrspace(4) %ptr
712  %element = extractelement <8 x i16> %vector, i32 %idx
713  ret i16 %element
714}
715
716define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) {
717; GFX9-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
718; GFX9:       ; %bb.0:
719; GFX9-NEXT:    s_and_b32 s0, s2, 7
720; GFX9-NEXT:    s_mov_b32 s1, 0
721; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
722; GFX9-NEXT:    v_mov_b32_e32 v3, s1
723; GFX9-NEXT:    v_mov_b32_e32 v2, s0
724; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
725; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
726; GFX9-NEXT:    global_load_ushort v0, v[0:1], off
727; GFX9-NEXT:    s_waitcnt vmcnt(0)
728; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
729; GFX9-NEXT:    ; return to shader part epilog
730;
731; GFX8-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
732; GFX8:       ; %bb.0:
733; GFX8-NEXT:    s_and_b32 s0, s2, 7
734; GFX8-NEXT:    s_mov_b32 s1, 0
735; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
736; GFX8-NEXT:    v_mov_b32_e32 v3, s1
737; GFX8-NEXT:    v_mov_b32_e32 v2, s0
738; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
739; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
740; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
741; GFX8-NEXT:    s_waitcnt vmcnt(0)
742; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
743; GFX8-NEXT:    ; return to shader part epilog
744;
745; GFX7-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
746; GFX7:       ; %bb.0:
747; GFX7-NEXT:    s_and_b32 s0, s2, 7
748; GFX7-NEXT:    s_mov_b32 s1, 0
749; GFX7-NEXT:    s_lshl_b32 s0, s0, 1
750; GFX7-NEXT:    s_mov_b32 s3, 0xf000
751; GFX7-NEXT:    s_mov_b32 s2, s1
752; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
753; GFX7-NEXT:    s_waitcnt vmcnt(0)
754; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
755; GFX7-NEXT:    ; return to shader part epilog
756;
757; GFX10-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
758; GFX10:       ; %bb.0:
759; GFX10-NEXT:    s_and_b32 s0, s2, 7
760; GFX10-NEXT:    s_mov_b32 s1, 0
761; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
762; GFX10-NEXT:    v_mov_b32_e32 v3, s1
763; GFX10-NEXT:    v_mov_b32_e32 v2, s0
764; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
765; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
766; GFX10-NEXT:    global_load_ushort v0, v[0:1], off
767; GFX10-NEXT:    s_waitcnt vmcnt(0)
768; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
769; GFX10-NEXT:    ; return to shader part epilog
770;
771; GFX11-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
772; GFX11:       ; %bb.0:
773; GFX11-NEXT:    s_and_b32 s0, s2, 7
774; GFX11-NEXT:    s_mov_b32 s1, 0
775; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
776; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
777; GFX11-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
778; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
779; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
780; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
781; GFX11-NEXT:    global_load_u16 v0, v[0:1], off
782; GFX11-NEXT:    s_waitcnt vmcnt(0)
783; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
784; GFX11-NEXT:    ; return to shader part epilog
785  %vector = load <8 x i16>, ptr addrspace(1) %ptr
786  %element = extractelement <8 x i16> %vector, i32 %idx
787  ret i16 %element
788}
789
790define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) {
791; GFX9-LABEL: extractelement_vgpr_v8i16_vgpr_idx:
792; GFX9:       ; %bb.0:
793; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
794; GFX9-NEXT:    v_and_b32_e32 v2, 7, v2
795; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
796; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
797; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
798; GFX9-NEXT:    global_load_ushort v0, v[0:1], off
799; GFX9-NEXT:    s_waitcnt vmcnt(0)
800; GFX9-NEXT:    s_setpc_b64 s[30:31]
801;
802; GFX8-LABEL: extractelement_vgpr_v8i16_vgpr_idx:
803; GFX8:       ; %bb.0:
804; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
805; GFX8-NEXT:    v_and_b32_e32 v2, 7, v2
806; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
807; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
808; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
809; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
810; GFX8-NEXT:    s_waitcnt vmcnt(0)
811; GFX8-NEXT:    s_setpc_b64 s[30:31]
812;
813; GFX7-LABEL: extractelement_vgpr_v8i16_vgpr_idx:
814; GFX7:       ; %bb.0:
815; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
816; GFX7-NEXT:    v_and_b32_e32 v2, 7, v2
817; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
818; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
819; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
820; GFX7-NEXT:    s_mov_b32 s6, 0
821; GFX7-NEXT:    s_mov_b32 s7, 0xf000
822; GFX7-NEXT:    s_mov_b64 s[4:5], 0
823; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
824; GFX7-NEXT:    s_waitcnt vmcnt(0)
825; GFX7-NEXT:    s_setpc_b64 s[30:31]
826;
827; GFX10-LABEL: extractelement_vgpr_v8i16_vgpr_idx:
828; GFX10:       ; %bb.0:
829; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
830; GFX10-NEXT:    v_and_b32_e32 v2, 7, v2
831; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
832; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
833; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
834; GFX10-NEXT:    global_load_ushort v0, v[0:1], off
835; GFX10-NEXT:    s_waitcnt vmcnt(0)
836; GFX10-NEXT:    s_setpc_b64 s[30:31]
837;
838; GFX11-LABEL: extractelement_vgpr_v8i16_vgpr_idx:
839; GFX11:       ; %bb.0:
840; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
841; GFX11-NEXT:    v_and_b32_e32 v2, 7, v2
842; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
843; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
844; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
845; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
846; GFX11-NEXT:    global_load_u16 v0, v[0:1], off
847; GFX11-NEXT:    s_waitcnt vmcnt(0)
848; GFX11-NEXT:    s_setpc_b64 s[30:31]
849  %vector = load <8 x i16>, ptr addrspace(1) %ptr
850  %element = extractelement <8 x i16> %vector, i32 %idx
851  ret i16 %element
852}
853
854define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) {
855; GFX9-LABEL: extractelement_sgpr_v8i16_vgpr_idx:
856; GFX9:       ; %bb.0:
857; GFX9-NEXT:    v_and_b32_e32 v0, 7, v0
858; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
859; GFX9-NEXT:    global_load_ushort v0, v0, s[2:3]
860; GFX9-NEXT:    s_waitcnt vmcnt(0)
861; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
862; GFX9-NEXT:    ; return to shader part epilog
863;
864; GFX8-LABEL: extractelement_sgpr_v8i16_vgpr_idx:
865; GFX8:       ; %bb.0:
866; GFX8-NEXT:    v_and_b32_e32 v0, 7, v0
867; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 1, v0
868; GFX8-NEXT:    v_mov_b32_e32 v0, s2
869; GFX8-NEXT:    v_mov_b32_e32 v1, s3
870; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
871; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
872; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
873; GFX8-NEXT:    s_waitcnt vmcnt(0)
874; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
875; GFX8-NEXT:    ; return to shader part epilog
876;
877; GFX7-LABEL: extractelement_sgpr_v8i16_vgpr_idx:
878; GFX7:       ; %bb.0:
879; GFX7-NEXT:    v_and_b32_e32 v0, 7, v0
880; GFX7-NEXT:    s_mov_b32 s0, s2
881; GFX7-NEXT:    s_mov_b32 s1, s3
882; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
883; GFX7-NEXT:    v_mov_b32_e32 v1, 0
884; GFX7-NEXT:    s_mov_b32 s2, 0
885; GFX7-NEXT:    s_mov_b32 s3, 0xf000
886; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
887; GFX7-NEXT:    s_waitcnt vmcnt(0)
888; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
889; GFX7-NEXT:    ; return to shader part epilog
890;
891; GFX10-LABEL: extractelement_sgpr_v8i16_vgpr_idx:
892; GFX10:       ; %bb.0:
893; GFX10-NEXT:    v_and_b32_e32 v0, 7, v0
894; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
895; GFX10-NEXT:    global_load_ushort v0, v0, s[2:3]
896; GFX10-NEXT:    s_waitcnt vmcnt(0)
897; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
898; GFX10-NEXT:    ; return to shader part epilog
899;
900; GFX11-LABEL: extractelement_sgpr_v8i16_vgpr_idx:
901; GFX11:       ; %bb.0:
902; GFX11-NEXT:    v_and_b32_e32 v0, 7, v0
903; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
904; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
905; GFX11-NEXT:    global_load_u16 v0, v0, s[2:3]
906; GFX11-NEXT:    s_waitcnt vmcnt(0)
907; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
908; GFX11-NEXT:    ; return to shader part epilog
909  %vector = load <8 x i16>, ptr addrspace(4) %ptr
910  %element = extractelement <8 x i16> %vector, i32 %idx
911  ret i16 %element
912}
913
914define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx0(ptr addrspace(4) inreg %ptr) {
915; GFX9-LABEL: extractelement_sgpr_v8i16_idx0:
916; GFX9:       ; %bb.0:
917; GFX9-NEXT:    v_mov_b32_e32 v0, 0
918; GFX9-NEXT:    global_load_ushort v0, v0, s[2:3]
919; GFX9-NEXT:    s_waitcnt vmcnt(0)
920; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
921; GFX9-NEXT:    ; return to shader part epilog
922;
923; GFX8-LABEL: extractelement_sgpr_v8i16_idx0:
924; GFX8:       ; %bb.0:
925; GFX8-NEXT:    v_mov_b32_e32 v0, s2
926; GFX8-NEXT:    v_mov_b32_e32 v1, s3
927; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
928; GFX8-NEXT:    s_waitcnt vmcnt(0)
929; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
930; GFX8-NEXT:    ; return to shader part epilog
931;
932; GFX7-LABEL: extractelement_sgpr_v8i16_idx0:
933; GFX7:       ; %bb.0:
934; GFX7-NEXT:    s_mov_b32 s0, s2
935; GFX7-NEXT:    s_mov_b32 s1, s3
936; GFX7-NEXT:    s_mov_b32 s2, -1
937; GFX7-NEXT:    s_mov_b32 s3, 0xf000
938; GFX7-NEXT:    buffer_load_ushort v0, off, s[0:3], 0
939; GFX7-NEXT:    s_waitcnt vmcnt(0)
940; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
941; GFX7-NEXT:    ; return to shader part epilog
942;
943; GFX10-LABEL: extractelement_sgpr_v8i16_idx0:
944; GFX10:       ; %bb.0:
945; GFX10-NEXT:    v_mov_b32_e32 v0, 0
946; GFX10-NEXT:    global_load_ushort v0, v0, s[2:3]
947; GFX10-NEXT:    s_waitcnt vmcnt(0)
948; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
949; GFX10-NEXT:    ; return to shader part epilog
950;
951; GFX11-LABEL: extractelement_sgpr_v8i16_idx0:
952; GFX11:       ; %bb.0:
953; GFX11-NEXT:    v_mov_b32_e32 v0, 0
954; GFX11-NEXT:    global_load_u16 v0, v0, s[2:3]
955; GFX11-NEXT:    s_waitcnt vmcnt(0)
956; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
957; GFX11-NEXT:    ; return to shader part epilog
958  %vector = load <8 x i16>, ptr addrspace(4) %ptr
959  %element = extractelement <8 x i16> %vector, i32 0
960  ret i16 %element
961}
962
963define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx1(ptr addrspace(4) inreg %ptr) {
964; GFX9-LABEL: extractelement_sgpr_v8i16_idx1:
965; GFX9:       ; %bb.0:
966; GFX9-NEXT:    v_mov_b32_e32 v0, 0
967; GFX9-NEXT:    global_load_ushort v0, v0, s[2:3] offset:2
968; GFX9-NEXT:    s_waitcnt vmcnt(0)
969; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
970; GFX9-NEXT:    ; return to shader part epilog
971;
972; GFX8-LABEL: extractelement_sgpr_v8i16_idx1:
973; GFX8:       ; %bb.0:
974; GFX8-NEXT:    s_add_u32 s0, s2, 2
975; GFX8-NEXT:    s_addc_u32 s1, s3, 0
976; GFX8-NEXT:    v_mov_b32_e32 v0, s0
977; GFX8-NEXT:    v_mov_b32_e32 v1, s1
978; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
979; GFX8-NEXT:    s_waitcnt vmcnt(0)
980; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
981; GFX8-NEXT:    ; return to shader part epilog
982;
983; GFX7-LABEL: extractelement_sgpr_v8i16_idx1:
984; GFX7:       ; %bb.0:
985; GFX7-NEXT:    s_mov_b32 s0, s2
986; GFX7-NEXT:    s_mov_b32 s1, s3
987; GFX7-NEXT:    s_mov_b32 s2, -1
988; GFX7-NEXT:    s_mov_b32 s3, 0xf000
989; GFX7-NEXT:    buffer_load_ushort v0, off, s[0:3], 0 offset:2
990; GFX7-NEXT:    s_waitcnt vmcnt(0)
991; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
992; GFX7-NEXT:    ; return to shader part epilog
993;
994; GFX10-LABEL: extractelement_sgpr_v8i16_idx1:
995; GFX10:       ; %bb.0:
996; GFX10-NEXT:    v_mov_b32_e32 v0, 0
997; GFX10-NEXT:    global_load_ushort v0, v0, s[2:3] offset:2
998; GFX10-NEXT:    s_waitcnt vmcnt(0)
999; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1000; GFX10-NEXT:    ; return to shader part epilog
1001;
1002; GFX11-LABEL: extractelement_sgpr_v8i16_idx1:
1003; GFX11:       ; %bb.0:
1004; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1005; GFX11-NEXT:    global_load_u16 v0, v0, s[2:3] offset:2
1006; GFX11-NEXT:    s_waitcnt vmcnt(0)
1007; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1008; GFX11-NEXT:    ; return to shader part epilog
1009  %vector = load <8 x i16>, ptr addrspace(4) %ptr
1010  %element = extractelement <8 x i16> %vector, i32 1
1011  ret i16 %element
1012}
1013
1014define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx2(ptr addrspace(4) inreg %ptr) {
1015; GFX9-LABEL: extractelement_sgpr_v8i16_idx2:
1016; GFX9:       ; %bb.0:
1017; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1018; GFX9-NEXT:    global_load_ushort v0, v0, s[2:3] offset:4
1019; GFX9-NEXT:    s_waitcnt vmcnt(0)
1020; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1021; GFX9-NEXT:    ; return to shader part epilog
1022;
1023; GFX8-LABEL: extractelement_sgpr_v8i16_idx2:
1024; GFX8:       ; %bb.0:
1025; GFX8-NEXT:    s_add_u32 s0, s2, 4
1026; GFX8-NEXT:    s_addc_u32 s1, s3, 0
1027; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1028; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1029; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
1030; GFX8-NEXT:    s_waitcnt vmcnt(0)
1031; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1032; GFX8-NEXT:    ; return to shader part epilog
1033;
1034; GFX7-LABEL: extractelement_sgpr_v8i16_idx2:
1035; GFX7:       ; %bb.0:
1036; GFX7-NEXT:    s_mov_b32 s0, s2
1037; GFX7-NEXT:    s_mov_b32 s1, s3
1038; GFX7-NEXT:    s_mov_b32 s2, -1
1039; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1040; GFX7-NEXT:    buffer_load_ushort v0, off, s[0:3], 0 offset:4
1041; GFX7-NEXT:    s_waitcnt vmcnt(0)
1042; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
1043; GFX7-NEXT:    ; return to shader part epilog
1044;
1045; GFX10-LABEL: extractelement_sgpr_v8i16_idx2:
1046; GFX10:       ; %bb.0:
1047; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1048; GFX10-NEXT:    global_load_ushort v0, v0, s[2:3] offset:4
1049; GFX10-NEXT:    s_waitcnt vmcnt(0)
1050; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1051; GFX10-NEXT:    ; return to shader part epilog
1052;
1053; GFX11-LABEL: extractelement_sgpr_v8i16_idx2:
1054; GFX11:       ; %bb.0:
1055; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1056; GFX11-NEXT:    global_load_u16 v0, v0, s[2:3] offset:4
1057; GFX11-NEXT:    s_waitcnt vmcnt(0)
1058; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1059; GFX11-NEXT:    ; return to shader part epilog
1060  %vector = load <8 x i16>, ptr addrspace(4) %ptr
1061  %element = extractelement <8 x i16> %vector, i32 2
1062  ret i16 %element
1063}
1064
1065define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx3(ptr addrspace(4) inreg %ptr) {
1066; GFX9-LABEL: extractelement_sgpr_v8i16_idx3:
1067; GFX9:       ; %bb.0:
1068; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1069; GFX9-NEXT:    global_load_ushort v0, v0, s[2:3] offset:6
1070; GFX9-NEXT:    s_waitcnt vmcnt(0)
1071; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1072; GFX9-NEXT:    ; return to shader part epilog
1073;
1074; GFX8-LABEL: extractelement_sgpr_v8i16_idx3:
1075; GFX8:       ; %bb.0:
1076; GFX8-NEXT:    s_add_u32 s0, s2, 6
1077; GFX8-NEXT:    s_addc_u32 s1, s3, 0
1078; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1079; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1080; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
1081; GFX8-NEXT:    s_waitcnt vmcnt(0)
1082; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1083; GFX8-NEXT:    ; return to shader part epilog
1084;
1085; GFX7-LABEL: extractelement_sgpr_v8i16_idx3:
1086; GFX7:       ; %bb.0:
1087; GFX7-NEXT:    s_mov_b32 s0, s2
1088; GFX7-NEXT:    s_mov_b32 s1, s3
1089; GFX7-NEXT:    s_mov_b32 s2, -1
1090; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1091; GFX7-NEXT:    buffer_load_ushort v0, off, s[0:3], 0 offset:6
1092; GFX7-NEXT:    s_waitcnt vmcnt(0)
1093; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
1094; GFX7-NEXT:    ; return to shader part epilog
1095;
1096; GFX10-LABEL: extractelement_sgpr_v8i16_idx3:
1097; GFX10:       ; %bb.0:
1098; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1099; GFX10-NEXT:    global_load_ushort v0, v0, s[2:3] offset:6
1100; GFX10-NEXT:    s_waitcnt vmcnt(0)
1101; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1102; GFX10-NEXT:    ; return to shader part epilog
1103;
1104; GFX11-LABEL: extractelement_sgpr_v8i16_idx3:
1105; GFX11:       ; %bb.0:
1106; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1107; GFX11-NEXT:    global_load_u16 v0, v0, s[2:3] offset:6
1108; GFX11-NEXT:    s_waitcnt vmcnt(0)
1109; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1110; GFX11-NEXT:    ; return to shader part epilog
1111  %vector = load <8 x i16>, ptr addrspace(4) %ptr
1112  %element = extractelement <8 x i16> %vector, i32 3
1113  ret i16 %element
1114}
1115
1116define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx4(ptr addrspace(4) inreg %ptr) {
1117; GFX9-LABEL: extractelement_sgpr_v8i16_idx4:
1118; GFX9:       ; %bb.0:
1119; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1120; GFX9-NEXT:    global_load_ushort v0, v0, s[2:3] offset:8
1121; GFX9-NEXT:    s_waitcnt vmcnt(0)
1122; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1123; GFX9-NEXT:    ; return to shader part epilog
1124;
1125; GFX8-LABEL: extractelement_sgpr_v8i16_idx4:
1126; GFX8:       ; %bb.0:
1127; GFX8-NEXT:    s_add_u32 s0, s2, 8
1128; GFX8-NEXT:    s_addc_u32 s1, s3, 0
1129; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1130; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1131; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
1132; GFX8-NEXT:    s_waitcnt vmcnt(0)
1133; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1134; GFX8-NEXT:    ; return to shader part epilog
1135;
1136; GFX7-LABEL: extractelement_sgpr_v8i16_idx4:
1137; GFX7:       ; %bb.0:
1138; GFX7-NEXT:    s_mov_b32 s0, s2
1139; GFX7-NEXT:    s_mov_b32 s1, s3
1140; GFX7-NEXT:    s_mov_b32 s2, -1
1141; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1142; GFX7-NEXT:    buffer_load_ushort v0, off, s[0:3], 0 offset:8
1143; GFX7-NEXT:    s_waitcnt vmcnt(0)
1144; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
1145; GFX7-NEXT:    ; return to shader part epilog
1146;
1147; GFX10-LABEL: extractelement_sgpr_v8i16_idx4:
1148; GFX10:       ; %bb.0:
1149; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1150; GFX10-NEXT:    global_load_ushort v0, v0, s[2:3] offset:8
1151; GFX10-NEXT:    s_waitcnt vmcnt(0)
1152; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1153; GFX10-NEXT:    ; return to shader part epilog
1154;
1155; GFX11-LABEL: extractelement_sgpr_v8i16_idx4:
1156; GFX11:       ; %bb.0:
1157; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1158; GFX11-NEXT:    global_load_u16 v0, v0, s[2:3] offset:8
1159; GFX11-NEXT:    s_waitcnt vmcnt(0)
1160; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1161; GFX11-NEXT:    ; return to shader part epilog
1162  %vector = load <8 x i16>, ptr addrspace(4) %ptr
1163  %element = extractelement <8 x i16> %vector, i32 4
1164  ret i16 %element
1165}
1166
1167define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx5(ptr addrspace(4) inreg %ptr) {
1168; GFX9-LABEL: extractelement_sgpr_v8i16_idx5:
1169; GFX9:       ; %bb.0:
1170; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1171; GFX9-NEXT:    global_load_ushort v0, v0, s[2:3] offset:10
1172; GFX9-NEXT:    s_waitcnt vmcnt(0)
1173; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1174; GFX9-NEXT:    ; return to shader part epilog
1175;
1176; GFX8-LABEL: extractelement_sgpr_v8i16_idx5:
1177; GFX8:       ; %bb.0:
1178; GFX8-NEXT:    s_add_u32 s0, s2, 10
1179; GFX8-NEXT:    s_addc_u32 s1, s3, 0
1180; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1181; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1182; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
1183; GFX8-NEXT:    s_waitcnt vmcnt(0)
1184; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1185; GFX8-NEXT:    ; return to shader part epilog
1186;
1187; GFX7-LABEL: extractelement_sgpr_v8i16_idx5:
1188; GFX7:       ; %bb.0:
1189; GFX7-NEXT:    s_mov_b32 s0, s2
1190; GFX7-NEXT:    s_mov_b32 s1, s3
1191; GFX7-NEXT:    s_mov_b32 s2, -1
1192; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1193; GFX7-NEXT:    buffer_load_ushort v0, off, s[0:3], 0 offset:10
1194; GFX7-NEXT:    s_waitcnt vmcnt(0)
1195; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
1196; GFX7-NEXT:    ; return to shader part epilog
1197;
1198; GFX10-LABEL: extractelement_sgpr_v8i16_idx5:
1199; GFX10:       ; %bb.0:
1200; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1201; GFX10-NEXT:    global_load_ushort v0, v0, s[2:3] offset:10
1202; GFX10-NEXT:    s_waitcnt vmcnt(0)
1203; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1204; GFX10-NEXT:    ; return to shader part epilog
1205;
1206; GFX11-LABEL: extractelement_sgpr_v8i16_idx5:
1207; GFX11:       ; %bb.0:
1208; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1209; GFX11-NEXT:    global_load_u16 v0, v0, s[2:3] offset:10
1210; GFX11-NEXT:    s_waitcnt vmcnt(0)
1211; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1212; GFX11-NEXT:    ; return to shader part epilog
1213  %vector = load <8 x i16>, ptr addrspace(4) %ptr
1214  %element = extractelement <8 x i16> %vector, i32 5
1215  ret i16 %element
1216}
1217
1218define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx6(ptr addrspace(4) inreg %ptr) {
1219; GFX9-LABEL: extractelement_sgpr_v8i16_idx6:
1220; GFX9:       ; %bb.0:
1221; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1222; GFX9-NEXT:    global_load_ushort v0, v0, s[2:3] offset:12
1223; GFX9-NEXT:    s_waitcnt vmcnt(0)
1224; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1225; GFX9-NEXT:    ; return to shader part epilog
1226;
1227; GFX8-LABEL: extractelement_sgpr_v8i16_idx6:
1228; GFX8:       ; %bb.0:
1229; GFX8-NEXT:    s_add_u32 s0, s2, 12
1230; GFX8-NEXT:    s_addc_u32 s1, s3, 0
1231; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1232; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1233; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
1234; GFX8-NEXT:    s_waitcnt vmcnt(0)
1235; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1236; GFX8-NEXT:    ; return to shader part epilog
1237;
1238; GFX7-LABEL: extractelement_sgpr_v8i16_idx6:
1239; GFX7:       ; %bb.0:
1240; GFX7-NEXT:    s_mov_b32 s0, s2
1241; GFX7-NEXT:    s_mov_b32 s1, s3
1242; GFX7-NEXT:    s_mov_b32 s2, -1
1243; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1244; GFX7-NEXT:    buffer_load_ushort v0, off, s[0:3], 0 offset:12
1245; GFX7-NEXT:    s_waitcnt vmcnt(0)
1246; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
1247; GFX7-NEXT:    ; return to shader part epilog
1248;
1249; GFX10-LABEL: extractelement_sgpr_v8i16_idx6:
1250; GFX10:       ; %bb.0:
1251; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1252; GFX10-NEXT:    global_load_ushort v0, v0, s[2:3] offset:12
1253; GFX10-NEXT:    s_waitcnt vmcnt(0)
1254; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1255; GFX10-NEXT:    ; return to shader part epilog
1256;
1257; GFX11-LABEL: extractelement_sgpr_v8i16_idx6:
1258; GFX11:       ; %bb.0:
1259; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1260; GFX11-NEXT:    global_load_u16 v0, v0, s[2:3] offset:12
1261; GFX11-NEXT:    s_waitcnt vmcnt(0)
1262; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1263; GFX11-NEXT:    ; return to shader part epilog
1264  %vector = load <8 x i16>, ptr addrspace(4) %ptr
1265  %element = extractelement <8 x i16> %vector, i32 6
1266  ret i16 %element
1267}
1268
1269define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx7(ptr addrspace(4) inreg %ptr) {
1270; GFX9-LABEL: extractelement_sgpr_v8i16_idx7:
1271; GFX9:       ; %bb.0:
1272; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1273; GFX9-NEXT:    global_load_ushort v0, v0, s[2:3] offset:14
1274; GFX9-NEXT:    s_waitcnt vmcnt(0)
1275; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1276; GFX9-NEXT:    ; return to shader part epilog
1277;
1278; GFX8-LABEL: extractelement_sgpr_v8i16_idx7:
1279; GFX8:       ; %bb.0:
1280; GFX8-NEXT:    s_add_u32 s0, s2, 14
1281; GFX8-NEXT:    s_addc_u32 s1, s3, 0
1282; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1283; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1284; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
1285; GFX8-NEXT:    s_waitcnt vmcnt(0)
1286; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1287; GFX8-NEXT:    ; return to shader part epilog
1288;
1289; GFX7-LABEL: extractelement_sgpr_v8i16_idx7:
1290; GFX7:       ; %bb.0:
1291; GFX7-NEXT:    s_mov_b32 s0, s2
1292; GFX7-NEXT:    s_mov_b32 s1, s3
1293; GFX7-NEXT:    s_mov_b32 s2, -1
1294; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1295; GFX7-NEXT:    buffer_load_ushort v0, off, s[0:3], 0 offset:14
1296; GFX7-NEXT:    s_waitcnt vmcnt(0)
1297; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
1298; GFX7-NEXT:    ; return to shader part epilog
1299;
1300; GFX10-LABEL: extractelement_sgpr_v8i16_idx7:
1301; GFX10:       ; %bb.0:
1302; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1303; GFX10-NEXT:    global_load_ushort v0, v0, s[2:3] offset:14
1304; GFX10-NEXT:    s_waitcnt vmcnt(0)
1305; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1306; GFX10-NEXT:    ; return to shader part epilog
1307;
1308; GFX11-LABEL: extractelement_sgpr_v8i16_idx7:
1309; GFX11:       ; %bb.0:
1310; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1311; GFX11-NEXT:    global_load_u16 v0, v0, s[2:3] offset:14
1312; GFX11-NEXT:    s_waitcnt vmcnt(0)
1313; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1314; GFX11-NEXT:    ; return to shader part epilog
1315  %vector = load <8 x i16>, ptr addrspace(4) %ptr
1316  %element = extractelement <8 x i16> %vector, i32 7
1317  ret i16 %element
1318}
1319
1320define i16 @extractelement_vgpr_v8i16_idx0(ptr addrspace(1) %ptr) {
1321; GFX9-LABEL: extractelement_vgpr_v8i16_idx0:
1322; GFX9:       ; %bb.0:
1323; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1324; GFX9-NEXT:    global_load_ushort v0, v[0:1], off
1325; GFX9-NEXT:    s_waitcnt vmcnt(0)
1326; GFX9-NEXT:    s_setpc_b64 s[30:31]
1327;
1328; GFX8-LABEL: extractelement_vgpr_v8i16_idx0:
1329; GFX8:       ; %bb.0:
1330; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1331; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
1332; GFX8-NEXT:    s_waitcnt vmcnt(0)
1333; GFX8-NEXT:    s_setpc_b64 s[30:31]
1334;
1335; GFX7-LABEL: extractelement_vgpr_v8i16_idx0:
1336; GFX7:       ; %bb.0:
1337; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1338; GFX7-NEXT:    s_mov_b32 s6, 0
1339; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1340; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1341; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
1342; GFX7-NEXT:    s_waitcnt vmcnt(0)
1343; GFX7-NEXT:    s_setpc_b64 s[30:31]
1344;
1345; GFX10-LABEL: extractelement_vgpr_v8i16_idx0:
1346; GFX10:       ; %bb.0:
1347; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1348; GFX10-NEXT:    global_load_ushort v0, v[0:1], off
1349; GFX10-NEXT:    s_waitcnt vmcnt(0)
1350; GFX10-NEXT:    s_setpc_b64 s[30:31]
1351;
1352; GFX11-LABEL: extractelement_vgpr_v8i16_idx0:
1353; GFX11:       ; %bb.0:
1354; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1355; GFX11-NEXT:    global_load_u16 v0, v[0:1], off
1356; GFX11-NEXT:    s_waitcnt vmcnt(0)
1357; GFX11-NEXT:    s_setpc_b64 s[30:31]
1358  %vector = load <8 x i16>, ptr addrspace(1) %ptr
1359  %element = extractelement <8 x i16> %vector, i32 0
1360  ret i16 %element
1361}
1362
1363define i16 @extractelement_vgpr_v8i16_idx1(ptr addrspace(1) %ptr) {
1364; GFX9-LABEL: extractelement_vgpr_v8i16_idx1:
1365; GFX9:       ; %bb.0:
1366; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1367; GFX9-NEXT:    global_load_ushort v0, v[0:1], off offset:2
1368; GFX9-NEXT:    s_waitcnt vmcnt(0)
1369; GFX9-NEXT:    s_setpc_b64 s[30:31]
1370;
1371; GFX8-LABEL: extractelement_vgpr_v8i16_idx1:
1372; GFX8:       ; %bb.0:
1373; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1374; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 2, v0
1375; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1376; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
1377; GFX8-NEXT:    s_waitcnt vmcnt(0)
1378; GFX8-NEXT:    s_setpc_b64 s[30:31]
1379;
1380; GFX7-LABEL: extractelement_vgpr_v8i16_idx1:
1381; GFX7:       ; %bb.0:
1382; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1383; GFX7-NEXT:    s_mov_b32 s6, 0
1384; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1385; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1386; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:2
1387; GFX7-NEXT:    s_waitcnt vmcnt(0)
1388; GFX7-NEXT:    s_setpc_b64 s[30:31]
1389;
1390; GFX10-LABEL: extractelement_vgpr_v8i16_idx1:
1391; GFX10:       ; %bb.0:
1392; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1393; GFX10-NEXT:    global_load_ushort v0, v[0:1], off offset:2
1394; GFX10-NEXT:    s_waitcnt vmcnt(0)
1395; GFX10-NEXT:    s_setpc_b64 s[30:31]
1396;
1397; GFX11-LABEL: extractelement_vgpr_v8i16_idx1:
1398; GFX11:       ; %bb.0:
1399; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1400; GFX11-NEXT:    global_load_u16 v0, v[0:1], off offset:2
1401; GFX11-NEXT:    s_waitcnt vmcnt(0)
1402; GFX11-NEXT:    s_setpc_b64 s[30:31]
1403  %vector = load <8 x i16>, ptr addrspace(1) %ptr
1404  %element = extractelement <8 x i16> %vector, i32 1
1405  ret i16 %element
1406}
1407
1408define i16 @extractelement_vgpr_v8i16_idx2(ptr addrspace(1) %ptr) {
1409; GFX9-LABEL: extractelement_vgpr_v8i16_idx2:
1410; GFX9:       ; %bb.0:
1411; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1412; GFX9-NEXT:    global_load_ushort v0, v[0:1], off offset:4
1413; GFX9-NEXT:    s_waitcnt vmcnt(0)
1414; GFX9-NEXT:    s_setpc_b64 s[30:31]
1415;
1416; GFX8-LABEL: extractelement_vgpr_v8i16_idx2:
1417; GFX8:       ; %bb.0:
1418; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1419; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 4, v0
1420; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1421; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
1422; GFX8-NEXT:    s_waitcnt vmcnt(0)
1423; GFX8-NEXT:    s_setpc_b64 s[30:31]
1424;
1425; GFX7-LABEL: extractelement_vgpr_v8i16_idx2:
1426; GFX7:       ; %bb.0:
1427; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1428; GFX7-NEXT:    s_mov_b32 s6, 0
1429; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1430; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1431; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:4
1432; GFX7-NEXT:    s_waitcnt vmcnt(0)
1433; GFX7-NEXT:    s_setpc_b64 s[30:31]
1434;
1435; GFX10-LABEL: extractelement_vgpr_v8i16_idx2:
1436; GFX10:       ; %bb.0:
1437; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1438; GFX10-NEXT:    global_load_ushort v0, v[0:1], off offset:4
1439; GFX10-NEXT:    s_waitcnt vmcnt(0)
1440; GFX10-NEXT:    s_setpc_b64 s[30:31]
1441;
1442; GFX11-LABEL: extractelement_vgpr_v8i16_idx2:
1443; GFX11:       ; %bb.0:
1444; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1445; GFX11-NEXT:    global_load_u16 v0, v[0:1], off offset:4
1446; GFX11-NEXT:    s_waitcnt vmcnt(0)
1447; GFX11-NEXT:    s_setpc_b64 s[30:31]
1448  %vector = load <8 x i16>, ptr addrspace(1) %ptr
1449  %element = extractelement <8 x i16> %vector, i32 2
1450  ret i16 %element
1451}
1452
1453define i16 @extractelement_vgpr_v8i16_idx3(ptr addrspace(1) %ptr) {
1454; GFX9-LABEL: extractelement_vgpr_v8i16_idx3:
1455; GFX9:       ; %bb.0:
1456; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1457; GFX9-NEXT:    global_load_ushort v0, v[0:1], off offset:6
1458; GFX9-NEXT:    s_waitcnt vmcnt(0)
1459; GFX9-NEXT:    s_setpc_b64 s[30:31]
1460;
1461; GFX8-LABEL: extractelement_vgpr_v8i16_idx3:
1462; GFX8:       ; %bb.0:
1463; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1464; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 6, v0
1465; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1466; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
1467; GFX8-NEXT:    s_waitcnt vmcnt(0)
1468; GFX8-NEXT:    s_setpc_b64 s[30:31]
1469;
1470; GFX7-LABEL: extractelement_vgpr_v8i16_idx3:
1471; GFX7:       ; %bb.0:
1472; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1473; GFX7-NEXT:    s_mov_b32 s6, 0
1474; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1475; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1476; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:6
1477; GFX7-NEXT:    s_waitcnt vmcnt(0)
1478; GFX7-NEXT:    s_setpc_b64 s[30:31]
1479;
1480; GFX10-LABEL: extractelement_vgpr_v8i16_idx3:
1481; GFX10:       ; %bb.0:
1482; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1483; GFX10-NEXT:    global_load_ushort v0, v[0:1], off offset:6
1484; GFX10-NEXT:    s_waitcnt vmcnt(0)
1485; GFX10-NEXT:    s_setpc_b64 s[30:31]
1486;
1487; GFX11-LABEL: extractelement_vgpr_v8i16_idx3:
1488; GFX11:       ; %bb.0:
1489; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1490; GFX11-NEXT:    global_load_u16 v0, v[0:1], off offset:6
1491; GFX11-NEXT:    s_waitcnt vmcnt(0)
1492; GFX11-NEXT:    s_setpc_b64 s[30:31]
1493  %vector = load <8 x i16>, ptr addrspace(1) %ptr
1494  %element = extractelement <8 x i16> %vector, i32 3
1495  ret i16 %element
1496}
1497
1498define i16 @extractelement_vgpr_v8i16_idx4(ptr addrspace(1) %ptr) {
1499; GFX9-LABEL: extractelement_vgpr_v8i16_idx4:
1500; GFX9:       ; %bb.0:
1501; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1502; GFX9-NEXT:    global_load_ushort v0, v[0:1], off offset:8
1503; GFX9-NEXT:    s_waitcnt vmcnt(0)
1504; GFX9-NEXT:    s_setpc_b64 s[30:31]
1505;
1506; GFX8-LABEL: extractelement_vgpr_v8i16_idx4:
1507; GFX8:       ; %bb.0:
1508; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1509; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 8, v0
1510; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1511; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
1512; GFX8-NEXT:    s_waitcnt vmcnt(0)
1513; GFX8-NEXT:    s_setpc_b64 s[30:31]
1514;
1515; GFX7-LABEL: extractelement_vgpr_v8i16_idx4:
1516; GFX7:       ; %bb.0:
1517; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1518; GFX7-NEXT:    s_mov_b32 s6, 0
1519; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1520; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1521; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:8
1522; GFX7-NEXT:    s_waitcnt vmcnt(0)
1523; GFX7-NEXT:    s_setpc_b64 s[30:31]
1524;
1525; GFX10-LABEL: extractelement_vgpr_v8i16_idx4:
1526; GFX10:       ; %bb.0:
1527; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1528; GFX10-NEXT:    global_load_ushort v0, v[0:1], off offset:8
1529; GFX10-NEXT:    s_waitcnt vmcnt(0)
1530; GFX10-NEXT:    s_setpc_b64 s[30:31]
1531;
1532; GFX11-LABEL: extractelement_vgpr_v8i16_idx4:
1533; GFX11:       ; %bb.0:
1534; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1535; GFX11-NEXT:    global_load_u16 v0, v[0:1], off offset:8
1536; GFX11-NEXT:    s_waitcnt vmcnt(0)
1537; GFX11-NEXT:    s_setpc_b64 s[30:31]
1538  %vector = load <8 x i16>, ptr addrspace(1) %ptr
1539  %element = extractelement <8 x i16> %vector, i32 4
1540  ret i16 %element
1541}
1542
1543define i16 @extractelement_vgpr_v8i16_idx5(ptr addrspace(1) %ptr) {
1544; GFX9-LABEL: extractelement_vgpr_v8i16_idx5:
1545; GFX9:       ; %bb.0:
1546; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1547; GFX9-NEXT:    global_load_ushort v0, v[0:1], off offset:10
1548; GFX9-NEXT:    s_waitcnt vmcnt(0)
1549; GFX9-NEXT:    s_setpc_b64 s[30:31]
1550;
1551; GFX8-LABEL: extractelement_vgpr_v8i16_idx5:
1552; GFX8:       ; %bb.0:
1553; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1554; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 10, v0
1555; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1556; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
1557; GFX8-NEXT:    s_waitcnt vmcnt(0)
1558; GFX8-NEXT:    s_setpc_b64 s[30:31]
1559;
1560; GFX7-LABEL: extractelement_vgpr_v8i16_idx5:
1561; GFX7:       ; %bb.0:
1562; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1563; GFX7-NEXT:    s_mov_b32 s6, 0
1564; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1565; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1566; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:10
1567; GFX7-NEXT:    s_waitcnt vmcnt(0)
1568; GFX7-NEXT:    s_setpc_b64 s[30:31]
1569;
1570; GFX10-LABEL: extractelement_vgpr_v8i16_idx5:
1571; GFX10:       ; %bb.0:
1572; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1573; GFX10-NEXT:    global_load_ushort v0, v[0:1], off offset:10
1574; GFX10-NEXT:    s_waitcnt vmcnt(0)
1575; GFX10-NEXT:    s_setpc_b64 s[30:31]
1576;
1577; GFX11-LABEL: extractelement_vgpr_v8i16_idx5:
1578; GFX11:       ; %bb.0:
1579; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1580; GFX11-NEXT:    global_load_u16 v0, v[0:1], off offset:10
1581; GFX11-NEXT:    s_waitcnt vmcnt(0)
1582; GFX11-NEXT:    s_setpc_b64 s[30:31]
1583  %vector = load <8 x i16>, ptr addrspace(1) %ptr
1584  %element = extractelement <8 x i16> %vector, i32 5
1585  ret i16 %element
1586}
1587
1588define i16 @extractelement_vgpr_v8i16_idx6(ptr addrspace(1) %ptr) {
1589; GFX9-LABEL: extractelement_vgpr_v8i16_idx6:
1590; GFX9:       ; %bb.0:
1591; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1592; GFX9-NEXT:    global_load_ushort v0, v[0:1], off offset:12
1593; GFX9-NEXT:    s_waitcnt vmcnt(0)
1594; GFX9-NEXT:    s_setpc_b64 s[30:31]
1595;
1596; GFX8-LABEL: extractelement_vgpr_v8i16_idx6:
1597; GFX8:       ; %bb.0:
1598; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1599; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 12, v0
1600; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1601; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
1602; GFX8-NEXT:    s_waitcnt vmcnt(0)
1603; GFX8-NEXT:    s_setpc_b64 s[30:31]
1604;
1605; GFX7-LABEL: extractelement_vgpr_v8i16_idx6:
1606; GFX7:       ; %bb.0:
1607; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1608; GFX7-NEXT:    s_mov_b32 s6, 0
1609; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1610; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1611; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:12
1612; GFX7-NEXT:    s_waitcnt vmcnt(0)
1613; GFX7-NEXT:    s_setpc_b64 s[30:31]
1614;
1615; GFX10-LABEL: extractelement_vgpr_v8i16_idx6:
1616; GFX10:       ; %bb.0:
1617; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1618; GFX10-NEXT:    global_load_ushort v0, v[0:1], off offset:12
1619; GFX10-NEXT:    s_waitcnt vmcnt(0)
1620; GFX10-NEXT:    s_setpc_b64 s[30:31]
1621;
1622; GFX11-LABEL: extractelement_vgpr_v8i16_idx6:
1623; GFX11:       ; %bb.0:
1624; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1625; GFX11-NEXT:    global_load_u16 v0, v[0:1], off offset:12
1626; GFX11-NEXT:    s_waitcnt vmcnt(0)
1627; GFX11-NEXT:    s_setpc_b64 s[30:31]
1628  %vector = load <8 x i16>, ptr addrspace(1) %ptr
1629  %element = extractelement <8 x i16> %vector, i32 6
1630  ret i16 %element
1631}
1632
1633define i16 @extractelement_vgpr_v8i16_idx7(ptr addrspace(1) %ptr) {
1634; GFX9-LABEL: extractelement_vgpr_v8i16_idx7:
1635; GFX9:       ; %bb.0:
1636; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1637; GFX9-NEXT:    global_load_ushort v0, v[0:1], off offset:14
1638; GFX9-NEXT:    s_waitcnt vmcnt(0)
1639; GFX9-NEXT:    s_setpc_b64 s[30:31]
1640;
1641; GFX8-LABEL: extractelement_vgpr_v8i16_idx7:
1642; GFX8:       ; %bb.0:
1643; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1644; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 14, v0
1645; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1646; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
1647; GFX8-NEXT:    s_waitcnt vmcnt(0)
1648; GFX8-NEXT:    s_setpc_b64 s[30:31]
1649;
1650; GFX7-LABEL: extractelement_vgpr_v8i16_idx7:
1651; GFX7:       ; %bb.0:
1652; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1653; GFX7-NEXT:    s_mov_b32 s6, 0
1654; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1655; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1656; GFX7-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:14
1657; GFX7-NEXT:    s_waitcnt vmcnt(0)
1658; GFX7-NEXT:    s_setpc_b64 s[30:31]
1659;
1660; GFX10-LABEL: extractelement_vgpr_v8i16_idx7:
1661; GFX10:       ; %bb.0:
1662; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1663; GFX10-NEXT:    global_load_ushort v0, v[0:1], off offset:14
1664; GFX10-NEXT:    s_waitcnt vmcnt(0)
1665; GFX10-NEXT:    s_setpc_b64 s[30:31]
1666;
1667; GFX11-LABEL: extractelement_vgpr_v8i16_idx7:
1668; GFX11:       ; %bb.0:
1669; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1670; GFX11-NEXT:    global_load_u16 v0, v[0:1], off offset:14
1671; GFX11-NEXT:    s_waitcnt vmcnt(0)
1672; GFX11-NEXT:    s_setpc_b64 s[30:31]
1673  %vector = load <8 x i16>, ptr addrspace(1) %ptr
1674  %element = extractelement <8 x i16> %vector, i32 7
1675  ret i16 %element
1676}
1677;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1678; GCN: {{.*}}
1679