xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll (revision 41507fe595d0fa3d81e151d70431d51897f8d14d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s
5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
6; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
7
8define amdgpu_ps i8 @extractelement_sgpr_v4i8_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
9; GFX9-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
10; GFX9:       ; %bb.0:
11; GFX9-NEXT:    s_and_b32 s0, s4, 3
12; GFX9-NEXT:    s_ashr_i32 s1, s0, 31
13; GFX9-NEXT:    s_add_u32 s0, s2, s0
14; GFX9-NEXT:    s_addc_u32 s1, s3, s1
15; GFX9-NEXT:    v_mov_b32_e32 v0, 0
16; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1]
17; GFX9-NEXT:    s_waitcnt vmcnt(0)
18; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
19; GFX9-NEXT:    ; return to shader part epilog
20;
21; GFX8-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
22; GFX8:       ; %bb.0:
23; GFX8-NEXT:    s_and_b32 s0, s4, 3
24; GFX8-NEXT:    s_ashr_i32 s1, s0, 31
25; GFX8-NEXT:    s_add_u32 s0, s2, s0
26; GFX8-NEXT:    s_addc_u32 s1, s3, s1
27; GFX8-NEXT:    v_mov_b32_e32 v0, s0
28; GFX8-NEXT:    v_mov_b32_e32 v1, s1
29; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
30; GFX8-NEXT:    s_waitcnt vmcnt(0)
31; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
32; GFX8-NEXT:    ; return to shader part epilog
33;
34; GFX7-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
35; GFX7:       ; %bb.0:
36; GFX7-NEXT:    s_and_b32 s4, s4, 3
37; GFX7-NEXT:    s_ashr_i32 s5, s4, 31
38; GFX7-NEXT:    v_mov_b32_e32 v0, s4
39; GFX7-NEXT:    s_mov_b32 s0, s2
40; GFX7-NEXT:    s_mov_b32 s1, s3
41; GFX7-NEXT:    s_mov_b32 s2, 0
42; GFX7-NEXT:    s_mov_b32 s3, 0xf000
43; GFX7-NEXT:    v_mov_b32_e32 v1, s5
44; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
45; GFX7-NEXT:    s_waitcnt vmcnt(0)
46; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
47; GFX7-NEXT:    ; return to shader part epilog
48;
49; GFX10-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
50; GFX10:       ; %bb.0:
51; GFX10-NEXT:    s_and_b32 s0, s4, 3
52; GFX10-NEXT:    v_mov_b32_e32 v0, 0
53; GFX10-NEXT:    s_ashr_i32 s1, s0, 31
54; GFX10-NEXT:    s_add_u32 s0, s2, s0
55; GFX10-NEXT:    s_addc_u32 s1, s3, s1
56; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1]
57; GFX10-NEXT:    s_waitcnt vmcnt(0)
58; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
59; GFX10-NEXT:    ; return to shader part epilog
60;
61; GFX11-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
62; GFX11:       ; %bb.0:
63; GFX11-NEXT:    s_and_b32 s0, s4, 3
64; GFX11-NEXT:    v_mov_b32_e32 v0, 0
65; GFX11-NEXT:    s_ashr_i32 s1, s0, 31
66; GFX11-NEXT:    s_add_u32 s0, s2, s0
67; GFX11-NEXT:    s_addc_u32 s1, s3, s1
68; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1]
69; GFX11-NEXT:    s_waitcnt vmcnt(0)
70; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
71; GFX11-NEXT:    ; return to shader part epilog
72  %vector = load <4 x i8>, ptr addrspace(4) %ptr
73  %element = extractelement <4 x i8> %vector, i32 %idx
74  ret i8 %element
75}
76
77define amdgpu_ps i8 @extractelement_vgpr_v4i8_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) {
78; GFX9-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
79; GFX9:       ; %bb.0:
80; GFX9-NEXT:    s_and_b32 s0, s2, 3
81; GFX9-NEXT:    s_ashr_i32 s1, s0, 31
82; GFX9-NEXT:    v_mov_b32_e32 v3, s1
83; GFX9-NEXT:    v_mov_b32_e32 v2, s0
84; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
85; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
86; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
87; GFX9-NEXT:    s_waitcnt vmcnt(0)
88; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
89; GFX9-NEXT:    ; return to shader part epilog
90;
91; GFX8-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
92; GFX8:       ; %bb.0:
93; GFX8-NEXT:    s_and_b32 s0, s2, 3
94; GFX8-NEXT:    s_ashr_i32 s1, s0, 31
95; GFX8-NEXT:    v_mov_b32_e32 v3, s1
96; GFX8-NEXT:    v_mov_b32_e32 v2, s0
97; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
98; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
99; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
100; GFX8-NEXT:    s_waitcnt vmcnt(0)
101; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
102; GFX8-NEXT:    ; return to shader part epilog
103;
104; GFX7-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
105; GFX7:       ; %bb.0:
106; GFX7-NEXT:    s_and_b32 s0, s2, 3
107; GFX7-NEXT:    s_ashr_i32 s1, s0, 31
108; GFX7-NEXT:    s_mov_b32 s2, 0
109; GFX7-NEXT:    s_mov_b32 s3, 0xf000
110; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
111; GFX7-NEXT:    s_waitcnt vmcnt(0)
112; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
113; GFX7-NEXT:    ; return to shader part epilog
114;
115; GFX10-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
116; GFX10:       ; %bb.0:
117; GFX10-NEXT:    s_and_b32 s0, s2, 3
118; GFX10-NEXT:    s_ashr_i32 s1, s0, 31
119; GFX10-NEXT:    v_mov_b32_e32 v3, s1
120; GFX10-NEXT:    v_mov_b32_e32 v2, s0
121; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
122; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
123; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
124; GFX10-NEXT:    s_waitcnt vmcnt(0)
125; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
126; GFX10-NEXT:    ; return to shader part epilog
127;
128; GFX11-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
129; GFX11:       ; %bb.0:
130; GFX11-NEXT:    s_and_b32 s0, s2, 3
131; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
132; GFX11-NEXT:    s_ashr_i32 s1, s0, 31
133; GFX11-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
134; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
135; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
136; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
137; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
138; GFX11-NEXT:    s_waitcnt vmcnt(0)
139; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
140; GFX11-NEXT:    ; return to shader part epilog
141  %vector = load <4 x i8>, ptr addrspace(1) %ptr
142  %element = extractelement <4 x i8> %vector, i32 %idx
143  ret i8 %element
144}
145
146define i8 @extractelement_vgpr_v4i8_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) {
147; GFX9-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
148; GFX9:       ; %bb.0:
149; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150; GFX9-NEXT:    v_and_b32_e32 v2, 3, v2
151; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
152; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
153; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
154; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
155; GFX9-NEXT:    s_waitcnt vmcnt(0)
156; GFX9-NEXT:    s_setpc_b64 s[30:31]
157;
158; GFX8-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
159; GFX8:       ; %bb.0:
160; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161; GFX8-NEXT:    v_and_b32_e32 v2, 3, v2
162; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
163; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
164; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
165; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
166; GFX8-NEXT:    s_waitcnt vmcnt(0)
167; GFX8-NEXT:    s_setpc_b64 s[30:31]
168;
169; GFX7-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
170; GFX7:       ; %bb.0:
171; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172; GFX7-NEXT:    v_and_b32_e32 v2, 3, v2
173; GFX7-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
174; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
175; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
176; GFX7-NEXT:    s_mov_b32 s6, 0
177; GFX7-NEXT:    s_mov_b32 s7, 0xf000
178; GFX7-NEXT:    s_mov_b64 s[4:5], 0
179; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
180; GFX7-NEXT:    s_waitcnt vmcnt(0)
181; GFX7-NEXT:    s_setpc_b64 s[30:31]
182;
183; GFX10-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
184; GFX10:       ; %bb.0:
185; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186; GFX10-NEXT:    v_and_b32_e32 v2, 3, v2
187; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
188; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
189; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
190; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
191; GFX10-NEXT:    s_waitcnt vmcnt(0)
192; GFX10-NEXT:    s_setpc_b64 s[30:31]
193;
194; GFX11-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
195; GFX11:       ; %bb.0:
196; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197; GFX11-NEXT:    v_and_b32_e32 v2, 3, v2
198; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
199; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
200; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
201; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
202; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
203; GFX11-NEXT:    s_waitcnt vmcnt(0)
204; GFX11-NEXT:    s_setpc_b64 s[30:31]
205  %vector = load <4 x i8>, ptr addrspace(1) %ptr
206  %element = extractelement <4 x i8> %vector, i32 %idx
207  ret i8 %element
208}
209
210define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) {
211; GFX9-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
212; GFX9:       ; %bb.0:
213; GFX9-NEXT:    v_and_b32_e32 v2, 3, v0
214; GFX9-NEXT:    v_mov_b32_e32 v0, s2
215; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
216; GFX9-NEXT:    v_mov_b32_e32 v1, s3
217; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
218; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
219; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
220; GFX9-NEXT:    s_waitcnt vmcnt(0)
221; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
222; GFX9-NEXT:    ; return to shader part epilog
223;
224; GFX8-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
225; GFX8:       ; %bb.0:
226; GFX8-NEXT:    v_and_b32_e32 v2, 3, v0
227; GFX8-NEXT:    v_mov_b32_e32 v0, s2
228; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
229; GFX8-NEXT:    v_mov_b32_e32 v1, s3
230; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
231; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
232; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
233; GFX8-NEXT:    s_waitcnt vmcnt(0)
234; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
235; GFX8-NEXT:    ; return to shader part epilog
236;
237; GFX7-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
238; GFX7:       ; %bb.0:
239; GFX7-NEXT:    v_and_b32_e32 v0, 3, v0
240; GFX7-NEXT:    s_mov_b32 s0, s2
241; GFX7-NEXT:    s_mov_b32 s1, s3
242; GFX7-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
243; GFX7-NEXT:    s_mov_b32 s2, 0
244; GFX7-NEXT:    s_mov_b32 s3, 0xf000
245; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
246; GFX7-NEXT:    s_waitcnt vmcnt(0)
247; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
248; GFX7-NEXT:    ; return to shader part epilog
249;
250; GFX10-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
251; GFX10:       ; %bb.0:
252; GFX10-NEXT:    v_and_b32_e32 v2, 3, v0
253; GFX10-NEXT:    v_mov_b32_e32 v0, s2
254; GFX10-NEXT:    v_mov_b32_e32 v1, s3
255; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
256; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
257; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
258; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
259; GFX10-NEXT:    s_waitcnt vmcnt(0)
260; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
261; GFX10-NEXT:    ; return to shader part epilog
262;
263; GFX11-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
264; GFX11:       ; %bb.0:
265; GFX11-NEXT:    v_and_b32_e32 v2, 3, v0
266; GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
267; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
268; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
269; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
270; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
271; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
272; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
273; GFX11-NEXT:    s_waitcnt vmcnt(0)
274; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
275; GFX11-NEXT:    ; return to shader part epilog
276  %vector = load <4 x i8>, ptr addrspace(4) %ptr
277  %element = extractelement <4 x i8> %vector, i32 %idx
278  ret i8 %element
279}
280
281define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx0(ptr addrspace(4) inreg %ptr) {
282; GFX9-LABEL: extractelement_sgpr_v4i8_idx0:
283; GFX9:       ; %bb.0:
284; GFX9-NEXT:    v_mov_b32_e32 v0, 0
285; GFX9-NEXT:    global_load_ubyte v0, v0, s[2:3]
286; GFX9-NEXT:    s_waitcnt vmcnt(0)
287; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
288; GFX9-NEXT:    ; return to shader part epilog
289;
290; GFX8-LABEL: extractelement_sgpr_v4i8_idx0:
291; GFX8:       ; %bb.0:
292; GFX8-NEXT:    v_mov_b32_e32 v0, s2
293; GFX8-NEXT:    v_mov_b32_e32 v1, s3
294; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
295; GFX8-NEXT:    s_waitcnt vmcnt(0)
296; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
297; GFX8-NEXT:    ; return to shader part epilog
298;
299; GFX7-LABEL: extractelement_sgpr_v4i8_idx0:
300; GFX7:       ; %bb.0:
301; GFX7-NEXT:    s_mov_b32 s0, s2
302; GFX7-NEXT:    s_mov_b32 s1, s3
303; GFX7-NEXT:    s_mov_b32 s2, -1
304; GFX7-NEXT:    s_mov_b32 s3, 0xf000
305; GFX7-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0
306; GFX7-NEXT:    s_waitcnt vmcnt(0)
307; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
308; GFX7-NEXT:    ; return to shader part epilog
309;
310; GFX10-LABEL: extractelement_sgpr_v4i8_idx0:
311; GFX10:       ; %bb.0:
312; GFX10-NEXT:    v_mov_b32_e32 v0, 0
313; GFX10-NEXT:    global_load_ubyte v0, v0, s[2:3]
314; GFX10-NEXT:    s_waitcnt vmcnt(0)
315; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
316; GFX10-NEXT:    ; return to shader part epilog
317;
318; GFX11-LABEL: extractelement_sgpr_v4i8_idx0:
319; GFX11:       ; %bb.0:
320; GFX11-NEXT:    v_mov_b32_e32 v0, 0
321; GFX11-NEXT:    global_load_u8 v0, v0, s[2:3]
322; GFX11-NEXT:    s_waitcnt vmcnt(0)
323; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
324; GFX11-NEXT:    ; return to shader part epilog
325  %vector = load <4 x i8>, ptr addrspace(4) %ptr
326  %element = extractelement <4 x i8> %vector, i32 0
327  ret i8 %element
328}
329
330define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx1(ptr addrspace(4) inreg %ptr) {
331; GFX9-LABEL: extractelement_sgpr_v4i8_idx1:
332; GFX9:       ; %bb.0:
333; GFX9-NEXT:    v_mov_b32_e32 v0, 0
334; GFX9-NEXT:    global_load_ubyte v0, v0, s[2:3] offset:1
335; GFX9-NEXT:    s_waitcnt vmcnt(0)
336; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
337; GFX9-NEXT:    ; return to shader part epilog
338;
339; GFX8-LABEL: extractelement_sgpr_v4i8_idx1:
340; GFX8:       ; %bb.0:
341; GFX8-NEXT:    s_add_u32 s0, s2, 1
342; GFX8-NEXT:    s_addc_u32 s1, s3, 0
343; GFX8-NEXT:    v_mov_b32_e32 v0, s0
344; GFX8-NEXT:    v_mov_b32_e32 v1, s1
345; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
346; GFX8-NEXT:    s_waitcnt vmcnt(0)
347; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
348; GFX8-NEXT:    ; return to shader part epilog
349;
350; GFX7-LABEL: extractelement_sgpr_v4i8_idx1:
351; GFX7:       ; %bb.0:
352; GFX7-NEXT:    s_mov_b32 s0, s2
353; GFX7-NEXT:    s_mov_b32 s1, s3
354; GFX7-NEXT:    s_mov_b32 s2, -1
355; GFX7-NEXT:    s_mov_b32 s3, 0xf000
356; GFX7-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 offset:1
357; GFX7-NEXT:    s_waitcnt vmcnt(0)
358; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
359; GFX7-NEXT:    ; return to shader part epilog
360;
361; GFX10-LABEL: extractelement_sgpr_v4i8_idx1:
362; GFX10:       ; %bb.0:
363; GFX10-NEXT:    v_mov_b32_e32 v0, 0
364; GFX10-NEXT:    global_load_ubyte v0, v0, s[2:3] offset:1
365; GFX10-NEXT:    s_waitcnt vmcnt(0)
366; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
367; GFX10-NEXT:    ; return to shader part epilog
368;
369; GFX11-LABEL: extractelement_sgpr_v4i8_idx1:
370; GFX11:       ; %bb.0:
371; GFX11-NEXT:    v_mov_b32_e32 v0, 0
372; GFX11-NEXT:    global_load_u8 v0, v0, s[2:3] offset:1
373; GFX11-NEXT:    s_waitcnt vmcnt(0)
374; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
375; GFX11-NEXT:    ; return to shader part epilog
376  %vector = load <4 x i8>, ptr addrspace(4) %ptr
377  %element = extractelement <4 x i8> %vector, i32 1
378  ret i8 %element
379}
380
381define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx2(ptr addrspace(4) inreg %ptr) {
382; GFX9-LABEL: extractelement_sgpr_v4i8_idx2:
383; GFX9:       ; %bb.0:
384; GFX9-NEXT:    v_mov_b32_e32 v0, 0
385; GFX9-NEXT:    global_load_ubyte v0, v0, s[2:3] offset:2
386; GFX9-NEXT:    s_waitcnt vmcnt(0)
387; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
388; GFX9-NEXT:    ; return to shader part epilog
389;
390; GFX8-LABEL: extractelement_sgpr_v4i8_idx2:
391; GFX8:       ; %bb.0:
392; GFX8-NEXT:    s_add_u32 s0, s2, 2
393; GFX8-NEXT:    s_addc_u32 s1, s3, 0
394; GFX8-NEXT:    v_mov_b32_e32 v0, s0
395; GFX8-NEXT:    v_mov_b32_e32 v1, s1
396; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
397; GFX8-NEXT:    s_waitcnt vmcnt(0)
398; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
399; GFX8-NEXT:    ; return to shader part epilog
400;
401; GFX7-LABEL: extractelement_sgpr_v4i8_idx2:
402; GFX7:       ; %bb.0:
403; GFX7-NEXT:    s_mov_b32 s0, s2
404; GFX7-NEXT:    s_mov_b32 s1, s3
405; GFX7-NEXT:    s_mov_b32 s2, -1
406; GFX7-NEXT:    s_mov_b32 s3, 0xf000
407; GFX7-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 offset:2
408; GFX7-NEXT:    s_waitcnt vmcnt(0)
409; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
410; GFX7-NEXT:    ; return to shader part epilog
411;
412; GFX10-LABEL: extractelement_sgpr_v4i8_idx2:
413; GFX10:       ; %bb.0:
414; GFX10-NEXT:    v_mov_b32_e32 v0, 0
415; GFX10-NEXT:    global_load_ubyte v0, v0, s[2:3] offset:2
416; GFX10-NEXT:    s_waitcnt vmcnt(0)
417; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
418; GFX10-NEXT:    ; return to shader part epilog
419;
420; GFX11-LABEL: extractelement_sgpr_v4i8_idx2:
421; GFX11:       ; %bb.0:
422; GFX11-NEXT:    v_mov_b32_e32 v0, 0
423; GFX11-NEXT:    global_load_u8 v0, v0, s[2:3] offset:2
424; GFX11-NEXT:    s_waitcnt vmcnt(0)
425; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
426; GFX11-NEXT:    ; return to shader part epilog
427  %vector = load <4 x i8>, ptr addrspace(4) %ptr
428  %element = extractelement <4 x i8> %vector, i32 2
429  ret i8 %element
430}
431
432define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx3(ptr addrspace(4) inreg %ptr) {
433; GFX9-LABEL: extractelement_sgpr_v4i8_idx3:
434; GFX9:       ; %bb.0:
435; GFX9-NEXT:    v_mov_b32_e32 v0, 0
436; GFX9-NEXT:    global_load_ubyte v0, v0, s[2:3] offset:3
437; GFX9-NEXT:    s_waitcnt vmcnt(0)
438; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
439; GFX9-NEXT:    ; return to shader part epilog
440;
441; GFX8-LABEL: extractelement_sgpr_v4i8_idx3:
442; GFX8:       ; %bb.0:
443; GFX8-NEXT:    s_add_u32 s0, s2, 3
444; GFX8-NEXT:    s_addc_u32 s1, s3, 0
445; GFX8-NEXT:    v_mov_b32_e32 v0, s0
446; GFX8-NEXT:    v_mov_b32_e32 v1, s1
447; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
448; GFX8-NEXT:    s_waitcnt vmcnt(0)
449; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
450; GFX8-NEXT:    ; return to shader part epilog
451;
452; GFX7-LABEL: extractelement_sgpr_v4i8_idx3:
453; GFX7:       ; %bb.0:
454; GFX7-NEXT:    s_mov_b32 s0, s2
455; GFX7-NEXT:    s_mov_b32 s1, s3
456; GFX7-NEXT:    s_mov_b32 s2, -1
457; GFX7-NEXT:    s_mov_b32 s3, 0xf000
458; GFX7-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 offset:3
459; GFX7-NEXT:    s_waitcnt vmcnt(0)
460; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
461; GFX7-NEXT:    ; return to shader part epilog
462;
463; GFX10-LABEL: extractelement_sgpr_v4i8_idx3:
464; GFX10:       ; %bb.0:
465; GFX10-NEXT:    v_mov_b32_e32 v0, 0
466; GFX10-NEXT:    global_load_ubyte v0, v0, s[2:3] offset:3
467; GFX10-NEXT:    s_waitcnt vmcnt(0)
468; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
469; GFX10-NEXT:    ; return to shader part epilog
470;
471; GFX11-LABEL: extractelement_sgpr_v4i8_idx3:
472; GFX11:       ; %bb.0:
473; GFX11-NEXT:    v_mov_b32_e32 v0, 0
474; GFX11-NEXT:    global_load_u8 v0, v0, s[2:3] offset:3
475; GFX11-NEXT:    s_waitcnt vmcnt(0)
476; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
477; GFX11-NEXT:    ; return to shader part epilog
478  %vector = load <4 x i8>, ptr addrspace(4) %ptr
479  %element = extractelement <4 x i8> %vector, i32 3
480  ret i8 %element
481}
482
483define i8 @extractelement_vgpr_v4i8_idx0(ptr addrspace(1) %ptr) {
484; GFX9-LABEL: extractelement_vgpr_v4i8_idx0:
485; GFX9:       ; %bb.0:
486; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
487; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
488; GFX9-NEXT:    s_waitcnt vmcnt(0)
489; GFX9-NEXT:    s_setpc_b64 s[30:31]
490;
491; GFX8-LABEL: extractelement_vgpr_v4i8_idx0:
492; GFX8:       ; %bb.0:
493; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
494; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
495; GFX8-NEXT:    s_waitcnt vmcnt(0)
496; GFX8-NEXT:    s_setpc_b64 s[30:31]
497;
498; GFX7-LABEL: extractelement_vgpr_v4i8_idx0:
499; GFX7:       ; %bb.0:
500; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
501; GFX7-NEXT:    s_mov_b32 s6, 0
502; GFX7-NEXT:    s_mov_b32 s7, 0xf000
503; GFX7-NEXT:    s_mov_b64 s[4:5], 0
504; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
505; GFX7-NEXT:    s_waitcnt vmcnt(0)
506; GFX7-NEXT:    s_setpc_b64 s[30:31]
507;
508; GFX10-LABEL: extractelement_vgpr_v4i8_idx0:
509; GFX10:       ; %bb.0:
510; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
511; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
512; GFX10-NEXT:    s_waitcnt vmcnt(0)
513; GFX10-NEXT:    s_setpc_b64 s[30:31]
514;
515; GFX11-LABEL: extractelement_vgpr_v4i8_idx0:
516; GFX11:       ; %bb.0:
517; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
518; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
519; GFX11-NEXT:    s_waitcnt vmcnt(0)
520; GFX11-NEXT:    s_setpc_b64 s[30:31]
521  %vector = load <4 x i8>, ptr addrspace(1) %ptr
522  %element = extractelement <4 x i8> %vector, i32 0
523  ret i8 %element
524}
525
526define i8 @extractelement_vgpr_v4i8_idx1(ptr addrspace(1) %ptr) {
527; GFX9-LABEL: extractelement_vgpr_v4i8_idx1:
528; GFX9:       ; %bb.0:
529; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
530; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:1
531; GFX9-NEXT:    s_waitcnt vmcnt(0)
532; GFX9-NEXT:    s_setpc_b64 s[30:31]
533;
534; GFX8-LABEL: extractelement_vgpr_v4i8_idx1:
535; GFX8:       ; %bb.0:
536; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
537; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
538; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
539; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
540; GFX8-NEXT:    s_waitcnt vmcnt(0)
541; GFX8-NEXT:    s_setpc_b64 s[30:31]
542;
543; GFX7-LABEL: extractelement_vgpr_v4i8_idx1:
544; GFX7:       ; %bb.0:
545; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
546; GFX7-NEXT:    s_mov_b32 s6, 0
547; GFX7-NEXT:    s_mov_b32 s7, 0xf000
548; GFX7-NEXT:    s_mov_b64 s[4:5], 0
549; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:1
550; GFX7-NEXT:    s_waitcnt vmcnt(0)
551; GFX7-NEXT:    s_setpc_b64 s[30:31]
552;
553; GFX10-LABEL: extractelement_vgpr_v4i8_idx1:
554; GFX10:       ; %bb.0:
555; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
556; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:1
557; GFX10-NEXT:    s_waitcnt vmcnt(0)
558; GFX10-NEXT:    s_setpc_b64 s[30:31]
559;
560; GFX11-LABEL: extractelement_vgpr_v4i8_idx1:
561; GFX11:       ; %bb.0:
562; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
563; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:1
564; GFX11-NEXT:    s_waitcnt vmcnt(0)
565; GFX11-NEXT:    s_setpc_b64 s[30:31]
566  %vector = load <4 x i8>, ptr addrspace(1) %ptr
567  %element = extractelement <4 x i8> %vector, i32 1
568  ret i8 %element
569}
570
571define i8 @extractelement_vgpr_v4i8_idx2(ptr addrspace(1) %ptr) {
572; GFX9-LABEL: extractelement_vgpr_v4i8_idx2:
573; GFX9:       ; %bb.0:
574; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
575; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:2
576; GFX9-NEXT:    s_waitcnt vmcnt(0)
577; GFX9-NEXT:    s_setpc_b64 s[30:31]
578;
579; GFX8-LABEL: extractelement_vgpr_v4i8_idx2:
580; GFX8:       ; %bb.0:
581; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
582; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 2, v0
583; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
584; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
585; GFX8-NEXT:    s_waitcnt vmcnt(0)
586; GFX8-NEXT:    s_setpc_b64 s[30:31]
587;
588; GFX7-LABEL: extractelement_vgpr_v4i8_idx2:
589; GFX7:       ; %bb.0:
590; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
591; GFX7-NEXT:    s_mov_b32 s6, 0
592; GFX7-NEXT:    s_mov_b32 s7, 0xf000
593; GFX7-NEXT:    s_mov_b64 s[4:5], 0
594; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:2
595; GFX7-NEXT:    s_waitcnt vmcnt(0)
596; GFX7-NEXT:    s_setpc_b64 s[30:31]
597;
598; GFX10-LABEL: extractelement_vgpr_v4i8_idx2:
599; GFX10:       ; %bb.0:
600; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
601; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2
602; GFX10-NEXT:    s_waitcnt vmcnt(0)
603; GFX10-NEXT:    s_setpc_b64 s[30:31]
604;
605; GFX11-LABEL: extractelement_vgpr_v4i8_idx2:
606; GFX11:       ; %bb.0:
607; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
608; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:2
609; GFX11-NEXT:    s_waitcnt vmcnt(0)
610; GFX11-NEXT:    s_setpc_b64 s[30:31]
611  %vector = load <4 x i8>, ptr addrspace(1) %ptr
612  %element = extractelement <4 x i8> %vector, i32 2
613  ret i8 %element
614}
615
616define i8 @extractelement_vgpr_v4i8_idx3(ptr addrspace(1) %ptr) {
617; GFX9-LABEL: extractelement_vgpr_v4i8_idx3:
618; GFX9:       ; %bb.0:
619; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
620; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:3
621; GFX9-NEXT:    s_waitcnt vmcnt(0)
622; GFX9-NEXT:    s_setpc_b64 s[30:31]
623;
624; GFX8-LABEL: extractelement_vgpr_v4i8_idx3:
625; GFX8:       ; %bb.0:
626; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
627; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 3, v0
628; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
629; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
630; GFX8-NEXT:    s_waitcnt vmcnt(0)
631; GFX8-NEXT:    s_setpc_b64 s[30:31]
632;
633; GFX7-LABEL: extractelement_vgpr_v4i8_idx3:
634; GFX7:       ; %bb.0:
635; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
636; GFX7-NEXT:    s_mov_b32 s6, 0
637; GFX7-NEXT:    s_mov_b32 s7, 0xf000
638; GFX7-NEXT:    s_mov_b64 s[4:5], 0
639; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:3
640; GFX7-NEXT:    s_waitcnt vmcnt(0)
641; GFX7-NEXT:    s_setpc_b64 s[30:31]
642;
643; GFX10-LABEL: extractelement_vgpr_v4i8_idx3:
644; GFX10:       ; %bb.0:
645; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
646; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:3
647; GFX10-NEXT:    s_waitcnt vmcnt(0)
648; GFX10-NEXT:    s_setpc_b64 s[30:31]
649;
650; GFX11-LABEL: extractelement_vgpr_v4i8_idx3:
651; GFX11:       ; %bb.0:
652; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
653; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:3
654; GFX11-NEXT:    s_waitcnt vmcnt(0)
655; GFX11-NEXT:    s_setpc_b64 s[30:31]
656  %vector = load <4 x i8>, ptr addrspace(1) %ptr
657  %element = extractelement <4 x i8> %vector, i32 3
658  ret i8 %element
659}
660
661define amdgpu_ps i8 @extractelement_sgpr_v8i8_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
662; GFX9-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
663; GFX9:       ; %bb.0:
664; GFX9-NEXT:    s_and_b32 s0, s4, 7
665; GFX9-NEXT:    s_ashr_i32 s1, s0, 31
666; GFX9-NEXT:    s_add_u32 s0, s2, s0
667; GFX9-NEXT:    s_addc_u32 s1, s3, s1
668; GFX9-NEXT:    v_mov_b32_e32 v0, 0
669; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1]
670; GFX9-NEXT:    s_waitcnt vmcnt(0)
671; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
672; GFX9-NEXT:    ; return to shader part epilog
673;
674; GFX8-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
675; GFX8:       ; %bb.0:
676; GFX8-NEXT:    s_and_b32 s0, s4, 7
677; GFX8-NEXT:    s_ashr_i32 s1, s0, 31
678; GFX8-NEXT:    s_add_u32 s0, s2, s0
679; GFX8-NEXT:    s_addc_u32 s1, s3, s1
680; GFX8-NEXT:    v_mov_b32_e32 v0, s0
681; GFX8-NEXT:    v_mov_b32_e32 v1, s1
682; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
683; GFX8-NEXT:    s_waitcnt vmcnt(0)
684; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
685; GFX8-NEXT:    ; return to shader part epilog
686;
687; GFX7-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
688; GFX7:       ; %bb.0:
689; GFX7-NEXT:    s_and_b32 s4, s4, 7
690; GFX7-NEXT:    s_ashr_i32 s5, s4, 31
691; GFX7-NEXT:    v_mov_b32_e32 v0, s4
692; GFX7-NEXT:    s_mov_b32 s0, s2
693; GFX7-NEXT:    s_mov_b32 s1, s3
694; GFX7-NEXT:    s_mov_b32 s2, 0
695; GFX7-NEXT:    s_mov_b32 s3, 0xf000
696; GFX7-NEXT:    v_mov_b32_e32 v1, s5
697; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
698; GFX7-NEXT:    s_waitcnt vmcnt(0)
699; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
700; GFX7-NEXT:    ; return to shader part epilog
701;
702; GFX10-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
703; GFX10:       ; %bb.0:
704; GFX10-NEXT:    s_and_b32 s0, s4, 7
705; GFX10-NEXT:    v_mov_b32_e32 v0, 0
706; GFX10-NEXT:    s_ashr_i32 s1, s0, 31
707; GFX10-NEXT:    s_add_u32 s0, s2, s0
708; GFX10-NEXT:    s_addc_u32 s1, s3, s1
709; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1]
710; GFX10-NEXT:    s_waitcnt vmcnt(0)
711; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
712; GFX10-NEXT:    ; return to shader part epilog
713;
714; GFX11-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
715; GFX11:       ; %bb.0:
716; GFX11-NEXT:    s_and_b32 s0, s4, 7
717; GFX11-NEXT:    v_mov_b32_e32 v0, 0
718; GFX11-NEXT:    s_ashr_i32 s1, s0, 31
719; GFX11-NEXT:    s_add_u32 s0, s2, s0
720; GFX11-NEXT:    s_addc_u32 s1, s3, s1
721; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1]
722; GFX11-NEXT:    s_waitcnt vmcnt(0)
723; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
724; GFX11-NEXT:    ; return to shader part epilog
725  %vector = load <8 x i8>, ptr addrspace(4) %ptr
726  %element = extractelement <8 x i8> %vector, i32 %idx
727  ret i8 %element
728}
729
730define amdgpu_ps i8 @extractelement_vgpr_v8i8_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) {
731; GFX9-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
732; GFX9:       ; %bb.0:
733; GFX9-NEXT:    s_and_b32 s0, s2, 7
734; GFX9-NEXT:    s_ashr_i32 s1, s0, 31
735; GFX9-NEXT:    v_mov_b32_e32 v3, s1
736; GFX9-NEXT:    v_mov_b32_e32 v2, s0
737; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
738; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
739; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
740; GFX9-NEXT:    s_waitcnt vmcnt(0)
741; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
742; GFX9-NEXT:    ; return to shader part epilog
743;
744; GFX8-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
745; GFX8:       ; %bb.0:
746; GFX8-NEXT:    s_and_b32 s0, s2, 7
747; GFX8-NEXT:    s_ashr_i32 s1, s0, 31
748; GFX8-NEXT:    v_mov_b32_e32 v3, s1
749; GFX8-NEXT:    v_mov_b32_e32 v2, s0
750; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
751; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
752; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
753; GFX8-NEXT:    s_waitcnt vmcnt(0)
754; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
755; GFX8-NEXT:    ; return to shader part epilog
756;
757; GFX7-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
758; GFX7:       ; %bb.0:
759; GFX7-NEXT:    s_and_b32 s0, s2, 7
760; GFX7-NEXT:    s_ashr_i32 s1, s0, 31
761; GFX7-NEXT:    s_mov_b32 s2, 0
762; GFX7-NEXT:    s_mov_b32 s3, 0xf000
763; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
764; GFX7-NEXT:    s_waitcnt vmcnt(0)
765; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
766; GFX7-NEXT:    ; return to shader part epilog
767;
768; GFX10-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
769; GFX10:       ; %bb.0:
770; GFX10-NEXT:    s_and_b32 s0, s2, 7
771; GFX10-NEXT:    s_ashr_i32 s1, s0, 31
772; GFX10-NEXT:    v_mov_b32_e32 v3, s1
773; GFX10-NEXT:    v_mov_b32_e32 v2, s0
774; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
775; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
776; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
777; GFX10-NEXT:    s_waitcnt vmcnt(0)
778; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
779; GFX10-NEXT:    ; return to shader part epilog
780;
781; GFX11-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
782; GFX11:       ; %bb.0:
783; GFX11-NEXT:    s_and_b32 s0, s2, 7
784; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
785; GFX11-NEXT:    s_ashr_i32 s1, s0, 31
786; GFX11-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
787; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
788; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
789; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
790; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
791; GFX11-NEXT:    s_waitcnt vmcnt(0)
792; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
793; GFX11-NEXT:    ; return to shader part epilog
794  %vector = load <8 x i8>, ptr addrspace(1) %ptr
795  %element = extractelement <8 x i8> %vector, i32 %idx
796  ret i8 %element
797}
798
799define i8 @extractelement_vgpr_v8i8_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) {
800; GFX9-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
801; GFX9:       ; %bb.0:
802; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
803; GFX9-NEXT:    v_and_b32_e32 v2, 7, v2
804; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
805; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
806; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
807; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
808; GFX9-NEXT:    s_waitcnt vmcnt(0)
809; GFX9-NEXT:    s_setpc_b64 s[30:31]
810;
811; GFX8-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
812; GFX8:       ; %bb.0:
813; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
814; GFX8-NEXT:    v_and_b32_e32 v2, 7, v2
815; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
816; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
817; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
818; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
819; GFX8-NEXT:    s_waitcnt vmcnt(0)
820; GFX8-NEXT:    s_setpc_b64 s[30:31]
821;
822; GFX7-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
823; GFX7:       ; %bb.0:
824; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
825; GFX7-NEXT:    v_and_b32_e32 v2, 7, v2
826; GFX7-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
827; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
828; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
829; GFX7-NEXT:    s_mov_b32 s6, 0
830; GFX7-NEXT:    s_mov_b32 s7, 0xf000
831; GFX7-NEXT:    s_mov_b64 s[4:5], 0
832; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
833; GFX7-NEXT:    s_waitcnt vmcnt(0)
834; GFX7-NEXT:    s_setpc_b64 s[30:31]
835;
836; GFX10-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
837; GFX10:       ; %bb.0:
838; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
839; GFX10-NEXT:    v_and_b32_e32 v2, 7, v2
840; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
841; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
842; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
843; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
844; GFX10-NEXT:    s_waitcnt vmcnt(0)
845; GFX10-NEXT:    s_setpc_b64 s[30:31]
846;
847; GFX11-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
848; GFX11:       ; %bb.0:
849; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
850; GFX11-NEXT:    v_and_b32_e32 v2, 7, v2
851; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
852; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
853; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
854; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
855; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
856; GFX11-NEXT:    s_waitcnt vmcnt(0)
857; GFX11-NEXT:    s_setpc_b64 s[30:31]
858  %vector = load <8 x i8>, ptr addrspace(1) %ptr
859  %element = extractelement <8 x i8> %vector, i32 %idx
860  ret i8 %element
861}
862
863define amdgpu_ps i8 @extractelement_sgpr_v8i8_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) {
864; GFX9-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
865; GFX9:       ; %bb.0:
866; GFX9-NEXT:    v_and_b32_e32 v2, 7, v0
867; GFX9-NEXT:    v_mov_b32_e32 v0, s2
868; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
869; GFX9-NEXT:    v_mov_b32_e32 v1, s3
870; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
871; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
872; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
873; GFX9-NEXT:    s_waitcnt vmcnt(0)
874; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
875; GFX9-NEXT:    ; return to shader part epilog
876;
877; GFX8-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
878; GFX8:       ; %bb.0:
879; GFX8-NEXT:    v_and_b32_e32 v2, 7, v0
880; GFX8-NEXT:    v_mov_b32_e32 v0, s2
881; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
882; GFX8-NEXT:    v_mov_b32_e32 v1, s3
883; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
884; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
885; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
886; GFX8-NEXT:    s_waitcnt vmcnt(0)
887; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
888; GFX8-NEXT:    ; return to shader part epilog
889;
890; GFX7-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
891; GFX7:       ; %bb.0:
892; GFX7-NEXT:    v_and_b32_e32 v0, 7, v0
893; GFX7-NEXT:    s_mov_b32 s0, s2
894; GFX7-NEXT:    s_mov_b32 s1, s3
895; GFX7-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
896; GFX7-NEXT:    s_mov_b32 s2, 0
897; GFX7-NEXT:    s_mov_b32 s3, 0xf000
898; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
899; GFX7-NEXT:    s_waitcnt vmcnt(0)
900; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
901; GFX7-NEXT:    ; return to shader part epilog
902;
903; GFX10-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
904; GFX10:       ; %bb.0:
905; GFX10-NEXT:    v_and_b32_e32 v2, 7, v0
906; GFX10-NEXT:    v_mov_b32_e32 v0, s2
907; GFX10-NEXT:    v_mov_b32_e32 v1, s3
908; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
909; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
910; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
911; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
912; GFX10-NEXT:    s_waitcnt vmcnt(0)
913; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
914; GFX10-NEXT:    ; return to shader part epilog
915;
916; GFX11-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
917; GFX11:       ; %bb.0:
918; GFX11-NEXT:    v_and_b32_e32 v2, 7, v0
919; GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
920; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
921; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
922; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
923; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
924; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
925; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
926; GFX11-NEXT:    s_waitcnt vmcnt(0)
927; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
928; GFX11-NEXT:    ; return to shader part epilog
929  %vector = load <8 x i8>, ptr addrspace(4) %ptr
930  %element = extractelement <8 x i8> %vector, i32 %idx
931  ret i8 %element
932}
933
934define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx0(ptr addrspace(4) inreg %ptr) {
935; GFX9-LABEL: extractelement_sgpr_v8i8_idx0:
936; GFX9:       ; %bb.0:
937; GFX9-NEXT:    v_mov_b32_e32 v0, 0
938; GFX9-NEXT:    global_load_ubyte v0, v0, s[2:3]
939; GFX9-NEXT:    s_waitcnt vmcnt(0)
940; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
941; GFX9-NEXT:    ; return to shader part epilog
942;
943; GFX8-LABEL: extractelement_sgpr_v8i8_idx0:
944; GFX8:       ; %bb.0:
945; GFX8-NEXT:    v_mov_b32_e32 v0, s2
946; GFX8-NEXT:    v_mov_b32_e32 v1, s3
947; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
948; GFX8-NEXT:    s_waitcnt vmcnt(0)
949; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
950; GFX8-NEXT:    ; return to shader part epilog
951;
952; GFX7-LABEL: extractelement_sgpr_v8i8_idx0:
953; GFX7:       ; %bb.0:
954; GFX7-NEXT:    s_mov_b32 s0, s2
955; GFX7-NEXT:    s_mov_b32 s1, s3
956; GFX7-NEXT:    s_mov_b32 s2, -1
957; GFX7-NEXT:    s_mov_b32 s3, 0xf000
958; GFX7-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0
959; GFX7-NEXT:    s_waitcnt vmcnt(0)
960; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
961; GFX7-NEXT:    ; return to shader part epilog
962;
963; GFX10-LABEL: extractelement_sgpr_v8i8_idx0:
964; GFX10:       ; %bb.0:
965; GFX10-NEXT:    v_mov_b32_e32 v0, 0
966; GFX10-NEXT:    global_load_ubyte v0, v0, s[2:3]
967; GFX10-NEXT:    s_waitcnt vmcnt(0)
968; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
969; GFX10-NEXT:    ; return to shader part epilog
970;
971; GFX11-LABEL: extractelement_sgpr_v8i8_idx0:
972; GFX11:       ; %bb.0:
973; GFX11-NEXT:    v_mov_b32_e32 v0, 0
974; GFX11-NEXT:    global_load_u8 v0, v0, s[2:3]
975; GFX11-NEXT:    s_waitcnt vmcnt(0)
976; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
977; GFX11-NEXT:    ; return to shader part epilog
978  %vector = load <8 x i8>, ptr addrspace(4) %ptr
979  %element = extractelement <8 x i8> %vector, i32 0
980  ret i8 %element
981}
982
983define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx1(ptr addrspace(4) inreg %ptr) {
984; GFX9-LABEL: extractelement_sgpr_v8i8_idx1:
985; GFX9:       ; %bb.0:
986; GFX9-NEXT:    v_mov_b32_e32 v0, 0
987; GFX9-NEXT:    global_load_ubyte v0, v0, s[2:3] offset:1
988; GFX9-NEXT:    s_waitcnt vmcnt(0)
989; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
990; GFX9-NEXT:    ; return to shader part epilog
991;
992; GFX8-LABEL: extractelement_sgpr_v8i8_idx1:
993; GFX8:       ; %bb.0:
994; GFX8-NEXT:    s_add_u32 s0, s2, 1
995; GFX8-NEXT:    s_addc_u32 s1, s3, 0
996; GFX8-NEXT:    v_mov_b32_e32 v0, s0
997; GFX8-NEXT:    v_mov_b32_e32 v1, s1
998; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
999; GFX8-NEXT:    s_waitcnt vmcnt(0)
1000; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1001; GFX8-NEXT:    ; return to shader part epilog
1002;
1003; GFX7-LABEL: extractelement_sgpr_v8i8_idx1:
1004; GFX7:       ; %bb.0:
1005; GFX7-NEXT:    s_mov_b32 s0, s2
1006; GFX7-NEXT:    s_mov_b32 s1, s3
1007; GFX7-NEXT:    s_mov_b32 s2, -1
1008; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1009; GFX7-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 offset:1
1010; GFX7-NEXT:    s_waitcnt vmcnt(0)
1011; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
1012; GFX7-NEXT:    ; return to shader part epilog
1013;
1014; GFX10-LABEL: extractelement_sgpr_v8i8_idx1:
1015; GFX10:       ; %bb.0:
1016; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1017; GFX10-NEXT:    global_load_ubyte v0, v0, s[2:3] offset:1
1018; GFX10-NEXT:    s_waitcnt vmcnt(0)
1019; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1020; GFX10-NEXT:    ; return to shader part epilog
1021;
1022; GFX11-LABEL: extractelement_sgpr_v8i8_idx1:
1023; GFX11:       ; %bb.0:
1024; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1025; GFX11-NEXT:    global_load_u8 v0, v0, s[2:3] offset:1
1026; GFX11-NEXT:    s_waitcnt vmcnt(0)
1027; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1028; GFX11-NEXT:    ; return to shader part epilog
1029  %vector = load <8 x i8>, ptr addrspace(4) %ptr
1030  %element = extractelement <8 x i8> %vector, i32 1
1031  ret i8 %element
1032}
1033
1034define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx2(ptr addrspace(4) inreg %ptr) {
1035; GFX9-LABEL: extractelement_sgpr_v8i8_idx2:
1036; GFX9:       ; %bb.0:
1037; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1038; GFX9-NEXT:    global_load_ubyte v0, v0, s[2:3] offset:2
1039; GFX9-NEXT:    s_waitcnt vmcnt(0)
1040; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1041; GFX9-NEXT:    ; return to shader part epilog
1042;
1043; GFX8-LABEL: extractelement_sgpr_v8i8_idx2:
1044; GFX8:       ; %bb.0:
1045; GFX8-NEXT:    s_add_u32 s0, s2, 2
1046; GFX8-NEXT:    s_addc_u32 s1, s3, 0
1047; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1048; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1049; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
1050; GFX8-NEXT:    s_waitcnt vmcnt(0)
1051; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1052; GFX8-NEXT:    ; return to shader part epilog
1053;
1054; GFX7-LABEL: extractelement_sgpr_v8i8_idx2:
1055; GFX7:       ; %bb.0:
1056; GFX7-NEXT:    s_mov_b32 s0, s2
1057; GFX7-NEXT:    s_mov_b32 s1, s3
1058; GFX7-NEXT:    s_mov_b32 s2, -1
1059; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1060; GFX7-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 offset:2
1061; GFX7-NEXT:    s_waitcnt vmcnt(0)
1062; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
1063; GFX7-NEXT:    ; return to shader part epilog
1064;
1065; GFX10-LABEL: extractelement_sgpr_v8i8_idx2:
1066; GFX10:       ; %bb.0:
1067; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1068; GFX10-NEXT:    global_load_ubyte v0, v0, s[2:3] offset:2
1069; GFX10-NEXT:    s_waitcnt vmcnt(0)
1070; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1071; GFX10-NEXT:    ; return to shader part epilog
1072;
1073; GFX11-LABEL: extractelement_sgpr_v8i8_idx2:
1074; GFX11:       ; %bb.0:
1075; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1076; GFX11-NEXT:    global_load_u8 v0, v0, s[2:3] offset:2
1077; GFX11-NEXT:    s_waitcnt vmcnt(0)
1078; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1079; GFX11-NEXT:    ; return to shader part epilog
1080  %vector = load <8 x i8>, ptr addrspace(4) %ptr
1081  %element = extractelement <8 x i8> %vector, i32 2
1082  ret i8 %element
1083}
1084
1085define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx3(ptr addrspace(4) inreg %ptr) {
1086; GFX9-LABEL: extractelement_sgpr_v8i8_idx3:
1087; GFX9:       ; %bb.0:
1088; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1089; GFX9-NEXT:    global_load_ubyte v0, v0, s[2:3] offset:3
1090; GFX9-NEXT:    s_waitcnt vmcnt(0)
1091; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1092; GFX9-NEXT:    ; return to shader part epilog
1093;
1094; GFX8-LABEL: extractelement_sgpr_v8i8_idx3:
1095; GFX8:       ; %bb.0:
1096; GFX8-NEXT:    s_add_u32 s0, s2, 3
1097; GFX8-NEXT:    s_addc_u32 s1, s3, 0
1098; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1099; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1100; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
1101; GFX8-NEXT:    s_waitcnt vmcnt(0)
1102; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1103; GFX8-NEXT:    ; return to shader part epilog
1104;
1105; GFX7-LABEL: extractelement_sgpr_v8i8_idx3:
1106; GFX7:       ; %bb.0:
1107; GFX7-NEXT:    s_mov_b32 s0, s2
1108; GFX7-NEXT:    s_mov_b32 s1, s3
1109; GFX7-NEXT:    s_mov_b32 s2, -1
1110; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1111; GFX7-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 offset:3
1112; GFX7-NEXT:    s_waitcnt vmcnt(0)
1113; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
1114; GFX7-NEXT:    ; return to shader part epilog
1115;
1116; GFX10-LABEL: extractelement_sgpr_v8i8_idx3:
1117; GFX10:       ; %bb.0:
1118; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1119; GFX10-NEXT:    global_load_ubyte v0, v0, s[2:3] offset:3
1120; GFX10-NEXT:    s_waitcnt vmcnt(0)
1121; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1122; GFX10-NEXT:    ; return to shader part epilog
1123;
1124; GFX11-LABEL: extractelement_sgpr_v8i8_idx3:
1125; GFX11:       ; %bb.0:
1126; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1127; GFX11-NEXT:    global_load_u8 v0, v0, s[2:3] offset:3
1128; GFX11-NEXT:    s_waitcnt vmcnt(0)
1129; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1130; GFX11-NEXT:    ; return to shader part epilog
1131  %vector = load <8 x i8>, ptr addrspace(4) %ptr
1132  %element = extractelement <8 x i8> %vector, i32 3
1133  ret i8 %element
1134}
1135
1136define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx4(ptr addrspace(4) inreg %ptr) {
1137; GFX9-LABEL: extractelement_sgpr_v8i8_idx4:
1138; GFX9:       ; %bb.0:
1139; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1140; GFX9-NEXT:    global_load_ubyte v0, v0, s[2:3] offset:4
1141; GFX9-NEXT:    s_waitcnt vmcnt(0)
1142; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1143; GFX9-NEXT:    ; return to shader part epilog
1144;
1145; GFX8-LABEL: extractelement_sgpr_v8i8_idx4:
1146; GFX8:       ; %bb.0:
1147; GFX8-NEXT:    s_add_u32 s0, s2, 4
1148; GFX8-NEXT:    s_addc_u32 s1, s3, 0
1149; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1150; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1151; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
1152; GFX8-NEXT:    s_waitcnt vmcnt(0)
1153; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1154; GFX8-NEXT:    ; return to shader part epilog
1155;
1156; GFX7-LABEL: extractelement_sgpr_v8i8_idx4:
1157; GFX7:       ; %bb.0:
1158; GFX7-NEXT:    s_mov_b32 s0, s2
1159; GFX7-NEXT:    s_mov_b32 s1, s3
1160; GFX7-NEXT:    s_mov_b32 s2, -1
1161; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1162; GFX7-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 offset:4
1163; GFX7-NEXT:    s_waitcnt vmcnt(0)
1164; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
1165; GFX7-NEXT:    ; return to shader part epilog
1166;
1167; GFX10-LABEL: extractelement_sgpr_v8i8_idx4:
1168; GFX10:       ; %bb.0:
1169; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1170; GFX10-NEXT:    global_load_ubyte v0, v0, s[2:3] offset:4
1171; GFX10-NEXT:    s_waitcnt vmcnt(0)
1172; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1173; GFX10-NEXT:    ; return to shader part epilog
1174;
1175; GFX11-LABEL: extractelement_sgpr_v8i8_idx4:
1176; GFX11:       ; %bb.0:
1177; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1178; GFX11-NEXT:    global_load_u8 v0, v0, s[2:3] offset:4
1179; GFX11-NEXT:    s_waitcnt vmcnt(0)
1180; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1181; GFX11-NEXT:    ; return to shader part epilog
1182  %vector = load <8 x i8>, ptr addrspace(4) %ptr
1183  %element = extractelement <8 x i8> %vector, i32 4
1184  ret i8 %element
1185}
1186
1187define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx5(ptr addrspace(4) inreg %ptr) {
1188; GFX9-LABEL: extractelement_sgpr_v8i8_idx5:
1189; GFX9:       ; %bb.0:
1190; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1191; GFX9-NEXT:    global_load_ubyte v0, v0, s[2:3] offset:5
1192; GFX9-NEXT:    s_waitcnt vmcnt(0)
1193; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1194; GFX9-NEXT:    ; return to shader part epilog
1195;
1196; GFX8-LABEL: extractelement_sgpr_v8i8_idx5:
1197; GFX8:       ; %bb.0:
1198; GFX8-NEXT:    s_add_u32 s0, s2, 5
1199; GFX8-NEXT:    s_addc_u32 s1, s3, 0
1200; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1201; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1202; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
1203; GFX8-NEXT:    s_waitcnt vmcnt(0)
1204; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1205; GFX8-NEXT:    ; return to shader part epilog
1206;
1207; GFX7-LABEL: extractelement_sgpr_v8i8_idx5:
1208; GFX7:       ; %bb.0:
1209; GFX7-NEXT:    s_mov_b32 s0, s2
1210; GFX7-NEXT:    s_mov_b32 s1, s3
1211; GFX7-NEXT:    s_mov_b32 s2, -1
1212; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1213; GFX7-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 offset:5
1214; GFX7-NEXT:    s_waitcnt vmcnt(0)
1215; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
1216; GFX7-NEXT:    ; return to shader part epilog
1217;
1218; GFX10-LABEL: extractelement_sgpr_v8i8_idx5:
1219; GFX10:       ; %bb.0:
1220; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1221; GFX10-NEXT:    global_load_ubyte v0, v0, s[2:3] offset:5
1222; GFX10-NEXT:    s_waitcnt vmcnt(0)
1223; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1224; GFX10-NEXT:    ; return to shader part epilog
1225;
1226; GFX11-LABEL: extractelement_sgpr_v8i8_idx5:
1227; GFX11:       ; %bb.0:
1228; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1229; GFX11-NEXT:    global_load_u8 v0, v0, s[2:3] offset:5
1230; GFX11-NEXT:    s_waitcnt vmcnt(0)
1231; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1232; GFX11-NEXT:    ; return to shader part epilog
1233  %vector = load <8 x i8>, ptr addrspace(4) %ptr
1234  %element = extractelement <8 x i8> %vector, i32 5
1235  ret i8 %element
1236}
1237
1238define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx6(ptr addrspace(4) inreg %ptr) {
1239; GFX9-LABEL: extractelement_sgpr_v8i8_idx6:
1240; GFX9:       ; %bb.0:
1241; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1242; GFX9-NEXT:    global_load_ubyte v0, v0, s[2:3] offset:6
1243; GFX9-NEXT:    s_waitcnt vmcnt(0)
1244; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1245; GFX9-NEXT:    ; return to shader part epilog
1246;
1247; GFX8-LABEL: extractelement_sgpr_v8i8_idx6:
1248; GFX8:       ; %bb.0:
1249; GFX8-NEXT:    s_add_u32 s0, s2, 6
1250; GFX8-NEXT:    s_addc_u32 s1, s3, 0
1251; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1252; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1253; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
1254; GFX8-NEXT:    s_waitcnt vmcnt(0)
1255; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1256; GFX8-NEXT:    ; return to shader part epilog
1257;
1258; GFX7-LABEL: extractelement_sgpr_v8i8_idx6:
1259; GFX7:       ; %bb.0:
1260; GFX7-NEXT:    s_mov_b32 s0, s2
1261; GFX7-NEXT:    s_mov_b32 s1, s3
1262; GFX7-NEXT:    s_mov_b32 s2, -1
1263; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1264; GFX7-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 offset:6
1265; GFX7-NEXT:    s_waitcnt vmcnt(0)
1266; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
1267; GFX7-NEXT:    ; return to shader part epilog
1268;
1269; GFX10-LABEL: extractelement_sgpr_v8i8_idx6:
1270; GFX10:       ; %bb.0:
1271; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1272; GFX10-NEXT:    global_load_ubyte v0, v0, s[2:3] offset:6
1273; GFX10-NEXT:    s_waitcnt vmcnt(0)
1274; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1275; GFX10-NEXT:    ; return to shader part epilog
1276;
1277; GFX11-LABEL: extractelement_sgpr_v8i8_idx6:
1278; GFX11:       ; %bb.0:
1279; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1280; GFX11-NEXT:    global_load_u8 v0, v0, s[2:3] offset:6
1281; GFX11-NEXT:    s_waitcnt vmcnt(0)
1282; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1283; GFX11-NEXT:    ; return to shader part epilog
1284  %vector = load <8 x i8>, ptr addrspace(4) %ptr
1285  %element = extractelement <8 x i8> %vector, i32 6
1286  ret i8 %element
1287}
1288
1289define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx7(ptr addrspace(4) inreg %ptr) {
1290; GFX9-LABEL: extractelement_sgpr_v8i8_idx7:
1291; GFX9:       ; %bb.0:
1292; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1293; GFX9-NEXT:    global_load_ubyte v0, v0, s[2:3] offset:7
1294; GFX9-NEXT:    s_waitcnt vmcnt(0)
1295; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1296; GFX9-NEXT:    ; return to shader part epilog
1297;
1298; GFX8-LABEL: extractelement_sgpr_v8i8_idx7:
1299; GFX8:       ; %bb.0:
1300; GFX8-NEXT:    s_add_u32 s0, s2, 7
1301; GFX8-NEXT:    s_addc_u32 s1, s3, 0
1302; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1303; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1304; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
1305; GFX8-NEXT:    s_waitcnt vmcnt(0)
1306; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1307; GFX8-NEXT:    ; return to shader part epilog
1308;
1309; GFX7-LABEL: extractelement_sgpr_v8i8_idx7:
1310; GFX7:       ; %bb.0:
1311; GFX7-NEXT:    s_mov_b32 s0, s2
1312; GFX7-NEXT:    s_mov_b32 s1, s3
1313; GFX7-NEXT:    s_mov_b32 s2, -1
1314; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1315; GFX7-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 offset:7
1316; GFX7-NEXT:    s_waitcnt vmcnt(0)
1317; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
1318; GFX7-NEXT:    ; return to shader part epilog
1319;
1320; GFX10-LABEL: extractelement_sgpr_v8i8_idx7:
1321; GFX10:       ; %bb.0:
1322; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1323; GFX10-NEXT:    global_load_ubyte v0, v0, s[2:3] offset:7
1324; GFX10-NEXT:    s_waitcnt vmcnt(0)
1325; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1326; GFX10-NEXT:    ; return to shader part epilog
1327;
1328; GFX11-LABEL: extractelement_sgpr_v8i8_idx7:
1329; GFX11:       ; %bb.0:
1330; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1331; GFX11-NEXT:    global_load_u8 v0, v0, s[2:3] offset:7
1332; GFX11-NEXT:    s_waitcnt vmcnt(0)
1333; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1334; GFX11-NEXT:    ; return to shader part epilog
1335  %vector = load <8 x i8>, ptr addrspace(4) %ptr
1336  %element = extractelement <8 x i8> %vector, i32 7
1337  ret i8 %element
1338}
1339
1340define i8 @extractelement_vgpr_v8i8_idx0(ptr addrspace(1) %ptr) {
1341; GFX9-LABEL: extractelement_vgpr_v8i8_idx0:
1342; GFX9:       ; %bb.0:
1343; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1344; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
1345; GFX9-NEXT:    s_waitcnt vmcnt(0)
1346; GFX9-NEXT:    s_setpc_b64 s[30:31]
1347;
1348; GFX8-LABEL: extractelement_vgpr_v8i8_idx0:
1349; GFX8:       ; %bb.0:
1350; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1351; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
1352; GFX8-NEXT:    s_waitcnt vmcnt(0)
1353; GFX8-NEXT:    s_setpc_b64 s[30:31]
1354;
1355; GFX7-LABEL: extractelement_vgpr_v8i8_idx0:
1356; GFX7:       ; %bb.0:
1357; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1358; GFX7-NEXT:    s_mov_b32 s6, 0
1359; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1360; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1361; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
1362; GFX7-NEXT:    s_waitcnt vmcnt(0)
1363; GFX7-NEXT:    s_setpc_b64 s[30:31]
1364;
1365; GFX10-LABEL: extractelement_vgpr_v8i8_idx0:
1366; GFX10:       ; %bb.0:
1367; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1368; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
1369; GFX10-NEXT:    s_waitcnt vmcnt(0)
1370; GFX10-NEXT:    s_setpc_b64 s[30:31]
1371;
1372; GFX11-LABEL: extractelement_vgpr_v8i8_idx0:
1373; GFX11:       ; %bb.0:
1374; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1375; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
1376; GFX11-NEXT:    s_waitcnt vmcnt(0)
1377; GFX11-NEXT:    s_setpc_b64 s[30:31]
1378  %vector = load <8 x i8>, ptr addrspace(1) %ptr
1379  %element = extractelement <8 x i8> %vector, i32 0
1380  ret i8 %element
1381}
1382
1383define i8 @extractelement_vgpr_v8i8_idx1(ptr addrspace(1) %ptr) {
1384; GFX9-LABEL: extractelement_vgpr_v8i8_idx1:
1385; GFX9:       ; %bb.0:
1386; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1387; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:1
1388; GFX9-NEXT:    s_waitcnt vmcnt(0)
1389; GFX9-NEXT:    s_setpc_b64 s[30:31]
1390;
1391; GFX8-LABEL: extractelement_vgpr_v8i8_idx1:
1392; GFX8:       ; %bb.0:
1393; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1394; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
1395; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1396; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
1397; GFX8-NEXT:    s_waitcnt vmcnt(0)
1398; GFX8-NEXT:    s_setpc_b64 s[30:31]
1399;
1400; GFX7-LABEL: extractelement_vgpr_v8i8_idx1:
1401; GFX7:       ; %bb.0:
1402; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1403; GFX7-NEXT:    s_mov_b32 s6, 0
1404; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1405; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1406; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:1
1407; GFX7-NEXT:    s_waitcnt vmcnt(0)
1408; GFX7-NEXT:    s_setpc_b64 s[30:31]
1409;
1410; GFX10-LABEL: extractelement_vgpr_v8i8_idx1:
1411; GFX10:       ; %bb.0:
1412; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1413; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:1
1414; GFX10-NEXT:    s_waitcnt vmcnt(0)
1415; GFX10-NEXT:    s_setpc_b64 s[30:31]
1416;
1417; GFX11-LABEL: extractelement_vgpr_v8i8_idx1:
1418; GFX11:       ; %bb.0:
1419; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1420; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:1
1421; GFX11-NEXT:    s_waitcnt vmcnt(0)
1422; GFX11-NEXT:    s_setpc_b64 s[30:31]
1423  %vector = load <8 x i8>, ptr addrspace(1) %ptr
1424  %element = extractelement <8 x i8> %vector, i32 1
1425  ret i8 %element
1426}
1427
1428define i8 @extractelement_vgpr_v8i8_idx2(ptr addrspace(1) %ptr) {
1429; GFX9-LABEL: extractelement_vgpr_v8i8_idx2:
1430; GFX9:       ; %bb.0:
1431; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1432; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:2
1433; GFX9-NEXT:    s_waitcnt vmcnt(0)
1434; GFX9-NEXT:    s_setpc_b64 s[30:31]
1435;
1436; GFX8-LABEL: extractelement_vgpr_v8i8_idx2:
1437; GFX8:       ; %bb.0:
1438; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1439; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 2, v0
1440; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1441; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
1442; GFX8-NEXT:    s_waitcnt vmcnt(0)
1443; GFX8-NEXT:    s_setpc_b64 s[30:31]
1444;
1445; GFX7-LABEL: extractelement_vgpr_v8i8_idx2:
1446; GFX7:       ; %bb.0:
1447; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1448; GFX7-NEXT:    s_mov_b32 s6, 0
1449; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1450; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1451; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:2
1452; GFX7-NEXT:    s_waitcnt vmcnt(0)
1453; GFX7-NEXT:    s_setpc_b64 s[30:31]
1454;
1455; GFX10-LABEL: extractelement_vgpr_v8i8_idx2:
1456; GFX10:       ; %bb.0:
1457; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1458; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2
1459; GFX10-NEXT:    s_waitcnt vmcnt(0)
1460; GFX10-NEXT:    s_setpc_b64 s[30:31]
1461;
1462; GFX11-LABEL: extractelement_vgpr_v8i8_idx2:
1463; GFX11:       ; %bb.0:
1464; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1465; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:2
1466; GFX11-NEXT:    s_waitcnt vmcnt(0)
1467; GFX11-NEXT:    s_setpc_b64 s[30:31]
1468  %vector = load <8 x i8>, ptr addrspace(1) %ptr
1469  %element = extractelement <8 x i8> %vector, i32 2
1470  ret i8 %element
1471}
1472
1473define i8 @extractelement_vgpr_v8i8_idx3(ptr addrspace(1) %ptr) {
1474; GFX9-LABEL: extractelement_vgpr_v8i8_idx3:
1475; GFX9:       ; %bb.0:
1476; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1477; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:3
1478; GFX9-NEXT:    s_waitcnt vmcnt(0)
1479; GFX9-NEXT:    s_setpc_b64 s[30:31]
1480;
1481; GFX8-LABEL: extractelement_vgpr_v8i8_idx3:
1482; GFX8:       ; %bb.0:
1483; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1484; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 3, v0
1485; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1486; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
1487; GFX8-NEXT:    s_waitcnt vmcnt(0)
1488; GFX8-NEXT:    s_setpc_b64 s[30:31]
1489;
1490; GFX7-LABEL: extractelement_vgpr_v8i8_idx3:
1491; GFX7:       ; %bb.0:
1492; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1493; GFX7-NEXT:    s_mov_b32 s6, 0
1494; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1495; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1496; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:3
1497; GFX7-NEXT:    s_waitcnt vmcnt(0)
1498; GFX7-NEXT:    s_setpc_b64 s[30:31]
1499;
1500; GFX10-LABEL: extractelement_vgpr_v8i8_idx3:
1501; GFX10:       ; %bb.0:
1502; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1503; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:3
1504; GFX10-NEXT:    s_waitcnt vmcnt(0)
1505; GFX10-NEXT:    s_setpc_b64 s[30:31]
1506;
1507; GFX11-LABEL: extractelement_vgpr_v8i8_idx3:
1508; GFX11:       ; %bb.0:
1509; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1510; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:3
1511; GFX11-NEXT:    s_waitcnt vmcnt(0)
1512; GFX11-NEXT:    s_setpc_b64 s[30:31]
1513  %vector = load <8 x i8>, ptr addrspace(1) %ptr
1514  %element = extractelement <8 x i8> %vector, i32 3
1515  ret i8 %element
1516}
1517
1518define i8 @extractelement_vgpr_v8i8_idx4(ptr addrspace(1) %ptr) {
1519; GFX9-LABEL: extractelement_vgpr_v8i8_idx4:
1520; GFX9:       ; %bb.0:
1521; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1522; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4
1523; GFX9-NEXT:    s_waitcnt vmcnt(0)
1524; GFX9-NEXT:    s_setpc_b64 s[30:31]
1525;
1526; GFX8-LABEL: extractelement_vgpr_v8i8_idx4:
1527; GFX8:       ; %bb.0:
1528; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1529; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 4, v0
1530; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1531; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
1532; GFX8-NEXT:    s_waitcnt vmcnt(0)
1533; GFX8-NEXT:    s_setpc_b64 s[30:31]
1534;
1535; GFX7-LABEL: extractelement_vgpr_v8i8_idx4:
1536; GFX7:       ; %bb.0:
1537; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1538; GFX7-NEXT:    s_mov_b32 s6, 0
1539; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1540; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1541; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:4
1542; GFX7-NEXT:    s_waitcnt vmcnt(0)
1543; GFX7-NEXT:    s_setpc_b64 s[30:31]
1544;
1545; GFX10-LABEL: extractelement_vgpr_v8i8_idx4:
1546; GFX10:       ; %bb.0:
1547; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1548; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:4
1549; GFX10-NEXT:    s_waitcnt vmcnt(0)
1550; GFX10-NEXT:    s_setpc_b64 s[30:31]
1551;
1552; GFX11-LABEL: extractelement_vgpr_v8i8_idx4:
1553; GFX11:       ; %bb.0:
1554; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1555; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:4
1556; GFX11-NEXT:    s_waitcnt vmcnt(0)
1557; GFX11-NEXT:    s_setpc_b64 s[30:31]
1558  %vector = load <8 x i8>, ptr addrspace(1) %ptr
1559  %element = extractelement <8 x i8> %vector, i32 4
1560  ret i8 %element
1561}
1562
1563define i8 @extractelement_vgpr_v8i8_idx5(ptr addrspace(1) %ptr) {
1564; GFX9-LABEL: extractelement_vgpr_v8i8_idx5:
1565; GFX9:       ; %bb.0:
1566; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1567; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:5
1568; GFX9-NEXT:    s_waitcnt vmcnt(0)
1569; GFX9-NEXT:    s_setpc_b64 s[30:31]
1570;
1571; GFX8-LABEL: extractelement_vgpr_v8i8_idx5:
1572; GFX8:       ; %bb.0:
1573; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1574; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 5, v0
1575; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1576; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
1577; GFX8-NEXT:    s_waitcnt vmcnt(0)
1578; GFX8-NEXT:    s_setpc_b64 s[30:31]
1579;
1580; GFX7-LABEL: extractelement_vgpr_v8i8_idx5:
1581; GFX7:       ; %bb.0:
1582; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1583; GFX7-NEXT:    s_mov_b32 s6, 0
1584; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1585; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1586; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:5
1587; GFX7-NEXT:    s_waitcnt vmcnt(0)
1588; GFX7-NEXT:    s_setpc_b64 s[30:31]
1589;
1590; GFX10-LABEL: extractelement_vgpr_v8i8_idx5:
1591; GFX10:       ; %bb.0:
1592; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1593; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:5
1594; GFX10-NEXT:    s_waitcnt vmcnt(0)
1595; GFX10-NEXT:    s_setpc_b64 s[30:31]
1596;
1597; GFX11-LABEL: extractelement_vgpr_v8i8_idx5:
1598; GFX11:       ; %bb.0:
1599; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1600; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:5
1601; GFX11-NEXT:    s_waitcnt vmcnt(0)
1602; GFX11-NEXT:    s_setpc_b64 s[30:31]
1603  %vector = load <8 x i8>, ptr addrspace(1) %ptr
1604  %element = extractelement <8 x i8> %vector, i32 5
1605  ret i8 %element
1606}
1607
1608define i8 @extractelement_vgpr_v8i8_idx6(ptr addrspace(1) %ptr) {
1609; GFX9-LABEL: extractelement_vgpr_v8i8_idx6:
1610; GFX9:       ; %bb.0:
1611; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1612; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:6
1613; GFX9-NEXT:    s_waitcnt vmcnt(0)
1614; GFX9-NEXT:    s_setpc_b64 s[30:31]
1615;
1616; GFX8-LABEL: extractelement_vgpr_v8i8_idx6:
1617; GFX8:       ; %bb.0:
1618; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1619; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 6, v0
1620; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1621; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
1622; GFX8-NEXT:    s_waitcnt vmcnt(0)
1623; GFX8-NEXT:    s_setpc_b64 s[30:31]
1624;
1625; GFX7-LABEL: extractelement_vgpr_v8i8_idx6:
1626; GFX7:       ; %bb.0:
1627; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1628; GFX7-NEXT:    s_mov_b32 s6, 0
1629; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1630; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1631; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:6
1632; GFX7-NEXT:    s_waitcnt vmcnt(0)
1633; GFX7-NEXT:    s_setpc_b64 s[30:31]
1634;
1635; GFX10-LABEL: extractelement_vgpr_v8i8_idx6:
1636; GFX10:       ; %bb.0:
1637; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1638; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:6
1639; GFX10-NEXT:    s_waitcnt vmcnt(0)
1640; GFX10-NEXT:    s_setpc_b64 s[30:31]
1641;
1642; GFX11-LABEL: extractelement_vgpr_v8i8_idx6:
1643; GFX11:       ; %bb.0:
1644; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1645; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:6
1646; GFX11-NEXT:    s_waitcnt vmcnt(0)
1647; GFX11-NEXT:    s_setpc_b64 s[30:31]
1648  %vector = load <8 x i8>, ptr addrspace(1) %ptr
1649  %element = extractelement <8 x i8> %vector, i32 6
1650  ret i8 %element
1651}
1652
1653define i8 @extractelement_vgpr_v8i8_idx7(ptr addrspace(1) %ptr) {
1654; GFX9-LABEL: extractelement_vgpr_v8i8_idx7:
1655; GFX9:       ; %bb.0:
1656; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1657; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:7
1658; GFX9-NEXT:    s_waitcnt vmcnt(0)
1659; GFX9-NEXT:    s_setpc_b64 s[30:31]
1660;
1661; GFX8-LABEL: extractelement_vgpr_v8i8_idx7:
1662; GFX8:       ; %bb.0:
1663; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1664; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 7, v0
1665; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1666; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
1667; GFX8-NEXT:    s_waitcnt vmcnt(0)
1668; GFX8-NEXT:    s_setpc_b64 s[30:31]
1669;
1670; GFX7-LABEL: extractelement_vgpr_v8i8_idx7:
1671; GFX7:       ; %bb.0:
1672; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1673; GFX7-NEXT:    s_mov_b32 s6, 0
1674; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1675; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1676; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:7
1677; GFX7-NEXT:    s_waitcnt vmcnt(0)
1678; GFX7-NEXT:    s_setpc_b64 s[30:31]
1679;
1680; GFX10-LABEL: extractelement_vgpr_v8i8_idx7:
1681; GFX10:       ; %bb.0:
1682; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1683; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:7
1684; GFX10-NEXT:    s_waitcnt vmcnt(0)
1685; GFX10-NEXT:    s_setpc_b64 s[30:31]
1686;
1687; GFX11-LABEL: extractelement_vgpr_v8i8_idx7:
1688; GFX11:       ; %bb.0:
1689; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1690; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:7
1691; GFX11-NEXT:    s_waitcnt vmcnt(0)
1692; GFX11-NEXT:    s_setpc_b64 s[30:31]
1693  %vector = load <8 x i8>, ptr addrspace(1) %ptr
1694  %element = extractelement <8 x i8> %vector, i32 7
1695  ret i8 %element
1696}
1697
1698define amdgpu_ps i8 @extractelement_sgpr_v16i8_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
1699; GFX9-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
1700; GFX9:       ; %bb.0:
1701; GFX9-NEXT:    s_and_b32 s0, s4, 15
1702; GFX9-NEXT:    s_ashr_i32 s1, s0, 31
1703; GFX9-NEXT:    s_add_u32 s0, s2, s0
1704; GFX9-NEXT:    s_addc_u32 s1, s3, s1
1705; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1706; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1]
1707; GFX9-NEXT:    s_waitcnt vmcnt(0)
1708; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1709; GFX9-NEXT:    ; return to shader part epilog
1710;
1711; GFX8-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
1712; GFX8:       ; %bb.0:
1713; GFX8-NEXT:    s_and_b32 s0, s4, 15
1714; GFX8-NEXT:    s_ashr_i32 s1, s0, 31
1715; GFX8-NEXT:    s_add_u32 s0, s2, s0
1716; GFX8-NEXT:    s_addc_u32 s1, s3, s1
1717; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1718; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1719; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
1720; GFX8-NEXT:    s_waitcnt vmcnt(0)
1721; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1722; GFX8-NEXT:    ; return to shader part epilog
1723;
1724; GFX7-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
1725; GFX7:       ; %bb.0:
1726; GFX7-NEXT:    s_and_b32 s4, s4, 15
1727; GFX7-NEXT:    s_ashr_i32 s5, s4, 31
1728; GFX7-NEXT:    v_mov_b32_e32 v0, s4
1729; GFX7-NEXT:    s_mov_b32 s0, s2
1730; GFX7-NEXT:    s_mov_b32 s1, s3
1731; GFX7-NEXT:    s_mov_b32 s2, 0
1732; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1733; GFX7-NEXT:    v_mov_b32_e32 v1, s5
1734; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
1735; GFX7-NEXT:    s_waitcnt vmcnt(0)
1736; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
1737; GFX7-NEXT:    ; return to shader part epilog
1738;
1739; GFX10-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
1740; GFX10:       ; %bb.0:
1741; GFX10-NEXT:    s_and_b32 s0, s4, 15
1742; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1743; GFX10-NEXT:    s_ashr_i32 s1, s0, 31
1744; GFX10-NEXT:    s_add_u32 s0, s2, s0
1745; GFX10-NEXT:    s_addc_u32 s1, s3, s1
1746; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1]
1747; GFX10-NEXT:    s_waitcnt vmcnt(0)
1748; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1749; GFX10-NEXT:    ; return to shader part epilog
1750;
1751; GFX11-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
1752; GFX11:       ; %bb.0:
1753; GFX11-NEXT:    s_and_b32 s0, s4, 15
1754; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1755; GFX11-NEXT:    s_ashr_i32 s1, s0, 31
1756; GFX11-NEXT:    s_add_u32 s0, s2, s0
1757; GFX11-NEXT:    s_addc_u32 s1, s3, s1
1758; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1]
1759; GFX11-NEXT:    s_waitcnt vmcnt(0)
1760; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1761; GFX11-NEXT:    ; return to shader part epilog
1762  %vector = load <16 x i8>, ptr addrspace(4) %ptr
1763  %element = extractelement <16 x i8> %vector, i32 %idx
1764  ret i8 %element
1765}
1766
1767define amdgpu_ps i8 @extractelement_vgpr_v16i8_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) {
1768; GFX9-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
1769; GFX9:       ; %bb.0:
1770; GFX9-NEXT:    s_and_b32 s0, s2, 15
1771; GFX9-NEXT:    s_ashr_i32 s1, s0, 31
1772; GFX9-NEXT:    v_mov_b32_e32 v3, s1
1773; GFX9-NEXT:    v_mov_b32_e32 v2, s0
1774; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
1775; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
1776; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
1777; GFX9-NEXT:    s_waitcnt vmcnt(0)
1778; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1779; GFX9-NEXT:    ; return to shader part epilog
1780;
1781; GFX8-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
1782; GFX8:       ; %bb.0:
1783; GFX8-NEXT:    s_and_b32 s0, s2, 15
1784; GFX8-NEXT:    s_ashr_i32 s1, s0, 31
1785; GFX8-NEXT:    v_mov_b32_e32 v3, s1
1786; GFX8-NEXT:    v_mov_b32_e32 v2, s0
1787; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
1788; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
1789; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
1790; GFX8-NEXT:    s_waitcnt vmcnt(0)
1791; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1792; GFX8-NEXT:    ; return to shader part epilog
1793;
1794; GFX7-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
1795; GFX7:       ; %bb.0:
1796; GFX7-NEXT:    s_and_b32 s0, s2, 15
1797; GFX7-NEXT:    s_ashr_i32 s1, s0, 31
1798; GFX7-NEXT:    s_mov_b32 s2, 0
1799; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1800; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
1801; GFX7-NEXT:    s_waitcnt vmcnt(0)
1802; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
1803; GFX7-NEXT:    ; return to shader part epilog
1804;
1805; GFX10-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
1806; GFX10:       ; %bb.0:
1807; GFX10-NEXT:    s_and_b32 s0, s2, 15
1808; GFX10-NEXT:    s_ashr_i32 s1, s0, 31
1809; GFX10-NEXT:    v_mov_b32_e32 v3, s1
1810; GFX10-NEXT:    v_mov_b32_e32 v2, s0
1811; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
1812; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1813; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
1814; GFX10-NEXT:    s_waitcnt vmcnt(0)
1815; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1816; GFX10-NEXT:    ; return to shader part epilog
1817;
1818; GFX11-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
1819; GFX11:       ; %bb.0:
1820; GFX11-NEXT:    s_and_b32 s0, s2, 15
1821; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1822; GFX11-NEXT:    s_ashr_i32 s1, s0, 31
1823; GFX11-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
1824; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
1825; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
1826; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1827; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
1828; GFX11-NEXT:    s_waitcnt vmcnt(0)
1829; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1830; GFX11-NEXT:    ; return to shader part epilog
1831  %vector = load <16 x i8>, ptr addrspace(1) %ptr
1832  %element = extractelement <16 x i8> %vector, i32 %idx
1833  ret i8 %element
1834}
1835
1836define i8 @extractelement_vgpr_v16i8_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) {
1837; GFX9-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
1838; GFX9:       ; %bb.0:
1839; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1840; GFX9-NEXT:    v_and_b32_e32 v2, 15, v2
1841; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
1842; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
1843; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
1844; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
1845; GFX9-NEXT:    s_waitcnt vmcnt(0)
1846; GFX9-NEXT:    s_setpc_b64 s[30:31]
1847;
1848; GFX8-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
1849; GFX8:       ; %bb.0:
1850; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1851; GFX8-NEXT:    v_and_b32_e32 v2, 15, v2
1852; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
1853; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
1854; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
1855; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
1856; GFX8-NEXT:    s_waitcnt vmcnt(0)
1857; GFX8-NEXT:    s_setpc_b64 s[30:31]
1858;
1859; GFX7-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
1860; GFX7:       ; %bb.0:
1861; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1862; GFX7-NEXT:    v_and_b32_e32 v2, 15, v2
1863; GFX7-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
1864; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
1865; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
1866; GFX7-NEXT:    s_mov_b32 s6, 0
1867; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1868; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1869; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
1870; GFX7-NEXT:    s_waitcnt vmcnt(0)
1871; GFX7-NEXT:    s_setpc_b64 s[30:31]
1872;
1873; GFX10-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
1874; GFX10:       ; %bb.0:
1875; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1876; GFX10-NEXT:    v_and_b32_e32 v2, 15, v2
1877; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
1878; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
1879; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1880; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
1881; GFX10-NEXT:    s_waitcnt vmcnt(0)
1882; GFX10-NEXT:    s_setpc_b64 s[30:31]
1883;
1884; GFX11-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
1885; GFX11:       ; %bb.0:
1886; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1887; GFX11-NEXT:    v_and_b32_e32 v2, 15, v2
1888; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1889; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
1890; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
1891; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1892; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
1893; GFX11-NEXT:    s_waitcnt vmcnt(0)
1894; GFX11-NEXT:    s_setpc_b64 s[30:31]
1895  %vector = load <16 x i8>, ptr addrspace(1) %ptr
1896  %element = extractelement <16 x i8> %vector, i32 %idx
1897  ret i8 %element
1898}
1899
1900define amdgpu_ps i8 @extractelement_sgpr_v16i8_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) {
1901; GFX9-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
1902; GFX9:       ; %bb.0:
1903; GFX9-NEXT:    v_and_b32_e32 v2, 15, v0
1904; GFX9-NEXT:    v_mov_b32_e32 v0, s2
1905; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
1906; GFX9-NEXT:    v_mov_b32_e32 v1, s3
1907; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
1908; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
1909; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
1910; GFX9-NEXT:    s_waitcnt vmcnt(0)
1911; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1912; GFX9-NEXT:    ; return to shader part epilog
1913;
1914; GFX8-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
1915; GFX8:       ; %bb.0:
1916; GFX8-NEXT:    v_and_b32_e32 v2, 15, v0
1917; GFX8-NEXT:    v_mov_b32_e32 v0, s2
1918; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
1919; GFX8-NEXT:    v_mov_b32_e32 v1, s3
1920; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
1921; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
1922; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
1923; GFX8-NEXT:    s_waitcnt vmcnt(0)
1924; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1925; GFX8-NEXT:    ; return to shader part epilog
1926;
1927; GFX7-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
1928; GFX7:       ; %bb.0:
1929; GFX7-NEXT:    v_and_b32_e32 v0, 15, v0
1930; GFX7-NEXT:    s_mov_b32 s0, s2
1931; GFX7-NEXT:    s_mov_b32 s1, s3
1932; GFX7-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
1933; GFX7-NEXT:    s_mov_b32 s2, 0
1934; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1935; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64
1936; GFX7-NEXT:    s_waitcnt vmcnt(0)
1937; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
1938; GFX7-NEXT:    ; return to shader part epilog
1939;
1940; GFX10-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
1941; GFX10:       ; %bb.0:
1942; GFX10-NEXT:    v_and_b32_e32 v2, 15, v0
1943; GFX10-NEXT:    v_mov_b32_e32 v0, s2
1944; GFX10-NEXT:    v_mov_b32_e32 v1, s3
1945; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
1946; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
1947; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1948; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
1949; GFX10-NEXT:    s_waitcnt vmcnt(0)
1950; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1951; GFX10-NEXT:    ; return to shader part epilog
1952;
1953; GFX11-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
1954; GFX11:       ; %bb.0:
1955; GFX11-NEXT:    v_and_b32_e32 v2, 15, v0
1956; GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
1957; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1958; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
1959; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
1960; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1961; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1962; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
1963; GFX11-NEXT:    s_waitcnt vmcnt(0)
1964; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1965; GFX11-NEXT:    ; return to shader part epilog
1966  %vector = load <16 x i8>, ptr addrspace(4) %ptr
1967  %element = extractelement <16 x i8> %vector, i32 %idx
1968  ret i8 %element
1969}
1970
1971define i8 @extractelement_vgpr_v16i8_idx0(ptr addrspace(1) %ptr) {
1972; GFX9-LABEL: extractelement_vgpr_v16i8_idx0:
1973; GFX9:       ; %bb.0:
1974; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1975; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
1976; GFX9-NEXT:    s_waitcnt vmcnt(0)
1977; GFX9-NEXT:    s_setpc_b64 s[30:31]
1978;
1979; GFX8-LABEL: extractelement_vgpr_v16i8_idx0:
1980; GFX8:       ; %bb.0:
1981; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1982; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
1983; GFX8-NEXT:    s_waitcnt vmcnt(0)
1984; GFX8-NEXT:    s_setpc_b64 s[30:31]
1985;
1986; GFX7-LABEL: extractelement_vgpr_v16i8_idx0:
1987; GFX7:       ; %bb.0:
1988; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1989; GFX7-NEXT:    s_mov_b32 s6, 0
1990; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1991; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1992; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
1993; GFX7-NEXT:    s_waitcnt vmcnt(0)
1994; GFX7-NEXT:    s_setpc_b64 s[30:31]
1995;
1996; GFX10-LABEL: extractelement_vgpr_v16i8_idx0:
1997; GFX10:       ; %bb.0:
1998; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1999; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
2000; GFX10-NEXT:    s_waitcnt vmcnt(0)
2001; GFX10-NEXT:    s_setpc_b64 s[30:31]
2002;
2003; GFX11-LABEL: extractelement_vgpr_v16i8_idx0:
2004; GFX11:       ; %bb.0:
2005; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2006; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
2007; GFX11-NEXT:    s_waitcnt vmcnt(0)
2008; GFX11-NEXT:    s_setpc_b64 s[30:31]
2009  %vector = load <16 x i8>, ptr addrspace(1) %ptr
2010  %element = extractelement <16 x i8> %vector, i32 0
2011  ret i8 %element
2012}
2013
2014define i8 @extractelement_vgpr_v16i8_idx1(ptr addrspace(1) %ptr) {
2015; GFX9-LABEL: extractelement_vgpr_v16i8_idx1:
2016; GFX9:       ; %bb.0:
2017; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2018; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:1
2019; GFX9-NEXT:    s_waitcnt vmcnt(0)
2020; GFX9-NEXT:    s_setpc_b64 s[30:31]
2021;
2022; GFX8-LABEL: extractelement_vgpr_v16i8_idx1:
2023; GFX8:       ; %bb.0:
2024; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2025; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
2026; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2027; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
2028; GFX8-NEXT:    s_waitcnt vmcnt(0)
2029; GFX8-NEXT:    s_setpc_b64 s[30:31]
2030;
2031; GFX7-LABEL: extractelement_vgpr_v16i8_idx1:
2032; GFX7:       ; %bb.0:
2033; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2034; GFX7-NEXT:    s_mov_b32 s6, 0
2035; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2036; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2037; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:1
2038; GFX7-NEXT:    s_waitcnt vmcnt(0)
2039; GFX7-NEXT:    s_setpc_b64 s[30:31]
2040;
2041; GFX10-LABEL: extractelement_vgpr_v16i8_idx1:
2042; GFX10:       ; %bb.0:
2043; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2044; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:1
2045; GFX10-NEXT:    s_waitcnt vmcnt(0)
2046; GFX10-NEXT:    s_setpc_b64 s[30:31]
2047;
2048; GFX11-LABEL: extractelement_vgpr_v16i8_idx1:
2049; GFX11:       ; %bb.0:
2050; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2051; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:1
2052; GFX11-NEXT:    s_waitcnt vmcnt(0)
2053; GFX11-NEXT:    s_setpc_b64 s[30:31]
2054  %vector = load <16 x i8>, ptr addrspace(1) %ptr
2055  %element = extractelement <16 x i8> %vector, i32 1
2056  ret i8 %element
2057}
2058
2059define i8 @extractelement_vgpr_v16i8_idx2(ptr addrspace(1) %ptr) {
2060; GFX9-LABEL: extractelement_vgpr_v16i8_idx2:
2061; GFX9:       ; %bb.0:
2062; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2063; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:2
2064; GFX9-NEXT:    s_waitcnt vmcnt(0)
2065; GFX9-NEXT:    s_setpc_b64 s[30:31]
2066;
2067; GFX8-LABEL: extractelement_vgpr_v16i8_idx2:
2068; GFX8:       ; %bb.0:
2069; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2070; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 2, v0
2071; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2072; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
2073; GFX8-NEXT:    s_waitcnt vmcnt(0)
2074; GFX8-NEXT:    s_setpc_b64 s[30:31]
2075;
2076; GFX7-LABEL: extractelement_vgpr_v16i8_idx2:
2077; GFX7:       ; %bb.0:
2078; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2079; GFX7-NEXT:    s_mov_b32 s6, 0
2080; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2081; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2082; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:2
2083; GFX7-NEXT:    s_waitcnt vmcnt(0)
2084; GFX7-NEXT:    s_setpc_b64 s[30:31]
2085;
2086; GFX10-LABEL: extractelement_vgpr_v16i8_idx2:
2087; GFX10:       ; %bb.0:
2088; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2089; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2
2090; GFX10-NEXT:    s_waitcnt vmcnt(0)
2091; GFX10-NEXT:    s_setpc_b64 s[30:31]
2092;
2093; GFX11-LABEL: extractelement_vgpr_v16i8_idx2:
2094; GFX11:       ; %bb.0:
2095; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2096; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:2
2097; GFX11-NEXT:    s_waitcnt vmcnt(0)
2098; GFX11-NEXT:    s_setpc_b64 s[30:31]
2099  %vector = load <16 x i8>, ptr addrspace(1) %ptr
2100  %element = extractelement <16 x i8> %vector, i32 2
2101  ret i8 %element
2102}
2103
2104define i8 @extractelement_vgpr_v16i8_idx3(ptr addrspace(1) %ptr) {
2105; GFX9-LABEL: extractelement_vgpr_v16i8_idx3:
2106; GFX9:       ; %bb.0:
2107; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2108; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:3
2109; GFX9-NEXT:    s_waitcnt vmcnt(0)
2110; GFX9-NEXT:    s_setpc_b64 s[30:31]
2111;
2112; GFX8-LABEL: extractelement_vgpr_v16i8_idx3:
2113; GFX8:       ; %bb.0:
2114; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2115; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 3, v0
2116; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2117; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
2118; GFX8-NEXT:    s_waitcnt vmcnt(0)
2119; GFX8-NEXT:    s_setpc_b64 s[30:31]
2120;
2121; GFX7-LABEL: extractelement_vgpr_v16i8_idx3:
2122; GFX7:       ; %bb.0:
2123; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2124; GFX7-NEXT:    s_mov_b32 s6, 0
2125; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2126; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2127; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:3
2128; GFX7-NEXT:    s_waitcnt vmcnt(0)
2129; GFX7-NEXT:    s_setpc_b64 s[30:31]
2130;
2131; GFX10-LABEL: extractelement_vgpr_v16i8_idx3:
2132; GFX10:       ; %bb.0:
2133; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2134; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:3
2135; GFX10-NEXT:    s_waitcnt vmcnt(0)
2136; GFX10-NEXT:    s_setpc_b64 s[30:31]
2137;
2138; GFX11-LABEL: extractelement_vgpr_v16i8_idx3:
2139; GFX11:       ; %bb.0:
2140; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2141; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:3
2142; GFX11-NEXT:    s_waitcnt vmcnt(0)
2143; GFX11-NEXT:    s_setpc_b64 s[30:31]
2144  %vector = load <16 x i8>, ptr addrspace(1) %ptr
2145  %element = extractelement <16 x i8> %vector, i32 3
2146  ret i8 %element
2147}
2148
2149define i8 @extractelement_vgpr_v16i8_idx4(ptr addrspace(1) %ptr) {
2150; GFX9-LABEL: extractelement_vgpr_v16i8_idx4:
2151; GFX9:       ; %bb.0:
2152; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2153; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4
2154; GFX9-NEXT:    s_waitcnt vmcnt(0)
2155; GFX9-NEXT:    s_setpc_b64 s[30:31]
2156;
2157; GFX8-LABEL: extractelement_vgpr_v16i8_idx4:
2158; GFX8:       ; %bb.0:
2159; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2160; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 4, v0
2161; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2162; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
2163; GFX8-NEXT:    s_waitcnt vmcnt(0)
2164; GFX8-NEXT:    s_setpc_b64 s[30:31]
2165;
2166; GFX7-LABEL: extractelement_vgpr_v16i8_idx4:
2167; GFX7:       ; %bb.0:
2168; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2169; GFX7-NEXT:    s_mov_b32 s6, 0
2170; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2171; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2172; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:4
2173; GFX7-NEXT:    s_waitcnt vmcnt(0)
2174; GFX7-NEXT:    s_setpc_b64 s[30:31]
2175;
2176; GFX10-LABEL: extractelement_vgpr_v16i8_idx4:
2177; GFX10:       ; %bb.0:
2178; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2179; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:4
2180; GFX10-NEXT:    s_waitcnt vmcnt(0)
2181; GFX10-NEXT:    s_setpc_b64 s[30:31]
2182;
2183; GFX11-LABEL: extractelement_vgpr_v16i8_idx4:
2184; GFX11:       ; %bb.0:
2185; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2186; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:4
2187; GFX11-NEXT:    s_waitcnt vmcnt(0)
2188; GFX11-NEXT:    s_setpc_b64 s[30:31]
2189  %vector = load <16 x i8>, ptr addrspace(1) %ptr
2190  %element = extractelement <16 x i8> %vector, i32 4
2191  ret i8 %element
2192}
2193
2194define i8 @extractelement_vgpr_v16i8_idx5(ptr addrspace(1) %ptr) {
2195; GFX9-LABEL: extractelement_vgpr_v16i8_idx5:
2196; GFX9:       ; %bb.0:
2197; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2198; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:5
2199; GFX9-NEXT:    s_waitcnt vmcnt(0)
2200; GFX9-NEXT:    s_setpc_b64 s[30:31]
2201;
2202; GFX8-LABEL: extractelement_vgpr_v16i8_idx5:
2203; GFX8:       ; %bb.0:
2204; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2205; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 5, v0
2206; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2207; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
2208; GFX8-NEXT:    s_waitcnt vmcnt(0)
2209; GFX8-NEXT:    s_setpc_b64 s[30:31]
2210;
2211; GFX7-LABEL: extractelement_vgpr_v16i8_idx5:
2212; GFX7:       ; %bb.0:
2213; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2214; GFX7-NEXT:    s_mov_b32 s6, 0
2215; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2216; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2217; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:5
2218; GFX7-NEXT:    s_waitcnt vmcnt(0)
2219; GFX7-NEXT:    s_setpc_b64 s[30:31]
2220;
2221; GFX10-LABEL: extractelement_vgpr_v16i8_idx5:
2222; GFX10:       ; %bb.0:
2223; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2224; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:5
2225; GFX10-NEXT:    s_waitcnt vmcnt(0)
2226; GFX10-NEXT:    s_setpc_b64 s[30:31]
2227;
2228; GFX11-LABEL: extractelement_vgpr_v16i8_idx5:
2229; GFX11:       ; %bb.0:
2230; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2231; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:5
2232; GFX11-NEXT:    s_waitcnt vmcnt(0)
2233; GFX11-NEXT:    s_setpc_b64 s[30:31]
2234  %vector = load <16 x i8>, ptr addrspace(1) %ptr
2235  %element = extractelement <16 x i8> %vector, i32 5
2236  ret i8 %element
2237}
2238
2239define i8 @extractelement_vgpr_v16i8_idx6(ptr addrspace(1) %ptr) {
2240; GFX9-LABEL: extractelement_vgpr_v16i8_idx6:
2241; GFX9:       ; %bb.0:
2242; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2243; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:6
2244; GFX9-NEXT:    s_waitcnt vmcnt(0)
2245; GFX9-NEXT:    s_setpc_b64 s[30:31]
2246;
2247; GFX8-LABEL: extractelement_vgpr_v16i8_idx6:
2248; GFX8:       ; %bb.0:
2249; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2250; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 6, v0
2251; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2252; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
2253; GFX8-NEXT:    s_waitcnt vmcnt(0)
2254; GFX8-NEXT:    s_setpc_b64 s[30:31]
2255;
2256; GFX7-LABEL: extractelement_vgpr_v16i8_idx6:
2257; GFX7:       ; %bb.0:
2258; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2259; GFX7-NEXT:    s_mov_b32 s6, 0
2260; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2261; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2262; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:6
2263; GFX7-NEXT:    s_waitcnt vmcnt(0)
2264; GFX7-NEXT:    s_setpc_b64 s[30:31]
2265;
2266; GFX10-LABEL: extractelement_vgpr_v16i8_idx6:
2267; GFX10:       ; %bb.0:
2268; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2269; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:6
2270; GFX10-NEXT:    s_waitcnt vmcnt(0)
2271; GFX10-NEXT:    s_setpc_b64 s[30:31]
2272;
2273; GFX11-LABEL: extractelement_vgpr_v16i8_idx6:
2274; GFX11:       ; %bb.0:
2275; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2276; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:6
2277; GFX11-NEXT:    s_waitcnt vmcnt(0)
2278; GFX11-NEXT:    s_setpc_b64 s[30:31]
2279  %vector = load <16 x i8>, ptr addrspace(1) %ptr
2280  %element = extractelement <16 x i8> %vector, i32 6
2281  ret i8 %element
2282}
2283
2284define i8 @extractelement_vgpr_v16i8_idx7(ptr addrspace(1) %ptr) {
2285; GFX9-LABEL: extractelement_vgpr_v16i8_idx7:
2286; GFX9:       ; %bb.0:
2287; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2288; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:7
2289; GFX9-NEXT:    s_waitcnt vmcnt(0)
2290; GFX9-NEXT:    s_setpc_b64 s[30:31]
2291;
2292; GFX8-LABEL: extractelement_vgpr_v16i8_idx7:
2293; GFX8:       ; %bb.0:
2294; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2295; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 7, v0
2296; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2297; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
2298; GFX8-NEXT:    s_waitcnt vmcnt(0)
2299; GFX8-NEXT:    s_setpc_b64 s[30:31]
2300;
2301; GFX7-LABEL: extractelement_vgpr_v16i8_idx7:
2302; GFX7:       ; %bb.0:
2303; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2304; GFX7-NEXT:    s_mov_b32 s6, 0
2305; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2306; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2307; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:7
2308; GFX7-NEXT:    s_waitcnt vmcnt(0)
2309; GFX7-NEXT:    s_setpc_b64 s[30:31]
2310;
2311; GFX10-LABEL: extractelement_vgpr_v16i8_idx7:
2312; GFX10:       ; %bb.0:
2313; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2314; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:7
2315; GFX10-NEXT:    s_waitcnt vmcnt(0)
2316; GFX10-NEXT:    s_setpc_b64 s[30:31]
2317;
2318; GFX11-LABEL: extractelement_vgpr_v16i8_idx7:
2319; GFX11:       ; %bb.0:
2320; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2321; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:7
2322; GFX11-NEXT:    s_waitcnt vmcnt(0)
2323; GFX11-NEXT:    s_setpc_b64 s[30:31]
2324  %vector = load <16 x i8>, ptr addrspace(1) %ptr
2325  %element = extractelement <16 x i8> %vector, i32 7
2326  ret i8 %element
2327}
2328
2329define i8 @extractelement_vgpr_v16i8_idx8(ptr addrspace(1) %ptr) {
2330; GFX9-LABEL: extractelement_vgpr_v16i8_idx8:
2331; GFX9:       ; %bb.0:
2332; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2333; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:8
2334; GFX9-NEXT:    s_waitcnt vmcnt(0)
2335; GFX9-NEXT:    s_setpc_b64 s[30:31]
2336;
2337; GFX8-LABEL: extractelement_vgpr_v16i8_idx8:
2338; GFX8:       ; %bb.0:
2339; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2340; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 8, v0
2341; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2342; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
2343; GFX8-NEXT:    s_waitcnt vmcnt(0)
2344; GFX8-NEXT:    s_setpc_b64 s[30:31]
2345;
2346; GFX7-LABEL: extractelement_vgpr_v16i8_idx8:
2347; GFX7:       ; %bb.0:
2348; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2349; GFX7-NEXT:    s_mov_b32 s6, 0
2350; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2351; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2352; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:8
2353; GFX7-NEXT:    s_waitcnt vmcnt(0)
2354; GFX7-NEXT:    s_setpc_b64 s[30:31]
2355;
2356; GFX10-LABEL: extractelement_vgpr_v16i8_idx8:
2357; GFX10:       ; %bb.0:
2358; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2359; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:8
2360; GFX10-NEXT:    s_waitcnt vmcnt(0)
2361; GFX10-NEXT:    s_setpc_b64 s[30:31]
2362;
2363; GFX11-LABEL: extractelement_vgpr_v16i8_idx8:
2364; GFX11:       ; %bb.0:
2365; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2366; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:8
2367; GFX11-NEXT:    s_waitcnt vmcnt(0)
2368; GFX11-NEXT:    s_setpc_b64 s[30:31]
2369  %vector = load <16 x i8>, ptr addrspace(1) %ptr
2370  %element = extractelement <16 x i8> %vector, i32 8
2371  ret i8 %element
2372}
2373
2374define i8 @extractelement_vgpr_v16i8_idx9(ptr addrspace(1) %ptr) {
2375; GFX9-LABEL: extractelement_vgpr_v16i8_idx9:
2376; GFX9:       ; %bb.0:
2377; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2378; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:9
2379; GFX9-NEXT:    s_waitcnt vmcnt(0)
2380; GFX9-NEXT:    s_setpc_b64 s[30:31]
2381;
2382; GFX8-LABEL: extractelement_vgpr_v16i8_idx9:
2383; GFX8:       ; %bb.0:
2384; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2385; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 9, v0
2386; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2387; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
2388; GFX8-NEXT:    s_waitcnt vmcnt(0)
2389; GFX8-NEXT:    s_setpc_b64 s[30:31]
2390;
2391; GFX7-LABEL: extractelement_vgpr_v16i8_idx9:
2392; GFX7:       ; %bb.0:
2393; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2394; GFX7-NEXT:    s_mov_b32 s6, 0
2395; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2396; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2397; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:9
2398; GFX7-NEXT:    s_waitcnt vmcnt(0)
2399; GFX7-NEXT:    s_setpc_b64 s[30:31]
2400;
2401; GFX10-LABEL: extractelement_vgpr_v16i8_idx9:
2402; GFX10:       ; %bb.0:
2403; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2404; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:9
2405; GFX10-NEXT:    s_waitcnt vmcnt(0)
2406; GFX10-NEXT:    s_setpc_b64 s[30:31]
2407;
2408; GFX11-LABEL: extractelement_vgpr_v16i8_idx9:
2409; GFX11:       ; %bb.0:
2410; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2411; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:9
2412; GFX11-NEXT:    s_waitcnt vmcnt(0)
2413; GFX11-NEXT:    s_setpc_b64 s[30:31]
2414  %vector = load <16 x i8>, ptr addrspace(1) %ptr
2415  %element = extractelement <16 x i8> %vector, i32 9
2416  ret i8 %element
2417}
2418
2419define i8 @extractelement_vgpr_v16i8_idx10(ptr addrspace(1) %ptr) {
2420; GFX9-LABEL: extractelement_vgpr_v16i8_idx10:
2421; GFX9:       ; %bb.0:
2422; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2423; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:10
2424; GFX9-NEXT:    s_waitcnt vmcnt(0)
2425; GFX9-NEXT:    s_setpc_b64 s[30:31]
2426;
2427; GFX8-LABEL: extractelement_vgpr_v16i8_idx10:
2428; GFX8:       ; %bb.0:
2429; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2430; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 10, v0
2431; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2432; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
2433; GFX8-NEXT:    s_waitcnt vmcnt(0)
2434; GFX8-NEXT:    s_setpc_b64 s[30:31]
2435;
2436; GFX7-LABEL: extractelement_vgpr_v16i8_idx10:
2437; GFX7:       ; %bb.0:
2438; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2439; GFX7-NEXT:    s_mov_b32 s6, 0
2440; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2441; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2442; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:10
2443; GFX7-NEXT:    s_waitcnt vmcnt(0)
2444; GFX7-NEXT:    s_setpc_b64 s[30:31]
2445;
2446; GFX10-LABEL: extractelement_vgpr_v16i8_idx10:
2447; GFX10:       ; %bb.0:
2448; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2449; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:10
2450; GFX10-NEXT:    s_waitcnt vmcnt(0)
2451; GFX10-NEXT:    s_setpc_b64 s[30:31]
2452;
2453; GFX11-LABEL: extractelement_vgpr_v16i8_idx10:
2454; GFX11:       ; %bb.0:
2455; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2456; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:10
2457; GFX11-NEXT:    s_waitcnt vmcnt(0)
2458; GFX11-NEXT:    s_setpc_b64 s[30:31]
2459  %vector = load <16 x i8>, ptr addrspace(1) %ptr
2460  %element = extractelement <16 x i8> %vector, i32 10
2461  ret i8 %element
2462}
2463
2464define i8 @extractelement_vgpr_v16i8_idx11(ptr addrspace(1) %ptr) {
2465; GFX9-LABEL: extractelement_vgpr_v16i8_idx11:
2466; GFX9:       ; %bb.0:
2467; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2468; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:11
2469; GFX9-NEXT:    s_waitcnt vmcnt(0)
2470; GFX9-NEXT:    s_setpc_b64 s[30:31]
2471;
2472; GFX8-LABEL: extractelement_vgpr_v16i8_idx11:
2473; GFX8:       ; %bb.0:
2474; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2475; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 11, v0
2476; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2477; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
2478; GFX8-NEXT:    s_waitcnt vmcnt(0)
2479; GFX8-NEXT:    s_setpc_b64 s[30:31]
2480;
2481; GFX7-LABEL: extractelement_vgpr_v16i8_idx11:
2482; GFX7:       ; %bb.0:
2483; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2484; GFX7-NEXT:    s_mov_b32 s6, 0
2485; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2486; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2487; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:11
2488; GFX7-NEXT:    s_waitcnt vmcnt(0)
2489; GFX7-NEXT:    s_setpc_b64 s[30:31]
2490;
2491; GFX10-LABEL: extractelement_vgpr_v16i8_idx11:
2492; GFX10:       ; %bb.0:
2493; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2494; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:11
2495; GFX10-NEXT:    s_waitcnt vmcnt(0)
2496; GFX10-NEXT:    s_setpc_b64 s[30:31]
2497;
2498; GFX11-LABEL: extractelement_vgpr_v16i8_idx11:
2499; GFX11:       ; %bb.0:
2500; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2501; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:11
2502; GFX11-NEXT:    s_waitcnt vmcnt(0)
2503; GFX11-NEXT:    s_setpc_b64 s[30:31]
2504  %vector = load <16 x i8>, ptr addrspace(1) %ptr
2505  %element = extractelement <16 x i8> %vector, i32 11
2506  ret i8 %element
2507}
2508
2509define i8 @extractelement_vgpr_v16i8_idx12(ptr addrspace(1) %ptr) {
2510; GFX9-LABEL: extractelement_vgpr_v16i8_idx12:
2511; GFX9:       ; %bb.0:
2512; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2513; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:12
2514; GFX9-NEXT:    s_waitcnt vmcnt(0)
2515; GFX9-NEXT:    s_setpc_b64 s[30:31]
2516;
2517; GFX8-LABEL: extractelement_vgpr_v16i8_idx12:
2518; GFX8:       ; %bb.0:
2519; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2520; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 12, v0
2521; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2522; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
2523; GFX8-NEXT:    s_waitcnt vmcnt(0)
2524; GFX8-NEXT:    s_setpc_b64 s[30:31]
2525;
2526; GFX7-LABEL: extractelement_vgpr_v16i8_idx12:
2527; GFX7:       ; %bb.0:
2528; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2529; GFX7-NEXT:    s_mov_b32 s6, 0
2530; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2531; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2532; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:12
2533; GFX7-NEXT:    s_waitcnt vmcnt(0)
2534; GFX7-NEXT:    s_setpc_b64 s[30:31]
2535;
2536; GFX10-LABEL: extractelement_vgpr_v16i8_idx12:
2537; GFX10:       ; %bb.0:
2538; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2539; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:12
2540; GFX10-NEXT:    s_waitcnt vmcnt(0)
2541; GFX10-NEXT:    s_setpc_b64 s[30:31]
2542;
2543; GFX11-LABEL: extractelement_vgpr_v16i8_idx12:
2544; GFX11:       ; %bb.0:
2545; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2546; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:12
2547; GFX11-NEXT:    s_waitcnt vmcnt(0)
2548; GFX11-NEXT:    s_setpc_b64 s[30:31]
2549  %vector = load <16 x i8>, ptr addrspace(1) %ptr
2550  %element = extractelement <16 x i8> %vector, i32 12
2551  ret i8 %element
2552}
2553
2554define i8 @extractelement_vgpr_v16i8_idx13(ptr addrspace(1) %ptr) {
2555; GFX9-LABEL: extractelement_vgpr_v16i8_idx13:
2556; GFX9:       ; %bb.0:
2557; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2558; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:13
2559; GFX9-NEXT:    s_waitcnt vmcnt(0)
2560; GFX9-NEXT:    s_setpc_b64 s[30:31]
2561;
2562; GFX8-LABEL: extractelement_vgpr_v16i8_idx13:
2563; GFX8:       ; %bb.0:
2564; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2565; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 13, v0
2566; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2567; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
2568; GFX8-NEXT:    s_waitcnt vmcnt(0)
2569; GFX8-NEXT:    s_setpc_b64 s[30:31]
2570;
2571; GFX7-LABEL: extractelement_vgpr_v16i8_idx13:
2572; GFX7:       ; %bb.0:
2573; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2574; GFX7-NEXT:    s_mov_b32 s6, 0
2575; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2576; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2577; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:13
2578; GFX7-NEXT:    s_waitcnt vmcnt(0)
2579; GFX7-NEXT:    s_setpc_b64 s[30:31]
2580;
2581; GFX10-LABEL: extractelement_vgpr_v16i8_idx13:
2582; GFX10:       ; %bb.0:
2583; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2584; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:13
2585; GFX10-NEXT:    s_waitcnt vmcnt(0)
2586; GFX10-NEXT:    s_setpc_b64 s[30:31]
2587;
2588; GFX11-LABEL: extractelement_vgpr_v16i8_idx13:
2589; GFX11:       ; %bb.0:
2590; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2591; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:13
2592; GFX11-NEXT:    s_waitcnt vmcnt(0)
2593; GFX11-NEXT:    s_setpc_b64 s[30:31]
2594  %vector = load <16 x i8>, ptr addrspace(1) %ptr
2595  %element = extractelement <16 x i8> %vector, i32 13
2596  ret i8 %element
2597}
2598
2599define i8 @extractelement_vgpr_v16i8_idx14(ptr addrspace(1) %ptr) {
2600; GFX9-LABEL: extractelement_vgpr_v16i8_idx14:
2601; GFX9:       ; %bb.0:
2602; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2603; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:14
2604; GFX9-NEXT:    s_waitcnt vmcnt(0)
2605; GFX9-NEXT:    s_setpc_b64 s[30:31]
2606;
2607; GFX8-LABEL: extractelement_vgpr_v16i8_idx14:
2608; GFX8:       ; %bb.0:
2609; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2610; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 14, v0
2611; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2612; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
2613; GFX8-NEXT:    s_waitcnt vmcnt(0)
2614; GFX8-NEXT:    s_setpc_b64 s[30:31]
2615;
2616; GFX7-LABEL: extractelement_vgpr_v16i8_idx14:
2617; GFX7:       ; %bb.0:
2618; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2619; GFX7-NEXT:    s_mov_b32 s6, 0
2620; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2621; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2622; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:14
2623; GFX7-NEXT:    s_waitcnt vmcnt(0)
2624; GFX7-NEXT:    s_setpc_b64 s[30:31]
2625;
2626; GFX10-LABEL: extractelement_vgpr_v16i8_idx14:
2627; GFX10:       ; %bb.0:
2628; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2629; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:14
2630; GFX10-NEXT:    s_waitcnt vmcnt(0)
2631; GFX10-NEXT:    s_setpc_b64 s[30:31]
2632;
2633; GFX11-LABEL: extractelement_vgpr_v16i8_idx14:
2634; GFX11:       ; %bb.0:
2635; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2636; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:14
2637; GFX11-NEXT:    s_waitcnt vmcnt(0)
2638; GFX11-NEXT:    s_setpc_b64 s[30:31]
2639  %vector = load <16 x i8>, ptr addrspace(1) %ptr
2640  %element = extractelement <16 x i8> %vector, i32 14
2641  ret i8 %element
2642}
2643
2644define i8 @extractelement_vgpr_v16i8_idx15(ptr addrspace(1) %ptr) {
2645; GFX9-LABEL: extractelement_vgpr_v16i8_idx15:
2646; GFX9:       ; %bb.0:
2647; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2648; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:15
2649; GFX9-NEXT:    s_waitcnt vmcnt(0)
2650; GFX9-NEXT:    s_setpc_b64 s[30:31]
2651;
2652; GFX8-LABEL: extractelement_vgpr_v16i8_idx15:
2653; GFX8:       ; %bb.0:
2654; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2655; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 15, v0
2656; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2657; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
2658; GFX8-NEXT:    s_waitcnt vmcnt(0)
2659; GFX8-NEXT:    s_setpc_b64 s[30:31]
2660;
2661; GFX7-LABEL: extractelement_vgpr_v16i8_idx15:
2662; GFX7:       ; %bb.0:
2663; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2664; GFX7-NEXT:    s_mov_b32 s6, 0
2665; GFX7-NEXT:    s_mov_b32 s7, 0xf000
2666; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2667; GFX7-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:15
2668; GFX7-NEXT:    s_waitcnt vmcnt(0)
2669; GFX7-NEXT:    s_setpc_b64 s[30:31]
2670;
2671; GFX10-LABEL: extractelement_vgpr_v16i8_idx15:
2672; GFX10:       ; %bb.0:
2673; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2674; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:15
2675; GFX10-NEXT:    s_waitcnt vmcnt(0)
2676; GFX10-NEXT:    s_setpc_b64 s[30:31]
2677;
2678; GFX11-LABEL: extractelement_vgpr_v16i8_idx15:
2679; GFX11:       ; %bb.0:
2680; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2681; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:15
2682; GFX11-NEXT:    s_waitcnt vmcnt(0)
2683; GFX11-NEXT:    s_setpc_b64 s[30:31]
2684  %vector = load <16 x i8>, ptr addrspace(1) %ptr
2685  %element = extractelement <16 x i8> %vector, i32 15
2686  ret i8 %element
2687}
2688;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
2689; GCN: {{.*}}
2690