xref: /llvm-project/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll (revision 6206f5444fc0732e6495703c75a67f1f90f5b418)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6-NOHSA %s
3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GFX7-HSA %s
4; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8-NOHSA %s
5; RUN: llc -mtriple=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s
6; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
7
8; TODO: NOT AND
9define amdgpu_kernel void @constant_load_i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
10; GFX6-NOHSA-LABEL: constant_load_i8:
11; GFX6-NOHSA:       ; %bb.0: ; %entry
12; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
13; GFX6-NOHSA-NEXT:    s_mov_b32 s7, 0xf000
14; GFX6-NOHSA-NEXT:    s_mov_b32 s6, -1
15; GFX6-NOHSA-NEXT:    s_mov_b32 s10, s6
16; GFX6-NOHSA-NEXT:    s_mov_b32 s11, s7
17; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
18; GFX6-NOHSA-NEXT:    s_mov_b32 s8, s2
19; GFX6-NOHSA-NEXT:    s_mov_b32 s9, s3
20; GFX6-NOHSA-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
21; GFX6-NOHSA-NEXT:    s_mov_b32 s4, s0
22; GFX6-NOHSA-NEXT:    s_mov_b32 s5, s1
23; GFX6-NOHSA-NEXT:    s_waitcnt vmcnt(0)
24; GFX6-NOHSA-NEXT:    buffer_store_byte v0, off, s[4:7], 0
25; GFX6-NOHSA-NEXT:    s_endpgm
26;
27; GFX7-HSA-LABEL: constant_load_i8:
28; GFX7-HSA:       ; %bb.0: ; %entry
29; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
30; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
31; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
32; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
33; GFX7-HSA-NEXT:    flat_load_ubyte v2, v[0:1]
34; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
35; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1
36; GFX7-HSA-NEXT:    s_waitcnt vmcnt(0)
37; GFX7-HSA-NEXT:    flat_store_byte v[0:1], v2
38; GFX7-HSA-NEXT:    s_endpgm
39;
40; GFX8-NOHSA-LABEL: constant_load_i8:
41; GFX8-NOHSA:       ; %bb.0: ; %entry
42; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
43; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
44; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
45; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
46; GFX8-NOHSA-NEXT:    flat_load_ubyte v2, v[0:1]
47; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
48; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
49; GFX8-NOHSA-NEXT:    s_waitcnt vmcnt(0)
50; GFX8-NOHSA-NEXT:    flat_store_byte v[0:1], v2
51; GFX8-NOHSA-NEXT:    s_endpgm
52;
53; EG-LABEL: constant_load_i8:
54; EG:       ; %bb.0: ; %entry
55; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
56; EG-NEXT:    TEX 0 @6
57; EG-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
58; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
59; EG-NEXT:    CF_END
60; EG-NEXT:    PAD
61; EG-NEXT:    Fetch clause starting at 6:
62; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
63; EG-NEXT:    ALU clause starting at 8:
64; EG-NEXT:     MOV * T0.X, KC0[2].Z,
65; EG-NEXT:    ALU clause starting at 9:
66; EG-NEXT:     AND_INT T0.W, KC0[2].Y, literal.x,
67; EG-NEXT:     AND_INT * T1.W, T0.X, literal.y,
68; EG-NEXT:    3(4.203895e-45), 255(3.573311e-43)
69; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
70; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
71; EG-NEXT:     LSHL T0.X, T1.W, PV.W,
72; EG-NEXT:     LSHL * T0.W, literal.x, PV.W,
73; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
74; EG-NEXT:     MOV T0.Y, 0.0,
75; EG-NEXT:     MOV * T0.Z, 0.0,
76; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
77; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
78;
79; GFX12-LABEL: constant_load_i8:
80; GFX12:       ; %bb.0: ; %entry
81; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
82; GFX12-NEXT:    v_mov_b32_e32 v0, 0
83; GFX12-NEXT:    s_wait_kmcnt 0x0
84; GFX12-NEXT:    global_load_u8 v1, v0, s[2:3]
85; GFX12-NEXT:    s_wait_loadcnt 0x0
86; GFX12-NEXT:    global_store_b8 v0, v1, s[0:1]
87; GFX12-NEXT:    s_endpgm
88entry:
89  %ld = load i8, ptr addrspace(4) %in
90  store i8 %ld, ptr addrspace(1) %out
91  ret void
92}
93
94define amdgpu_kernel void @constant_load_v2i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
95; GFX6-NOHSA-LABEL: constant_load_v2i8:
96; GFX6-NOHSA:       ; %bb.0: ; %entry
97; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
98; GFX6-NOHSA-NEXT:    s_mov_b32 s7, 0xf000
99; GFX6-NOHSA-NEXT:    s_mov_b32 s6, -1
100; GFX6-NOHSA-NEXT:    s_mov_b32 s10, s6
101; GFX6-NOHSA-NEXT:    s_mov_b32 s11, s7
102; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
103; GFX6-NOHSA-NEXT:    s_mov_b32 s8, s2
104; GFX6-NOHSA-NEXT:    s_mov_b32 s9, s3
105; GFX6-NOHSA-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
106; GFX6-NOHSA-NEXT:    s_mov_b32 s4, s0
107; GFX6-NOHSA-NEXT:    s_mov_b32 s5, s1
108; GFX6-NOHSA-NEXT:    s_waitcnt vmcnt(0)
109; GFX6-NOHSA-NEXT:    buffer_store_short v0, off, s[4:7], 0
110; GFX6-NOHSA-NEXT:    s_endpgm
111;
112; GFX7-HSA-LABEL: constant_load_v2i8:
113; GFX7-HSA:       ; %bb.0: ; %entry
114; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
115; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
116; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
117; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
118; GFX7-HSA-NEXT:    flat_load_ushort v2, v[0:1]
119; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
120; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1
121; GFX7-HSA-NEXT:    s_waitcnt vmcnt(0)
122; GFX7-HSA-NEXT:    flat_store_short v[0:1], v2
123; GFX7-HSA-NEXT:    s_endpgm
124;
125; GFX8-NOHSA-LABEL: constant_load_v2i8:
126; GFX8-NOHSA:       ; %bb.0: ; %entry
127; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
128; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
129; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
130; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
131; GFX8-NOHSA-NEXT:    flat_load_ushort v2, v[0:1]
132; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
133; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
134; GFX8-NOHSA-NEXT:    s_waitcnt vmcnt(0)
135; GFX8-NOHSA-NEXT:    flat_store_short v[0:1], v2
136; GFX8-NOHSA-NEXT:    s_endpgm
137;
138; EG-LABEL: constant_load_v2i8:
139; EG:       ; %bb.0: ; %entry
140; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
141; EG-NEXT:    TEX 0 @6
142; EG-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
143; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
144; EG-NEXT:    CF_END
145; EG-NEXT:    PAD
146; EG-NEXT:    Fetch clause starting at 6:
147; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
148; EG-NEXT:    ALU clause starting at 8:
149; EG-NEXT:     MOV * T0.X, KC0[2].Z,
150; EG-NEXT:    ALU clause starting at 9:
151; EG-NEXT:     AND_INT T0.W, KC0[2].Y, literal.x,
152; EG-NEXT:     AND_INT * T1.W, T0.X, literal.y,
153; EG-NEXT:    3(4.203895e-45), 65535(9.183409e-41)
154; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
155; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
156; EG-NEXT:     LSHL T0.X, T1.W, PV.W,
157; EG-NEXT:     LSHL * T0.W, literal.x, PV.W,
158; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
159; EG-NEXT:     MOV T0.Y, 0.0,
160; EG-NEXT:     MOV * T0.Z, 0.0,
161; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
162; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
163;
164; GFX12-LABEL: constant_load_v2i8:
165; GFX12:       ; %bb.0: ; %entry
166; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
167; GFX12-NEXT:    v_mov_b32_e32 v0, 0
168; GFX12-NEXT:    s_wait_kmcnt 0x0
169; GFX12-NEXT:    global_load_u16 v1, v0, s[2:3]
170; GFX12-NEXT:    s_wait_loadcnt 0x0
171; GFX12-NEXT:    global_store_b16 v0, v1, s[0:1]
172; GFX12-NEXT:    s_endpgm
173entry:
174  %ld = load <2 x i8>, ptr addrspace(4) %in
175  store <2 x i8> %ld, ptr addrspace(1) %out
176  ret void
177}
178
179define amdgpu_kernel void @constant_load_v3i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
180; GFX6-NOHSA-LABEL: constant_load_v3i8:
181; GFX6-NOHSA:       ; %bb.0: ; %entry
182; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
183; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
184; GFX6-NOHSA-NEXT:    s_load_dword s4, s[2:3], 0x0
185; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
186; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
187; GFX6-NOHSA-NEXT:    s_lshr_b32 s5, s4, 16
188; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
189; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
190; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s5
191; GFX6-NOHSA-NEXT:    buffer_store_byte v1, off, s[0:3], 0 offset:2
192; GFX6-NOHSA-NEXT:    buffer_store_short v0, off, s[0:3], 0
193; GFX6-NOHSA-NEXT:    s_endpgm
194;
195; GFX7-HSA-LABEL: constant_load_v3i8:
196; GFX7-HSA:       ; %bb.0: ; %entry
197; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
198; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
199; GFX7-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
200; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
201; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1
202; GFX7-HSA-NEXT:    s_add_u32 s0, s0, 2
203; GFX7-HSA-NEXT:    s_addc_u32 s1, s1, 0
204; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s1
205; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s0
206; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
207; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
208; GFX7-HSA-NEXT:    s_lshr_b32 s0, s2, 16
209; GFX7-HSA-NEXT:    flat_store_short v[0:1], v4
210; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
211; GFX7-HSA-NEXT:    flat_store_byte v[2:3], v0
212; GFX7-HSA-NEXT:    s_endpgm
213;
214; GFX8-NOHSA-LABEL: constant_load_v3i8:
215; GFX8-NOHSA:       ; %bb.0: ; %entry
216; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
217; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
218; GFX8-NOHSA-NEXT:    s_load_dword s2, s[2:3], 0x0
219; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
220; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
221; GFX8-NOHSA-NEXT:    s_add_u32 s0, s0, 2
222; GFX8-NOHSA-NEXT:    s_addc_u32 s1, s1, 0
223; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s1
224; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s0
225; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
226; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
227; GFX8-NOHSA-NEXT:    s_lshr_b32 s0, s2, 16
228; GFX8-NOHSA-NEXT:    flat_store_short v[0:1], v4
229; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
230; GFX8-NOHSA-NEXT:    flat_store_byte v[2:3], v0
231; GFX8-NOHSA-NEXT:    s_endpgm
232;
233; EG-LABEL: constant_load_v3i8:
234; EG:       ; %bb.0: ; %entry
235; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
236; EG-NEXT:    TEX 0 @6
237; EG-NEXT:    ALU 27, @9, KC0[CB0:0-32], KC1[]
238; EG-NEXT:    MEM_RAT MSKOR T6.XW, T8.X
239; EG-NEXT:    MEM_RAT MSKOR T5.XW, T7.X
240; EG-NEXT:    CF_END
241; EG-NEXT:    Fetch clause starting at 6:
242; EG-NEXT:     VTX_READ_32 T5.X, T5.X, 0, #1
243; EG-NEXT:    ALU clause starting at 8:
244; EG-NEXT:     MOV * T5.X, KC0[2].Z,
245; EG-NEXT:    ALU clause starting at 9:
246; EG-NEXT:     MOV * T2.X, T5.X,
247; EG-NEXT:     MOV T0.Y, PV.X,
248; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
249; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
250; EG-NEXT:     AND_INT T1.W, PV.W, literal.x,
251; EG-NEXT:     MOV * T2.W, literal.y,
252; EG-NEXT:    3(4.203895e-45), 8(1.121039e-44)
253; EG-NEXT:     BFE_UINT T2.W, T0.Y, literal.x, PS,
254; EG-NEXT:     LSHL * T1.W, PV.W, literal.y,
255; EG-NEXT:    16(2.242078e-44), 3(4.203895e-45)
256; EG-NEXT:     LSHL T6.X, PV.W, PS,
257; EG-NEXT:     LSHL * T6.W, literal.x, PS,
258; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
259; EG-NEXT:     MOV T6.Y, 0.0,
260; EG-NEXT:     AND_INT T1.W, KC0[2].Y, literal.x,
261; EG-NEXT:     AND_INT * T2.W, T5.X, literal.y,
262; EG-NEXT:    3(4.203895e-45), 65535(9.183409e-41)
263; EG-NEXT:     LSHL * T1.W, PV.W, literal.x,
264; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
265; EG-NEXT:     LSHL T5.X, T2.W, PV.W,
266; EG-NEXT:     LSHL * T5.W, literal.x, PV.W,
267; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
268; EG-NEXT:     MOV T5.Y, 0.0,
269; EG-NEXT:     MOV T6.Z, 0.0,
270; EG-NEXT:     MOV * T5.Z, 0.0,
271; EG-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
272; EG-NEXT:     LSHR * T8.X, T0.W, literal.x,
273; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
274;
275; GFX12-LABEL: constant_load_v3i8:
276; GFX12:       ; %bb.0: ; %entry
277; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
278; GFX12-NEXT:    s_wait_kmcnt 0x0
279; GFX12-NEXT:    s_load_b32 s2, s[2:3], 0x0
280; GFX12-NEXT:    s_wait_kmcnt 0x0
281; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
282; GFX12-NEXT:    s_clause 0x1
283; GFX12-NEXT:    global_store_d16_hi_b8 v0, v1, s[0:1] offset:2
284; GFX12-NEXT:    global_store_b16 v0, v1, s[0:1]
285; GFX12-NEXT:    s_endpgm
286entry:
287  %ld = load <3 x i8>, ptr addrspace(4) %in
288  store <3 x i8> %ld, ptr addrspace(1) %out
289  ret void
290}
291
292define amdgpu_kernel void @constant_load_v4i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
293; GFX6-NOHSA-LABEL: constant_load_v4i8:
294; GFX6-NOHSA:       ; %bb.0: ; %entry
295; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
296; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
297; GFX6-NOHSA-NEXT:    s_load_dword s4, s[2:3], 0x0
298; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
299; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
300; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
301; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
302; GFX6-NOHSA-NEXT:    buffer_store_dword v0, off, s[0:3], 0
303; GFX6-NOHSA-NEXT:    s_endpgm
304;
305; GFX7-HSA-LABEL: constant_load_v4i8:
306; GFX7-HSA:       ; %bb.0: ; %entry
307; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
308; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
309; GFX7-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
310; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
311; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1
312; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
313; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s2
314; GFX7-HSA-NEXT:    flat_store_dword v[0:1], v2
315; GFX7-HSA-NEXT:    s_endpgm
316;
317; GFX8-NOHSA-LABEL: constant_load_v4i8:
318; GFX8-NOHSA:       ; %bb.0: ; %entry
319; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
320; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
321; GFX8-NOHSA-NEXT:    s_load_dword s2, s[2:3], 0x0
322; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
323; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
324; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
325; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s2
326; GFX8-NOHSA-NEXT:    flat_store_dword v[0:1], v2
327; GFX8-NOHSA-NEXT:    s_endpgm
328;
329; EG-LABEL: constant_load_v4i8:
330; EG:       ; %bb.0: ; %entry
331; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
332; EG-NEXT:    TEX 0 @6
333; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
334; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
335; EG-NEXT:    CF_END
336; EG-NEXT:    PAD
337; EG-NEXT:    Fetch clause starting at 6:
338; EG-NEXT:     VTX_READ_32 T0.X, T0.X, 0, #1
339; EG-NEXT:    ALU clause starting at 8:
340; EG-NEXT:     MOV * T0.X, KC0[2].Z,
341; EG-NEXT:    ALU clause starting at 9:
342; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
343; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
344;
345; GFX12-LABEL: constant_load_v4i8:
346; GFX12:       ; %bb.0: ; %entry
347; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
348; GFX12-NEXT:    s_wait_kmcnt 0x0
349; GFX12-NEXT:    s_load_b32 s2, s[2:3], 0x0
350; GFX12-NEXT:    s_wait_kmcnt 0x0
351; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
352; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
353; GFX12-NEXT:    s_endpgm
354entry:
355  %ld = load <4 x i8>, ptr addrspace(4) %in
356  store <4 x i8> %ld, ptr addrspace(1) %out
357  ret void
358}
359
360define amdgpu_kernel void @constant_load_v8i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
361; GFX6-NOHSA-LABEL: constant_load_v8i8:
362; GFX6-NOHSA:       ; %bb.0: ; %entry
363; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
364; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
365; GFX6-NOHSA-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
366; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
367; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
368; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
369; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
370; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s5
371; GFX6-NOHSA-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
372; GFX6-NOHSA-NEXT:    s_endpgm
373;
374; GFX7-HSA-LABEL: constant_load_v8i8:
375; GFX7-HSA:       ; %bb.0: ; %entry
376; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
377; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
378; GFX7-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
379; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
380; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1
381; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
382; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s2
383; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s3
384; GFX7-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
385; GFX7-HSA-NEXT:    s_endpgm
386;
387; GFX8-NOHSA-LABEL: constant_load_v8i8:
388; GFX8-NOHSA:       ; %bb.0: ; %entry
389; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
390; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
391; GFX8-NOHSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
392; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
393; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
394; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
395; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s2
396; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s3
397; GFX8-NOHSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
398; GFX8-NOHSA-NEXT:    s_endpgm
399;
400; EG-LABEL: constant_load_v8i8:
401; EG:       ; %bb.0: ; %entry
402; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
403; EG-NEXT:    TEX 0 @6
404; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
405; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
406; EG-NEXT:    CF_END
407; EG-NEXT:    PAD
408; EG-NEXT:    Fetch clause starting at 6:
409; EG-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
410; EG-NEXT:    ALU clause starting at 8:
411; EG-NEXT:     MOV * T0.X, KC0[2].Z,
412; EG-NEXT:    ALU clause starting at 9:
413; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
414; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
415;
416; GFX12-LABEL: constant_load_v8i8:
417; GFX12:       ; %bb.0: ; %entry
418; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
419; GFX12-NEXT:    s_wait_kmcnt 0x0
420; GFX12-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
421; GFX12-NEXT:    v_mov_b32_e32 v2, 0
422; GFX12-NEXT:    s_wait_kmcnt 0x0
423; GFX12-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
424; GFX12-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
425; GFX12-NEXT:    s_endpgm
426entry:
427  %ld = load <8 x i8>, ptr addrspace(4) %in
428  store <8 x i8> %ld, ptr addrspace(1) %out
429  ret void
430}
431
432define amdgpu_kernel void @constant_load_v16i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
433; GFX6-NOHSA-LABEL: constant_load_v16i8:
434; GFX6-NOHSA:       ; %bb.0: ; %entry
435; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
436; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
437; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
438; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
439; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
440; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
441; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
442; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s5
443; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s6
444; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s7
445; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
446; GFX6-NOHSA-NEXT:    s_endpgm
447;
448; GFX7-HSA-LABEL: constant_load_v16i8:
449; GFX7-HSA:       ; %bb.0: ; %entry
450; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
451; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
452; GFX7-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
453; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
454; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
455; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
456; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s4
457; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s5
458; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s6
459; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s7
460; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
461; GFX7-HSA-NEXT:    s_endpgm
462;
463; GFX8-NOHSA-LABEL: constant_load_v16i8:
464; GFX8-NOHSA:       ; %bb.0: ; %entry
465; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
466; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
467; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
468; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
469; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
470; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
471; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
472; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s5
473; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s6
474; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s7
475; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
476; GFX8-NOHSA-NEXT:    s_endpgm
477;
478; EG-LABEL: constant_load_v16i8:
479; EG:       ; %bb.0: ; %entry
480; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
481; EG-NEXT:    TEX 0 @6
482; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
483; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
484; EG-NEXT:    CF_END
485; EG-NEXT:    PAD
486; EG-NEXT:    Fetch clause starting at 6:
487; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
488; EG-NEXT:    ALU clause starting at 8:
489; EG-NEXT:     MOV * T0.X, KC0[2].Z,
490; EG-NEXT:    ALU clause starting at 9:
491; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
492; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
493;
494; GFX12-LABEL: constant_load_v16i8:
495; GFX12:       ; %bb.0: ; %entry
496; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
497; GFX12-NEXT:    s_wait_kmcnt 0x0
498; GFX12-NEXT:    s_load_b128 s[4:7], s[2:3], 0x0
499; GFX12-NEXT:    v_mov_b32_e32 v4, 0
500; GFX12-NEXT:    s_wait_kmcnt 0x0
501; GFX12-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v3, s7
502; GFX12-NEXT:    v_dual_mov_b32 v1, s5 :: v_dual_mov_b32 v2, s6
503; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[0:1]
504; GFX12-NEXT:    s_endpgm
505entry:
506  %ld = load <16 x i8>, ptr addrspace(4) %in
507  store <16 x i8> %ld, ptr addrspace(1) %out
508  ret void
509}
510
511define amdgpu_kernel void @constant_zextload_i8_to_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
512; GFX6-NOHSA-LABEL: constant_zextload_i8_to_i32:
513; GFX6-NOHSA:       ; %bb.0:
514; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
515; GFX6-NOHSA-NEXT:    s_mov_b32 s7, 0xf000
516; GFX6-NOHSA-NEXT:    s_mov_b32 s6, -1
517; GFX6-NOHSA-NEXT:    s_mov_b32 s10, s6
518; GFX6-NOHSA-NEXT:    s_mov_b32 s11, s7
519; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
520; GFX6-NOHSA-NEXT:    s_mov_b32 s8, s2
521; GFX6-NOHSA-NEXT:    s_mov_b32 s9, s3
522; GFX6-NOHSA-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
523; GFX6-NOHSA-NEXT:    s_mov_b32 s4, s0
524; GFX6-NOHSA-NEXT:    s_mov_b32 s5, s1
525; GFX6-NOHSA-NEXT:    s_waitcnt vmcnt(0)
526; GFX6-NOHSA-NEXT:    buffer_store_dword v0, off, s[4:7], 0
527; GFX6-NOHSA-NEXT:    s_endpgm
528;
529; GFX7-HSA-LABEL: constant_zextload_i8_to_i32:
530; GFX7-HSA:       ; %bb.0:
531; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
532; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
533; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
534; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
535; GFX7-HSA-NEXT:    flat_load_ubyte v2, v[0:1]
536; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
537; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1
538; GFX7-HSA-NEXT:    s_waitcnt vmcnt(0)
539; GFX7-HSA-NEXT:    flat_store_dword v[0:1], v2
540; GFX7-HSA-NEXT:    s_endpgm
541;
542; GFX8-NOHSA-LABEL: constant_zextload_i8_to_i32:
543; GFX8-NOHSA:       ; %bb.0:
544; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
545; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
546; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
547; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
548; GFX8-NOHSA-NEXT:    flat_load_ubyte v2, v[0:1]
549; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
550; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
551; GFX8-NOHSA-NEXT:    s_waitcnt vmcnt(0)
552; GFX8-NOHSA-NEXT:    flat_store_dword v[0:1], v2
553; GFX8-NOHSA-NEXT:    s_endpgm
554;
555; EG-LABEL: constant_zextload_i8_to_i32:
556; EG:       ; %bb.0:
557; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
558; EG-NEXT:    TEX 0 @6
559; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
560; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
561; EG-NEXT:    CF_END
562; EG-NEXT:    PAD
563; EG-NEXT:    Fetch clause starting at 6:
564; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
565; EG-NEXT:    ALU clause starting at 8:
566; EG-NEXT:     MOV * T0.X, KC0[2].Z,
567; EG-NEXT:    ALU clause starting at 9:
568; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
569; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
570;
571; GFX12-LABEL: constant_zextload_i8_to_i32:
572; GFX12:       ; %bb.0:
573; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
574; GFX12-NEXT:    s_wait_kmcnt 0x0
575; GFX12-NEXT:    s_load_u8 s2, s[2:3], 0x0
576; GFX12-NEXT:    s_wait_kmcnt 0x0
577; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
578; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
579; GFX12-NEXT:    s_endpgm
580  %a = load i8, ptr addrspace(4) %in
581  %ext = zext i8 %a to i32
582  store i32 %ext, ptr addrspace(1) %out
583  ret void
584}
585
586define amdgpu_kernel void @constant_sextload_i8_to_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
587; GFX6-NOHSA-LABEL: constant_sextload_i8_to_i32:
588; GFX6-NOHSA:       ; %bb.0:
589; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
590; GFX6-NOHSA-NEXT:    s_mov_b32 s7, 0xf000
591; GFX6-NOHSA-NEXT:    s_mov_b32 s6, -1
592; GFX6-NOHSA-NEXT:    s_mov_b32 s10, s6
593; GFX6-NOHSA-NEXT:    s_mov_b32 s11, s7
594; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
595; GFX6-NOHSA-NEXT:    s_mov_b32 s8, s2
596; GFX6-NOHSA-NEXT:    s_mov_b32 s9, s3
597; GFX6-NOHSA-NEXT:    buffer_load_sbyte v0, off, s[8:11], 0
598; GFX6-NOHSA-NEXT:    s_mov_b32 s4, s0
599; GFX6-NOHSA-NEXT:    s_mov_b32 s5, s1
600; GFX6-NOHSA-NEXT:    s_waitcnt vmcnt(0)
601; GFX6-NOHSA-NEXT:    buffer_store_dword v0, off, s[4:7], 0
602; GFX6-NOHSA-NEXT:    s_endpgm
603;
604; GFX7-HSA-LABEL: constant_sextload_i8_to_i32:
605; GFX7-HSA:       ; %bb.0:
606; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
607; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
608; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
609; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
610; GFX7-HSA-NEXT:    flat_load_sbyte v2, v[0:1]
611; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
612; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1
613; GFX7-HSA-NEXT:    s_waitcnt vmcnt(0)
614; GFX7-HSA-NEXT:    flat_store_dword v[0:1], v2
615; GFX7-HSA-NEXT:    s_endpgm
616;
617; GFX8-NOHSA-LABEL: constant_sextload_i8_to_i32:
618; GFX8-NOHSA:       ; %bb.0:
619; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
620; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
621; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
622; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
623; GFX8-NOHSA-NEXT:    flat_load_sbyte v2, v[0:1]
624; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
625; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
626; GFX8-NOHSA-NEXT:    s_waitcnt vmcnt(0)
627; GFX8-NOHSA-NEXT:    flat_store_dword v[0:1], v2
628; GFX8-NOHSA-NEXT:    s_endpgm
629;
630; EG-LABEL: constant_sextload_i8_to_i32:
631; EG:       ; %bb.0:
632; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
633; EG-NEXT:    TEX 0 @6
634; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
635; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
636; EG-NEXT:    CF_END
637; EG-NEXT:    PAD
638; EG-NEXT:    Fetch clause starting at 6:
639; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
640; EG-NEXT:    ALU clause starting at 8:
641; EG-NEXT:     MOV * T0.X, KC0[2].Z,
642; EG-NEXT:    ALU clause starting at 9:
643; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
644; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
645; EG-NEXT:    8(1.121039e-44), 2(2.802597e-45)
646;
647; GFX12-LABEL: constant_sextload_i8_to_i32:
648; GFX12:       ; %bb.0:
649; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
650; GFX12-NEXT:    s_wait_kmcnt 0x0
651; GFX12-NEXT:    s_load_i8 s2, s[2:3], 0x0
652; GFX12-NEXT:    s_wait_kmcnt 0x0
653; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
654; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
655; GFX12-NEXT:    s_endpgm
656  %ld = load i8, ptr addrspace(4) %in
657  %ext = sext i8 %ld to i32
658  store i32 %ext, ptr addrspace(1) %out
659  ret void
660}
661
662define amdgpu_kernel void @constant_zextload_v1i8_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
663; GFX6-NOHSA-LABEL: constant_zextload_v1i8_to_v1i32:
664; GFX6-NOHSA:       ; %bb.0:
665; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
666; GFX6-NOHSA-NEXT:    s_mov_b32 s7, 0xf000
667; GFX6-NOHSA-NEXT:    s_mov_b32 s6, -1
668; GFX6-NOHSA-NEXT:    s_mov_b32 s10, s6
669; GFX6-NOHSA-NEXT:    s_mov_b32 s11, s7
670; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
671; GFX6-NOHSA-NEXT:    s_mov_b32 s8, s2
672; GFX6-NOHSA-NEXT:    s_mov_b32 s9, s3
673; GFX6-NOHSA-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
674; GFX6-NOHSA-NEXT:    s_mov_b32 s4, s0
675; GFX6-NOHSA-NEXT:    s_mov_b32 s5, s1
676; GFX6-NOHSA-NEXT:    s_waitcnt vmcnt(0)
677; GFX6-NOHSA-NEXT:    buffer_store_dword v0, off, s[4:7], 0
678; GFX6-NOHSA-NEXT:    s_endpgm
679;
680; GFX7-HSA-LABEL: constant_zextload_v1i8_to_v1i32:
681; GFX7-HSA:       ; %bb.0:
682; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
683; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
684; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
685; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
686; GFX7-HSA-NEXT:    flat_load_ubyte v2, v[0:1]
687; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
688; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1
689; GFX7-HSA-NEXT:    s_waitcnt vmcnt(0)
690; GFX7-HSA-NEXT:    flat_store_dword v[0:1], v2
691; GFX7-HSA-NEXT:    s_endpgm
692;
693; GFX8-NOHSA-LABEL: constant_zextload_v1i8_to_v1i32:
694; GFX8-NOHSA:       ; %bb.0:
695; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
696; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
697; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
698; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
699; GFX8-NOHSA-NEXT:    flat_load_ubyte v2, v[0:1]
700; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
701; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
702; GFX8-NOHSA-NEXT:    s_waitcnt vmcnt(0)
703; GFX8-NOHSA-NEXT:    flat_store_dword v[0:1], v2
704; GFX8-NOHSA-NEXT:    s_endpgm
705;
706; EG-LABEL: constant_zextload_v1i8_to_v1i32:
707; EG:       ; %bb.0:
708; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
709; EG-NEXT:    TEX 0 @6
710; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
711; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
712; EG-NEXT:    CF_END
713; EG-NEXT:    PAD
714; EG-NEXT:    Fetch clause starting at 6:
715; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
716; EG-NEXT:    ALU clause starting at 8:
717; EG-NEXT:     MOV * T0.X, KC0[2].Z,
718; EG-NEXT:    ALU clause starting at 9:
719; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
720; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
721;
722; GFX12-LABEL: constant_zextload_v1i8_to_v1i32:
723; GFX12:       ; %bb.0:
724; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
725; GFX12-NEXT:    s_wait_kmcnt 0x0
726; GFX12-NEXT:    s_load_u8 s2, s[2:3], 0x0
727; GFX12-NEXT:    s_wait_kmcnt 0x0
728; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
729; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
730; GFX12-NEXT:    s_endpgm
731  %load = load <1 x i8>, ptr addrspace(4) %in
732  %ext = zext <1 x i8> %load to <1 x i32>
733  store <1 x i32> %ext, ptr addrspace(1) %out
734  ret void
735}
736
737define amdgpu_kernel void @constant_sextload_v1i8_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
738; GFX6-NOHSA-LABEL: constant_sextload_v1i8_to_v1i32:
739; GFX6-NOHSA:       ; %bb.0:
740; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
741; GFX6-NOHSA-NEXT:    s_mov_b32 s7, 0xf000
742; GFX6-NOHSA-NEXT:    s_mov_b32 s6, -1
743; GFX6-NOHSA-NEXT:    s_mov_b32 s10, s6
744; GFX6-NOHSA-NEXT:    s_mov_b32 s11, s7
745; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
746; GFX6-NOHSA-NEXT:    s_mov_b32 s8, s2
747; GFX6-NOHSA-NEXT:    s_mov_b32 s9, s3
748; GFX6-NOHSA-NEXT:    buffer_load_sbyte v0, off, s[8:11], 0
749; GFX6-NOHSA-NEXT:    s_mov_b32 s4, s0
750; GFX6-NOHSA-NEXT:    s_mov_b32 s5, s1
751; GFX6-NOHSA-NEXT:    s_waitcnt vmcnt(0)
752; GFX6-NOHSA-NEXT:    buffer_store_dword v0, off, s[4:7], 0
753; GFX6-NOHSA-NEXT:    s_endpgm
754;
755; GFX7-HSA-LABEL: constant_sextload_v1i8_to_v1i32:
756; GFX7-HSA:       ; %bb.0:
757; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
758; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
759; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
760; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
761; GFX7-HSA-NEXT:    flat_load_sbyte v2, v[0:1]
762; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
763; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1
764; GFX7-HSA-NEXT:    s_waitcnt vmcnt(0)
765; GFX7-HSA-NEXT:    flat_store_dword v[0:1], v2
766; GFX7-HSA-NEXT:    s_endpgm
767;
768; GFX8-NOHSA-LABEL: constant_sextload_v1i8_to_v1i32:
769; GFX8-NOHSA:       ; %bb.0:
770; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
771; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
772; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
773; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
774; GFX8-NOHSA-NEXT:    flat_load_sbyte v2, v[0:1]
775; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
776; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
777; GFX8-NOHSA-NEXT:    s_waitcnt vmcnt(0)
778; GFX8-NOHSA-NEXT:    flat_store_dword v[0:1], v2
779; GFX8-NOHSA-NEXT:    s_endpgm
780;
781; EG-LABEL: constant_sextload_v1i8_to_v1i32:
782; EG:       ; %bb.0:
783; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
784; EG-NEXT:    TEX 0 @6
785; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
786; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
787; EG-NEXT:    CF_END
788; EG-NEXT:    PAD
789; EG-NEXT:    Fetch clause starting at 6:
790; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
791; EG-NEXT:    ALU clause starting at 8:
792; EG-NEXT:     MOV * T0.X, KC0[2].Z,
793; EG-NEXT:    ALU clause starting at 9:
794; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
795; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
796; EG-NEXT:    8(1.121039e-44), 2(2.802597e-45)
797;
798; GFX12-LABEL: constant_sextload_v1i8_to_v1i32:
799; GFX12:       ; %bb.0:
800; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
801; GFX12-NEXT:    s_wait_kmcnt 0x0
802; GFX12-NEXT:    s_load_i8 s2, s[2:3], 0x0
803; GFX12-NEXT:    s_wait_kmcnt 0x0
804; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
805; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
806; GFX12-NEXT:    s_endpgm
807  %load = load <1 x i8>, ptr addrspace(4) %in
808  %ext = sext <1 x i8> %load to <1 x i32>
809  store <1 x i32> %ext, ptr addrspace(1) %out
810  ret void
811}
812
813; TODO: This should use DST, but for some there are redundant MOVs
814define amdgpu_kernel void @constant_zextload_v2i8_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
815; GFX6-NOHSA-LABEL: constant_zextload_v2i8_to_v2i32:
816; GFX6-NOHSA:       ; %bb.0:
817; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
818; GFX6-NOHSA-NEXT:    s_mov_b32 s7, 0xf000
819; GFX6-NOHSA-NEXT:    s_mov_b32 s6, -1
820; GFX6-NOHSA-NEXT:    s_mov_b32 s10, s6
821; GFX6-NOHSA-NEXT:    s_mov_b32 s11, s7
822; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
823; GFX6-NOHSA-NEXT:    s_mov_b32 s8, s2
824; GFX6-NOHSA-NEXT:    s_mov_b32 s9, s3
825; GFX6-NOHSA-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
826; GFX6-NOHSA-NEXT:    s_mov_b32 s4, s0
827; GFX6-NOHSA-NEXT:    s_mov_b32 s5, s1
828; GFX6-NOHSA-NEXT:    s_waitcnt vmcnt(0)
829; GFX6-NOHSA-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
830; GFX6-NOHSA-NEXT:    v_and_b32_e32 v0, 0xff, v0
831; GFX6-NOHSA-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
832; GFX6-NOHSA-NEXT:    s_endpgm
833;
834; GFX7-HSA-LABEL: constant_zextload_v2i8_to_v2i32:
835; GFX7-HSA:       ; %bb.0:
836; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
837; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
838; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
839; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
840; GFX7-HSA-NEXT:    flat_load_ushort v2, v[0:1]
841; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
842; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1
843; GFX7-HSA-NEXT:    s_waitcnt vmcnt(0)
844; GFX7-HSA-NEXT:    v_lshrrev_b32_e32 v3, 8, v2
845; GFX7-HSA-NEXT:    v_and_b32_e32 v2, 0xff, v2
846; GFX7-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
847; GFX7-HSA-NEXT:    s_endpgm
848;
849; GFX8-NOHSA-LABEL: constant_zextload_v2i8_to_v2i32:
850; GFX8-NOHSA:       ; %bb.0:
851; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
852; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, 8
853; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
854; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
855; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
856; GFX8-NOHSA-NEXT:    flat_load_ushort v2, v[0:1]
857; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
858; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
859; GFX8-NOHSA-NEXT:    s_waitcnt vmcnt(0)
860; GFX8-NOHSA-NEXT:    v_lshrrev_b32_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
861; GFX8-NOHSA-NEXT:    v_and_b32_e32 v2, 0xff, v2
862; GFX8-NOHSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
863; GFX8-NOHSA-NEXT:    s_endpgm
864;
865; EG-LABEL: constant_zextload_v2i8_to_v2i32:
866; EG:       ; %bb.0:
867; EG-NEXT:    ALU 1, @8, KC0[CB0:0-32], KC1[]
868; EG-NEXT:    TEX 0 @6
869; EG-NEXT:    ALU 12, @10, KC0[CB0:0-32], KC1[]
870; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1
871; EG-NEXT:    CF_END
872; EG-NEXT:    PAD
873; EG-NEXT:    Fetch clause starting at 6:
874; EG-NEXT:     VTX_READ_16 T4.X, T4.X, 0, #1
875; EG-NEXT:    ALU clause starting at 8:
876; EG-NEXT:     MOV * T0.Y, T2.X,
877; EG-NEXT:     MOV * T4.X, KC0[2].Z,
878; EG-NEXT:    ALU clause starting at 10:
879; EG-NEXT:     AND_INT T0.W, T4.X, literal.x,
880; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
881; EG-NEXT:    65535(9.183409e-41), -65536(nan)
882; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
883; EG-NEXT:     MOV * T2.X, PV.W,
884; EG-NEXT:     MOV T0.Y, PV.X,
885; EG-NEXT:     MOV * T1.W, literal.x,
886; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
887; EG-NEXT:     BFE_UINT * T4.Y, PV.Y, literal.x, PV.W,
888; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
889; EG-NEXT:     AND_INT T4.X, T0.W, literal.x,
890; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
891; EG-NEXT:    255(3.573311e-43), 2(2.802597e-45)
892;
893; GFX12-LABEL: constant_zextload_v2i8_to_v2i32:
894; GFX12:       ; %bb.0:
895; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
896; GFX12-NEXT:    v_mov_b32_e32 v2, 0
897; GFX12-NEXT:    s_wait_kmcnt 0x0
898; GFX12-NEXT:    global_load_u16 v0, v2, s[2:3]
899; GFX12-NEXT:    s_wait_loadcnt 0x0
900; GFX12-NEXT:    v_and_b32_e32 v1, 0xffff, v0
901; GFX12-NEXT:    v_and_b32_e32 v0, 0xff, v0
902; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
903; GFX12-NEXT:    v_lshrrev_b32_e32 v1, 8, v1
904; GFX12-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
905; GFX12-NEXT:    s_endpgm
906  %load = load <2 x i8>, ptr addrspace(4) %in
907  %ext = zext <2 x i8> %load to <2 x i32>
908  store <2 x i32> %ext, ptr addrspace(1) %out
909  ret void
910}
911
912; TODO: These should use DST, but for some there are redundant MOVs
913define amdgpu_kernel void @constant_sextload_v2i8_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
914; GFX6-NOHSA-LABEL: constant_sextload_v2i8_to_v2i32:
915; GFX6-NOHSA:       ; %bb.0:
916; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
917; GFX6-NOHSA-NEXT:    s_mov_b32 s7, 0xf000
918; GFX6-NOHSA-NEXT:    s_mov_b32 s6, -1
919; GFX6-NOHSA-NEXT:    s_mov_b32 s10, s6
920; GFX6-NOHSA-NEXT:    s_mov_b32 s11, s7
921; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
922; GFX6-NOHSA-NEXT:    s_mov_b32 s8, s2
923; GFX6-NOHSA-NEXT:    s_mov_b32 s9, s3
924; GFX6-NOHSA-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
925; GFX6-NOHSA-NEXT:    s_mov_b32 s4, s0
926; GFX6-NOHSA-NEXT:    s_mov_b32 s5, s1
927; GFX6-NOHSA-NEXT:    s_waitcnt vmcnt(0)
928; GFX6-NOHSA-NEXT:    v_bfe_i32 v1, v0, 8, 8
929; GFX6-NOHSA-NEXT:    v_bfe_i32 v0, v0, 0, 8
930; GFX6-NOHSA-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
931; GFX6-NOHSA-NEXT:    s_endpgm
932;
933; GFX7-HSA-LABEL: constant_sextload_v2i8_to_v2i32:
934; GFX7-HSA:       ; %bb.0:
935; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
936; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
937; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
938; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
939; GFX7-HSA-NEXT:    flat_load_ushort v2, v[0:1]
940; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
941; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1
942; GFX7-HSA-NEXT:    s_waitcnt vmcnt(0)
943; GFX7-HSA-NEXT:    v_bfe_i32 v3, v2, 8, 8
944; GFX7-HSA-NEXT:    v_bfe_i32 v2, v2, 0, 8
945; GFX7-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
946; GFX7-HSA-NEXT:    s_endpgm
947;
948; GFX8-NOHSA-LABEL: constant_sextload_v2i8_to_v2i32:
949; GFX8-NOHSA:       ; %bb.0:
950; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
951; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
952; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
953; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
954; GFX8-NOHSA-NEXT:    flat_load_ushort v2, v[0:1]
955; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
956; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
957; GFX8-NOHSA-NEXT:    s_waitcnt vmcnt(0)
958; GFX8-NOHSA-NEXT:    v_bfe_i32 v3, v2, 8, 8
959; GFX8-NOHSA-NEXT:    v_bfe_i32 v2, v2, 0, 8
960; GFX8-NOHSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
961; GFX8-NOHSA-NEXT:    s_endpgm
962;
963; EG-LABEL: constant_sextload_v2i8_to_v2i32:
964; EG:       ; %bb.0:
965; EG-NEXT:    ALU 1, @8, KC0[CB0:0-32], KC1[]
966; EG-NEXT:    TEX 0 @6
967; EG-NEXT:    ALU 11, @10, KC0[CB0:0-32], KC1[]
968; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1
969; EG-NEXT:    CF_END
970; EG-NEXT:    PAD
971; EG-NEXT:    Fetch clause starting at 6:
972; EG-NEXT:     VTX_READ_16 T4.X, T4.X, 0, #1
973; EG-NEXT:    ALU clause starting at 8:
974; EG-NEXT:     MOV * T0.Y, T2.X,
975; EG-NEXT:     MOV * T4.X, KC0[2].Z,
976; EG-NEXT:    ALU clause starting at 10:
977; EG-NEXT:     AND_INT T0.W, T4.X, literal.x,
978; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
979; EG-NEXT:    65535(9.183409e-41), -65536(nan)
980; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
981; EG-NEXT:     MOV * T2.X, PV.W,
982; EG-NEXT:     MOV * T0.Y, PV.X,
983; EG-NEXT:     BFE_INT T4.X, T0.W, 0.0, literal.x,
984; EG-NEXT:     LSHR T0.W, PV.Y, literal.x,
985; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
986; EG-NEXT:    8(1.121039e-44), 2(2.802597e-45)
987; EG-NEXT:     BFE_INT * T4.Y, PV.W, 0.0, literal.x,
988; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
989;
990; GFX12-LABEL: constant_sextload_v2i8_to_v2i32:
991; GFX12:       ; %bb.0:
992; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
993; GFX12-NEXT:    s_wait_kmcnt 0x0
994; GFX12-NEXT:    s_load_u16 s2, s[2:3], 0x0
995; GFX12-NEXT:    s_wait_kmcnt 0x0
996; GFX12-NEXT:    s_sext_i32_i8 s3, s2
997; GFX12-NEXT:    s_bfe_i32 s2, s2, 0x80008
998; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
999; GFX12-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
1000; GFX12-NEXT:    v_mov_b32_e32 v0, s3
1001; GFX12-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
1002; GFX12-NEXT:    s_endpgm
1003  %load = load <2 x i8>, ptr addrspace(4) %in
1004  %ext = sext <2 x i8> %load to <2 x i32>
1005  store <2 x i32> %ext, ptr addrspace(1) %out
1006  ret void
1007}
1008
1009; TODO: These should use DST, but for some there are redundant MOVs
1010define amdgpu_kernel void @constant_zextload_v3i8_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
1011; GFX6-NOHSA-LABEL: constant_zextload_v3i8_to_v3i32:
1012; GFX6-NOHSA:       ; %bb.0: ; %entry
1013; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1014; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1015; GFX6-NOHSA-NEXT:    s_load_dword s4, s[2:3], 0x0
1016; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
1017; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
1018; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1019; GFX6-NOHSA-NEXT:    s_bfe_u32 s5, s4, 0x80008
1020; GFX6-NOHSA-NEXT:    s_bfe_u32 s6, s4, 0x80010
1021; GFX6-NOHSA-NEXT:    s_and_b32 s4, s4, 0xff
1022; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
1023; GFX6-NOHSA-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:8
1024; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
1025; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
1026; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s5
1027; GFX6-NOHSA-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1028; GFX6-NOHSA-NEXT:    s_endpgm
1029;
1030; GFX7-HSA-LABEL: constant_zextload_v3i8_to_v3i32:
1031; GFX7-HSA:       ; %bb.0: ; %entry
1032; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1033; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1034; GFX7-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
1035; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s0
1036; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s1
1037; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1038; GFX7-HSA-NEXT:    s_bfe_u32 s0, s2, 0x80008
1039; GFX7-HSA-NEXT:    s_and_b32 s1, s2, 0xff
1040; GFX7-HSA-NEXT:    s_bfe_u32 s2, s2, 0x80010
1041; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s1
1042; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s0
1043; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s2
1044; GFX7-HSA-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
1045; GFX7-HSA-NEXT:    s_endpgm
1046;
1047; GFX8-NOHSA-LABEL: constant_zextload_v3i8_to_v3i32:
1048; GFX8-NOHSA:       ; %bb.0: ; %entry
1049; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1050; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1051; GFX8-NOHSA-NEXT:    s_load_dword s2, s[2:3], 0x0
1052; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s0
1053; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s1
1054; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1055; GFX8-NOHSA-NEXT:    s_bfe_u32 s0, s2, 0x80008
1056; GFX8-NOHSA-NEXT:    s_and_b32 s1, s2, 0xff
1057; GFX8-NOHSA-NEXT:    s_bfe_u32 s2, s2, 0x80010
1058; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s1
1059; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s0
1060; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s2
1061; GFX8-NOHSA-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
1062; GFX8-NOHSA-NEXT:    s_endpgm
1063;
1064; EG-LABEL: constant_zextload_v3i8_to_v3i32:
1065; EG:       ; %bb.0: ; %entry
1066; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1067; EG-NEXT:    TEX 0 @6
1068; EG-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
1069; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.X, T7.X, 0
1070; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XY, T6.X, 1
1071; EG-NEXT:    CF_END
1072; EG-NEXT:    Fetch clause starting at 6:
1073; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
1074; EG-NEXT:    ALU clause starting at 8:
1075; EG-NEXT:     MOV * T4.X, KC0[2].Z,
1076; EG-NEXT:    ALU clause starting at 9:
1077; EG-NEXT:     MOV * T0.W, literal.x,
1078; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
1079; EG-NEXT:     BFE_UINT * T5.Y, T4.X, literal.x, PV.W,
1080; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
1081; EG-NEXT:     AND_INT T5.X, T4.X, literal.x,
1082; EG-NEXT:     LSHR * T6.X, KC0[2].Y, literal.y,
1083; EG-NEXT:    255(3.573311e-43), 2(2.802597e-45)
1084; EG-NEXT:     BFE_UINT T4.X, T4.X, literal.x, T0.W,
1085; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1086; EG-NEXT:    16(2.242078e-44), 8(1.121039e-44)
1087; EG-NEXT:     LSHR * T7.X, PV.W, literal.x,
1088; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1089;
1090; GFX12-LABEL: constant_zextload_v3i8_to_v3i32:
1091; GFX12:       ; %bb.0: ; %entry
1092; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1093; GFX12-NEXT:    s_wait_kmcnt 0x0
1094; GFX12-NEXT:    s_load_b32 s2, s[2:3], 0x0
1095; GFX12-NEXT:    s_wait_kmcnt 0x0
1096; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x80008
1097; GFX12-NEXT:    s_and_b32 s4, s2, 0xff
1098; GFX12-NEXT:    s_bfe_u32 s2, s2, 0x80010
1099; GFX12-NEXT:    v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s4
1100; GFX12-NEXT:    v_dual_mov_b32 v1, s3 :: v_dual_mov_b32 v2, s2
1101; GFX12-NEXT:    global_store_b96 v3, v[0:2], s[0:1]
1102; GFX12-NEXT:    s_endpgm
1103entry:
1104  %ld = load <3 x i8>, ptr addrspace(4) %in
1105  %ext = zext <3 x i8> %ld to <3 x i32>
1106  store <3 x i32> %ext, ptr addrspace(1) %out
1107  ret void
1108}
1109
1110; TODO: These should use DST, but for some there are redundant MOVs
1111define amdgpu_kernel void @constant_sextload_v3i8_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
1112; GFX6-NOHSA-LABEL: constant_sextload_v3i8_to_v3i32:
1113; GFX6-NOHSA:       ; %bb.0: ; %entry
1114; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1115; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1116; GFX6-NOHSA-NEXT:    s_load_dword s4, s[2:3], 0x0
1117; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
1118; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
1119; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1120; GFX6-NOHSA-NEXT:    s_bfe_i32 s5, s4, 0x80008
1121; GFX6-NOHSA-NEXT:    s_bfe_i32 s6, s4, 0x80010
1122; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s4, s4
1123; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
1124; GFX6-NOHSA-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:8
1125; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
1126; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
1127; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s5
1128; GFX6-NOHSA-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1129; GFX6-NOHSA-NEXT:    s_endpgm
1130;
1131; GFX7-HSA-LABEL: constant_sextload_v3i8_to_v3i32:
1132; GFX7-HSA:       ; %bb.0: ; %entry
1133; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1134; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1135; GFX7-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
1136; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s0
1137; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s1
1138; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1139; GFX7-HSA-NEXT:    s_bfe_i32 s0, s2, 0x80010
1140; GFX7-HSA-NEXT:    s_bfe_i32 s1, s2, 0x80008
1141; GFX7-HSA-NEXT:    s_sext_i32_i8 s2, s2
1142; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
1143; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1
1144; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s0
1145; GFX7-HSA-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
1146; GFX7-HSA-NEXT:    s_endpgm
1147;
1148; GFX8-NOHSA-LABEL: constant_sextload_v3i8_to_v3i32:
1149; GFX8-NOHSA:       ; %bb.0: ; %entry
1150; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1151; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1152; GFX8-NOHSA-NEXT:    s_load_dword s2, s[2:3], 0x0
1153; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s0
1154; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s1
1155; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1156; GFX8-NOHSA-NEXT:    s_bfe_i32 s0, s2, 0x80010
1157; GFX8-NOHSA-NEXT:    s_bfe_i32 s1, s2, 0x80008
1158; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s2, s2
1159; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
1160; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
1161; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s0
1162; GFX8-NOHSA-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
1163; GFX8-NOHSA-NEXT:    s_endpgm
1164;
1165; EG-LABEL: constant_sextload_v3i8_to_v3i32:
1166; EG:       ; %bb.0: ; %entry
1167; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1168; EG-NEXT:    TEX 0 @6
1169; EG-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
1170; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.X, T4.X, 0
1171; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XY, T5.X, 1
1172; EG-NEXT:    CF_END
1173; EG-NEXT:    Fetch clause starting at 6:
1174; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
1175; EG-NEXT:    ALU clause starting at 8:
1176; EG-NEXT:     MOV * T4.X, KC0[2].Z,
1177; EG-NEXT:    ALU clause starting at 9:
1178; EG-NEXT:     LSHR T5.X, KC0[2].Y, literal.x,
1179; EG-NEXT:     LSHR * T0.W, T4.X, literal.y,
1180; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1181; EG-NEXT:     BFE_INT * T6.X, PV.W, 0.0, literal.x,
1182; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
1183; EG-NEXT:     BFE_INT T7.X, T4.X, 0.0, literal.x,
1184; EG-NEXT:     LSHR T0.W, T4.X, literal.x,
1185; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.x,
1186; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
1187; EG-NEXT:     LSHR T4.X, PS, literal.x,
1188; EG-NEXT:     BFE_INT * T7.Y, PV.W, 0.0, literal.y,
1189; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
1190;
1191; GFX12-LABEL: constant_sextload_v3i8_to_v3i32:
1192; GFX12:       ; %bb.0: ; %entry
1193; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1194; GFX12-NEXT:    s_wait_kmcnt 0x0
1195; GFX12-NEXT:    s_load_b32 s2, s[2:3], 0x0
1196; GFX12-NEXT:    s_wait_kmcnt 0x0
1197; GFX12-NEXT:    s_bfe_i32 s3, s2, 0x80010
1198; GFX12-NEXT:    s_sext_i32_i8 s4, s2
1199; GFX12-NEXT:    s_bfe_i32 s2, s2, 0x80008
1200; GFX12-NEXT:    v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s4
1201; GFX12-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s3
1202; GFX12-NEXT:    global_store_b96 v3, v[0:2], s[0:1]
1203; GFX12-NEXT:    s_endpgm
1204entry:
1205  %ld = load <3 x i8>, ptr addrspace(4) %in
1206  %ext = sext <3 x i8> %ld to <3 x i32>
1207  store <3 x i32> %ext, ptr addrspace(1) %out
1208  ret void
1209}
1210
1211; TODO: These should use DST, but for some there are redundant MOVs
1212define amdgpu_kernel void @constant_zextload_v4i8_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
1213; GFX6-NOHSA-LABEL: constant_zextload_v4i8_to_v4i32:
1214; GFX6-NOHSA:       ; %bb.0:
1215; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1216; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1217; GFX6-NOHSA-NEXT:    s_load_dword s2, s[2:3], 0x0
1218; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
1219; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1220; GFX6-NOHSA-NEXT:    s_lshr_b32 s4, s2, 24
1221; GFX6-NOHSA-NEXT:    s_bfe_u32 s5, s2, 0x80008
1222; GFX6-NOHSA-NEXT:    s_and_b32 s6, s2, 0xff
1223; GFX6-NOHSA-NEXT:    s_bfe_u32 s7, s2, 0x80010
1224; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
1225; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
1226; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s5
1227; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s7
1228; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s4
1229; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1230; GFX6-NOHSA-NEXT:    s_endpgm
1231;
1232; GFX7-HSA-LABEL: constant_zextload_v4i8_to_v4i32:
1233; GFX7-HSA:       ; %bb.0:
1234; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1235; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1236; GFX7-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
1237; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
1238; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
1239; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1240; GFX7-HSA-NEXT:    s_lshr_b32 s0, s2, 24
1241; GFX7-HSA-NEXT:    s_bfe_u32 s1, s2, 0x80008
1242; GFX7-HSA-NEXT:    s_and_b32 s3, s2, 0xff
1243; GFX7-HSA-NEXT:    s_bfe_u32 s2, s2, 0x80010
1244; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s3
1245; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1
1246; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s2
1247; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s0
1248; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1249; GFX7-HSA-NEXT:    s_endpgm
1250;
1251; GFX8-NOHSA-LABEL: constant_zextload_v4i8_to_v4i32:
1252; GFX8-NOHSA:       ; %bb.0:
1253; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1254; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1255; GFX8-NOHSA-NEXT:    s_load_dword s2, s[2:3], 0x0
1256; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
1257; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
1258; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1259; GFX8-NOHSA-NEXT:    s_lshr_b32 s0, s2, 24
1260; GFX8-NOHSA-NEXT:    s_bfe_u32 s1, s2, 0x80008
1261; GFX8-NOHSA-NEXT:    s_and_b32 s3, s2, 0xff
1262; GFX8-NOHSA-NEXT:    s_bfe_u32 s2, s2, 0x80010
1263; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s3
1264; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
1265; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s2
1266; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s0
1267; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1268; GFX8-NOHSA-NEXT:    s_endpgm
1269;
1270; EG-LABEL: constant_zextload_v4i8_to_v4i32:
1271; EG:       ; %bb.0:
1272; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1273; EG-NEXT:    TEX 0 @6
1274; EG-NEXT:    ALU 9, @9, KC0[CB0:0-32], KC1[]
1275; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1
1276; EG-NEXT:    CF_END
1277; EG-NEXT:    PAD
1278; EG-NEXT:    Fetch clause starting at 6:
1279; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
1280; EG-NEXT:    ALU clause starting at 8:
1281; EG-NEXT:     MOV * T4.X, KC0[2].Z,
1282; EG-NEXT:    ALU clause starting at 9:
1283; EG-NEXT:     MOV * T0.W, literal.x,
1284; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
1285; EG-NEXT:     BFE_UINT * T4.Z, T4.X, literal.x, PV.W,
1286; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1287; EG-NEXT:     BFE_UINT T4.Y, T4.X, literal.x, T0.W,
1288; EG-NEXT:     LSHR * T4.W, T4.X, literal.y,
1289; EG-NEXT:    8(1.121039e-44), 24(3.363116e-44)
1290; EG-NEXT:     AND_INT T4.X, T4.X, literal.x,
1291; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
1292; EG-NEXT:    255(3.573311e-43), 2(2.802597e-45)
1293;
1294; GFX12-LABEL: constant_zextload_v4i8_to_v4i32:
1295; GFX12:       ; %bb.0:
1296; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1297; GFX12-NEXT:    s_wait_kmcnt 0x0
1298; GFX12-NEXT:    s_load_b32 s2, s[2:3], 0x0
1299; GFX12-NEXT:    s_wait_kmcnt 0x0
1300; GFX12-NEXT:    s_lshr_b32 s3, s2, 24
1301; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x80008
1302; GFX12-NEXT:    s_and_b32 s5, s2, 0xff
1303; GFX12-NEXT:    s_bfe_u32 s2, s2, 0x80010
1304; GFX12-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s4
1305; GFX12-NEXT:    v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v3, s3
1306; GFX12-NEXT:    v_mov_b32_e32 v2, s2
1307; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[0:1]
1308; GFX12-NEXT:    s_endpgm
1309  %load = load <4 x i8>, ptr addrspace(4) %in
1310  %ext = zext <4 x i8> %load to <4 x i32>
1311  store <4 x i32> %ext, ptr addrspace(1) %out
1312  ret void
1313}
1314
1315; TODO: These should use DST, but for some there are redundant MOVs
1316define amdgpu_kernel void @constant_sextload_v4i8_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
1317; GFX6-NOHSA-LABEL: constant_sextload_v4i8_to_v4i32:
1318; GFX6-NOHSA:       ; %bb.0:
1319; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1320; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1321; GFX6-NOHSA-NEXT:    s_load_dword s2, s[2:3], 0x0
1322; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
1323; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1324; GFX6-NOHSA-NEXT:    s_ashr_i32 s4, s2, 24
1325; GFX6-NOHSA-NEXT:    s_bfe_i32 s5, s2, 0x80010
1326; GFX6-NOHSA-NEXT:    s_bfe_i32 s6, s2, 0x80008
1327; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s7, s2
1328; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
1329; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s7
1330; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s6
1331; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
1332; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s4
1333; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1334; GFX6-NOHSA-NEXT:    s_endpgm
1335;
1336; GFX7-HSA-LABEL: constant_sextload_v4i8_to_v4i32:
1337; GFX7-HSA:       ; %bb.0:
1338; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1339; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1340; GFX7-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
1341; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
1342; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
1343; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1344; GFX7-HSA-NEXT:    s_ashr_i32 s0, s2, 24
1345; GFX7-HSA-NEXT:    s_bfe_i32 s1, s2, 0x80010
1346; GFX7-HSA-NEXT:    s_bfe_i32 s3, s2, 0x80008
1347; GFX7-HSA-NEXT:    s_sext_i32_i8 s2, s2
1348; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
1349; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
1350; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s1
1351; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s0
1352; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1353; GFX7-HSA-NEXT:    s_endpgm
1354;
1355; GFX8-NOHSA-LABEL: constant_sextload_v4i8_to_v4i32:
1356; GFX8-NOHSA:       ; %bb.0:
1357; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1358; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1359; GFX8-NOHSA-NEXT:    s_load_dword s2, s[2:3], 0x0
1360; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
1361; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
1362; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1363; GFX8-NOHSA-NEXT:    s_ashr_i32 s0, s2, 24
1364; GFX8-NOHSA-NEXT:    s_bfe_i32 s1, s2, 0x80010
1365; GFX8-NOHSA-NEXT:    s_bfe_i32 s3, s2, 0x80008
1366; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s2, s2
1367; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
1368; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
1369; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s1
1370; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s0
1371; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1372; GFX8-NOHSA-NEXT:    s_endpgm
1373;
1374; EG-LABEL: constant_sextload_v4i8_to_v4i32:
1375; EG:       ; %bb.0:
1376; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1377; EG-NEXT:    TEX 0 @6
1378; EG-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
1379; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T4.X, 1
1380; EG-NEXT:    CF_END
1381; EG-NEXT:    PAD
1382; EG-NEXT:    Fetch clause starting at 6:
1383; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
1384; EG-NEXT:    ALU clause starting at 8:
1385; EG-NEXT:     MOV * T4.X, KC0[2].Z,
1386; EG-NEXT:    ALU clause starting at 9:
1387; EG-NEXT:     BFE_INT T5.X, T4.X, 0.0, literal.x,
1388; EG-NEXT:     LSHR * T0.W, T4.X, literal.y,
1389; EG-NEXT:    8(1.121039e-44), 24(3.363116e-44)
1390; EG-NEXT:     BFE_INT T5.W, PV.W, 0.0, literal.x,
1391; EG-NEXT:     LSHR * T0.W, T4.X, literal.y,
1392; EG-NEXT:    8(1.121039e-44), 16(2.242078e-44)
1393; EG-NEXT:     BFE_INT T5.Z, PS, 0.0, literal.x,
1394; EG-NEXT:     LSHR * T0.W, T4.X, literal.x,
1395; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
1396; EG-NEXT:     LSHR T4.X, KC0[2].Y, literal.x,
1397; EG-NEXT:     BFE_INT * T5.Y, PV.W, 0.0, literal.y,
1398; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
1399;
1400; GFX12-LABEL: constant_sextload_v4i8_to_v4i32:
1401; GFX12:       ; %bb.0:
1402; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1403; GFX12-NEXT:    s_wait_kmcnt 0x0
1404; GFX12-NEXT:    s_load_b32 s2, s[2:3], 0x0
1405; GFX12-NEXT:    s_wait_kmcnt 0x0
1406; GFX12-NEXT:    s_ashr_i32 s3, s2, 24
1407; GFX12-NEXT:    s_bfe_i32 s4, s2, 0x80010
1408; GFX12-NEXT:    s_sext_i32_i8 s5, s2
1409; GFX12-NEXT:    s_bfe_i32 s2, s2, 0x80008
1410; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1411; GFX12-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s2
1412; GFX12-NEXT:    v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v3, s3
1413; GFX12-NEXT:    v_mov_b32_e32 v2, s4
1414; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[0:1]
1415; GFX12-NEXT:    s_endpgm
1416  %load = load <4 x i8>, ptr addrspace(4) %in
1417  %ext = sext <4 x i8> %load to <4 x i32>
1418  store <4 x i32> %ext, ptr addrspace(1) %out
1419  ret void
1420}
1421
1422; TODO: These should use DST, but for some there are redundant MOVs
1423define amdgpu_kernel void @constant_zextload_v8i8_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
1424; GFX6-NOHSA-LABEL: constant_zextload_v8i8_to_v8i32:
1425; GFX6-NOHSA:       ; %bb.0:
1426; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1427; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1428; GFX6-NOHSA-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1429; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
1430; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
1431; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1432; GFX6-NOHSA-NEXT:    s_lshr_b32 s6, s4, 24
1433; GFX6-NOHSA-NEXT:    s_bfe_u32 s7, s4, 0x80008
1434; GFX6-NOHSA-NEXT:    s_lshr_b32 s8, s5, 24
1435; GFX6-NOHSA-NEXT:    s_bfe_u32 s9, s5, 0x80008
1436; GFX6-NOHSA-NEXT:    s_and_b32 s10, s4, 0xff
1437; GFX6-NOHSA-NEXT:    s_and_b32 s11, s5, 0xff
1438; GFX6-NOHSA-NEXT:    s_bfe_u32 s5, s5, 0x80010
1439; GFX6-NOHSA-NEXT:    s_bfe_u32 s4, s4, 0x80010
1440; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s11
1441; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s9
1442; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
1443; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s8
1444; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1445; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
1446; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s10
1447; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s7
1448; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s4
1449; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s6
1450; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1451; GFX6-NOHSA-NEXT:    s_endpgm
1452;
1453; GFX7-HSA-LABEL: constant_zextload_v8i8_to_v8i32:
1454; GFX7-HSA:       ; %bb.0:
1455; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1456; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1457; GFX7-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
1458; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1459; GFX7-HSA-NEXT:    s_lshr_b32 s4, s2, 24
1460; GFX7-HSA-NEXT:    s_bfe_u32 s5, s2, 0x80008
1461; GFX7-HSA-NEXT:    s_lshr_b32 s6, s3, 24
1462; GFX7-HSA-NEXT:    s_bfe_u32 s7, s3, 0x80008
1463; GFX7-HSA-NEXT:    s_and_b32 s8, s2, 0xff
1464; GFX7-HSA-NEXT:    s_bfe_u32 s9, s2, 0x80010
1465; GFX7-HSA-NEXT:    s_and_b32 s2, s3, 0xff
1466; GFX7-HSA-NEXT:    s_bfe_u32 s3, s3, 0x80010
1467; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
1468; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 16
1469; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s3
1470; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
1471; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
1472; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s7
1473; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s6
1474; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
1475; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1476; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
1477; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s8
1478; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s5
1479; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s9
1480; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s4
1481; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
1482; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1483; GFX7-HSA-NEXT:    s_endpgm
1484;
1485; GFX8-NOHSA-LABEL: constant_zextload_v8i8_to_v8i32:
1486; GFX8-NOHSA:       ; %bb.0:
1487; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1488; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1489; GFX8-NOHSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
1490; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1491; GFX8-NOHSA-NEXT:    s_lshr_b32 s4, s2, 24
1492; GFX8-NOHSA-NEXT:    s_bfe_u32 s5, s2, 0x80008
1493; GFX8-NOHSA-NEXT:    s_lshr_b32 s6, s3, 24
1494; GFX8-NOHSA-NEXT:    s_bfe_u32 s7, s3, 0x80008
1495; GFX8-NOHSA-NEXT:    s_and_b32 s8, s2, 0xff
1496; GFX8-NOHSA-NEXT:    s_bfe_u32 s9, s2, 0x80010
1497; GFX8-NOHSA-NEXT:    s_and_b32 s2, s3, 0xff
1498; GFX8-NOHSA-NEXT:    s_bfe_u32 s3, s3, 0x80010
1499; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
1500; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 16
1501; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s3
1502; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
1503; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
1504; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s7
1505; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s6
1506; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
1507; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1508; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
1509; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s8
1510; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s5
1511; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s9
1512; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s4
1513; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
1514; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1515; GFX8-NOHSA-NEXT:    s_endpgm
1516;
1517; EG-LABEL: constant_zextload_v8i8_to_v8i32:
1518; EG:       ; %bb.0:
1519; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1520; EG-NEXT:    TEX 0 @6
1521; EG-NEXT:    ALU 20, @9, KC0[CB0:0-32], KC1[]
1522; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 0
1523; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 1
1524; EG-NEXT:    CF_END
1525; EG-NEXT:    Fetch clause starting at 6:
1526; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
1527; EG-NEXT:    ALU clause starting at 8:
1528; EG-NEXT:     MOV * T5.X, KC0[2].Z,
1529; EG-NEXT:    ALU clause starting at 9:
1530; EG-NEXT:     MOV * T0.W, literal.x,
1531; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
1532; EG-NEXT:     BFE_UINT * T6.Z, T5.X, literal.x, PV.W,
1533; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1534; EG-NEXT:     BFE_UINT T6.Y, T5.X, literal.x, T0.W,
1535; EG-NEXT:     BFE_UINT T7.Z, T5.Y, literal.y, T0.W,
1536; EG-NEXT:     LSHR * T6.W, T5.X, literal.z,
1537; EG-NEXT:    8(1.121039e-44), 16(2.242078e-44)
1538; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
1539; EG-NEXT:     AND_INT T6.X, T5.X, literal.x,
1540; EG-NEXT:     BFE_UINT T7.Y, T5.Y, literal.y, T0.W,
1541; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.z,
1542; EG-NEXT:    255(3.573311e-43), 8(1.121039e-44)
1543; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1544; EG-NEXT:     LSHR * T7.W, T5.Y, literal.x,
1545; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
1546; EG-NEXT:     AND_INT T7.X, T5.Y, literal.x,
1547; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1548; EG-NEXT:    255(3.573311e-43), 16(2.242078e-44)
1549; EG-NEXT:     LSHR * T8.X, PV.W, literal.x,
1550; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1551;
1552; GFX12-LABEL: constant_zextload_v8i8_to_v8i32:
1553; GFX12:       ; %bb.0:
1554; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1555; GFX12-NEXT:    s_wait_kmcnt 0x0
1556; GFX12-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
1557; GFX12-NEXT:    s_wait_kmcnt 0x0
1558; GFX12-NEXT:    s_lshr_b32 s6, s3, 24
1559; GFX12-NEXT:    s_bfe_u32 s7, s3, 0x80008
1560; GFX12-NEXT:    s_and_b32 s9, s3, 0xff
1561; GFX12-NEXT:    s_bfe_u32 s3, s3, 0x80010
1562; GFX12-NEXT:    s_lshr_b32 s4, s2, 24
1563; GFX12-NEXT:    s_bfe_u32 s5, s2, 0x80008
1564; GFX12-NEXT:    s_and_b32 s8, s2, 0xff
1565; GFX12-NEXT:    s_bfe_u32 s2, s2, 0x80010
1566; GFX12-NEXT:    v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s7
1567; GFX12-NEXT:    v_dual_mov_b32 v0, s9 :: v_dual_mov_b32 v3, s6
1568; GFX12-NEXT:    v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v5, s5
1569; GFX12-NEXT:    v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v7, s4
1570; GFX12-NEXT:    v_mov_b32_e32 v6, s2
1571; GFX12-NEXT:    s_clause 0x1
1572; GFX12-NEXT:    global_store_b128 v8, v[0:3], s[0:1] offset:16
1573; GFX12-NEXT:    global_store_b128 v8, v[4:7], s[0:1]
1574; GFX12-NEXT:    s_endpgm
1575  %load = load <8 x i8>, ptr addrspace(4) %in
1576  %ext = zext <8 x i8> %load to <8 x i32>
1577  store <8 x i32> %ext, ptr addrspace(1) %out
1578  ret void
1579}
1580
1581; TODO: These should use DST, but for some there are redundant MOVs
1582define amdgpu_kernel void @constant_sextload_v8i8_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
1583; GFX6-NOHSA-LABEL: constant_sextload_v8i8_to_v8i32:
1584; GFX6-NOHSA:       ; %bb.0:
1585; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1586; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1587; GFX6-NOHSA-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1588; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
1589; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
1590; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1591; GFX6-NOHSA-NEXT:    s_ashr_i32 s6, s4, 24
1592; GFX6-NOHSA-NEXT:    s_bfe_i32 s7, s4, 0x80010
1593; GFX6-NOHSA-NEXT:    s_bfe_i32 s8, s4, 0x80008
1594; GFX6-NOHSA-NEXT:    s_ashr_i32 s9, s5, 24
1595; GFX6-NOHSA-NEXT:    s_bfe_i32 s10, s5, 0x80010
1596; GFX6-NOHSA-NEXT:    s_bfe_i32 s11, s5, 0x80008
1597; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s5, s5
1598; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s4, s4
1599; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s5
1600; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s11
1601; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s10
1602; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s9
1603; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1604; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
1605; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
1606; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s8
1607; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s7
1608; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s6
1609; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1610; GFX6-NOHSA-NEXT:    s_endpgm
1611;
1612; GFX7-HSA-LABEL: constant_sextload_v8i8_to_v8i32:
1613; GFX7-HSA:       ; %bb.0:
1614; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1615; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1616; GFX7-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
1617; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1618; GFX7-HSA-NEXT:    s_ashr_i32 s4, s2, 24
1619; GFX7-HSA-NEXT:    s_bfe_i32 s5, s2, 0x80010
1620; GFX7-HSA-NEXT:    s_bfe_i32 s6, s2, 0x80008
1621; GFX7-HSA-NEXT:    s_sext_i32_i8 s7, s2
1622; GFX7-HSA-NEXT:    s_ashr_i32 s2, s3, 24
1623; GFX7-HSA-NEXT:    s_bfe_i32 s8, s3, 0x80010
1624; GFX7-HSA-NEXT:    s_bfe_i32 s9, s3, 0x80008
1625; GFX7-HSA-NEXT:    s_sext_i32_i8 s3, s3
1626; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s2
1627; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 16
1628; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s3
1629; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
1630; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
1631; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s9
1632; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s8
1633; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
1634; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1635; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
1636; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s7
1637; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s6
1638; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s5
1639; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s4
1640; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
1641; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1642; GFX7-HSA-NEXT:    s_endpgm
1643;
1644; GFX8-NOHSA-LABEL: constant_sextload_v8i8_to_v8i32:
1645; GFX8-NOHSA:       ; %bb.0:
1646; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1647; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1648; GFX8-NOHSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
1649; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1650; GFX8-NOHSA-NEXT:    s_ashr_i32 s4, s2, 24
1651; GFX8-NOHSA-NEXT:    s_bfe_i32 s5, s2, 0x80010
1652; GFX8-NOHSA-NEXT:    s_bfe_i32 s6, s2, 0x80008
1653; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s7, s2
1654; GFX8-NOHSA-NEXT:    s_ashr_i32 s2, s3, 24
1655; GFX8-NOHSA-NEXT:    s_bfe_i32 s8, s3, 0x80010
1656; GFX8-NOHSA-NEXT:    s_bfe_i32 s9, s3, 0x80008
1657; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s3, s3
1658; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s2
1659; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 16
1660; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s3
1661; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
1662; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
1663; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s9
1664; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s8
1665; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
1666; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1667; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
1668; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s7
1669; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s6
1670; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
1671; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s4
1672; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
1673; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1674; GFX8-NOHSA-NEXT:    s_endpgm
1675;
1676; EG-LABEL: constant_sextload_v8i8_to_v8i32:
1677; EG:       ; %bb.0:
1678; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1679; EG-NEXT:    TEX 0 @6
1680; EG-NEXT:    ALU 23, @9, KC0[CB0:0-32], KC1[]
1681; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 0
1682; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 1
1683; EG-NEXT:    CF_END
1684; EG-NEXT:    Fetch clause starting at 6:
1685; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
1686; EG-NEXT:    ALU clause starting at 8:
1687; EG-NEXT:     MOV * T5.X, KC0[2].Z,
1688; EG-NEXT:    ALU clause starting at 9:
1689; EG-NEXT:     BFE_INT T6.X, T5.X, 0.0, literal.x,
1690; EG-NEXT:     LSHR * T0.W, T5.X, literal.y,
1691; EG-NEXT:    8(1.121039e-44), 24(3.363116e-44)
1692; EG-NEXT:     BFE_INT T7.X, T5.Y, 0.0, literal.x,
1693; EG-NEXT:     LSHR T0.Z, T5.Y, literal.y,
1694; EG-NEXT:     BFE_INT T6.W, PV.W, 0.0, literal.x,
1695; EG-NEXT:     LSHR * T0.W, T5.X, literal.z,
1696; EG-NEXT:    8(1.121039e-44), 24(3.363116e-44)
1697; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1698; EG-NEXT:     LSHR T0.Y, T5.Y, literal.x,
1699; EG-NEXT:     BFE_INT T6.Z, PS, 0.0, literal.y,
1700; EG-NEXT:     BFE_INT T7.W, PV.Z, 0.0, literal.y,
1701; EG-NEXT:     LSHR * T0.W, T5.X, literal.y,
1702; EG-NEXT:    16(2.242078e-44), 8(1.121039e-44)
1703; EG-NEXT:     LSHR T5.X, KC0[2].Y, literal.x,
1704; EG-NEXT:     BFE_INT T6.Y, PS, 0.0, literal.y,
1705; EG-NEXT:     BFE_INT T7.Z, PV.Y, 0.0, literal.y,
1706; EG-NEXT:     LSHR T0.W, T5.Y, literal.y,
1707; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
1708; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
1709; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1710; EG-NEXT:     LSHR T8.X, PS, literal.x,
1711; EG-NEXT:     BFE_INT * T7.Y, PV.W, 0.0, literal.y,
1712; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
1713;
1714; GFX12-LABEL: constant_sextload_v8i8_to_v8i32:
1715; GFX12:       ; %bb.0:
1716; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1717; GFX12-NEXT:    s_wait_kmcnt 0x0
1718; GFX12-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
1719; GFX12-NEXT:    s_wait_kmcnt 0x0
1720; GFX12-NEXT:    s_ashr_i32 s7, s3, 24
1721; GFX12-NEXT:    s_bfe_i32 s8, s3, 0x80010
1722; GFX12-NEXT:    s_sext_i32_i8 s9, s3
1723; GFX12-NEXT:    s_bfe_i32 s3, s3, 0x80008
1724; GFX12-NEXT:    s_ashr_i32 s4, s2, 24
1725; GFX12-NEXT:    s_bfe_i32 s5, s2, 0x80010
1726; GFX12-NEXT:    s_bfe_i32 s6, s2, 0x80008
1727; GFX12-NEXT:    s_sext_i32_i8 s2, s2
1728; GFX12-NEXT:    v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s3
1729; GFX12-NEXT:    v_dual_mov_b32 v0, s9 :: v_dual_mov_b32 v3, s7
1730; GFX12-NEXT:    v_dual_mov_b32 v2, s8 :: v_dual_mov_b32 v5, s6
1731; GFX12-NEXT:    v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v7, s4
1732; GFX12-NEXT:    v_mov_b32_e32 v6, s5
1733; GFX12-NEXT:    s_clause 0x1
1734; GFX12-NEXT:    global_store_b128 v8, v[0:3], s[0:1] offset:16
1735; GFX12-NEXT:    global_store_b128 v8, v[4:7], s[0:1]
1736; GFX12-NEXT:    s_endpgm
1737  %load = load <8 x i8>, ptr addrspace(4) %in
1738  %ext = sext <8 x i8> %load to <8 x i32>
1739  store <8 x i32> %ext, ptr addrspace(1) %out
1740  ret void
1741}
1742
1743; TODO: These should use DST, but for some there are redundant MOVs
1744define amdgpu_kernel void @constant_zextload_v16i8_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
1745; GFX6-NOHSA-LABEL: constant_zextload_v16i8_to_v16i32:
1746; GFX6-NOHSA:       ; %bb.0:
1747; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1748; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1749; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1750; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
1751; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
1752; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1753; GFX6-NOHSA-NEXT:    s_lshr_b32 s8, s4, 24
1754; GFX6-NOHSA-NEXT:    s_bfe_u32 s9, s4, 0x80008
1755; GFX6-NOHSA-NEXT:    s_lshr_b32 s10, s5, 24
1756; GFX6-NOHSA-NEXT:    s_bfe_u32 s11, s5, 0x80008
1757; GFX6-NOHSA-NEXT:    s_lshr_b32 s12, s6, 24
1758; GFX6-NOHSA-NEXT:    s_bfe_u32 s13, s6, 0x80008
1759; GFX6-NOHSA-NEXT:    s_lshr_b32 s14, s7, 24
1760; GFX6-NOHSA-NEXT:    s_bfe_u32 s15, s7, 0x80008
1761; GFX6-NOHSA-NEXT:    s_and_b32 s16, s4, 0xff
1762; GFX6-NOHSA-NEXT:    s_bfe_u32 s4, s4, 0x80010
1763; GFX6-NOHSA-NEXT:    s_and_b32 s17, s5, 0xff
1764; GFX6-NOHSA-NEXT:    s_bfe_u32 s5, s5, 0x80010
1765; GFX6-NOHSA-NEXT:    s_and_b32 s18, s6, 0xff
1766; GFX6-NOHSA-NEXT:    s_and_b32 s19, s7, 0xff
1767; GFX6-NOHSA-NEXT:    s_bfe_u32 s7, s7, 0x80010
1768; GFX6-NOHSA-NEXT:    s_bfe_u32 s6, s6, 0x80010
1769; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s19
1770; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s15
1771; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s7
1772; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s14
1773; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
1774; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
1775; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s18
1776; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s13
1777; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s6
1778; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s12
1779; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
1780; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
1781; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s17
1782; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s11
1783; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
1784; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s10
1785; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1786; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
1787; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s16
1788; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s9
1789; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s4
1790; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s8
1791; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1792; GFX6-NOHSA-NEXT:    s_endpgm
1793;
1794; GFX7-HSA-LABEL: constant_zextload_v16i8_to_v16i32:
1795; GFX7-HSA:       ; %bb.0:
1796; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1797; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1798; GFX7-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1799; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1800; GFX7-HSA-NEXT:    s_lshr_b32 s8, s4, 24
1801; GFX7-HSA-NEXT:    s_bfe_u32 s9, s4, 0x80008
1802; GFX7-HSA-NEXT:    s_lshr_b32 s10, s5, 24
1803; GFX7-HSA-NEXT:    s_bfe_u32 s11, s5, 0x80008
1804; GFX7-HSA-NEXT:    s_lshr_b32 s12, s6, 24
1805; GFX7-HSA-NEXT:    s_bfe_u32 s13, s6, 0x80008
1806; GFX7-HSA-NEXT:    s_lshr_b32 s2, s7, 24
1807; GFX7-HSA-NEXT:    s_bfe_u32 s3, s7, 0x80008
1808; GFX7-HSA-NEXT:    s_and_b32 s14, s4, 0xff
1809; GFX7-HSA-NEXT:    s_bfe_u32 s4, s4, 0x80010
1810; GFX7-HSA-NEXT:    s_and_b32 s15, s5, 0xff
1811; GFX7-HSA-NEXT:    s_bfe_u32 s5, s5, 0x80010
1812; GFX7-HSA-NEXT:    s_and_b32 s16, s6, 0xff
1813; GFX7-HSA-NEXT:    s_bfe_u32 s6, s6, 0x80010
1814; GFX7-HSA-NEXT:    s_and_b32 s17, s7, 0xff
1815; GFX7-HSA-NEXT:    s_bfe_u32 s7, s7, 0x80010
1816; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s2
1817; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 48
1818; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
1819; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
1820; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
1821; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
1822; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 32
1823; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s17
1824; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s7
1825; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
1826; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1827; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
1828; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
1829; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 16
1830; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s16
1831; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s13
1832; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s6
1833; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s12
1834; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
1835; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1836; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
1837; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s15
1838; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s11
1839; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s5
1840; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s10
1841; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
1842; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1843; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
1844; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s14
1845; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s9
1846; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s4
1847; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s8
1848; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
1849; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1850; GFX7-HSA-NEXT:    s_endpgm
1851;
1852; GFX8-NOHSA-LABEL: constant_zextload_v16i8_to_v16i32:
1853; GFX8-NOHSA:       ; %bb.0:
1854; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1855; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1856; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1857; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
1858; GFX8-NOHSA-NEXT:    s_lshr_b32 s8, s4, 24
1859; GFX8-NOHSA-NEXT:    s_bfe_u32 s9, s4, 0x80008
1860; GFX8-NOHSA-NEXT:    s_lshr_b32 s10, s5, 24
1861; GFX8-NOHSA-NEXT:    s_bfe_u32 s11, s5, 0x80008
1862; GFX8-NOHSA-NEXT:    s_lshr_b32 s12, s6, 24
1863; GFX8-NOHSA-NEXT:    s_bfe_u32 s13, s6, 0x80008
1864; GFX8-NOHSA-NEXT:    s_lshr_b32 s2, s7, 24
1865; GFX8-NOHSA-NEXT:    s_bfe_u32 s3, s7, 0x80008
1866; GFX8-NOHSA-NEXT:    s_and_b32 s14, s4, 0xff
1867; GFX8-NOHSA-NEXT:    s_bfe_u32 s4, s4, 0x80010
1868; GFX8-NOHSA-NEXT:    s_and_b32 s15, s5, 0xff
1869; GFX8-NOHSA-NEXT:    s_bfe_u32 s5, s5, 0x80010
1870; GFX8-NOHSA-NEXT:    s_and_b32 s16, s6, 0xff
1871; GFX8-NOHSA-NEXT:    s_bfe_u32 s6, s6, 0x80010
1872; GFX8-NOHSA-NEXT:    s_and_b32 s17, s7, 0xff
1873; GFX8-NOHSA-NEXT:    s_bfe_u32 s7, s7, 0x80010
1874; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s2
1875; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 48
1876; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
1877; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
1878; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
1879; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
1880; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 32
1881; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s17
1882; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s7
1883; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
1884; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1885; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
1886; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
1887; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 16
1888; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s16
1889; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s13
1890; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s6
1891; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s12
1892; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
1893; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1894; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
1895; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s15
1896; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s11
1897; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
1898; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s10
1899; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
1900; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1901; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
1902; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s14
1903; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s9
1904; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s4
1905; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s8
1906; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
1907; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1908; GFX8-NOHSA-NEXT:    s_endpgm
1909;
1910; EG-LABEL: constant_zextload_v16i8_to_v16i32:
1911; EG:       ; %bb.0:
1912; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
1913; EG-NEXT:    TEX 0 @8
1914; EG-NEXT:    ALU 39, @11, KC0[CB0:0-32], KC1[]
1915; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T14.X, 0
1916; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T13.X, 0
1917; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T11.X, 0
1918; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
1919; EG-NEXT:    CF_END
1920; EG-NEXT:    Fetch clause starting at 8:
1921; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
1922; EG-NEXT:    ALU clause starting at 10:
1923; EG-NEXT:     MOV * T7.X, KC0[2].Z,
1924; EG-NEXT:    ALU clause starting at 11:
1925; EG-NEXT:     MOV * T0.W, literal.x,
1926; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
1927; EG-NEXT:     BFE_UINT * T8.Z, T7.X, literal.x, PV.W,
1928; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1929; EG-NEXT:     BFE_UINT T8.Y, T7.X, literal.x, T0.W,
1930; EG-NEXT:     BFE_UINT T9.Z, T7.Y, literal.y, T0.W,
1931; EG-NEXT:     LSHR * T8.W, T7.X, literal.z,
1932; EG-NEXT:    8(1.121039e-44), 16(2.242078e-44)
1933; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
1934; EG-NEXT:     AND_INT T8.X, T7.X, literal.x,
1935; EG-NEXT:     BFE_UINT T9.Y, T7.Y, literal.y, T0.W,
1936; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.z,
1937; EG-NEXT:    255(3.573311e-43), 8(1.121039e-44)
1938; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1939; EG-NEXT:     BFE_UINT T10.Z, T7.Z, literal.x, T0.W,
1940; EG-NEXT:     LSHR * T9.W, T7.Y, literal.y,
1941; EG-NEXT:    16(2.242078e-44), 24(3.363116e-44)
1942; EG-NEXT:     AND_INT T9.X, T7.Y, literal.x,
1943; EG-NEXT:     BFE_UINT T10.Y, T7.Z, literal.y, T0.W,
1944; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
1945; EG-NEXT:    255(3.573311e-43), 8(1.121039e-44)
1946; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1947; EG-NEXT:     LSHR T11.X, PV.W, literal.x,
1948; EG-NEXT:     BFE_UINT T12.Z, T7.W, literal.y, T0.W,
1949; EG-NEXT:     LSHR T10.W, T7.Z, literal.z,
1950; EG-NEXT:     AND_INT * T10.X, T7.Z, literal.w,
1951; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1952; EG-NEXT:    24(3.363116e-44), 255(3.573311e-43)
1953; EG-NEXT:     BFE_UINT T12.Y, T7.W, literal.x, T0.W,
1954; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1955; EG-NEXT:    8(1.121039e-44), 32(4.484155e-44)
1956; EG-NEXT:     LSHR T13.X, PV.W, literal.x,
1957; EG-NEXT:     LSHR T12.W, T7.W, literal.y,
1958; EG-NEXT:     AND_INT * T12.X, T7.W, literal.z,
1959; EG-NEXT:    2(2.802597e-45), 24(3.363116e-44)
1960; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
1961; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
1962; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
1963; EG-NEXT:     LSHR * T14.X, PV.W, literal.x,
1964; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1965;
1966; GFX12-LABEL: constant_zextload_v16i8_to_v16i32:
1967; GFX12:       ; %bb.0:
1968; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1969; GFX12-NEXT:    s_wait_kmcnt 0x0
1970; GFX12-NEXT:    s_load_b128 s[4:7], s[2:3], 0x0
1971; GFX12-NEXT:    s_wait_kmcnt 0x0
1972; GFX12-NEXT:    s_lshr_b32 s12, s7, 24
1973; GFX12-NEXT:    s_bfe_u32 s13, s7, 0x80008
1974; GFX12-NEXT:    s_and_b32 s17, s7, 0xff
1975; GFX12-NEXT:    s_bfe_u32 s7, s7, 0x80010
1976; GFX12-NEXT:    s_lshr_b32 s10, s6, 24
1977; GFX12-NEXT:    s_bfe_u32 s11, s6, 0x80008
1978; GFX12-NEXT:    s_and_b32 s16, s6, 0xff
1979; GFX12-NEXT:    s_bfe_u32 s6, s6, 0x80010
1980; GFX12-NEXT:    v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v1, s13
1981; GFX12-NEXT:    s_lshr_b32 s8, s5, 24
1982; GFX12-NEXT:    s_bfe_u32 s9, s5, 0x80008
1983; GFX12-NEXT:    s_and_b32 s15, s5, 0xff
1984; GFX12-NEXT:    s_bfe_u32 s5, s5, 0x80010
1985; GFX12-NEXT:    v_dual_mov_b32 v0, s17 :: v_dual_mov_b32 v3, s12
1986; GFX12-NEXT:    v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v5, s11
1987; GFX12-NEXT:    s_lshr_b32 s2, s4, 24
1988; GFX12-NEXT:    s_bfe_u32 s3, s4, 0x80008
1989; GFX12-NEXT:    s_and_b32 s14, s4, 0xff
1990; GFX12-NEXT:    s_bfe_u32 s4, s4, 0x80010
1991; GFX12-NEXT:    v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v7, s10
1992; GFX12-NEXT:    v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v9, s9
1993; GFX12-NEXT:    v_dual_mov_b32 v8, s15 :: v_dual_mov_b32 v11, s8
1994; GFX12-NEXT:    v_dual_mov_b32 v10, s5 :: v_dual_mov_b32 v13, s3
1995; GFX12-NEXT:    v_dual_mov_b32 v12, s14 :: v_dual_mov_b32 v15, s2
1996; GFX12-NEXT:    v_mov_b32_e32 v14, s4
1997; GFX12-NEXT:    s_clause 0x3
1998; GFX12-NEXT:    global_store_b128 v16, v[0:3], s[0:1] offset:48
1999; GFX12-NEXT:    global_store_b128 v16, v[4:7], s[0:1] offset:32
2000; GFX12-NEXT:    global_store_b128 v16, v[8:11], s[0:1] offset:16
2001; GFX12-NEXT:    global_store_b128 v16, v[12:15], s[0:1]
2002; GFX12-NEXT:    s_endpgm
2003  %load = load <16 x i8>, ptr addrspace(4) %in
2004  %ext = zext <16 x i8> %load to <16 x i32>
2005  store <16 x i32> %ext, ptr addrspace(1) %out
2006  ret void
2007}
2008
2009; TODO: These should use DST, but for some there are redundant MOVs
2010define amdgpu_kernel void @constant_sextload_v16i8_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
2011; GFX6-NOHSA-LABEL: constant_sextload_v16i8_to_v16i32:
2012; GFX6-NOHSA:       ; %bb.0:
2013; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
2014; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
2015; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
2016; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
2017; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
2018; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
2019; GFX6-NOHSA-NEXT:    s_ashr_i32 s8, s4, 24
2020; GFX6-NOHSA-NEXT:    s_bfe_i32 s9, s4, 0x80010
2021; GFX6-NOHSA-NEXT:    s_bfe_i32 s10, s4, 0x80008
2022; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s4, s4
2023; GFX6-NOHSA-NEXT:    s_ashr_i32 s11, s5, 24
2024; GFX6-NOHSA-NEXT:    s_bfe_i32 s12, s5, 0x80010
2025; GFX6-NOHSA-NEXT:    s_bfe_i32 s13, s5, 0x80008
2026; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s5, s5
2027; GFX6-NOHSA-NEXT:    s_ashr_i32 s14, s6, 24
2028; GFX6-NOHSA-NEXT:    s_bfe_i32 s15, s6, 0x80010
2029; GFX6-NOHSA-NEXT:    s_bfe_i32 s16, s6, 0x80008
2030; GFX6-NOHSA-NEXT:    s_ashr_i32 s17, s7, 24
2031; GFX6-NOHSA-NEXT:    s_bfe_i32 s18, s7, 0x80010
2032; GFX6-NOHSA-NEXT:    s_bfe_i32 s19, s7, 0x80008
2033; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s7, s7
2034; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s6, s6
2035; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s7
2036; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s19
2037; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s18
2038; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s17
2039; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
2040; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
2041; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
2042; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s16
2043; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s15
2044; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s14
2045; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2046; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
2047; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s5
2048; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s13
2049; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s12
2050; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s11
2051; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2052; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
2053; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
2054; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s10
2055; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s9
2056; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s8
2057; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2058; GFX6-NOHSA-NEXT:    s_endpgm
2059;
2060; GFX7-HSA-LABEL: constant_sextload_v16i8_to_v16i32:
2061; GFX7-HSA:       ; %bb.0:
2062; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
2063; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2064; GFX7-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
2065; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2066; GFX7-HSA-NEXT:    s_ashr_i32 s8, s4, 24
2067; GFX7-HSA-NEXT:    s_bfe_i32 s9, s4, 0x80010
2068; GFX7-HSA-NEXT:    s_bfe_i32 s10, s4, 0x80008
2069; GFX7-HSA-NEXT:    s_ashr_i32 s11, s5, 24
2070; GFX7-HSA-NEXT:    s_bfe_i32 s12, s5, 0x80010
2071; GFX7-HSA-NEXT:    s_bfe_i32 s13, s5, 0x80008
2072; GFX7-HSA-NEXT:    s_ashr_i32 s14, s6, 24
2073; GFX7-HSA-NEXT:    s_bfe_i32 s15, s6, 0x80010
2074; GFX7-HSA-NEXT:    s_bfe_i32 s16, s6, 0x80008
2075; GFX7-HSA-NEXT:    s_ashr_i32 s2, s7, 24
2076; GFX7-HSA-NEXT:    s_bfe_i32 s3, s7, 0x80010
2077; GFX7-HSA-NEXT:    s_bfe_i32 s17, s7, 0x80008
2078; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s2
2079; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 48
2080; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s3
2081; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
2082; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
2083; GFX7-HSA-NEXT:    s_sext_i32_i8 s7, s7
2084; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
2085; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 32
2086; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s7
2087; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s17
2088; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
2089; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2090; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
2091; GFX7-HSA-NEXT:    s_sext_i32_i8 s6, s6
2092; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
2093; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 16
2094; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s6
2095; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s16
2096; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s15
2097; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s14
2098; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
2099; GFX7-HSA-NEXT:    s_sext_i32_i8 s5, s5
2100; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2101; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
2102; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s5
2103; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s13
2104; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s12
2105; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s11
2106; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
2107; GFX7-HSA-NEXT:    s_sext_i32_i8 s4, s4
2108; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2109; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
2110; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s4
2111; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s10
2112; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s9
2113; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s8
2114; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
2115; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2116; GFX7-HSA-NEXT:    s_endpgm
2117;
2118; GFX8-NOHSA-LABEL: constant_sextload_v16i8_to_v16i32:
2119; GFX8-NOHSA:       ; %bb.0:
2120; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
2121; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
2122; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
2123; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
2124; GFX8-NOHSA-NEXT:    s_ashr_i32 s8, s4, 24
2125; GFX8-NOHSA-NEXT:    s_bfe_i32 s9, s4, 0x80010
2126; GFX8-NOHSA-NEXT:    s_bfe_i32 s10, s4, 0x80008
2127; GFX8-NOHSA-NEXT:    s_ashr_i32 s11, s5, 24
2128; GFX8-NOHSA-NEXT:    s_bfe_i32 s12, s5, 0x80010
2129; GFX8-NOHSA-NEXT:    s_bfe_i32 s13, s5, 0x80008
2130; GFX8-NOHSA-NEXT:    s_ashr_i32 s14, s6, 24
2131; GFX8-NOHSA-NEXT:    s_bfe_i32 s15, s6, 0x80010
2132; GFX8-NOHSA-NEXT:    s_bfe_i32 s16, s6, 0x80008
2133; GFX8-NOHSA-NEXT:    s_ashr_i32 s2, s7, 24
2134; GFX8-NOHSA-NEXT:    s_bfe_i32 s3, s7, 0x80010
2135; GFX8-NOHSA-NEXT:    s_bfe_i32 s17, s7, 0x80008
2136; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s2
2137; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 48
2138; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s3
2139; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
2140; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
2141; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s7, s7
2142; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
2143; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 32
2144; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s7
2145; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s17
2146; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
2147; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2148; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
2149; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s6, s6
2150; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
2151; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 16
2152; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
2153; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s16
2154; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s15
2155; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s14
2156; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
2157; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s5, s5
2158; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2159; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
2160; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s5
2161; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s13
2162; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s12
2163; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s11
2164; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
2165; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s4, s4
2166; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2167; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
2168; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
2169; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s10
2170; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s9
2171; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s8
2172; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
2173; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2174; GFX8-NOHSA-NEXT:    s_endpgm
2175;
2176; EG-LABEL: constant_sextload_v16i8_to_v16i32:
2177; EG:       ; %bb.0:
2178; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
2179; EG-NEXT:    TEX 0 @8
2180; EG-NEXT:    ALU 47, @11, KC0[CB0:0-32], KC1[]
2181; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T14.X, 0
2182; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T13.X, 0
2183; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T7.X, 0
2184; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T8.X, 1
2185; EG-NEXT:    CF_END
2186; EG-NEXT:    Fetch clause starting at 8:
2187; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
2188; EG-NEXT:    ALU clause starting at 10:
2189; EG-NEXT:     MOV * T7.X, KC0[2].Z,
2190; EG-NEXT:    ALU clause starting at 11:
2191; EG-NEXT:     LSHR T8.X, KC0[2].Y, literal.x,
2192; EG-NEXT:     LSHR T0.W, T7.W, literal.y,
2193; EG-NEXT:     LSHR * T1.W, T7.Z, literal.z,
2194; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2195; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
2196; EG-NEXT:     BFE_INT T9.X, T7.X, 0.0, literal.x,
2197; EG-NEXT:     LSHR T0.Y, T7.W, literal.y,
2198; EG-NEXT:     LSHR T0.Z, T7.Z, literal.z,
2199; EG-NEXT:     LSHR T2.W, T7.Y, literal.x,
2200; EG-NEXT:     LSHR * T3.W, T7.X, literal.y,
2201; EG-NEXT:    8(1.121039e-44), 24(3.363116e-44)
2202; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2203; EG-NEXT:     BFE_INT T10.X, T7.Y, 0.0, literal.x,
2204; EG-NEXT:     LSHR T1.Y, T7.Z, literal.y,
2205; EG-NEXT:     LSHR T1.Z, T7.Y, literal.y,
2206; EG-NEXT:     BFE_INT T9.W, PS, 0.0, literal.x,
2207; EG-NEXT:     LSHR * T3.W, T7.X, literal.z,
2208; EG-NEXT:    8(1.121039e-44), 24(3.363116e-44)
2209; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2210; EG-NEXT:     BFE_INT T11.X, T7.Z, 0.0, literal.x,
2211; EG-NEXT:     LSHR T2.Y, T7.Y, literal.y,
2212; EG-NEXT:     BFE_INT T9.Z, PS, 0.0, literal.x,
2213; EG-NEXT:     BFE_INT T10.W, PV.Z, 0.0, literal.x,
2214; EG-NEXT:     LSHR * T3.W, T7.X, literal.x,
2215; EG-NEXT:    8(1.121039e-44), 16(2.242078e-44)
2216; EG-NEXT:     BFE_INT T12.X, T7.W, 0.0, literal.x,
2217; EG-NEXT:     BFE_INT T9.Y, PS, 0.0, literal.x,
2218; EG-NEXT:     BFE_INT T10.Z, PV.Y, 0.0, literal.x,
2219; EG-NEXT:     BFE_INT T11.W, T1.Y, 0.0, literal.x,
2220; EG-NEXT:     ADD_INT * T3.W, KC0[2].Y, literal.y,
2221; EG-NEXT:    8(1.121039e-44), 16(2.242078e-44)
2222; EG-NEXT:     LSHR T7.X, PS, literal.x,
2223; EG-NEXT:     BFE_INT T10.Y, T2.W, 0.0, literal.y,
2224; EG-NEXT:     BFE_INT T11.Z, T0.Z, 0.0, literal.y,
2225; EG-NEXT:     BFE_INT T12.W, T0.Y, 0.0, literal.y,
2226; EG-NEXT:     ADD_INT * T2.W, KC0[2].Y, literal.z,
2227; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
2228; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
2229; EG-NEXT:     LSHR T13.X, PS, literal.x,
2230; EG-NEXT:     BFE_INT T11.Y, T1.W, 0.0, literal.y,
2231; EG-NEXT:     BFE_INT T12.Z, T0.W, 0.0, literal.y, BS:VEC_120/SCL_212
2232; EG-NEXT:     LSHR T0.W, T7.W, literal.y, BS:VEC_201
2233; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
2234; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
2235; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
2236; EG-NEXT:     LSHR T14.X, PS, literal.x,
2237; EG-NEXT:     BFE_INT * T12.Y, PV.W, 0.0, literal.y,
2238; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
2239;
2240; GFX12-LABEL: constant_sextload_v16i8_to_v16i32:
2241; GFX12:       ; %bb.0:
2242; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
2243; GFX12-NEXT:    s_wait_kmcnt 0x0
2244; GFX12-NEXT:    s_load_b128 s[4:7], s[2:3], 0x0
2245; GFX12-NEXT:    s_wait_kmcnt 0x0
2246; GFX12-NEXT:    s_ashr_i32 s15, s7, 24
2247; GFX12-NEXT:    s_bfe_i32 s16, s7, 0x80010
2248; GFX12-NEXT:    s_sext_i32_i8 s17, s7
2249; GFX12-NEXT:    s_bfe_i32 s7, s7, 0x80008
2250; GFX12-NEXT:    s_ashr_i32 s12, s6, 24
2251; GFX12-NEXT:    s_bfe_i32 s13, s6, 0x80010
2252; GFX12-NEXT:    s_bfe_i32 s14, s6, 0x80008
2253; GFX12-NEXT:    s_sext_i32_i8 s6, s6
2254; GFX12-NEXT:    v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v1, s7
2255; GFX12-NEXT:    s_ashr_i32 s9, s5, 24
2256; GFX12-NEXT:    s_bfe_i32 s10, s5, 0x80010
2257; GFX12-NEXT:    s_bfe_i32 s11, s5, 0x80008
2258; GFX12-NEXT:    s_sext_i32_i8 s5, s5
2259; GFX12-NEXT:    v_dual_mov_b32 v0, s17 :: v_dual_mov_b32 v3, s15
2260; GFX12-NEXT:    v_dual_mov_b32 v2, s16 :: v_dual_mov_b32 v5, s14
2261; GFX12-NEXT:    s_ashr_i32 s2, s4, 24
2262; GFX12-NEXT:    s_bfe_i32 s3, s4, 0x80010
2263; GFX12-NEXT:    s_bfe_i32 s8, s4, 0x80008
2264; GFX12-NEXT:    s_sext_i32_i8 s4, s4
2265; GFX12-NEXT:    v_dual_mov_b32 v4, s6 :: v_dual_mov_b32 v7, s12
2266; GFX12-NEXT:    v_dual_mov_b32 v6, s13 :: v_dual_mov_b32 v9, s11
2267; GFX12-NEXT:    v_dual_mov_b32 v8, s5 :: v_dual_mov_b32 v11, s9
2268; GFX12-NEXT:    v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v13, s8
2269; GFX12-NEXT:    v_dual_mov_b32 v12, s4 :: v_dual_mov_b32 v15, s2
2270; GFX12-NEXT:    v_mov_b32_e32 v14, s3
2271; GFX12-NEXT:    s_clause 0x3
2272; GFX12-NEXT:    global_store_b128 v16, v[0:3], s[0:1] offset:48
2273; GFX12-NEXT:    global_store_b128 v16, v[4:7], s[0:1] offset:32
2274; GFX12-NEXT:    global_store_b128 v16, v[8:11], s[0:1] offset:16
2275; GFX12-NEXT:    global_store_b128 v16, v[12:15], s[0:1]
2276; GFX12-NEXT:    s_endpgm
2277  %load = load <16 x i8>, ptr addrspace(4) %in
2278  %ext = sext <16 x i8> %load to <16 x i32>
2279  store <16 x i32> %ext, ptr addrspace(1) %out
2280  ret void
2281}
2282
2283; TODO: These should use DST, but for some there are redundant MOVs
2284define amdgpu_kernel void @constant_zextload_v32i8_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
2285; GFX6-NOHSA-LABEL: constant_zextload_v32i8_to_v32i32:
2286; GFX6-NOHSA:       ; %bb.0:
2287; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
2288; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
2289; GFX6-NOHSA-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
2290; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
2291; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
2292; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
2293; GFX6-NOHSA-NEXT:    s_lshr_b32 s12, s4, 24
2294; GFX6-NOHSA-NEXT:    s_bfe_u32 s13, s4, 0x80008
2295; GFX6-NOHSA-NEXT:    s_lshr_b32 s14, s5, 24
2296; GFX6-NOHSA-NEXT:    s_bfe_u32 s15, s5, 0x80008
2297; GFX6-NOHSA-NEXT:    s_lshr_b32 s16, s6, 24
2298; GFX6-NOHSA-NEXT:    s_bfe_u32 s17, s6, 0x80008
2299; GFX6-NOHSA-NEXT:    s_lshr_b32 s18, s7, 24
2300; GFX6-NOHSA-NEXT:    s_bfe_u32 s19, s7, 0x80008
2301; GFX6-NOHSA-NEXT:    s_lshr_b32 s20, s8, 24
2302; GFX6-NOHSA-NEXT:    s_bfe_u32 s21, s8, 0x80008
2303; GFX6-NOHSA-NEXT:    s_lshr_b32 s22, s9, 24
2304; GFX6-NOHSA-NEXT:    s_bfe_u32 s23, s9, 0x80008
2305; GFX6-NOHSA-NEXT:    s_lshr_b32 s24, s10, 24
2306; GFX6-NOHSA-NEXT:    s_bfe_u32 s25, s10, 0x80008
2307; GFX6-NOHSA-NEXT:    s_lshr_b32 s26, s11, 24
2308; GFX6-NOHSA-NEXT:    s_bfe_u32 s27, s11, 0x80008
2309; GFX6-NOHSA-NEXT:    s_and_b32 s28, s4, 0xff
2310; GFX6-NOHSA-NEXT:    s_bfe_u32 s4, s4, 0x80010
2311; GFX6-NOHSA-NEXT:    s_and_b32 s29, s5, 0xff
2312; GFX6-NOHSA-NEXT:    s_bfe_u32 s5, s5, 0x80010
2313; GFX6-NOHSA-NEXT:    s_and_b32 s30, s6, 0xff
2314; GFX6-NOHSA-NEXT:    s_bfe_u32 s6, s6, 0x80010
2315; GFX6-NOHSA-NEXT:    s_and_b32 s31, s7, 0xff
2316; GFX6-NOHSA-NEXT:    s_bfe_u32 s7, s7, 0x80010
2317; GFX6-NOHSA-NEXT:    s_and_b32 s33, s8, 0xff
2318; GFX6-NOHSA-NEXT:    s_bfe_u32 s8, s8, 0x80010
2319; GFX6-NOHSA-NEXT:    s_and_b32 s34, s9, 0xff
2320; GFX6-NOHSA-NEXT:    s_bfe_u32 s9, s9, 0x80010
2321; GFX6-NOHSA-NEXT:    s_and_b32 s35, s10, 0xff
2322; GFX6-NOHSA-NEXT:    s_and_b32 s36, s11, 0xff
2323; GFX6-NOHSA-NEXT:    s_bfe_u32 s11, s11, 0x80010
2324; GFX6-NOHSA-NEXT:    s_bfe_u32 s10, s10, 0x80010
2325; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s36
2326; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s27
2327; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s11
2328; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s26
2329; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
2330; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
2331; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s35
2332; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s25
2333; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s10
2334; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s24
2335; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
2336; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
2337; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s34
2338; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s23
2339; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s9
2340; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s22
2341; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
2342; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
2343; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s33
2344; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s21
2345; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s8
2346; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s20
2347; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
2348; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
2349; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s31
2350; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s19
2351; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s7
2352; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s18
2353; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
2354; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
2355; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s30
2356; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s17
2357; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s6
2358; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s16
2359; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2360; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
2361; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s29
2362; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s15
2363; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
2364; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s14
2365; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2366; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
2367; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s28
2368; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s13
2369; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s4
2370; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s12
2371; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2372; GFX6-NOHSA-NEXT:    s_endpgm
2373;
2374; GFX7-HSA-LABEL: constant_zextload_v32i8_to_v32i32:
2375; GFX7-HSA:       ; %bb.0:
2376; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
2377; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2378; GFX7-HSA-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
2379; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2380; GFX7-HSA-NEXT:    s_lshr_b32 s12, s4, 24
2381; GFX7-HSA-NEXT:    s_bfe_u32 s13, s4, 0x80008
2382; GFX7-HSA-NEXT:    s_lshr_b32 s14, s5, 24
2383; GFX7-HSA-NEXT:    s_bfe_u32 s15, s5, 0x80008
2384; GFX7-HSA-NEXT:    s_lshr_b32 s16, s6, 24
2385; GFX7-HSA-NEXT:    s_bfe_u32 s17, s6, 0x80008
2386; GFX7-HSA-NEXT:    s_lshr_b32 s18, s7, 24
2387; GFX7-HSA-NEXT:    s_bfe_u32 s19, s7, 0x80008
2388; GFX7-HSA-NEXT:    s_lshr_b32 s20, s8, 24
2389; GFX7-HSA-NEXT:    s_bfe_u32 s21, s8, 0x80008
2390; GFX7-HSA-NEXT:    s_lshr_b32 s22, s9, 24
2391; GFX7-HSA-NEXT:    s_bfe_u32 s23, s9, 0x80008
2392; GFX7-HSA-NEXT:    s_lshr_b32 s24, s10, 24
2393; GFX7-HSA-NEXT:    s_bfe_u32 s25, s10, 0x80008
2394; GFX7-HSA-NEXT:    s_lshr_b32 s26, s11, 24
2395; GFX7-HSA-NEXT:    s_bfe_u32 s27, s11, 0x80008
2396; GFX7-HSA-NEXT:    s_and_b32 s28, s4, 0xff
2397; GFX7-HSA-NEXT:    s_bfe_u32 s4, s4, 0x80010
2398; GFX7-HSA-NEXT:    s_and_b32 s29, s5, 0xff
2399; GFX7-HSA-NEXT:    s_bfe_u32 s5, s5, 0x80010
2400; GFX7-HSA-NEXT:    s_and_b32 s30, s6, 0xff
2401; GFX7-HSA-NEXT:    s_bfe_u32 s6, s6, 0x80010
2402; GFX7-HSA-NEXT:    s_and_b32 s31, s7, 0xff
2403; GFX7-HSA-NEXT:    s_bfe_u32 s7, s7, 0x80010
2404; GFX7-HSA-NEXT:    s_and_b32 s33, s8, 0xff
2405; GFX7-HSA-NEXT:    s_bfe_u32 s8, s8, 0x80010
2406; GFX7-HSA-NEXT:    s_and_b32 s34, s9, 0xff
2407; GFX7-HSA-NEXT:    s_bfe_u32 s9, s9, 0x80010
2408; GFX7-HSA-NEXT:    s_and_b32 s35, s10, 0xff
2409; GFX7-HSA-NEXT:    s_bfe_u32 s10, s10, 0x80010
2410; GFX7-HSA-NEXT:    s_and_b32 s36, s11, 0xff
2411; GFX7-HSA-NEXT:    s_bfe_u32 s11, s11, 0x80010
2412; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 0x70
2413; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
2414; GFX7-HSA-NEXT:    v_mov_b32_e32 v9, s3
2415; GFX7-HSA-NEXT:    v_mov_b32_e32 v8, s2
2416; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 0x60
2417; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
2418; GFX7-HSA-NEXT:    v_mov_b32_e32 v11, s3
2419; GFX7-HSA-NEXT:    v_mov_b32_e32 v10, s2
2420; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 0x50
2421; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s36
2422; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s27
2423; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s11
2424; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s26
2425; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s35
2426; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s25
2427; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
2428; GFX7-HSA-NEXT:    v_mov_b32_e32 v6, s10
2429; GFX7-HSA-NEXT:    v_mov_b32_e32 v7, s24
2430; GFX7-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
2431; GFX7-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
2432; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s34
2433; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
2434; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
2435; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 64
2436; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s23
2437; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s9
2438; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s22
2439; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
2440; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2441; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
2442; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
2443; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 48
2444; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s33
2445; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s21
2446; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s8
2447; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s20
2448; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
2449; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2450; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
2451; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
2452; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 32
2453; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s31
2454; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s19
2455; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s7
2456; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s18
2457; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
2458; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2459; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
2460; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
2461; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 16
2462; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s30
2463; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s17
2464; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s6
2465; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s16
2466; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
2467; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2468; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
2469; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s29
2470; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s15
2471; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s5
2472; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s14
2473; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
2474; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2475; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
2476; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s28
2477; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s13
2478; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s4
2479; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s12
2480; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
2481; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2482; GFX7-HSA-NEXT:    s_endpgm
2483;
2484; GFX8-NOHSA-LABEL: constant_zextload_v32i8_to_v32i32:
2485; GFX8-NOHSA:       ; %bb.0:
2486; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
2487; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
2488; GFX8-NOHSA-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
2489; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
2490; GFX8-NOHSA-NEXT:    s_lshr_b32 s12, s4, 24
2491; GFX8-NOHSA-NEXT:    s_bfe_u32 s13, s4, 0x80008
2492; GFX8-NOHSA-NEXT:    s_lshr_b32 s14, s5, 24
2493; GFX8-NOHSA-NEXT:    s_bfe_u32 s15, s5, 0x80008
2494; GFX8-NOHSA-NEXT:    s_lshr_b32 s16, s6, 24
2495; GFX8-NOHSA-NEXT:    s_bfe_u32 s17, s6, 0x80008
2496; GFX8-NOHSA-NEXT:    s_lshr_b32 s18, s7, 24
2497; GFX8-NOHSA-NEXT:    s_bfe_u32 s19, s7, 0x80008
2498; GFX8-NOHSA-NEXT:    s_lshr_b32 s20, s8, 24
2499; GFX8-NOHSA-NEXT:    s_bfe_u32 s21, s8, 0x80008
2500; GFX8-NOHSA-NEXT:    s_lshr_b32 s22, s9, 24
2501; GFX8-NOHSA-NEXT:    s_bfe_u32 s23, s9, 0x80008
2502; GFX8-NOHSA-NEXT:    s_lshr_b32 s24, s10, 24
2503; GFX8-NOHSA-NEXT:    s_bfe_u32 s25, s10, 0x80008
2504; GFX8-NOHSA-NEXT:    s_lshr_b32 s2, s11, 24
2505; GFX8-NOHSA-NEXT:    s_bfe_u32 s3, s11, 0x80008
2506; GFX8-NOHSA-NEXT:    s_and_b32 s26, s4, 0xff
2507; GFX8-NOHSA-NEXT:    s_bfe_u32 s4, s4, 0x80010
2508; GFX8-NOHSA-NEXT:    s_and_b32 s27, s5, 0xff
2509; GFX8-NOHSA-NEXT:    s_bfe_u32 s5, s5, 0x80010
2510; GFX8-NOHSA-NEXT:    s_and_b32 s28, s6, 0xff
2511; GFX8-NOHSA-NEXT:    s_bfe_u32 s6, s6, 0x80010
2512; GFX8-NOHSA-NEXT:    s_and_b32 s29, s7, 0xff
2513; GFX8-NOHSA-NEXT:    s_bfe_u32 s7, s7, 0x80010
2514; GFX8-NOHSA-NEXT:    s_and_b32 s30, s8, 0xff
2515; GFX8-NOHSA-NEXT:    s_bfe_u32 s8, s8, 0x80010
2516; GFX8-NOHSA-NEXT:    s_and_b32 s31, s9, 0xff
2517; GFX8-NOHSA-NEXT:    s_bfe_u32 s9, s9, 0x80010
2518; GFX8-NOHSA-NEXT:    s_and_b32 s33, s10, 0xff
2519; GFX8-NOHSA-NEXT:    s_bfe_u32 s10, s10, 0x80010
2520; GFX8-NOHSA-NEXT:    s_and_b32 s34, s11, 0xff
2521; GFX8-NOHSA-NEXT:    s_bfe_u32 s11, s11, 0x80010
2522; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s2
2523; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 0x70
2524; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
2525; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
2526; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
2527; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
2528; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 0x60
2529; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s34
2530; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s11
2531; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
2532; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2533; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
2534; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
2535; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 0x50
2536; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s33
2537; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s25
2538; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s10
2539; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s24
2540; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
2541; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2542; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
2543; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
2544; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 64
2545; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s31
2546; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s23
2547; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s9
2548; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s22
2549; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
2550; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2551; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
2552; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
2553; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 48
2554; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s30
2555; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s21
2556; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s8
2557; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s20
2558; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
2559; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2560; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
2561; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
2562; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 32
2563; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s29
2564; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s19
2565; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s7
2566; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s18
2567; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
2568; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2569; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
2570; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
2571; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 16
2572; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s28
2573; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s17
2574; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s6
2575; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s16
2576; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
2577; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2578; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
2579; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s27
2580; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s15
2581; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
2582; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s14
2583; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
2584; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2585; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
2586; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s26
2587; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s13
2588; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s4
2589; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s12
2590; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
2591; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2592; GFX8-NOHSA-NEXT:    s_endpgm
2593;
2594; EG-LABEL: constant_zextload_v32i8_to_v32i32:
2595; EG:       ; %bb.0:
2596; EG-NEXT:    ALU 0, @16, KC0[CB0:0-32], KC1[]
2597; EG-NEXT:    TEX 1 @12
2598; EG-NEXT:    ALU 75, @17, KC0[CB0:0-32], KC1[]
2599; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T26.X, 0
2600; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T25.X, 0
2601; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T23.X, 0
2602; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T12.X, 0
2603; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T20.X, 0
2604; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T18.X, 0
2605; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0
2606; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 1
2607; EG-NEXT:    CF_END
2608; EG-NEXT:    Fetch clause starting at 12:
2609; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
2610; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
2611; EG-NEXT:    ALU clause starting at 16:
2612; EG-NEXT:     MOV * T11.X, KC0[2].Z,
2613; EG-NEXT:    ALU clause starting at 17:
2614; EG-NEXT:     MOV * T0.W, literal.x,
2615; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
2616; EG-NEXT:     BFE_UINT * T13.Z, T11.X, literal.x, PV.W,
2617; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2618; EG-NEXT:     BFE_UINT T13.Y, T11.X, literal.x, T0.W,
2619; EG-NEXT:     BFE_UINT T14.Z, T11.Y, literal.y, T0.W,
2620; EG-NEXT:     LSHR * T13.W, T11.X, literal.z,
2621; EG-NEXT:    8(1.121039e-44), 16(2.242078e-44)
2622; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
2623; EG-NEXT:     AND_INT T13.X, T11.X, literal.x,
2624; EG-NEXT:     BFE_UINT T14.Y, T11.Y, literal.y, T0.W,
2625; EG-NEXT:     LSHR * T11.X, KC0[2].Y, literal.z,
2626; EG-NEXT:    255(3.573311e-43), 8(1.121039e-44)
2627; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2628; EG-NEXT:     BFE_UINT T15.Z, T11.Z, literal.x, T0.W,
2629; EG-NEXT:     LSHR * T14.W, T11.Y, literal.y,
2630; EG-NEXT:    16(2.242078e-44), 24(3.363116e-44)
2631; EG-NEXT:     AND_INT T14.X, T11.Y, literal.x,
2632; EG-NEXT:     BFE_UINT T15.Y, T11.Z, literal.y, T0.W,
2633; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
2634; EG-NEXT:    255(3.573311e-43), 8(1.121039e-44)
2635; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2636; EG-NEXT:     LSHR T16.X, PV.W, literal.x,
2637; EG-NEXT:     BFE_UINT T17.Z, T11.W, literal.y, T0.W,
2638; EG-NEXT:     LSHR T15.W, T11.Z, literal.z,
2639; EG-NEXT:     AND_INT * T15.X, T11.Z, literal.w,
2640; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2641; EG-NEXT:    24(3.363116e-44), 255(3.573311e-43)
2642; EG-NEXT:     BFE_UINT T17.Y, T11.W, literal.x, T0.W,
2643; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
2644; EG-NEXT:    8(1.121039e-44), 32(4.484155e-44)
2645; EG-NEXT:     LSHR T18.X, PV.W, literal.x,
2646; EG-NEXT:     BFE_UINT T19.Z, T12.X, literal.y, T0.W, BS:VEC_021/SCL_122
2647; EG-NEXT:     LSHR T17.W, T11.W, literal.z,
2648; EG-NEXT:     AND_INT * T17.X, T11.W, literal.w,
2649; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2650; EG-NEXT:    24(3.363116e-44), 255(3.573311e-43)
2651; EG-NEXT:     BFE_UINT T19.Y, T12.X, literal.x, T0.W,
2652; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
2653; EG-NEXT:    8(1.121039e-44), 48(6.726233e-44)
2654; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
2655; EG-NEXT:     BFE_UINT T21.Z, T12.Y, literal.y, T0.W,
2656; EG-NEXT:     LSHR T19.W, T12.X, literal.z,
2657; EG-NEXT:     AND_INT * T19.X, T12.X, literal.w,
2658; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2659; EG-NEXT:    24(3.363116e-44), 255(3.573311e-43)
2660; EG-NEXT:     BFE_UINT T21.Y, T12.Y, literal.x, T0.W,
2661; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
2662; EG-NEXT:    8(1.121039e-44), 64(8.968310e-44)
2663; EG-NEXT:     LSHR T12.X, PV.W, literal.x,
2664; EG-NEXT:     BFE_UINT T22.Z, T12.Z, literal.y, T0.W,
2665; EG-NEXT:     LSHR T21.W, T12.Y, literal.z,
2666; EG-NEXT:     AND_INT * T21.X, T12.Y, literal.w,
2667; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2668; EG-NEXT:    24(3.363116e-44), 255(3.573311e-43)
2669; EG-NEXT:     BFE_UINT T22.Y, T12.Z, literal.x, T0.W,
2670; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
2671; EG-NEXT:    8(1.121039e-44), 80(1.121039e-43)
2672; EG-NEXT:     LSHR T23.X, PV.W, literal.x,
2673; EG-NEXT:     BFE_UINT T24.Z, T12.W, literal.y, T0.W,
2674; EG-NEXT:     LSHR T22.W, T12.Z, literal.z,
2675; EG-NEXT:     AND_INT * T22.X, T12.Z, literal.w,
2676; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2677; EG-NEXT:    24(3.363116e-44), 255(3.573311e-43)
2678; EG-NEXT:     BFE_UINT T24.Y, T12.W, literal.x, T0.W,
2679; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2680; EG-NEXT:    8(1.121039e-44), 96(1.345247e-43)
2681; EG-NEXT:     LSHR T25.X, PV.W, literal.x,
2682; EG-NEXT:     LSHR T24.W, T12.W, literal.y,
2683; EG-NEXT:     AND_INT * T24.X, T12.W, literal.z,
2684; EG-NEXT:    2(2.802597e-45), 24(3.363116e-44)
2685; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
2686; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
2687; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
2688; EG-NEXT:     LSHR * T26.X, PV.W, literal.x,
2689; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2690;
2691; GFX12-LABEL: constant_zextload_v32i8_to_v32i32:
2692; GFX12:       ; %bb.0:
2693; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
2694; GFX12-NEXT:    s_wait_kmcnt 0x0
2695; GFX12-NEXT:    s_load_b256 s[4:11], s[2:3], 0x0
2696; GFX12-NEXT:    s_wait_kmcnt 0x0
2697; GFX12-NEXT:    s_lshr_b32 s24, s11, 24
2698; GFX12-NEXT:    s_bfe_u32 s25, s11, 0x80008
2699; GFX12-NEXT:    s_and_b32 s34, s11, 0xff
2700; GFX12-NEXT:    s_bfe_u32 s11, s11, 0x80010
2701; GFX12-NEXT:    s_lshr_b32 s22, s10, 24
2702; GFX12-NEXT:    s_bfe_u32 s23, s10, 0x80008
2703; GFX12-NEXT:    s_and_b32 s33, s10, 0xff
2704; GFX12-NEXT:    s_bfe_u32 s10, s10, 0x80010
2705; GFX12-NEXT:    v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s25
2706; GFX12-NEXT:    v_dual_mov_b32 v0, s34 :: v_dual_mov_b32 v3, s24
2707; GFX12-NEXT:    v_dual_mov_b32 v2, s11 :: v_dual_mov_b32 v5, s23
2708; GFX12-NEXT:    s_bfe_u32 s21, s9, 0x80008
2709; GFX12-NEXT:    v_dual_mov_b32 v4, s33 :: v_dual_mov_b32 v7, s22
2710; GFX12-NEXT:    v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v9, s21
2711; GFX12-NEXT:    s_lshr_b32 s20, s9, 24
2712; GFX12-NEXT:    s_and_b32 s31, s9, 0xff
2713; GFX12-NEXT:    s_bfe_u32 s9, s9, 0x80010
2714; GFX12-NEXT:    s_lshr_b32 s18, s8, 24
2715; GFX12-NEXT:    s_bfe_u32 s19, s8, 0x80008
2716; GFX12-NEXT:    s_and_b32 s30, s8, 0xff
2717; GFX12-NEXT:    s_bfe_u32 s8, s8, 0x80010
2718; GFX12-NEXT:    s_lshr_b32 s16, s7, 24
2719; GFX12-NEXT:    s_bfe_u32 s17, s7, 0x80008
2720; GFX12-NEXT:    s_and_b32 s29, s7, 0xff
2721; GFX12-NEXT:    s_bfe_u32 s7, s7, 0x80010
2722; GFX12-NEXT:    s_wait_alu 0xfffe
2723; GFX12-NEXT:    v_dual_mov_b32 v8, s31 :: v_dual_mov_b32 v11, s20
2724; GFX12-NEXT:    v_mov_b32_e32 v10, s9
2725; GFX12-NEXT:    s_lshr_b32 s14, s6, 24
2726; GFX12-NEXT:    s_bfe_u32 s15, s6, 0x80008
2727; GFX12-NEXT:    s_and_b32 s28, s6, 0xff
2728; GFX12-NEXT:    s_bfe_u32 s6, s6, 0x80010
2729; GFX12-NEXT:    s_clause 0x1
2730; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[0:1] offset:112
2731; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[0:1] offset:96
2732; GFX12-NEXT:    v_dual_mov_b32 v1, s19 :: v_dual_mov_b32 v0, s30
2733; GFX12-NEXT:    v_dual_mov_b32 v3, s18 :: v_dual_mov_b32 v2, s8
2734; GFX12-NEXT:    v_mov_b32_e32 v5, s17
2735; GFX12-NEXT:    s_lshr_b32 s12, s5, 24
2736; GFX12-NEXT:    s_bfe_u32 s13, s5, 0x80008
2737; GFX12-NEXT:    s_and_b32 s27, s5, 0xff
2738; GFX12-NEXT:    s_bfe_u32 s5, s5, 0x80010
2739; GFX12-NEXT:    v_dual_mov_b32 v4, s29 :: v_dual_mov_b32 v7, s16
2740; GFX12-NEXT:    v_dual_mov_b32 v6, s7 :: v_dual_mov_b32 v13, s15
2741; GFX12-NEXT:    s_lshr_b32 s2, s4, 24
2742; GFX12-NEXT:    s_bfe_u32 s3, s4, 0x80008
2743; GFX12-NEXT:    s_and_b32 s26, s4, 0xff
2744; GFX12-NEXT:    s_bfe_u32 s4, s4, 0x80010
2745; GFX12-NEXT:    v_dual_mov_b32 v12, s28 :: v_dual_mov_b32 v15, s14
2746; GFX12-NEXT:    v_dual_mov_b32 v14, s6 :: v_dual_mov_b32 v17, s13
2747; GFX12-NEXT:    v_dual_mov_b32 v16, s27 :: v_dual_mov_b32 v19, s12
2748; GFX12-NEXT:    v_dual_mov_b32 v18, s5 :: v_dual_mov_b32 v21, s3
2749; GFX12-NEXT:    v_dual_mov_b32 v20, s26 :: v_dual_mov_b32 v23, s2
2750; GFX12-NEXT:    v_mov_b32_e32 v22, s4
2751; GFX12-NEXT:    s_clause 0x5
2752; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[0:1] offset:80
2753; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[0:1] offset:64
2754; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[0:1] offset:48
2755; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[0:1] offset:32
2756; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[0:1] offset:16
2757; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[0:1]
2758; GFX12-NEXT:    s_endpgm
2759  %load = load <32 x i8>, ptr addrspace(4) %in
2760  %ext = zext <32 x i8> %load to <32 x i32>
2761  store <32 x i32> %ext, ptr addrspace(1) %out
2762  ret void
2763}
2764
2765; TODO: These should use DST, but for some there are redundant MOVs
2766define amdgpu_kernel void @constant_sextload_v32i8_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
2767; GFX6-NOHSA-LABEL: constant_sextload_v32i8_to_v32i32:
2768; GFX6-NOHSA:       ; %bb.0:
2769; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
2770; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
2771; GFX6-NOHSA-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
2772; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
2773; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
2774; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
2775; GFX6-NOHSA-NEXT:    s_ashr_i32 s12, s4, 24
2776; GFX6-NOHSA-NEXT:    s_bfe_i32 s13, s4, 0x80010
2777; GFX6-NOHSA-NEXT:    s_bfe_i32 s14, s4, 0x80008
2778; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s4, s4
2779; GFX6-NOHSA-NEXT:    s_ashr_i32 s15, s5, 24
2780; GFX6-NOHSA-NEXT:    s_bfe_i32 s16, s5, 0x80010
2781; GFX6-NOHSA-NEXT:    s_bfe_i32 s17, s5, 0x80008
2782; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s5, s5
2783; GFX6-NOHSA-NEXT:    s_ashr_i32 s18, s6, 24
2784; GFX6-NOHSA-NEXT:    s_bfe_i32 s19, s6, 0x80010
2785; GFX6-NOHSA-NEXT:    s_bfe_i32 s20, s6, 0x80008
2786; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s6, s6
2787; GFX6-NOHSA-NEXT:    s_ashr_i32 s21, s7, 24
2788; GFX6-NOHSA-NEXT:    s_bfe_i32 s22, s7, 0x80010
2789; GFX6-NOHSA-NEXT:    s_bfe_i32 s23, s7, 0x80008
2790; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s7, s7
2791; GFX6-NOHSA-NEXT:    s_ashr_i32 s24, s8, 24
2792; GFX6-NOHSA-NEXT:    s_bfe_i32 s25, s8, 0x80010
2793; GFX6-NOHSA-NEXT:    s_bfe_i32 s26, s8, 0x80008
2794; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s8, s8
2795; GFX6-NOHSA-NEXT:    s_ashr_i32 s27, s9, 24
2796; GFX6-NOHSA-NEXT:    s_bfe_i32 s28, s9, 0x80010
2797; GFX6-NOHSA-NEXT:    s_bfe_i32 s29, s9, 0x80008
2798; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s9, s9
2799; GFX6-NOHSA-NEXT:    s_ashr_i32 s30, s10, 24
2800; GFX6-NOHSA-NEXT:    s_bfe_i32 s31, s10, 0x80010
2801; GFX6-NOHSA-NEXT:    s_bfe_i32 s33, s10, 0x80008
2802; GFX6-NOHSA-NEXT:    s_ashr_i32 s34, s11, 24
2803; GFX6-NOHSA-NEXT:    s_bfe_i32 s35, s11, 0x80010
2804; GFX6-NOHSA-NEXT:    s_bfe_i32 s36, s11, 0x80008
2805; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s11, s11
2806; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s10, s10
2807; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s11
2808; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s36
2809; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s35
2810; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s34
2811; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
2812; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
2813; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s10
2814; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s33
2815; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s31
2816; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s30
2817; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
2818; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
2819; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s9
2820; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s29
2821; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s28
2822; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s27
2823; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
2824; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
2825; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s8
2826; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s26
2827; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s25
2828; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s24
2829; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
2830; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
2831; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s7
2832; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s23
2833; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s22
2834; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s21
2835; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
2836; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
2837; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
2838; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s20
2839; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s19
2840; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s18
2841; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2842; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
2843; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s5
2844; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s17
2845; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s16
2846; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s15
2847; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2848; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
2849; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
2850; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s14
2851; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s13
2852; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s12
2853; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2854; GFX6-NOHSA-NEXT:    s_endpgm
2855;
2856; GFX7-HSA-LABEL: constant_sextload_v32i8_to_v32i32:
2857; GFX7-HSA:       ; %bb.0:
2858; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
2859; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2860; GFX7-HSA-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
2861; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2862; GFX7-HSA-NEXT:    s_ashr_i32 s12, s4, 24
2863; GFX7-HSA-NEXT:    s_bfe_i32 s13, s4, 0x80010
2864; GFX7-HSA-NEXT:    s_bfe_i32 s14, s4, 0x80008
2865; GFX7-HSA-NEXT:    s_ashr_i32 s15, s5, 24
2866; GFX7-HSA-NEXT:    s_bfe_i32 s16, s5, 0x80010
2867; GFX7-HSA-NEXT:    s_bfe_i32 s17, s5, 0x80008
2868; GFX7-HSA-NEXT:    s_ashr_i32 s18, s6, 24
2869; GFX7-HSA-NEXT:    s_bfe_i32 s19, s6, 0x80010
2870; GFX7-HSA-NEXT:    s_bfe_i32 s20, s6, 0x80008
2871; GFX7-HSA-NEXT:    s_ashr_i32 s21, s7, 24
2872; GFX7-HSA-NEXT:    s_bfe_i32 s22, s7, 0x80010
2873; GFX7-HSA-NEXT:    s_bfe_i32 s23, s7, 0x80008
2874; GFX7-HSA-NEXT:    s_ashr_i32 s24, s8, 24
2875; GFX7-HSA-NEXT:    s_bfe_i32 s25, s8, 0x80010
2876; GFX7-HSA-NEXT:    s_bfe_i32 s26, s8, 0x80008
2877; GFX7-HSA-NEXT:    s_ashr_i32 s27, s9, 24
2878; GFX7-HSA-NEXT:    s_bfe_i32 s28, s9, 0x80010
2879; GFX7-HSA-NEXT:    s_bfe_i32 s29, s9, 0x80008
2880; GFX7-HSA-NEXT:    s_ashr_i32 s30, s10, 24
2881; GFX7-HSA-NEXT:    s_bfe_i32 s31, s10, 0x80010
2882; GFX7-HSA-NEXT:    s_bfe_i32 s33, s10, 0x80008
2883; GFX7-HSA-NEXT:    s_ashr_i32 s34, s11, 24
2884; GFX7-HSA-NEXT:    s_bfe_i32 s35, s11, 0x80010
2885; GFX7-HSA-NEXT:    s_bfe_i32 s36, s11, 0x80008
2886; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 0x70
2887; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
2888; GFX7-HSA-NEXT:    v_mov_b32_e32 v9, s3
2889; GFX7-HSA-NEXT:    v_mov_b32_e32 v8, s2
2890; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 0x60
2891; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
2892; GFX7-HSA-NEXT:    v_mov_b32_e32 v11, s3
2893; GFX7-HSA-NEXT:    s_sext_i32_i8 s10, s10
2894; GFX7-HSA-NEXT:    s_sext_i32_i8 s11, s11
2895; GFX7-HSA-NEXT:    v_mov_b32_e32 v10, s2
2896; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 0x50
2897; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s11
2898; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s36
2899; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s35
2900; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s34
2901; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s10
2902; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s33
2903; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
2904; GFX7-HSA-NEXT:    v_mov_b32_e32 v6, s31
2905; GFX7-HSA-NEXT:    v_mov_b32_e32 v7, s30
2906; GFX7-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
2907; GFX7-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
2908; GFX7-HSA-NEXT:    s_sext_i32_i8 s9, s9
2909; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
2910; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
2911; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 64
2912; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s9
2913; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s29
2914; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s28
2915; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s27
2916; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
2917; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2918; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
2919; GFX7-HSA-NEXT:    s_sext_i32_i8 s8, s8
2920; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
2921; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 48
2922; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s8
2923; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s26
2924; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s25
2925; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s24
2926; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
2927; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2928; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
2929; GFX7-HSA-NEXT:    s_sext_i32_i8 s7, s7
2930; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
2931; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 32
2932; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s7
2933; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s23
2934; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s22
2935; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s21
2936; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
2937; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2938; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
2939; GFX7-HSA-NEXT:    s_sext_i32_i8 s6, s6
2940; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
2941; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 16
2942; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s6
2943; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s20
2944; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s19
2945; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s18
2946; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
2947; GFX7-HSA-NEXT:    s_sext_i32_i8 s5, s5
2948; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2949; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
2950; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s5
2951; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s17
2952; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s16
2953; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s15
2954; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
2955; GFX7-HSA-NEXT:    s_sext_i32_i8 s4, s4
2956; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2957; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
2958; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s4
2959; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s14
2960; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s13
2961; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s12
2962; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
2963; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2964; GFX7-HSA-NEXT:    s_endpgm
2965;
2966; GFX8-NOHSA-LABEL: constant_sextload_v32i8_to_v32i32:
2967; GFX8-NOHSA:       ; %bb.0:
2968; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
2969; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
2970; GFX8-NOHSA-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
2971; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
2972; GFX8-NOHSA-NEXT:    s_ashr_i32 s12, s4, 24
2973; GFX8-NOHSA-NEXT:    s_bfe_i32 s13, s4, 0x80010
2974; GFX8-NOHSA-NEXT:    s_bfe_i32 s14, s4, 0x80008
2975; GFX8-NOHSA-NEXT:    s_ashr_i32 s15, s5, 24
2976; GFX8-NOHSA-NEXT:    s_bfe_i32 s16, s5, 0x80010
2977; GFX8-NOHSA-NEXT:    s_bfe_i32 s17, s5, 0x80008
2978; GFX8-NOHSA-NEXT:    s_ashr_i32 s18, s6, 24
2979; GFX8-NOHSA-NEXT:    s_bfe_i32 s19, s6, 0x80010
2980; GFX8-NOHSA-NEXT:    s_bfe_i32 s20, s6, 0x80008
2981; GFX8-NOHSA-NEXT:    s_ashr_i32 s21, s7, 24
2982; GFX8-NOHSA-NEXT:    s_bfe_i32 s22, s7, 0x80010
2983; GFX8-NOHSA-NEXT:    s_bfe_i32 s23, s7, 0x80008
2984; GFX8-NOHSA-NEXT:    s_ashr_i32 s24, s8, 24
2985; GFX8-NOHSA-NEXT:    s_bfe_i32 s25, s8, 0x80010
2986; GFX8-NOHSA-NEXT:    s_bfe_i32 s26, s8, 0x80008
2987; GFX8-NOHSA-NEXT:    s_ashr_i32 s27, s9, 24
2988; GFX8-NOHSA-NEXT:    s_bfe_i32 s28, s9, 0x80010
2989; GFX8-NOHSA-NEXT:    s_bfe_i32 s29, s9, 0x80008
2990; GFX8-NOHSA-NEXT:    s_ashr_i32 s30, s10, 24
2991; GFX8-NOHSA-NEXT:    s_bfe_i32 s31, s10, 0x80010
2992; GFX8-NOHSA-NEXT:    s_bfe_i32 s33, s10, 0x80008
2993; GFX8-NOHSA-NEXT:    s_ashr_i32 s2, s11, 24
2994; GFX8-NOHSA-NEXT:    s_bfe_i32 s3, s11, 0x80010
2995; GFX8-NOHSA-NEXT:    s_bfe_i32 s34, s11, 0x80008
2996; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s2
2997; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 0x70
2998; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s3
2999; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
3000; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
3001; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s11, s11
3002; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
3003; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 0x60
3004; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s11
3005; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s34
3006; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
3007; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3008; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
3009; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s10, s10
3010; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
3011; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 0x50
3012; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s10
3013; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s33
3014; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s31
3015; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s30
3016; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
3017; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3018; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
3019; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s9, s9
3020; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
3021; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 64
3022; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s9
3023; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s29
3024; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s28
3025; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s27
3026; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
3027; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3028; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
3029; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s8, s8
3030; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
3031; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 48
3032; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s8
3033; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s26
3034; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s25
3035; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s24
3036; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
3037; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3038; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
3039; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s7, s7
3040; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
3041; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 32
3042; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s7
3043; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s23
3044; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s22
3045; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s21
3046; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
3047; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3048; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
3049; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s6, s6
3050; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
3051; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 16
3052; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
3053; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s20
3054; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s19
3055; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s18
3056; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
3057; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s5, s5
3058; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3059; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
3060; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s5
3061; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s17
3062; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s16
3063; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s15
3064; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
3065; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s4, s4
3066; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3067; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
3068; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
3069; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s14
3070; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s13
3071; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s12
3072; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
3073; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3074; GFX8-NOHSA-NEXT:    s_endpgm
3075;
3076; EG-LABEL: constant_sextload_v32i8_to_v32i32:
3077; EG:       ; %bb.0:
3078; EG-NEXT:    ALU 0, @18, KC0[CB0:0-32], KC1[]
3079; EG-NEXT:    TEX 0 @14
3080; EG-NEXT:    ALU 18, @19, KC0[CB0:0-32], KC1[]
3081; EG-NEXT:    TEX 0 @16
3082; EG-NEXT:    ALU 75, @38, KC0[CB0:0-32], KC1[]
3083; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T26.X, 0
3084; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T25.X, 0
3085; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T12.X, 0
3086; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T17.X, 0
3087; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T16.X, 0
3088; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T15.X, 0
3089; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T14.X, 0
3090; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T13.X, 1
3091; EG-NEXT:    CF_END
3092; EG-NEXT:    Fetch clause starting at 14:
3093; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
3094; EG-NEXT:    Fetch clause starting at 16:
3095; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
3096; EG-NEXT:    ALU clause starting at 18:
3097; EG-NEXT:     MOV * T11.X, KC0[2].Z,
3098; EG-NEXT:    ALU clause starting at 19:
3099; EG-NEXT:     LSHR T13.X, KC0[2].Y, literal.x,
3100; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3101; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3102; EG-NEXT:     LSHR T14.X, PV.W, literal.x,
3103; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3104; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
3105; EG-NEXT:     LSHR T15.X, PV.W, literal.x,
3106; EG-NEXT:     LSHR T0.Z, T12.W, literal.y,
3107; EG-NEXT:     LSHR T0.W, T12.Z, literal.z,
3108; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.w,
3109; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3110; EG-NEXT:    8(1.121039e-44), 48(6.726233e-44)
3111; EG-NEXT:     LSHR T16.X, PS, literal.x,
3112; EG-NEXT:     LSHR T0.Y, T12.W, literal.y,
3113; EG-NEXT:     LSHR T1.Z, T12.Z, literal.z,
3114; EG-NEXT:     LSHR T1.W, T12.Y, literal.w,
3115; EG-NEXT:     LSHR * T2.W, T12.Z, literal.y,
3116; EG-NEXT:    2(2.802597e-45), 24(3.363116e-44)
3117; EG-NEXT:    16(2.242078e-44), 8(1.121039e-44)
3118; EG-NEXT:    ALU clause starting at 38:
3119; EG-NEXT:     ADD_INT * T3.W, KC0[2].Y, literal.x,
3120; EG-NEXT:    64(8.968310e-44), 0(0.000000e+00)
3121; EG-NEXT:     LSHR T17.X, PV.W, literal.x,
3122; EG-NEXT:     LSHR T1.Y, T12.Y, literal.y,
3123; EG-NEXT:     LSHR T2.Z, T12.Y, literal.z,
3124; EG-NEXT:     LSHR T3.W, T12.X, literal.y,
3125; EG-NEXT:     LSHR * T4.W, T12.X, literal.z,
3126; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3127; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
3128; EG-NEXT:     BFE_INT T18.X, T11.X, 0.0, literal.x,
3129; EG-NEXT:     LSHR T2.Y, T11.W, literal.y,
3130; EG-NEXT:     LSHR T3.Z, T11.W, literal.z,
3131; EG-NEXT:     LSHR T5.W, T11.Z, literal.y,
3132; EG-NEXT:     LSHR * T6.W, T11.X, literal.z,
3133; EG-NEXT:    8(1.121039e-44), 16(2.242078e-44)
3134; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
3135; EG-NEXT:     BFE_INT T19.X, T11.Y, 0.0, literal.x,
3136; EG-NEXT:     LSHR T3.Y, T11.Z, literal.y,
3137; EG-NEXT:     LSHR T4.Z, T11.Y, literal.y,
3138; EG-NEXT:     BFE_INT T18.W, PS, 0.0, literal.x,
3139; EG-NEXT:     LSHR * T6.W, T11.X, literal.z,
3140; EG-NEXT:    8(1.121039e-44), 24(3.363116e-44)
3141; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3142; EG-NEXT:     BFE_INT T20.X, T11.Z, 0.0, literal.x,
3143; EG-NEXT:     LSHR T4.Y, T11.Y, literal.y,
3144; EG-NEXT:     BFE_INT T18.Z, PS, 0.0, literal.x,
3145; EG-NEXT:     BFE_INT T19.W, PV.Z, 0.0, literal.x,
3146; EG-NEXT:     LSHR * T6.W, T11.X, literal.x,
3147; EG-NEXT:    8(1.121039e-44), 16(2.242078e-44)
3148; EG-NEXT:     BFE_INT T21.X, T11.W, 0.0, literal.x,
3149; EG-NEXT:     BFE_INT T18.Y, PS, 0.0, literal.x,
3150; EG-NEXT:     BFE_INT T19.Z, PV.Y, 0.0, literal.x,
3151; EG-NEXT:     BFE_INT T20.W, T3.Y, 0.0, literal.x,
3152; EG-NEXT:     LSHR * T6.W, T11.Y, literal.x,
3153; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
3154; EG-NEXT:     BFE_INT T22.X, T12.X, 0.0, literal.x,
3155; EG-NEXT:     BFE_INT T19.Y, PS, 0.0, literal.x,
3156; EG-NEXT:     BFE_INT T20.Z, T5.W, 0.0, literal.x,
3157; EG-NEXT:     BFE_INT T21.W, T3.Z, 0.0, literal.x,
3158; EG-NEXT:     LSHR * T5.W, T11.Z, literal.x,
3159; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
3160; EG-NEXT:     BFE_INT T11.X, T12.Y, 0.0, literal.x,
3161; EG-NEXT:     BFE_INT T20.Y, PS, 0.0, literal.x,
3162; EG-NEXT:     BFE_INT T21.Z, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212
3163; EG-NEXT:     BFE_INT T22.W, T4.W, 0.0, literal.x,
3164; EG-NEXT:     LSHR * T4.W, T11.W, literal.x,
3165; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
3166; EG-NEXT:     BFE_INT T23.X, T12.Z, 0.0, literal.x,
3167; EG-NEXT:     BFE_INT T21.Y, PS, 0.0, literal.x,
3168; EG-NEXT:     BFE_INT T22.Z, T3.W, 0.0, literal.x,
3169; EG-NEXT:     BFE_INT T11.W, T2.Z, 0.0, literal.x, BS:VEC_120/SCL_212
3170; EG-NEXT:     LSHR * T3.W, T12.X, literal.x,
3171; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
3172; EG-NEXT:     BFE_INT T24.X, T12.W, 0.0, literal.x,
3173; EG-NEXT:     BFE_INT T22.Y, PS, 0.0, literal.x,
3174; EG-NEXT:     BFE_INT T11.Z, T1.Y, 0.0, literal.x,
3175; EG-NEXT:     BFE_INT T23.W, T2.W, 0.0, literal.x, BS:VEC_120/SCL_212
3176; EG-NEXT:     ADD_INT * T2.W, KC0[2].Y, literal.y,
3177; EG-NEXT:    8(1.121039e-44), 80(1.121039e-43)
3178; EG-NEXT:     LSHR T12.X, PS, literal.x,
3179; EG-NEXT:     BFE_INT T11.Y, T1.W, 0.0, literal.y,
3180; EG-NEXT:     BFE_INT T23.Z, T1.Z, 0.0, literal.y,
3181; EG-NEXT:     BFE_INT T24.W, T0.Y, 0.0, literal.y,
3182; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
3183; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
3184; EG-NEXT:    96(1.345247e-43), 0(0.000000e+00)
3185; EG-NEXT:     LSHR T25.X, PS, literal.x,
3186; EG-NEXT:     BFE_INT T23.Y, T0.W, 0.0, literal.y,
3187; EG-NEXT:     BFE_INT T24.Z, T0.Z, 0.0, literal.y,
3188; EG-NEXT:     LSHR T0.W, T12.W, literal.y, BS:VEC_120/SCL_212
3189; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
3190; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
3191; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
3192; EG-NEXT:     LSHR T26.X, PS, literal.x,
3193; EG-NEXT:     BFE_INT * T24.Y, PV.W, 0.0, literal.y,
3194; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
3195;
3196; GFX12-LABEL: constant_sextload_v32i8_to_v32i32:
3197; GFX12:       ; %bb.0:
3198; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
3199; GFX12-NEXT:    s_wait_kmcnt 0x0
3200; GFX12-NEXT:    s_load_b256 s[4:11], s[2:3], 0x0
3201; GFX12-NEXT:    s_wait_kmcnt 0x0
3202; GFX12-NEXT:    s_ashr_i32 s31, s11, 24
3203; GFX12-NEXT:    s_bfe_i32 s33, s11, 0x80010
3204; GFX12-NEXT:    s_sext_i32_i8 s34, s11
3205; GFX12-NEXT:    s_bfe_i32 s11, s11, 0x80008
3206; GFX12-NEXT:    s_ashr_i32 s28, s10, 24
3207; GFX12-NEXT:    s_bfe_i32 s29, s10, 0x80010
3208; GFX12-NEXT:    s_bfe_i32 s30, s10, 0x80008
3209; GFX12-NEXT:    s_sext_i32_i8 s10, s10
3210; GFX12-NEXT:    v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s11
3211; GFX12-NEXT:    v_dual_mov_b32 v0, s34 :: v_dual_mov_b32 v3, s31
3212; GFX12-NEXT:    v_dual_mov_b32 v2, s33 :: v_dual_mov_b32 v5, s30
3213; GFX12-NEXT:    s_bfe_i32 s27, s9, 0x80008
3214; GFX12-NEXT:    v_dual_mov_b32 v4, s10 :: v_dual_mov_b32 v7, s28
3215; GFX12-NEXT:    v_dual_mov_b32 v6, s29 :: v_dual_mov_b32 v9, s27
3216; GFX12-NEXT:    s_ashr_i32 s25, s9, 24
3217; GFX12-NEXT:    s_bfe_i32 s26, s9, 0x80010
3218; GFX12-NEXT:    s_sext_i32_i8 s9, s9
3219; GFX12-NEXT:    s_ashr_i32 s22, s8, 24
3220; GFX12-NEXT:    s_bfe_i32 s23, s8, 0x80010
3221; GFX12-NEXT:    s_bfe_i32 s24, s8, 0x80008
3222; GFX12-NEXT:    s_sext_i32_i8 s8, s8
3223; GFX12-NEXT:    s_ashr_i32 s19, s7, 24
3224; GFX12-NEXT:    s_bfe_i32 s20, s7, 0x80010
3225; GFX12-NEXT:    s_bfe_i32 s21, s7, 0x80008
3226; GFX12-NEXT:    s_sext_i32_i8 s7, s7
3227; GFX12-NEXT:    v_dual_mov_b32 v8, s9 :: v_dual_mov_b32 v11, s25
3228; GFX12-NEXT:    s_wait_alu 0xfffe
3229; GFX12-NEXT:    v_mov_b32_e32 v10, s26
3230; GFX12-NEXT:    s_ashr_i32 s16, s6, 24
3231; GFX12-NEXT:    s_bfe_i32 s17, s6, 0x80010
3232; GFX12-NEXT:    s_bfe_i32 s18, s6, 0x80008
3233; GFX12-NEXT:    s_sext_i32_i8 s6, s6
3234; GFX12-NEXT:    s_clause 0x1
3235; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[0:1] offset:112
3236; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[0:1] offset:96
3237; GFX12-NEXT:    v_dual_mov_b32 v1, s24 :: v_dual_mov_b32 v0, s8
3238; GFX12-NEXT:    v_dual_mov_b32 v3, s22 :: v_dual_mov_b32 v2, s23
3239; GFX12-NEXT:    v_mov_b32_e32 v5, s21
3240; GFX12-NEXT:    s_ashr_i32 s13, s5, 24
3241; GFX12-NEXT:    s_bfe_i32 s14, s5, 0x80010
3242; GFX12-NEXT:    s_bfe_i32 s15, s5, 0x80008
3243; GFX12-NEXT:    s_sext_i32_i8 s5, s5
3244; GFX12-NEXT:    v_dual_mov_b32 v4, s7 :: v_dual_mov_b32 v7, s19
3245; GFX12-NEXT:    v_dual_mov_b32 v6, s20 :: v_dual_mov_b32 v13, s18
3246; GFX12-NEXT:    s_ashr_i32 s2, s4, 24
3247; GFX12-NEXT:    s_bfe_i32 s3, s4, 0x80010
3248; GFX12-NEXT:    s_bfe_i32 s12, s4, 0x80008
3249; GFX12-NEXT:    s_sext_i32_i8 s4, s4
3250; GFX12-NEXT:    v_dual_mov_b32 v12, s6 :: v_dual_mov_b32 v15, s16
3251; GFX12-NEXT:    v_dual_mov_b32 v14, s17 :: v_dual_mov_b32 v17, s15
3252; GFX12-NEXT:    v_dual_mov_b32 v16, s5 :: v_dual_mov_b32 v19, s13
3253; GFX12-NEXT:    v_dual_mov_b32 v18, s14 :: v_dual_mov_b32 v21, s12
3254; GFX12-NEXT:    v_dual_mov_b32 v20, s4 :: v_dual_mov_b32 v23, s2
3255; GFX12-NEXT:    v_mov_b32_e32 v22, s3
3256; GFX12-NEXT:    s_clause 0x5
3257; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[0:1] offset:80
3258; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[0:1] offset:64
3259; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[0:1] offset:48
3260; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[0:1] offset:32
3261; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[0:1] offset:16
3262; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[0:1]
3263; GFX12-NEXT:    s_endpgm
3264  %load = load <32 x i8>, ptr addrspace(4) %in
3265  %ext = sext <32 x i8> %load to <32 x i32>
3266  store <32 x i32> %ext, ptr addrspace(1) %out
3267  ret void
3268}
3269
3270define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
3271; GFX6-NOHSA-LABEL: constant_zextload_v64i8_to_v64i32:
3272; GFX6-NOHSA:       ; %bb.0:
3273; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x9
3274; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
3275; GFX6-NOHSA-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
3276; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
3277; GFX6-NOHSA-NEXT:    s_lshr_b32 s18, s0, 24
3278; GFX6-NOHSA-NEXT:    s_bfe_u32 s19, s0, 0x80008
3279; GFX6-NOHSA-NEXT:    s_lshr_b32 s20, s1, 24
3280; GFX6-NOHSA-NEXT:    s_bfe_u32 s21, s1, 0x80008
3281; GFX6-NOHSA-NEXT:    s_lshr_b32 s22, s2, 24
3282; GFX6-NOHSA-NEXT:    s_bfe_u32 s23, s2, 0x80008
3283; GFX6-NOHSA-NEXT:    s_lshr_b32 s24, s3, 24
3284; GFX6-NOHSA-NEXT:    s_bfe_u32 s27, s3, 0x80008
3285; GFX6-NOHSA-NEXT:    s_lshr_b32 s28, s4, 24
3286; GFX6-NOHSA-NEXT:    s_bfe_u32 s29, s4, 0x80008
3287; GFX6-NOHSA-NEXT:    s_lshr_b32 s30, s5, 24
3288; GFX6-NOHSA-NEXT:    s_bfe_u32 s31, s5, 0x80008
3289; GFX6-NOHSA-NEXT:    s_lshr_b32 s33, s6, 24
3290; GFX6-NOHSA-NEXT:    s_bfe_u32 s34, s6, 0x80008
3291; GFX6-NOHSA-NEXT:    s_lshr_b32 s35, s7, 24
3292; GFX6-NOHSA-NEXT:    s_bfe_u32 s36, s7, 0x80008
3293; GFX6-NOHSA-NEXT:    s_lshr_b32 s37, s8, 24
3294; GFX6-NOHSA-NEXT:    s_bfe_u32 s38, s8, 0x80008
3295; GFX6-NOHSA-NEXT:    s_lshr_b32 s39, s9, 24
3296; GFX6-NOHSA-NEXT:    s_bfe_u32 s40, s9, 0x80008
3297; GFX6-NOHSA-NEXT:    s_lshr_b32 s41, s10, 24
3298; GFX6-NOHSA-NEXT:    s_bfe_u32 s42, s10, 0x80008
3299; GFX6-NOHSA-NEXT:    s_lshr_b32 s43, s11, 24
3300; GFX6-NOHSA-NEXT:    s_bfe_u32 s44, s11, 0x80008
3301; GFX6-NOHSA-NEXT:    s_lshr_b32 s45, s12, 24
3302; GFX6-NOHSA-NEXT:    s_bfe_u32 s46, s12, 0x80008
3303; GFX6-NOHSA-NEXT:    s_lshr_b32 s47, s13, 24
3304; GFX6-NOHSA-NEXT:    s_bfe_u32 s48, s13, 0x80008
3305; GFX6-NOHSA-NEXT:    s_lshr_b32 s49, s14, 24
3306; GFX6-NOHSA-NEXT:    s_bfe_u32 s50, s14, 0x80008
3307; GFX6-NOHSA-NEXT:    s_lshr_b32 s51, s15, 24
3308; GFX6-NOHSA-NEXT:    s_bfe_u32 s52, s15, 0x80008
3309; GFX6-NOHSA-NEXT:    s_and_b32 s26, s0, 0xff
3310; GFX6-NOHSA-NEXT:    s_bfe_u32 s25, s0, 0x80010
3311; GFX6-NOHSA-NEXT:    s_and_b32 s53, s1, 0xff
3312; GFX6-NOHSA-NEXT:    s_bfe_u32 s54, s1, 0x80010
3313; GFX6-NOHSA-NEXT:    s_and_b32 s55, s2, 0xff
3314; GFX6-NOHSA-NEXT:    s_bfe_u32 s56, s2, 0x80010
3315; GFX6-NOHSA-NEXT:    s_and_b32 s57, s3, 0xff
3316; GFX6-NOHSA-NEXT:    s_bfe_u32 s58, s3, 0x80010
3317; GFX6-NOHSA-NEXT:    s_and_b32 s59, s4, 0xff
3318; GFX6-NOHSA-NEXT:    s_bfe_u32 s4, s4, 0x80010
3319; GFX6-NOHSA-NEXT:    s_and_b32 s60, s5, 0xff
3320; GFX6-NOHSA-NEXT:    s_bfe_u32 s5, s5, 0x80010
3321; GFX6-NOHSA-NEXT:    s_and_b32 s61, s6, 0xff
3322; GFX6-NOHSA-NEXT:    s_bfe_u32 s6, s6, 0x80010
3323; GFX6-NOHSA-NEXT:    s_and_b32 s62, s7, 0xff
3324; GFX6-NOHSA-NEXT:    s_bfe_u32 s7, s7, 0x80010
3325; GFX6-NOHSA-NEXT:    s_and_b32 s63, s8, 0xff
3326; GFX6-NOHSA-NEXT:    s_bfe_u32 s8, s8, 0x80010
3327; GFX6-NOHSA-NEXT:    s_and_b32 s64, s9, 0xff
3328; GFX6-NOHSA-NEXT:    s_bfe_u32 s9, s9, 0x80010
3329; GFX6-NOHSA-NEXT:    s_and_b32 s65, s10, 0xff
3330; GFX6-NOHSA-NEXT:    s_and_b32 s66, s11, 0xff
3331; GFX6-NOHSA-NEXT:    s_bfe_u32 s11, s11, 0x80010
3332; GFX6-NOHSA-NEXT:    s_and_b32 s67, s12, 0xff
3333; GFX6-NOHSA-NEXT:    s_bfe_u32 s12, s12, 0x80010
3334; GFX6-NOHSA-NEXT:    s_and_b32 s68, s13, 0xff
3335; GFX6-NOHSA-NEXT:    s_bfe_u32 s13, s13, 0x80010
3336; GFX6-NOHSA-NEXT:    s_and_b32 s69, s14, 0xff
3337; GFX6-NOHSA-NEXT:    s_bfe_u32 s14, s14, 0x80010
3338; GFX6-NOHSA-NEXT:    s_and_b32 s70, s15, 0xff
3339; GFX6-NOHSA-NEXT:    s_bfe_u32 s15, s15, 0x80010
3340; GFX6-NOHSA-NEXT:    s_bfe_u32 s10, s10, 0x80010
3341; GFX6-NOHSA-NEXT:    s_mov_b32 s0, s16
3342; GFX6-NOHSA-NEXT:    s_mov_b32 s1, s17
3343; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
3344; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
3345; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s70
3346; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s52
3347; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s15
3348; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s51
3349; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s69
3350; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s50
3351; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s14
3352; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s49
3353; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s68
3354; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s48
3355; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s13
3356; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s47
3357; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s67
3358; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s46
3359; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s12
3360; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s45
3361; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s66
3362; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s44
3363; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s11
3364; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
3365; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
3366; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s65
3367; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s43
3368; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s42
3369; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s10
3370; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s41
3371; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
3372; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
3373; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
3374; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
3375; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
3376; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
3377; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s64
3378; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s40
3379; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s9
3380; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s39
3381; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
3382; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
3383; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s63
3384; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s38
3385; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s8
3386; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s37
3387; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
3388; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
3389; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s62
3390; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s36
3391; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s7
3392; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s35
3393; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
3394; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
3395; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s61
3396; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s34
3397; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s6
3398; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s33
3399; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
3400; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
3401; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s60
3402; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s31
3403; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
3404; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s30
3405; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
3406; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
3407; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s59
3408; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s29
3409; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s4
3410; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s28
3411; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
3412; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
3413; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s57
3414; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s27
3415; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s58
3416; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s24
3417; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
3418; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
3419; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s55
3420; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s23
3421; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s56
3422; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s22
3423; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
3424; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
3425; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s53
3426; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s21
3427; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s54
3428; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s20
3429; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
3430; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
3431; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s26
3432; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s19
3433; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s25
3434; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s18
3435; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
3436; GFX6-NOHSA-NEXT:    s_endpgm
3437;
3438; GFX7-HSA-LABEL: constant_zextload_v64i8_to_v64i32:
3439; GFX7-HSA:       ; %bb.0:
3440; GFX7-HSA-NEXT:    s_load_dwordx4 s[16:19], s[8:9], 0x0
3441; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3442; GFX7-HSA-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
3443; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3444; GFX7-HSA-NEXT:    s_lshr_b32 s18, s0, 24
3445; GFX7-HSA-NEXT:    s_bfe_u32 s19, s0, 0x80008
3446; GFX7-HSA-NEXT:    s_lshr_b32 s20, s1, 24
3447; GFX7-HSA-NEXT:    s_bfe_u32 s21, s1, 0x80008
3448; GFX7-HSA-NEXT:    s_lshr_b32 s22, s2, 24
3449; GFX7-HSA-NEXT:    s_bfe_u32 s23, s2, 0x80008
3450; GFX7-HSA-NEXT:    s_lshr_b32 s25, s3, 24
3451; GFX7-HSA-NEXT:    s_bfe_u32 s26, s3, 0x80008
3452; GFX7-HSA-NEXT:    s_lshr_b32 s28, s4, 24
3453; GFX7-HSA-NEXT:    s_bfe_u32 s29, s4, 0x80008
3454; GFX7-HSA-NEXT:    s_lshr_b32 s31, s5, 24
3455; GFX7-HSA-NEXT:    s_bfe_u32 s33, s5, 0x80008
3456; GFX7-HSA-NEXT:    s_lshr_b32 s35, s6, 24
3457; GFX7-HSA-NEXT:    s_bfe_u32 s37, s6, 0x80008
3458; GFX7-HSA-NEXT:    s_lshr_b32 s38, s7, 24
3459; GFX7-HSA-NEXT:    s_bfe_u32 s39, s7, 0x80008
3460; GFX7-HSA-NEXT:    s_lshr_b32 s41, s8, 24
3461; GFX7-HSA-NEXT:    s_bfe_u32 s43, s8, 0x80008
3462; GFX7-HSA-NEXT:    s_lshr_b32 s44, s9, 24
3463; GFX7-HSA-NEXT:    s_bfe_u32 s46, s9, 0x80008
3464; GFX7-HSA-NEXT:    s_lshr_b32 s47, s10, 24
3465; GFX7-HSA-NEXT:    s_bfe_u32 s48, s10, 0x80008
3466; GFX7-HSA-NEXT:    s_lshr_b32 s49, s11, 24
3467; GFX7-HSA-NEXT:    s_bfe_u32 s50, s11, 0x80008
3468; GFX7-HSA-NEXT:    s_lshr_b32 s51, s12, 24
3469; GFX7-HSA-NEXT:    s_bfe_u32 s52, s12, 0x80008
3470; GFX7-HSA-NEXT:    s_lshr_b32 s53, s13, 24
3471; GFX7-HSA-NEXT:    s_bfe_u32 s54, s13, 0x80008
3472; GFX7-HSA-NEXT:    s_lshr_b32 s55, s14, 24
3473; GFX7-HSA-NEXT:    s_bfe_u32 s56, s14, 0x80008
3474; GFX7-HSA-NEXT:    s_lshr_b32 s57, s15, 24
3475; GFX7-HSA-NEXT:    s_bfe_u32 s58, s15, 0x80008
3476; GFX7-HSA-NEXT:    s_and_b32 s24, s0, 0xff
3477; GFX7-HSA-NEXT:    s_bfe_u32 s0, s0, 0x80010
3478; GFX7-HSA-NEXT:    s_and_b32 s27, s1, 0xff
3479; GFX7-HSA-NEXT:    s_bfe_u32 s1, s1, 0x80010
3480; GFX7-HSA-NEXT:    s_and_b32 s30, s2, 0xff
3481; GFX7-HSA-NEXT:    s_bfe_u32 s2, s2, 0x80010
3482; GFX7-HSA-NEXT:    s_and_b32 s34, s3, 0xff
3483; GFX7-HSA-NEXT:    s_bfe_u32 s3, s3, 0x80010
3484; GFX7-HSA-NEXT:    s_and_b32 s36, s4, 0xff
3485; GFX7-HSA-NEXT:    s_bfe_u32 s4, s4, 0x80010
3486; GFX7-HSA-NEXT:    s_and_b32 s40, s5, 0xff
3487; GFX7-HSA-NEXT:    s_bfe_u32 s5, s5, 0x80010
3488; GFX7-HSA-NEXT:    s_and_b32 s42, s6, 0xff
3489; GFX7-HSA-NEXT:    s_bfe_u32 s6, s6, 0x80010
3490; GFX7-HSA-NEXT:    s_and_b32 s45, s7, 0xff
3491; GFX7-HSA-NEXT:    s_bfe_u32 s7, s7, 0x80010
3492; GFX7-HSA-NEXT:    s_and_b32 s59, s8, 0xff
3493; GFX7-HSA-NEXT:    s_bfe_u32 s60, s8, 0x80010
3494; GFX7-HSA-NEXT:    s_and_b32 s61, s9, 0xff
3495; GFX7-HSA-NEXT:    s_bfe_u32 s62, s9, 0x80010
3496; GFX7-HSA-NEXT:    s_and_b32 s63, s10, 0xff
3497; GFX7-HSA-NEXT:    s_bfe_u32 s10, s10, 0x80010
3498; GFX7-HSA-NEXT:    s_and_b32 s64, s11, 0xff
3499; GFX7-HSA-NEXT:    s_bfe_u32 s11, s11, 0x80010
3500; GFX7-HSA-NEXT:    s_and_b32 s65, s12, 0xff
3501; GFX7-HSA-NEXT:    s_bfe_u32 s12, s12, 0x80010
3502; GFX7-HSA-NEXT:    s_and_b32 s66, s13, 0xff
3503; GFX7-HSA-NEXT:    s_bfe_u32 s13, s13, 0x80010
3504; GFX7-HSA-NEXT:    s_and_b32 s67, s14, 0xff
3505; GFX7-HSA-NEXT:    s_bfe_u32 s14, s14, 0x80010
3506; GFX7-HSA-NEXT:    s_and_b32 s68, s15, 0xff
3507; GFX7-HSA-NEXT:    s_bfe_u32 s15, s15, 0x80010
3508; GFX7-HSA-NEXT:    s_add_u32 s8, s16, 0xf0
3509; GFX7-HSA-NEXT:    s_addc_u32 s9, s17, 0
3510; GFX7-HSA-NEXT:    v_mov_b32_e32 v20, s9
3511; GFX7-HSA-NEXT:    v_mov_b32_e32 v19, s8
3512; GFX7-HSA-NEXT:    s_add_u32 s8, s16, 0xe0
3513; GFX7-HSA-NEXT:    s_addc_u32 s9, s17, 0
3514; GFX7-HSA-NEXT:    v_mov_b32_e32 v22, s9
3515; GFX7-HSA-NEXT:    v_mov_b32_e32 v21, s8
3516; GFX7-HSA-NEXT:    s_add_u32 s8, s16, 0xd0
3517; GFX7-HSA-NEXT:    s_addc_u32 s9, s17, 0
3518; GFX7-HSA-NEXT:    v_mov_b32_e32 v24, s9
3519; GFX7-HSA-NEXT:    v_mov_b32_e32 v23, s8
3520; GFX7-HSA-NEXT:    s_add_u32 s8, s16, 0xc0
3521; GFX7-HSA-NEXT:    s_addc_u32 s9, s17, 0
3522; GFX7-HSA-NEXT:    v_mov_b32_e32 v26, s9
3523; GFX7-HSA-NEXT:    v_mov_b32_e32 v25, s8
3524; GFX7-HSA-NEXT:    s_add_u32 s8, s16, 0xb0
3525; GFX7-HSA-NEXT:    s_addc_u32 s9, s17, 0
3526; GFX7-HSA-NEXT:    v_mov_b32_e32 v28, s9
3527; GFX7-HSA-NEXT:    v_mov_b32_e32 v27, s8
3528; GFX7-HSA-NEXT:    s_add_u32 s8, s16, 0xa0
3529; GFX7-HSA-NEXT:    v_mov_b32_e32 v8, s66
3530; GFX7-HSA-NEXT:    v_mov_b32_e32 v9, s54
3531; GFX7-HSA-NEXT:    v_mov_b32_e32 v10, s13
3532; GFX7-HSA-NEXT:    v_mov_b32_e32 v11, s53
3533; GFX7-HSA-NEXT:    s_addc_u32 s9, s17, 0
3534; GFX7-HSA-NEXT:    flat_store_dwordx4 v[23:24], v[8:11]
3535; GFX7-HSA-NEXT:    v_mov_b32_e32 v12, s65
3536; GFX7-HSA-NEXT:    v_mov_b32_e32 v10, s9
3537; GFX7-HSA-NEXT:    v_mov_b32_e32 v9, s8
3538; GFX7-HSA-NEXT:    s_add_u32 s8, s16, 0x90
3539; GFX7-HSA-NEXT:    v_mov_b32_e32 v13, s52
3540; GFX7-HSA-NEXT:    v_mov_b32_e32 v14, s12
3541; GFX7-HSA-NEXT:    v_mov_b32_e32 v15, s51
3542; GFX7-HSA-NEXT:    s_addc_u32 s9, s17, 0
3543; GFX7-HSA-NEXT:    flat_store_dwordx4 v[25:26], v[12:15]
3544; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s68
3545; GFX7-HSA-NEXT:    v_mov_b32_e32 v13, s9
3546; GFX7-HSA-NEXT:    v_mov_b32_e32 v12, s8
3547; GFX7-HSA-NEXT:    s_add_u32 s8, s16, 0x80
3548; GFX7-HSA-NEXT:    s_addc_u32 s9, s17, 0
3549; GFX7-HSA-NEXT:    v_mov_b32_e32 v15, s9
3550; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s58
3551; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s15
3552; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s57
3553; GFX7-HSA-NEXT:    v_mov_b32_e32 v14, s8
3554; GFX7-HSA-NEXT:    s_add_u32 s8, s16, 0x70
3555; GFX7-HSA-NEXT:    flat_store_dwordx4 v[19:20], v[0:3]
3556; GFX7-HSA-NEXT:    s_addc_u32 s9, s17, 0
3557; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s63
3558; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s48
3559; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s10
3560; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s47
3561; GFX7-HSA-NEXT:    flat_store_dwordx4 v[9:10], v[0:3]
3562; GFX7-HSA-NEXT:    v_mov_b32_e32 v16, s64
3563; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s6
3564; GFX7-HSA-NEXT:    s_add_u32 s6, s16, 0x60
3565; GFX7-HSA-NEXT:    v_mov_b32_e32 v17, s50
3566; GFX7-HSA-NEXT:    v_mov_b32_e32 v18, s11
3567; GFX7-HSA-NEXT:    v_mov_b32_e32 v19, s49
3568; GFX7-HSA-NEXT:    v_mov_b32_e32 v10, s7
3569; GFX7-HSA-NEXT:    s_addc_u32 s7, s17, 0
3570; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s67
3571; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s56
3572; GFX7-HSA-NEXT:    v_mov_b32_e32 v6, s14
3573; GFX7-HSA-NEXT:    v_mov_b32_e32 v7, s55
3574; GFX7-HSA-NEXT:    flat_store_dwordx4 v[27:28], v[16:19]
3575; GFX7-HSA-NEXT:    v_mov_b32_e32 v20, s61
3576; GFX7-HSA-NEXT:    v_mov_b32_e32 v19, s7
3577; GFX7-HSA-NEXT:    flat_store_dwordx4 v[21:22], v[4:7]
3578; GFX7-HSA-NEXT:    v_mov_b32_e32 v21, s46
3579; GFX7-HSA-NEXT:    v_mov_b32_e32 v22, s62
3580; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s59
3581; GFX7-HSA-NEXT:    v_mov_b32_e32 v23, s44
3582; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s43
3583; GFX7-HSA-NEXT:    v_mov_b32_e32 v17, s9
3584; GFX7-HSA-NEXT:    v_mov_b32_e32 v18, s6
3585; GFX7-HSA-NEXT:    s_add_u32 s6, s16, 0x50
3586; GFX7-HSA-NEXT:    v_mov_b32_e32 v6, s60
3587; GFX7-HSA-NEXT:    v_mov_b32_e32 v8, s45
3588; GFX7-HSA-NEXT:    v_mov_b32_e32 v7, s41
3589; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s42
3590; GFX7-HSA-NEXT:    v_mov_b32_e32 v9, s39
3591; GFX7-HSA-NEXT:    v_mov_b32_e32 v11, s38
3592; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s37
3593; GFX7-HSA-NEXT:    v_mov_b32_e32 v16, s8
3594; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s35
3595; GFX7-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[20:23]
3596; GFX7-HSA-NEXT:    flat_store_dwordx4 v[14:15], v[4:7]
3597; GFX7-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
3598; GFX7-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[0:3]
3599; GFX7-HSA-NEXT:    s_addc_u32 s7, s17, 0
3600; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s6
3601; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s40
3602; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s33
3603; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s5
3604; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s31
3605; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s7
3606; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3607; GFX7-HSA-NEXT:    s_nop 0
3608; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s4
3609; GFX7-HSA-NEXT:    s_add_u32 s4, s16, 64
3610; GFX7-HSA-NEXT:    s_addc_u32 s5, s17, 0
3611; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s4
3612; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s36
3613; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s29
3614; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s28
3615; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s5
3616; GFX7-HSA-NEXT:    s_add_u32 s4, s16, 48
3617; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3618; GFX7-HSA-NEXT:    s_addc_u32 s5, s17, 0
3619; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s4
3620; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s34
3621; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s26
3622; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s3
3623; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s25
3624; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s5
3625; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3626; GFX7-HSA-NEXT:    s_nop 0
3627; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s2
3628; GFX7-HSA-NEXT:    s_add_u32 s2, s16, 32
3629; GFX7-HSA-NEXT:    s_addc_u32 s3, s17, 0
3630; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
3631; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
3632; GFX7-HSA-NEXT:    s_add_u32 s2, s16, 16
3633; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s30
3634; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s23
3635; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s22
3636; GFX7-HSA-NEXT:    s_addc_u32 s3, s17, 0
3637; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3638; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
3639; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s27
3640; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s21
3641; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s1
3642; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s20
3643; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
3644; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3645; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s16
3646; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s24
3647; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s19
3648; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s0
3649; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s18
3650; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s17
3651; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3652; GFX7-HSA-NEXT:    s_endpgm
3653;
3654; GFX8-NOHSA-LABEL: constant_zextload_v64i8_to_v64i32:
3655; GFX8-NOHSA:       ; %bb.0:
3656; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x24
3657; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
3658; GFX8-NOHSA-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
3659; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
3660; GFX8-NOHSA-NEXT:    s_lshr_b32 s18, s0, 24
3661; GFX8-NOHSA-NEXT:    s_bfe_u32 s19, s0, 0x80008
3662; GFX8-NOHSA-NEXT:    s_lshr_b32 s20, s1, 24
3663; GFX8-NOHSA-NEXT:    s_bfe_u32 s21, s1, 0x80008
3664; GFX8-NOHSA-NEXT:    s_lshr_b32 s22, s2, 24
3665; GFX8-NOHSA-NEXT:    s_bfe_u32 s23, s2, 0x80008
3666; GFX8-NOHSA-NEXT:    s_lshr_b32 s25, s3, 24
3667; GFX8-NOHSA-NEXT:    s_bfe_u32 s26, s3, 0x80008
3668; GFX8-NOHSA-NEXT:    s_lshr_b32 s28, s4, 24
3669; GFX8-NOHSA-NEXT:    s_bfe_u32 s29, s4, 0x80008
3670; GFX8-NOHSA-NEXT:    s_lshr_b32 s31, s5, 24
3671; GFX8-NOHSA-NEXT:    s_bfe_u32 s33, s5, 0x80008
3672; GFX8-NOHSA-NEXT:    s_lshr_b32 s35, s6, 24
3673; GFX8-NOHSA-NEXT:    s_bfe_u32 s36, s6, 0x80008
3674; GFX8-NOHSA-NEXT:    s_lshr_b32 s37, s7, 24
3675; GFX8-NOHSA-NEXT:    s_bfe_u32 s38, s7, 0x80008
3676; GFX8-NOHSA-NEXT:    s_lshr_b32 s39, s8, 24
3677; GFX8-NOHSA-NEXT:    s_bfe_u32 s40, s8, 0x80008
3678; GFX8-NOHSA-NEXT:    s_lshr_b32 s41, s9, 24
3679; GFX8-NOHSA-NEXT:    s_bfe_u32 s42, s9, 0x80008
3680; GFX8-NOHSA-NEXT:    s_lshr_b32 s43, s10, 24
3681; GFX8-NOHSA-NEXT:    s_bfe_u32 s44, s10, 0x80008
3682; GFX8-NOHSA-NEXT:    s_lshr_b32 s45, s11, 24
3683; GFX8-NOHSA-NEXT:    s_bfe_u32 s46, s11, 0x80008
3684; GFX8-NOHSA-NEXT:    s_lshr_b32 s47, s12, 24
3685; GFX8-NOHSA-NEXT:    s_bfe_u32 s48, s12, 0x80008
3686; GFX8-NOHSA-NEXT:    s_lshr_b32 s49, s13, 24
3687; GFX8-NOHSA-NEXT:    s_bfe_u32 s50, s13, 0x80008
3688; GFX8-NOHSA-NEXT:    s_lshr_b32 s51, s14, 24
3689; GFX8-NOHSA-NEXT:    s_bfe_u32 s52, s14, 0x80008
3690; GFX8-NOHSA-NEXT:    s_lshr_b32 s53, s15, 24
3691; GFX8-NOHSA-NEXT:    s_bfe_u32 s54, s15, 0x80008
3692; GFX8-NOHSA-NEXT:    s_and_b32 s24, s0, 0xff
3693; GFX8-NOHSA-NEXT:    s_bfe_u32 s0, s0, 0x80010
3694; GFX8-NOHSA-NEXT:    s_and_b32 s27, s1, 0xff
3695; GFX8-NOHSA-NEXT:    s_bfe_u32 s1, s1, 0x80010
3696; GFX8-NOHSA-NEXT:    s_and_b32 s30, s2, 0xff
3697; GFX8-NOHSA-NEXT:    s_bfe_u32 s2, s2, 0x80010
3698; GFX8-NOHSA-NEXT:    s_and_b32 s34, s3, 0xff
3699; GFX8-NOHSA-NEXT:    s_bfe_u32 s3, s3, 0x80010
3700; GFX8-NOHSA-NEXT:    s_and_b32 s55, s4, 0xff
3701; GFX8-NOHSA-NEXT:    s_bfe_u32 s4, s4, 0x80010
3702; GFX8-NOHSA-NEXT:    s_and_b32 s56, s5, 0xff
3703; GFX8-NOHSA-NEXT:    s_bfe_u32 s5, s5, 0x80010
3704; GFX8-NOHSA-NEXT:    s_and_b32 s57, s6, 0xff
3705; GFX8-NOHSA-NEXT:    s_bfe_u32 s58, s6, 0x80010
3706; GFX8-NOHSA-NEXT:    s_and_b32 s59, s7, 0xff
3707; GFX8-NOHSA-NEXT:    s_bfe_u32 s60, s7, 0x80010
3708; GFX8-NOHSA-NEXT:    s_and_b32 s61, s8, 0xff
3709; GFX8-NOHSA-NEXT:    s_bfe_u32 s8, s8, 0x80010
3710; GFX8-NOHSA-NEXT:    s_and_b32 s62, s9, 0xff
3711; GFX8-NOHSA-NEXT:    s_bfe_u32 s9, s9, 0x80010
3712; GFX8-NOHSA-NEXT:    s_and_b32 s63, s10, 0xff
3713; GFX8-NOHSA-NEXT:    s_bfe_u32 s10, s10, 0x80010
3714; GFX8-NOHSA-NEXT:    s_and_b32 s64, s11, 0xff
3715; GFX8-NOHSA-NEXT:    s_bfe_u32 s11, s11, 0x80010
3716; GFX8-NOHSA-NEXT:    s_and_b32 s65, s12, 0xff
3717; GFX8-NOHSA-NEXT:    s_bfe_u32 s12, s12, 0x80010
3718; GFX8-NOHSA-NEXT:    s_and_b32 s66, s13, 0xff
3719; GFX8-NOHSA-NEXT:    s_bfe_u32 s13, s13, 0x80010
3720; GFX8-NOHSA-NEXT:    s_and_b32 s67, s14, 0xff
3721; GFX8-NOHSA-NEXT:    s_bfe_u32 s14, s14, 0x80010
3722; GFX8-NOHSA-NEXT:    s_and_b32 s6, s15, 0xff
3723; GFX8-NOHSA-NEXT:    s_bfe_u32 s7, s15, 0x80010
3724; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
3725; GFX8-NOHSA-NEXT:    s_add_u32 s6, s16, 0xf0
3726; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s7
3727; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s17, 0
3728; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
3729; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s54
3730; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s53
3731; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
3732; GFX8-NOHSA-NEXT:    s_add_u32 s6, s16, 0xe0
3733; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3734; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s17, 0
3735; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
3736; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s67
3737; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s52
3738; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s14
3739; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s51
3740; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
3741; GFX8-NOHSA-NEXT:    s_add_u32 s6, s16, 0xd0
3742; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3743; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s17, 0
3744; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
3745; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s66
3746; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s50
3747; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s13
3748; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s49
3749; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
3750; GFX8-NOHSA-NEXT:    s_add_u32 s6, s16, 0xc0
3751; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3752; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s17, 0
3753; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
3754; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s65
3755; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s48
3756; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s12
3757; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s47
3758; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
3759; GFX8-NOHSA-NEXT:    s_add_u32 s6, s16, 0xb0
3760; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3761; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s17, 0
3762; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
3763; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s64
3764; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s46
3765; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s11
3766; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s45
3767; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
3768; GFX8-NOHSA-NEXT:    s_add_u32 s6, s16, 0xa0
3769; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3770; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s17, 0
3771; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
3772; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s63
3773; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s44
3774; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s10
3775; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s43
3776; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
3777; GFX8-NOHSA-NEXT:    s_add_u32 s6, s16, 0x90
3778; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3779; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s17, 0
3780; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
3781; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s62
3782; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s42
3783; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s9
3784; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s41
3785; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
3786; GFX8-NOHSA-NEXT:    s_add_u32 s6, s16, 0x80
3787; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3788; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s17, 0
3789; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
3790; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s61
3791; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s40
3792; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s8
3793; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s39
3794; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
3795; GFX8-NOHSA-NEXT:    s_add_u32 s6, s16, 0x70
3796; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3797; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s17, 0
3798; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
3799; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s59
3800; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s38
3801; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s60
3802; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s37
3803; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
3804; GFX8-NOHSA-NEXT:    s_add_u32 s6, s16, 0x60
3805; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3806; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s17, 0
3807; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
3808; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s57
3809; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s36
3810; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s58
3811; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s35
3812; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
3813; GFX8-NOHSA-NEXT:    s_add_u32 s6, s16, 0x50
3814; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3815; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s17, 0
3816; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
3817; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s56
3818; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s33
3819; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
3820; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s31
3821; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
3822; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3823; GFX8-NOHSA-NEXT:    s_nop 0
3824; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s4
3825; GFX8-NOHSA-NEXT:    s_add_u32 s4, s16, 64
3826; GFX8-NOHSA-NEXT:    s_addc_u32 s5, s17, 0
3827; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s4
3828; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s55
3829; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s29
3830; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s28
3831; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s5
3832; GFX8-NOHSA-NEXT:    s_add_u32 s4, s16, 48
3833; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3834; GFX8-NOHSA-NEXT:    s_addc_u32 s5, s17, 0
3835; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s4
3836; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s34
3837; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s26
3838; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s3
3839; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s25
3840; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s5
3841; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3842; GFX8-NOHSA-NEXT:    s_nop 0
3843; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s2
3844; GFX8-NOHSA-NEXT:    s_add_u32 s2, s16, 32
3845; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s17, 0
3846; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
3847; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
3848; GFX8-NOHSA-NEXT:    s_add_u32 s2, s16, 16
3849; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s30
3850; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s23
3851; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s22
3852; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s17, 0
3853; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3854; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
3855; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s27
3856; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s21
3857; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s1
3858; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s20
3859; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
3860; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3861; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s16
3862; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s24
3863; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s19
3864; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s0
3865; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s18
3866; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s17
3867; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3868; GFX8-NOHSA-NEXT:    s_endpgm
3869;
3870; EG-LABEL: constant_zextload_v64i8_to_v64i32:
3871; EG:       ; %bb.0:
3872; EG-NEXT:    ALU 0, @30, KC0[CB0:0-32], KC1[]
3873; EG-NEXT:    TEX 1 @22
3874; EG-NEXT:    ALU 59, @31, KC0[CB0:0-32], KC1[]
3875; EG-NEXT:    TEX 1 @26
3876; EG-NEXT:    ALU 88, @91, KC0[CB0:0-32], KC1[]
3877; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T50.X, 0
3878; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T49.X, 0
3879; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T47.X, 0
3880; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T32.X, 0
3881; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T44.X, 0
3882; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T42.X, 0
3883; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T40.X, 0
3884; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T33.X, 0
3885; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T37.X, 0
3886; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T35.X, 0
3887; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T31.X, 0
3888; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T22.X, 0
3889; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T29.X, 0
3890; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T27.X, 0
3891; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T25.X, 0
3892; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T23.X, 1
3893; EG-NEXT:    CF_END
3894; EG-NEXT:    Fetch clause starting at 22:
3895; EG-NEXT:     VTX_READ_128 T22.XYZW, T21.X, 16, #1
3896; EG-NEXT:     VTX_READ_128 T23.XYZW, T21.X, 0, #1
3897; EG-NEXT:    Fetch clause starting at 26:
3898; EG-NEXT:     VTX_READ_128 T32.XYZW, T21.X, 48, #1
3899; EG-NEXT:     VTX_READ_128 T33.XYZW, T21.X, 32, #1
3900; EG-NEXT:    ALU clause starting at 30:
3901; EG-NEXT:     MOV * T21.X, KC0[2].Z,
3902; EG-NEXT:    ALU clause starting at 31:
3903; EG-NEXT:     MOV * T0.W, literal.x,
3904; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
3905; EG-NEXT:     BFE_UINT * T19.Z, T23.X, literal.x, PV.W,
3906; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3907; EG-NEXT:     BFE_UINT T19.Y, T23.X, literal.x, T0.W,
3908; EG-NEXT:     BFE_UINT T20.Z, T23.Y, literal.y, T0.W,
3909; EG-NEXT:     LSHR * T19.W, T23.X, literal.z,
3910; EG-NEXT:    8(1.121039e-44), 16(2.242078e-44)
3911; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
3912; EG-NEXT:     AND_INT T19.X, T23.X, literal.x,
3913; EG-NEXT:     BFE_UINT T20.Y, T23.Y, literal.y, T0.W,
3914; EG-NEXT:     LSHR * T23.X, KC0[2].Y, literal.z,
3915; EG-NEXT:    255(3.573311e-43), 8(1.121039e-44)
3916; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
3917; EG-NEXT:     BFE_UINT T24.Z, T23.Z, literal.x, T0.W,
3918; EG-NEXT:     LSHR * T20.W, T23.Y, literal.y,
3919; EG-NEXT:    16(2.242078e-44), 24(3.363116e-44)
3920; EG-NEXT:     AND_INT T20.X, T23.Y, literal.x,
3921; EG-NEXT:     BFE_UINT T24.Y, T23.Z, literal.y, T0.W,
3922; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
3923; EG-NEXT:    255(3.573311e-43), 8(1.121039e-44)
3924; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3925; EG-NEXT:     LSHR T25.X, PV.W, literal.x,
3926; EG-NEXT:     BFE_UINT T26.Z, T23.W, literal.y, T0.W,
3927; EG-NEXT:     LSHR T24.W, T23.Z, literal.z,
3928; EG-NEXT:     AND_INT * T24.X, T23.Z, literal.w,
3929; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3930; EG-NEXT:    24(3.363116e-44), 255(3.573311e-43)
3931; EG-NEXT:     BFE_UINT T26.Y, T23.W, literal.x, T0.W,
3932; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
3933; EG-NEXT:    8(1.121039e-44), 32(4.484155e-44)
3934; EG-NEXT:     LSHR T27.X, PV.W, literal.x,
3935; EG-NEXT:     BFE_UINT T28.Z, T22.X, literal.y, T0.W, BS:VEC_021/SCL_122
3936; EG-NEXT:     LSHR T26.W, T23.W, literal.z,
3937; EG-NEXT:     AND_INT * T26.X, T23.W, literal.w,
3938; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3939; EG-NEXT:    24(3.363116e-44), 255(3.573311e-43)
3940; EG-NEXT:     BFE_UINT T28.Y, T22.X, literal.x, T0.W,
3941; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
3942; EG-NEXT:    8(1.121039e-44), 48(6.726233e-44)
3943; EG-NEXT:     LSHR T29.X, PV.W, literal.x,
3944; EG-NEXT:     BFE_UINT T30.Z, T22.Y, literal.y, T0.W,
3945; EG-NEXT:     LSHR T28.W, T22.X, literal.z,
3946; EG-NEXT:     AND_INT * T28.X, T22.X, literal.w,
3947; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3948; EG-NEXT:    24(3.363116e-44), 255(3.573311e-43)
3949; EG-NEXT:     BFE_UINT T30.Y, T22.Y, literal.x, T0.W,
3950; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
3951; EG-NEXT:    8(1.121039e-44), 64(8.968310e-44)
3952; EG-NEXT:     LSHR T22.X, PV.W, literal.x,
3953; EG-NEXT:     LSHR T30.W, T22.Y, literal.y,
3954; EG-NEXT:     AND_INT * T30.X, T22.Y, literal.z,
3955; EG-NEXT:    2(2.802597e-45), 24(3.363116e-44)
3956; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
3957; EG-NEXT:     BFE_UINT T21.Z, T22.Z, literal.x, T0.W,
3958; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
3959; EG-NEXT:    16(2.242078e-44), 80(1.121039e-43)
3960; EG-NEXT:     LSHR T31.X, PV.W, literal.x,
3961; EG-NEXT:     BFE_UINT * T21.Y, T22.Z, literal.y, T0.W,
3962; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
3963; EG-NEXT:    ALU clause starting at 91:
3964; EG-NEXT:     BFE_UINT T34.Z, T22.W, literal.x, T0.W,
3965; EG-NEXT:     LSHR * T21.W, T22.Z, literal.y,
3966; EG-NEXT:    16(2.242078e-44), 24(3.363116e-44)
3967; EG-NEXT:     AND_INT T21.X, T22.Z, literal.x,
3968; EG-NEXT:     BFE_UINT T34.Y, T22.W, literal.y, T0.W,
3969; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
3970; EG-NEXT:    255(3.573311e-43), 8(1.121039e-44)
3971; EG-NEXT:    96(1.345247e-43), 0(0.000000e+00)
3972; EG-NEXT:     LSHR T35.X, PV.W, literal.x,
3973; EG-NEXT:     BFE_UINT T36.Z, T33.X, literal.y, T0.W, BS:VEC_021/SCL_122
3974; EG-NEXT:     LSHR T34.W, T22.W, literal.z,
3975; EG-NEXT:     AND_INT * T34.X, T22.W, literal.w,
3976; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3977; EG-NEXT:    24(3.363116e-44), 255(3.573311e-43)
3978; EG-NEXT:     BFE_UINT T36.Y, T33.X, literal.x, T0.W,
3979; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
3980; EG-NEXT:    8(1.121039e-44), 112(1.569454e-43)
3981; EG-NEXT:     LSHR T37.X, PV.W, literal.x,
3982; EG-NEXT:     BFE_UINT T38.Z, T33.Y, literal.y, T0.W,
3983; EG-NEXT:     LSHR T36.W, T33.X, literal.z,
3984; EG-NEXT:     AND_INT * T36.X, T33.X, literal.w,
3985; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3986; EG-NEXT:    24(3.363116e-44), 255(3.573311e-43)
3987; EG-NEXT:     BFE_UINT T38.Y, T33.Y, literal.x, T0.W,
3988; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
3989; EG-NEXT:    8(1.121039e-44), 128(1.793662e-43)
3990; EG-NEXT:     LSHR T33.X, PV.W, literal.x,
3991; EG-NEXT:     BFE_UINT T39.Z, T33.Z, literal.y, T0.W,
3992; EG-NEXT:     LSHR T38.W, T33.Y, literal.z,
3993; EG-NEXT:     AND_INT * T38.X, T33.Y, literal.w,
3994; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3995; EG-NEXT:    24(3.363116e-44), 255(3.573311e-43)
3996; EG-NEXT:     BFE_UINT T39.Y, T33.Z, literal.x, T0.W,
3997; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
3998; EG-NEXT:    8(1.121039e-44), 144(2.017870e-43)
3999; EG-NEXT:     LSHR T40.X, PV.W, literal.x,
4000; EG-NEXT:     BFE_UINT T41.Z, T33.W, literal.y, T0.W,
4001; EG-NEXT:     LSHR T39.W, T33.Z, literal.z,
4002; EG-NEXT:     AND_INT * T39.X, T33.Z, literal.w,
4003; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4004; EG-NEXT:    24(3.363116e-44), 255(3.573311e-43)
4005; EG-NEXT:     BFE_UINT T41.Y, T33.W, literal.x, T0.W,
4006; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
4007; EG-NEXT:    8(1.121039e-44), 160(2.242078e-43)
4008; EG-NEXT:     LSHR T42.X, PV.W, literal.x,
4009; EG-NEXT:     BFE_UINT T43.Z, T32.X, literal.y, T0.W, BS:VEC_021/SCL_122
4010; EG-NEXT:     LSHR T41.W, T33.W, literal.z,
4011; EG-NEXT:     AND_INT * T41.X, T33.W, literal.w,
4012; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4013; EG-NEXT:    24(3.363116e-44), 255(3.573311e-43)
4014; EG-NEXT:     BFE_UINT T43.Y, T32.X, literal.x, T0.W,
4015; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
4016; EG-NEXT:    8(1.121039e-44), 176(2.466285e-43)
4017; EG-NEXT:     LSHR T44.X, PV.W, literal.x,
4018; EG-NEXT:     BFE_UINT T45.Z, T32.Y, literal.y, T0.W,
4019; EG-NEXT:     LSHR T43.W, T32.X, literal.z,
4020; EG-NEXT:     AND_INT * T43.X, T32.X, literal.w,
4021; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4022; EG-NEXT:    24(3.363116e-44), 255(3.573311e-43)
4023; EG-NEXT:     BFE_UINT T45.Y, T32.Y, literal.x, T0.W,
4024; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
4025; EG-NEXT:    8(1.121039e-44), 192(2.690493e-43)
4026; EG-NEXT:     LSHR T32.X, PV.W, literal.x,
4027; EG-NEXT:     BFE_UINT T46.Z, T32.Z, literal.y, T0.W,
4028; EG-NEXT:     LSHR T45.W, T32.Y, literal.z,
4029; EG-NEXT:     AND_INT * T45.X, T32.Y, literal.w,
4030; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4031; EG-NEXT:    24(3.363116e-44), 255(3.573311e-43)
4032; EG-NEXT:     BFE_UINT T46.Y, T32.Z, literal.x, T0.W,
4033; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
4034; EG-NEXT:    8(1.121039e-44), 208(2.914701e-43)
4035; EG-NEXT:     LSHR T47.X, PV.W, literal.x,
4036; EG-NEXT:     BFE_UINT T48.Z, T32.W, literal.y, T0.W,
4037; EG-NEXT:     LSHR T46.W, T32.Z, literal.z,
4038; EG-NEXT:     AND_INT * T46.X, T32.Z, literal.w,
4039; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4040; EG-NEXT:    24(3.363116e-44), 255(3.573311e-43)
4041; EG-NEXT:     BFE_UINT T48.Y, T32.W, literal.x, T0.W,
4042; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4043; EG-NEXT:    8(1.121039e-44), 224(3.138909e-43)
4044; EG-NEXT:     LSHR T49.X, PV.W, literal.x,
4045; EG-NEXT:     LSHR T48.W, T32.W, literal.y,
4046; EG-NEXT:     AND_INT * T48.X, T32.W, literal.z,
4047; EG-NEXT:    2(2.802597e-45), 24(3.363116e-44)
4048; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
4049; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
4050; EG-NEXT:    240(3.363116e-43), 0(0.000000e+00)
4051; EG-NEXT:     LSHR * T50.X, PV.W, literal.x,
4052; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4053;
4054; GFX12-LABEL: constant_zextload_v64i8_to_v64i32:
4055; GFX12:       ; %bb.0:
4056; GFX12-NEXT:    s_load_b128 s[16:19], s[4:5], 0x24
4057; GFX12-NEXT:    s_wait_kmcnt 0x0
4058; GFX12-NEXT:    s_load_b512 s[0:15], s[18:19], 0x0
4059; GFX12-NEXT:    s_wait_kmcnt 0x0
4060; GFX12-NEXT:    s_lshr_b32 s49, s15, 24
4061; GFX12-NEXT:    s_bfe_u32 s50, s15, 0x80008
4062; GFX12-NEXT:    s_and_b32 s66, s15, 0xff
4063; GFX12-NEXT:    s_bfe_u32 s15, s15, 0x80010
4064; GFX12-NEXT:    s_lshr_b32 s47, s14, 24
4065; GFX12-NEXT:    s_bfe_u32 s48, s14, 0x80008
4066; GFX12-NEXT:    s_and_b32 s65, s14, 0xff
4067; GFX12-NEXT:    s_bfe_u32 s14, s14, 0x80010
4068; GFX12-NEXT:    v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s50
4069; GFX12-NEXT:    s_lshr_b32 s45, s13, 24
4070; GFX12-NEXT:    s_bfe_u32 s46, s13, 0x80008
4071; GFX12-NEXT:    s_and_b32 s64, s13, 0xff
4072; GFX12-NEXT:    s_bfe_u32 s13, s13, 0x80010
4073; GFX12-NEXT:    v_dual_mov_b32 v0, s66 :: v_dual_mov_b32 v3, s49
4074; GFX12-NEXT:    v_dual_mov_b32 v2, s15 :: v_dual_mov_b32 v5, s48
4075; GFX12-NEXT:    s_lshr_b32 s43, s12, 24
4076; GFX12-NEXT:    s_bfe_u32 s44, s12, 0x80008
4077; GFX12-NEXT:    s_and_b32 s63, s12, 0xff
4078; GFX12-NEXT:    s_bfe_u32 s12, s12, 0x80010
4079; GFX12-NEXT:    v_dual_mov_b32 v4, s65 :: v_dual_mov_b32 v7, s47
4080; GFX12-NEXT:    v_dual_mov_b32 v6, s14 :: v_dual_mov_b32 v9, s46
4081; GFX12-NEXT:    v_dual_mov_b32 v8, s64 :: v_dual_mov_b32 v11, s45
4082; GFX12-NEXT:    v_dual_mov_b32 v10, s13 :: v_dual_mov_b32 v13, s44
4083; GFX12-NEXT:    s_lshr_b32 s41, s11, 24
4084; GFX12-NEXT:    s_bfe_u32 s42, s11, 0x80008
4085; GFX12-NEXT:    s_and_b32 s62, s11, 0xff
4086; GFX12-NEXT:    v_dual_mov_b32 v12, s63 :: v_dual_mov_b32 v15, s43
4087; GFX12-NEXT:    v_mov_b32_e32 v14, s12
4088; GFX12-NEXT:    s_bfe_u32 s11, s11, 0x80010
4089; GFX12-NEXT:    s_lshr_b32 s39, s10, 24
4090; GFX12-NEXT:    s_bfe_u32 s40, s10, 0x80008
4091; GFX12-NEXT:    s_and_b32 s61, s10, 0xff
4092; GFX12-NEXT:    s_bfe_u32 s10, s10, 0x80010
4093; GFX12-NEXT:    s_lshr_b32 s37, s9, 24
4094; GFX12-NEXT:    s_bfe_u32 s38, s9, 0x80008
4095; GFX12-NEXT:    s_and_b32 s60, s9, 0xff
4096; GFX12-NEXT:    s_bfe_u32 s9, s9, 0x80010
4097; GFX12-NEXT:    s_clause 0x3
4098; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[16:17] offset:240
4099; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[16:17] offset:224
4100; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[16:17] offset:208
4101; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[16:17] offset:192
4102; GFX12-NEXT:    v_dual_mov_b32 v1, s42 :: v_dual_mov_b32 v0, s62
4103; GFX12-NEXT:    v_dual_mov_b32 v3, s41 :: v_dual_mov_b32 v2, s11
4104; GFX12-NEXT:    v_mov_b32_e32 v5, s40
4105; GFX12-NEXT:    s_lshr_b32 s35, s8, 24
4106; GFX12-NEXT:    s_bfe_u32 s36, s8, 0x80008
4107; GFX12-NEXT:    s_and_b32 s59, s8, 0xff
4108; GFX12-NEXT:    s_bfe_u32 s8, s8, 0x80010
4109; GFX12-NEXT:    v_dual_mov_b32 v4, s61 :: v_dual_mov_b32 v7, s39
4110; GFX12-NEXT:    v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v9, s38
4111; GFX12-NEXT:    s_lshr_b32 s33, s7, 24
4112; GFX12-NEXT:    s_bfe_u32 s34, s7, 0x80008
4113; GFX12-NEXT:    s_and_b32 s58, s7, 0xff
4114; GFX12-NEXT:    s_bfe_u32 s7, s7, 0x80010
4115; GFX12-NEXT:    v_dual_mov_b32 v8, s60 :: v_dual_mov_b32 v11, s37
4116; GFX12-NEXT:    v_dual_mov_b32 v10, s9 :: v_dual_mov_b32 v13, s36
4117; GFX12-NEXT:    s_lshr_b32 s28, s5, 24
4118; GFX12-NEXT:    s_bfe_u32 s29, s5, 0x80008
4119; GFX12-NEXT:    s_lshr_b32 s30, s6, 24
4120; GFX12-NEXT:    s_bfe_u32 s31, s6, 0x80008
4121; GFX12-NEXT:    s_and_b32 s56, s5, 0xff
4122; GFX12-NEXT:    s_bfe_u32 s5, s5, 0x80010
4123; GFX12-NEXT:    s_and_b32 s57, s6, 0xff
4124; GFX12-NEXT:    s_bfe_u32 s6, s6, 0x80010
4125; GFX12-NEXT:    v_dual_mov_b32 v12, s59 :: v_dual_mov_b32 v15, s35
4126; GFX12-NEXT:    v_dual_mov_b32 v14, s8 :: v_dual_mov_b32 v17, s34
4127; GFX12-NEXT:    s_lshr_b32 s26, s4, 24
4128; GFX12-NEXT:    s_bfe_u32 s27, s4, 0x80008
4129; GFX12-NEXT:    s_and_b32 s55, s4, 0xff
4130; GFX12-NEXT:    s_bfe_u32 s4, s4, 0x80010
4131; GFX12-NEXT:    v_dual_mov_b32 v16, s58 :: v_dual_mov_b32 v19, s33
4132; GFX12-NEXT:    v_dual_mov_b32 v18, s7 :: v_dual_mov_b32 v21, s31
4133; GFX12-NEXT:    s_lshr_b32 s24, s3, 24
4134; GFX12-NEXT:    s_bfe_u32 s25, s3, 0x80008
4135; GFX12-NEXT:    s_and_b32 s54, s3, 0xff
4136; GFX12-NEXT:    s_bfe_u32 s3, s3, 0x80010
4137; GFX12-NEXT:    v_dual_mov_b32 v20, s57 :: v_dual_mov_b32 v23, s30
4138; GFX12-NEXT:    v_mov_b32_e32 v22, s6
4139; GFX12-NEXT:    s_clause 0x5
4140; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[16:17] offset:176
4141; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[16:17] offset:160
4142; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[16:17] offset:144
4143; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[16:17] offset:128
4144; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[16:17] offset:112
4145; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[16:17] offset:96
4146; GFX12-NEXT:    v_dual_mov_b32 v1, s29 :: v_dual_mov_b32 v0, s56
4147; GFX12-NEXT:    v_dual_mov_b32 v3, s28 :: v_dual_mov_b32 v2, s5
4148; GFX12-NEXT:    v_mov_b32_e32 v5, s27
4149; GFX12-NEXT:    s_lshr_b32 s22, s2, 24
4150; GFX12-NEXT:    s_bfe_u32 s23, s2, 0x80008
4151; GFX12-NEXT:    s_and_b32 s53, s2, 0xff
4152; GFX12-NEXT:    s_bfe_u32 s2, s2, 0x80010
4153; GFX12-NEXT:    v_dual_mov_b32 v4, s55 :: v_dual_mov_b32 v7, s26
4154; GFX12-NEXT:    v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v9, s25
4155; GFX12-NEXT:    s_lshr_b32 s20, s1, 24
4156; GFX12-NEXT:    s_bfe_u32 s21, s1, 0x80008
4157; GFX12-NEXT:    s_and_b32 s52, s1, 0xff
4158; GFX12-NEXT:    s_bfe_u32 s1, s1, 0x80010
4159; GFX12-NEXT:    v_dual_mov_b32 v8, s54 :: v_dual_mov_b32 v11, s24
4160; GFX12-NEXT:    v_dual_mov_b32 v10, s3 :: v_dual_mov_b32 v13, s23
4161; GFX12-NEXT:    s_lshr_b32 s18, s0, 24
4162; GFX12-NEXT:    s_bfe_u32 s19, s0, 0x80008
4163; GFX12-NEXT:    s_and_b32 s51, s0, 0xff
4164; GFX12-NEXT:    s_bfe_u32 s0, s0, 0x80010
4165; GFX12-NEXT:    v_dual_mov_b32 v12, s53 :: v_dual_mov_b32 v15, s22
4166; GFX12-NEXT:    v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v17, s21
4167; GFX12-NEXT:    v_dual_mov_b32 v16, s52 :: v_dual_mov_b32 v19, s20
4168; GFX12-NEXT:    v_dual_mov_b32 v18, s1 :: v_dual_mov_b32 v21, s19
4169; GFX12-NEXT:    s_wait_alu 0xfffe
4170; GFX12-NEXT:    v_dual_mov_b32 v20, s51 :: v_dual_mov_b32 v23, s18
4171; GFX12-NEXT:    v_mov_b32_e32 v22, s0
4172; GFX12-NEXT:    s_clause 0x5
4173; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[16:17] offset:80
4174; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[16:17] offset:64
4175; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[16:17] offset:48
4176; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[16:17] offset:32
4177; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[16:17] offset:16
4178; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[16:17]
4179; GFX12-NEXT:    s_endpgm
4180  %load = load <64 x i8>, ptr addrspace(4) %in
4181  %ext = zext <64 x i8> %load to <64 x i32>
4182  store <64 x i32> %ext, ptr addrspace(1) %out
4183  ret void
4184}
4185
4186define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
4187; GFX6-NOHSA-LABEL: constant_sextload_v64i8_to_v64i32:
4188; GFX6-NOHSA:       ; %bb.0:
4189; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x9
4190; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
4191; GFX6-NOHSA-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
4192; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
4193; GFX6-NOHSA-NEXT:    s_ashr_i32 s18, s0, 24
4194; GFX6-NOHSA-NEXT:    s_bfe_i32 s19, s0, 0x80010
4195; GFX6-NOHSA-NEXT:    s_bfe_i32 s20, s0, 0x80008
4196; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s21, s0
4197; GFX6-NOHSA-NEXT:    s_ashr_i32 s22, s1, 24
4198; GFX6-NOHSA-NEXT:    s_bfe_i32 s23, s1, 0x80010
4199; GFX6-NOHSA-NEXT:    s_bfe_i32 s24, s1, 0x80008
4200; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s25, s1
4201; GFX6-NOHSA-NEXT:    s_ashr_i32 s26, s2, 24
4202; GFX6-NOHSA-NEXT:    s_bfe_i32 s27, s2, 0x80010
4203; GFX6-NOHSA-NEXT:    s_bfe_i32 s28, s2, 0x80008
4204; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s29, s2
4205; GFX6-NOHSA-NEXT:    s_ashr_i32 s30, s3, 24
4206; GFX6-NOHSA-NEXT:    s_bfe_i32 s31, s3, 0x80010
4207; GFX6-NOHSA-NEXT:    s_bfe_i32 s33, s3, 0x80008
4208; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s34, s3
4209; GFX6-NOHSA-NEXT:    s_ashr_i32 s35, s4, 24
4210; GFX6-NOHSA-NEXT:    s_bfe_i32 s36, s4, 0x80010
4211; GFX6-NOHSA-NEXT:    s_bfe_i32 s37, s4, 0x80008
4212; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s4, s4
4213; GFX6-NOHSA-NEXT:    s_ashr_i32 s38, s5, 24
4214; GFX6-NOHSA-NEXT:    s_bfe_i32 s39, s5, 0x80010
4215; GFX6-NOHSA-NEXT:    s_bfe_i32 s40, s5, 0x80008
4216; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s5, s5
4217; GFX6-NOHSA-NEXT:    s_ashr_i32 s41, s6, 24
4218; GFX6-NOHSA-NEXT:    s_bfe_i32 s42, s6, 0x80010
4219; GFX6-NOHSA-NEXT:    s_bfe_i32 s43, s6, 0x80008
4220; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s6, s6
4221; GFX6-NOHSA-NEXT:    s_ashr_i32 s44, s7, 24
4222; GFX6-NOHSA-NEXT:    s_bfe_i32 s45, s7, 0x80010
4223; GFX6-NOHSA-NEXT:    s_bfe_i32 s46, s7, 0x80008
4224; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s7, s7
4225; GFX6-NOHSA-NEXT:    s_ashr_i32 s47, s8, 24
4226; GFX6-NOHSA-NEXT:    s_bfe_i32 s48, s8, 0x80010
4227; GFX6-NOHSA-NEXT:    s_bfe_i32 s49, s8, 0x80008
4228; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s8, s8
4229; GFX6-NOHSA-NEXT:    s_ashr_i32 s50, s9, 24
4230; GFX6-NOHSA-NEXT:    s_bfe_i32 s51, s9, 0x80010
4231; GFX6-NOHSA-NEXT:    s_bfe_i32 s52, s9, 0x80008
4232; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s9, s9
4233; GFX6-NOHSA-NEXT:    s_ashr_i32 s53, s10, 24
4234; GFX6-NOHSA-NEXT:    s_bfe_i32 s54, s10, 0x80010
4235; GFX6-NOHSA-NEXT:    s_bfe_i32 s55, s10, 0x80008
4236; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s10, s10
4237; GFX6-NOHSA-NEXT:    s_bfe_i32 s56, s11, 0x80010
4238; GFX6-NOHSA-NEXT:    s_bfe_i32 s57, s11, 0x80008
4239; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s58, s11
4240; GFX6-NOHSA-NEXT:    s_ashr_i32 s59, s12, 24
4241; GFX6-NOHSA-NEXT:    s_bfe_i32 s60, s12, 0x80010
4242; GFX6-NOHSA-NEXT:    s_bfe_i32 s61, s12, 0x80008
4243; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s12, s12
4244; GFX6-NOHSA-NEXT:    s_ashr_i32 s62, s13, 24
4245; GFX6-NOHSA-NEXT:    s_bfe_i32 s63, s13, 0x80010
4246; GFX6-NOHSA-NEXT:    s_bfe_i32 s64, s13, 0x80008
4247; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s13, s13
4248; GFX6-NOHSA-NEXT:    s_ashr_i32 s65, s14, 24
4249; GFX6-NOHSA-NEXT:    s_bfe_i32 s66, s14, 0x80010
4250; GFX6-NOHSA-NEXT:    s_bfe_i32 s67, s14, 0x80008
4251; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s14, s14
4252; GFX6-NOHSA-NEXT:    s_ashr_i32 s68, s15, 24
4253; GFX6-NOHSA-NEXT:    s_bfe_i32 s69, s15, 0x80010
4254; GFX6-NOHSA-NEXT:    s_bfe_i32 s70, s15, 0x80008
4255; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s15, s15
4256; GFX6-NOHSA-NEXT:    s_ashr_i32 s11, s11, 24
4257; GFX6-NOHSA-NEXT:    s_mov_b32 s0, s16
4258; GFX6-NOHSA-NEXT:    s_mov_b32 s1, s17
4259; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
4260; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
4261; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s15
4262; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s70
4263; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s69
4264; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s68
4265; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s14
4266; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s67
4267; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s66
4268; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s65
4269; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s13
4270; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s64
4271; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s63
4272; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s62
4273; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s12
4274; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s61
4275; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s60
4276; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s59
4277; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s58
4278; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s57
4279; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s56
4280; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
4281; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
4282; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s10
4283; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s11
4284; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s55
4285; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s54
4286; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s53
4287; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
4288; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
4289; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
4290; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
4291; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
4292; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
4293; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s9
4294; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s52
4295; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s51
4296; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s50
4297; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
4298; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
4299; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s8
4300; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s49
4301; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s48
4302; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s47
4303; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
4304; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
4305; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s7
4306; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s46
4307; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s45
4308; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s44
4309; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
4310; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
4311; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
4312; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s43
4313; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s42
4314; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s41
4315; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
4316; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
4317; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s5
4318; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s40
4319; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s39
4320; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s38
4321; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
4322; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
4323; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
4324; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s37
4325; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s36
4326; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s35
4327; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
4328; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
4329; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s34
4330; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s33
4331; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s31
4332; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s30
4333; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
4334; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
4335; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s29
4336; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s28
4337; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s27
4338; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s26
4339; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
4340; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
4341; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s25
4342; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s24
4343; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s23
4344; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s22
4345; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
4346; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
4347; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s21
4348; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s20
4349; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s19
4350; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s18
4351; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
4352; GFX6-NOHSA-NEXT:    s_endpgm
4353;
4354; GFX7-HSA-LABEL: constant_sextload_v64i8_to_v64i32:
4355; GFX7-HSA:       ; %bb.0:
4356; GFX7-HSA-NEXT:    s_load_dwordx4 s[16:19], s[8:9], 0x0
4357; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4358; GFX7-HSA-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
4359; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4360; GFX7-HSA-NEXT:    s_ashr_i32 s18, s0, 24
4361; GFX7-HSA-NEXT:    s_bfe_i32 s19, s0, 0x80010
4362; GFX7-HSA-NEXT:    s_bfe_i32 s20, s0, 0x80008
4363; GFX7-HSA-NEXT:    s_ashr_i32 s21, s1, 24
4364; GFX7-HSA-NEXT:    s_bfe_i32 s22, s1, 0x80010
4365; GFX7-HSA-NEXT:    s_bfe_i32 s23, s1, 0x80008
4366; GFX7-HSA-NEXT:    s_ashr_i32 s24, s2, 24
4367; GFX7-HSA-NEXT:    s_bfe_i32 s25, s2, 0x80010
4368; GFX7-HSA-NEXT:    s_bfe_i32 s26, s2, 0x80008
4369; GFX7-HSA-NEXT:    s_ashr_i32 s27, s3, 24
4370; GFX7-HSA-NEXT:    s_bfe_i32 s28, s3, 0x80010
4371; GFX7-HSA-NEXT:    s_bfe_i32 s29, s3, 0x80008
4372; GFX7-HSA-NEXT:    s_ashr_i32 s30, s4, 24
4373; GFX7-HSA-NEXT:    s_bfe_i32 s31, s4, 0x80010
4374; GFX7-HSA-NEXT:    s_bfe_i32 s33, s4, 0x80008
4375; GFX7-HSA-NEXT:    s_ashr_i32 s34, s5, 24
4376; GFX7-HSA-NEXT:    s_bfe_i32 s35, s5, 0x80010
4377; GFX7-HSA-NEXT:    s_bfe_i32 s36, s5, 0x80008
4378; GFX7-HSA-NEXT:    s_ashr_i32 s37, s6, 24
4379; GFX7-HSA-NEXT:    s_bfe_i32 s38, s6, 0x80010
4380; GFX7-HSA-NEXT:    s_bfe_i32 s39, s6, 0x80008
4381; GFX7-HSA-NEXT:    s_sext_i32_i8 s40, s6
4382; GFX7-HSA-NEXT:    s_ashr_i32 s6, s7, 24
4383; GFX7-HSA-NEXT:    s_bfe_i32 s41, s7, 0x80010
4384; GFX7-HSA-NEXT:    s_bfe_i32 s42, s7, 0x80008
4385; GFX7-HSA-NEXT:    s_ashr_i32 s43, s8, 24
4386; GFX7-HSA-NEXT:    s_bfe_i32 s44, s8, 0x80010
4387; GFX7-HSA-NEXT:    s_bfe_i32 s45, s8, 0x80008
4388; GFX7-HSA-NEXT:    s_ashr_i32 s47, s9, 24
4389; GFX7-HSA-NEXT:    s_bfe_i32 s48, s9, 0x80010
4390; GFX7-HSA-NEXT:    s_bfe_i32 s49, s9, 0x80008
4391; GFX7-HSA-NEXT:    s_ashr_i32 s51, s10, 24
4392; GFX7-HSA-NEXT:    s_bfe_i32 s52, s10, 0x80010
4393; GFX7-HSA-NEXT:    s_bfe_i32 s53, s10, 0x80008
4394; GFX7-HSA-NEXT:    s_ashr_i32 s54, s11, 24
4395; GFX7-HSA-NEXT:    s_bfe_i32 s55, s11, 0x80010
4396; GFX7-HSA-NEXT:    s_bfe_i32 s56, s11, 0x80008
4397; GFX7-HSA-NEXT:    s_ashr_i32 s57, s12, 24
4398; GFX7-HSA-NEXT:    s_bfe_i32 s58, s12, 0x80010
4399; GFX7-HSA-NEXT:    s_bfe_i32 s59, s12, 0x80008
4400; GFX7-HSA-NEXT:    s_ashr_i32 s60, s13, 24
4401; GFX7-HSA-NEXT:    s_bfe_i32 s61, s13, 0x80010
4402; GFX7-HSA-NEXT:    s_bfe_i32 s62, s13, 0x80008
4403; GFX7-HSA-NEXT:    s_ashr_i32 s63, s14, 24
4404; GFX7-HSA-NEXT:    s_bfe_i32 s64, s14, 0x80010
4405; GFX7-HSA-NEXT:    s_bfe_i32 s65, s14, 0x80008
4406; GFX7-HSA-NEXT:    s_ashr_i32 s66, s15, 24
4407; GFX7-HSA-NEXT:    s_bfe_i32 s67, s15, 0x80010
4408; GFX7-HSA-NEXT:    s_bfe_i32 s68, s15, 0x80008
4409; GFX7-HSA-NEXT:    s_sext_i32_i8 s46, s8
4410; GFX7-HSA-NEXT:    s_add_u32 s8, s16, 0xf0
4411; GFX7-HSA-NEXT:    s_sext_i32_i8 s50, s9
4412; GFX7-HSA-NEXT:    s_addc_u32 s9, s17, 0
4413; GFX7-HSA-NEXT:    v_mov_b32_e32 v20, s9
4414; GFX7-HSA-NEXT:    v_mov_b32_e32 v19, s8
4415; GFX7-HSA-NEXT:    s_add_u32 s8, s16, 0xe0
4416; GFX7-HSA-NEXT:    s_addc_u32 s9, s17, 0
4417; GFX7-HSA-NEXT:    v_mov_b32_e32 v22, s9
4418; GFX7-HSA-NEXT:    v_mov_b32_e32 v21, s8
4419; GFX7-HSA-NEXT:    s_add_u32 s8, s16, 0xd0
4420; GFX7-HSA-NEXT:    s_addc_u32 s9, s17, 0
4421; GFX7-HSA-NEXT:    v_mov_b32_e32 v24, s9
4422; GFX7-HSA-NEXT:    v_mov_b32_e32 v23, s8
4423; GFX7-HSA-NEXT:    s_add_u32 s8, s16, 0xc0
4424; GFX7-HSA-NEXT:    s_addc_u32 s9, s17, 0
4425; GFX7-HSA-NEXT:    v_mov_b32_e32 v26, s9
4426; GFX7-HSA-NEXT:    v_mov_b32_e32 v25, s8
4427; GFX7-HSA-NEXT:    s_add_u32 s8, s16, 0xb0
4428; GFX7-HSA-NEXT:    s_addc_u32 s9, s17, 0
4429; GFX7-HSA-NEXT:    v_mov_b32_e32 v28, s9
4430; GFX7-HSA-NEXT:    s_sext_i32_i8 s13, s13
4431; GFX7-HSA-NEXT:    v_mov_b32_e32 v27, s8
4432; GFX7-HSA-NEXT:    s_add_u32 s8, s16, 0xa0
4433; GFX7-HSA-NEXT:    v_mov_b32_e32 v8, s13
4434; GFX7-HSA-NEXT:    v_mov_b32_e32 v9, s62
4435; GFX7-HSA-NEXT:    v_mov_b32_e32 v10, s61
4436; GFX7-HSA-NEXT:    v_mov_b32_e32 v11, s60
4437; GFX7-HSA-NEXT:    s_addc_u32 s9, s17, 0
4438; GFX7-HSA-NEXT:    flat_store_dwordx4 v[23:24], v[8:11]
4439; GFX7-HSA-NEXT:    s_sext_i32_i8 s12, s12
4440; GFX7-HSA-NEXT:    v_mov_b32_e32 v10, s9
4441; GFX7-HSA-NEXT:    v_mov_b32_e32 v9, s8
4442; GFX7-HSA-NEXT:    s_add_u32 s8, s16, 0x90
4443; GFX7-HSA-NEXT:    v_mov_b32_e32 v12, s12
4444; GFX7-HSA-NEXT:    v_mov_b32_e32 v13, s59
4445; GFX7-HSA-NEXT:    v_mov_b32_e32 v14, s58
4446; GFX7-HSA-NEXT:    v_mov_b32_e32 v15, s57
4447; GFX7-HSA-NEXT:    s_addc_u32 s9, s17, 0
4448; GFX7-HSA-NEXT:    flat_store_dwordx4 v[25:26], v[12:15]
4449; GFX7-HSA-NEXT:    s_sext_i32_i8 s15, s15
4450; GFX7-HSA-NEXT:    v_mov_b32_e32 v13, s9
4451; GFX7-HSA-NEXT:    v_mov_b32_e32 v12, s8
4452; GFX7-HSA-NEXT:    s_add_u32 s8, s16, 0x80
4453; GFX7-HSA-NEXT:    s_addc_u32 s9, s17, 0
4454; GFX7-HSA-NEXT:    s_sext_i32_i8 s7, s7
4455; GFX7-HSA-NEXT:    s_sext_i32_i8 s11, s11
4456; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s15
4457; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s68
4458; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s67
4459; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s66
4460; GFX7-HSA-NEXT:    v_mov_b32_e32 v11, s6
4461; GFX7-HSA-NEXT:    s_add_u32 s6, s16, 0x70
4462; GFX7-HSA-NEXT:    v_mov_b32_e32 v16, s11
4463; GFX7-HSA-NEXT:    v_mov_b32_e32 v17, s56
4464; GFX7-HSA-NEXT:    v_mov_b32_e32 v18, s55
4465; GFX7-HSA-NEXT:    flat_store_dwordx4 v[19:20], v[0:3]
4466; GFX7-HSA-NEXT:    v_mov_b32_e32 v19, s54
4467; GFX7-HSA-NEXT:    v_mov_b32_e32 v8, s7
4468; GFX7-HSA-NEXT:    s_addc_u32 s7, s17, 0
4469; GFX7-HSA-NEXT:    flat_store_dwordx4 v[27:28], v[16:19]
4470; GFX7-HSA-NEXT:    s_sext_i32_i8 s14, s14
4471; GFX7-HSA-NEXT:    v_mov_b32_e32 v17, s7
4472; GFX7-HSA-NEXT:    v_mov_b32_e32 v16, s6
4473; GFX7-HSA-NEXT:    s_add_u32 s6, s16, 0x60
4474; GFX7-HSA-NEXT:    s_addc_u32 s7, s17, 0
4475; GFX7-HSA-NEXT:    s_sext_i32_i8 s10, s10
4476; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s14
4477; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s65
4478; GFX7-HSA-NEXT:    v_mov_b32_e32 v6, s64
4479; GFX7-HSA-NEXT:    v_mov_b32_e32 v7, s63
4480; GFX7-HSA-NEXT:    v_mov_b32_e32 v19, s7
4481; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s10
4482; GFX7-HSA-NEXT:    v_mov_b32_e32 v20, s50
4483; GFX7-HSA-NEXT:    flat_store_dwordx4 v[21:22], v[4:7]
4484; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s53
4485; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s52
4486; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s51
4487; GFX7-HSA-NEXT:    v_mov_b32_e32 v21, s49
4488; GFX7-HSA-NEXT:    v_mov_b32_e32 v22, s48
4489; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s46
4490; GFX7-HSA-NEXT:    v_mov_b32_e32 v23, s47
4491; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s45
4492; GFX7-HSA-NEXT:    v_mov_b32_e32 v15, s9
4493; GFX7-HSA-NEXT:    v_mov_b32_e32 v18, s6
4494; GFX7-HSA-NEXT:    s_add_u32 s6, s16, 0x50
4495; GFX7-HSA-NEXT:    s_sext_i32_i8 s5, s5
4496; GFX7-HSA-NEXT:    v_mov_b32_e32 v14, s8
4497; GFX7-HSA-NEXT:    v_mov_b32_e32 v6, s44
4498; GFX7-HSA-NEXT:    v_mov_b32_e32 v7, s43
4499; GFX7-HSA-NEXT:    flat_store_dwordx4 v[9:10], v[0:3]
4500; GFX7-HSA-NEXT:    v_mov_b32_e32 v9, s42
4501; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s40
4502; GFX7-HSA-NEXT:    v_mov_b32_e32 v10, s41
4503; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s39
4504; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s38
4505; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s37
4506; GFX7-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[20:23]
4507; GFX7-HSA-NEXT:    flat_store_dwordx4 v[14:15], v[4:7]
4508; GFX7-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
4509; GFX7-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[0:3]
4510; GFX7-HSA-NEXT:    s_addc_u32 s7, s17, 0
4511; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s6
4512; GFX7-HSA-NEXT:    s_sext_i32_i8 s4, s4
4513; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s5
4514; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s36
4515; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s35
4516; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s34
4517; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s7
4518; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4519; GFX7-HSA-NEXT:    s_sext_i32_i8 s3, s3
4520; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s4
4521; GFX7-HSA-NEXT:    s_add_u32 s4, s16, 64
4522; GFX7-HSA-NEXT:    s_addc_u32 s5, s17, 0
4523; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s4
4524; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s33
4525; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s31
4526; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s30
4527; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s5
4528; GFX7-HSA-NEXT:    s_add_u32 s4, s16, 48
4529; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4530; GFX7-HSA-NEXT:    s_addc_u32 s5, s17, 0
4531; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s4
4532; GFX7-HSA-NEXT:    s_sext_i32_i8 s2, s2
4533; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s3
4534; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s29
4535; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s28
4536; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s27
4537; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s5
4538; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4539; GFX7-HSA-NEXT:    s_sext_i32_i8 s1, s1
4540; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
4541; GFX7-HSA-NEXT:    s_add_u32 s2, s16, 32
4542; GFX7-HSA-NEXT:    s_addc_u32 s3, s17, 0
4543; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
4544; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
4545; GFX7-HSA-NEXT:    s_add_u32 s2, s16, 16
4546; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s26
4547; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s25
4548; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s24
4549; GFX7-HSA-NEXT:    s_addc_u32 s3, s17, 0
4550; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4551; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
4552; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s1
4553; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s23
4554; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s22
4555; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s21
4556; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
4557; GFX7-HSA-NEXT:    s_sext_i32_i8 s0, s0
4558; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4559; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s16
4560; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
4561; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s20
4562; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s19
4563; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s18
4564; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s17
4565; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4566; GFX7-HSA-NEXT:    s_endpgm
4567;
4568; GFX8-NOHSA-LABEL: constant_sextload_v64i8_to_v64i32:
4569; GFX8-NOHSA:       ; %bb.0:
4570; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x24
4571; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
4572; GFX8-NOHSA-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
4573; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
4574; GFX8-NOHSA-NEXT:    s_ashr_i32 s18, s0, 24
4575; GFX8-NOHSA-NEXT:    s_bfe_i32 s19, s0, 0x80010
4576; GFX8-NOHSA-NEXT:    s_bfe_i32 s20, s0, 0x80008
4577; GFX8-NOHSA-NEXT:    s_ashr_i32 s21, s1, 24
4578; GFX8-NOHSA-NEXT:    s_bfe_i32 s22, s1, 0x80010
4579; GFX8-NOHSA-NEXT:    s_bfe_i32 s23, s1, 0x80008
4580; GFX8-NOHSA-NEXT:    s_ashr_i32 s24, s2, 24
4581; GFX8-NOHSA-NEXT:    s_bfe_i32 s25, s2, 0x80010
4582; GFX8-NOHSA-NEXT:    s_bfe_i32 s26, s2, 0x80008
4583; GFX8-NOHSA-NEXT:    s_ashr_i32 s27, s3, 24
4584; GFX8-NOHSA-NEXT:    s_bfe_i32 s28, s3, 0x80010
4585; GFX8-NOHSA-NEXT:    s_bfe_i32 s29, s3, 0x80008
4586; GFX8-NOHSA-NEXT:    s_ashr_i32 s30, s4, 24
4587; GFX8-NOHSA-NEXT:    s_bfe_i32 s31, s4, 0x80010
4588; GFX8-NOHSA-NEXT:    s_bfe_i32 s33, s4, 0x80008
4589; GFX8-NOHSA-NEXT:    s_ashr_i32 s34, s5, 24
4590; GFX8-NOHSA-NEXT:    s_bfe_i32 s35, s5, 0x80010
4591; GFX8-NOHSA-NEXT:    s_bfe_i32 s36, s5, 0x80008
4592; GFX8-NOHSA-NEXT:    s_ashr_i32 s37, s6, 24
4593; GFX8-NOHSA-NEXT:    s_bfe_i32 s38, s6, 0x80010
4594; GFX8-NOHSA-NEXT:    s_bfe_i32 s39, s6, 0x80008
4595; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s40, s6
4596; GFX8-NOHSA-NEXT:    s_ashr_i32 s41, s7, 24
4597; GFX8-NOHSA-NEXT:    s_bfe_i32 s42, s7, 0x80010
4598; GFX8-NOHSA-NEXT:    s_bfe_i32 s43, s7, 0x80008
4599; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s44, s7
4600; GFX8-NOHSA-NEXT:    s_ashr_i32 s45, s8, 24
4601; GFX8-NOHSA-NEXT:    s_bfe_i32 s46, s8, 0x80010
4602; GFX8-NOHSA-NEXT:    s_bfe_i32 s47, s8, 0x80008
4603; GFX8-NOHSA-NEXT:    s_ashr_i32 s48, s9, 24
4604; GFX8-NOHSA-NEXT:    s_bfe_i32 s49, s9, 0x80010
4605; GFX8-NOHSA-NEXT:    s_bfe_i32 s50, s9, 0x80008
4606; GFX8-NOHSA-NEXT:    s_ashr_i32 s51, s10, 24
4607; GFX8-NOHSA-NEXT:    s_bfe_i32 s52, s10, 0x80010
4608; GFX8-NOHSA-NEXT:    s_bfe_i32 s53, s10, 0x80008
4609; GFX8-NOHSA-NEXT:    s_ashr_i32 s54, s11, 24
4610; GFX8-NOHSA-NEXT:    s_bfe_i32 s55, s11, 0x80010
4611; GFX8-NOHSA-NEXT:    s_bfe_i32 s56, s11, 0x80008
4612; GFX8-NOHSA-NEXT:    s_ashr_i32 s57, s12, 24
4613; GFX8-NOHSA-NEXT:    s_bfe_i32 s58, s12, 0x80010
4614; GFX8-NOHSA-NEXT:    s_bfe_i32 s59, s12, 0x80008
4615; GFX8-NOHSA-NEXT:    s_ashr_i32 s60, s13, 24
4616; GFX8-NOHSA-NEXT:    s_bfe_i32 s61, s13, 0x80010
4617; GFX8-NOHSA-NEXT:    s_bfe_i32 s62, s13, 0x80008
4618; GFX8-NOHSA-NEXT:    s_ashr_i32 s63, s14, 24
4619; GFX8-NOHSA-NEXT:    s_bfe_i32 s64, s14, 0x80010
4620; GFX8-NOHSA-NEXT:    s_bfe_i32 s65, s14, 0x80008
4621; GFX8-NOHSA-NEXT:    s_ashr_i32 s6, s15, 24
4622; GFX8-NOHSA-NEXT:    s_bfe_i32 s7, s15, 0x80010
4623; GFX8-NOHSA-NEXT:    s_bfe_i32 s66, s15, 0x80008
4624; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s6
4625; GFX8-NOHSA-NEXT:    s_add_u32 s6, s16, 0xf0
4626; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s15, s15
4627; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s7
4628; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s17, 0
4629; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
4630; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s15
4631; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s66
4632; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
4633; GFX8-NOHSA-NEXT:    s_add_u32 s6, s16, 0xe0
4634; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s14, s14
4635; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4636; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s17, 0
4637; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
4638; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s14
4639; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s65
4640; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s64
4641; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s63
4642; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
4643; GFX8-NOHSA-NEXT:    s_add_u32 s6, s16, 0xd0
4644; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s13, s13
4645; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4646; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s17, 0
4647; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
4648; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s13
4649; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s62
4650; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s61
4651; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s60
4652; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
4653; GFX8-NOHSA-NEXT:    s_add_u32 s6, s16, 0xc0
4654; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s12, s12
4655; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4656; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s17, 0
4657; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
4658; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s12
4659; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s59
4660; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s58
4661; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s57
4662; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
4663; GFX8-NOHSA-NEXT:    s_add_u32 s6, s16, 0xb0
4664; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s11, s11
4665; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4666; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s17, 0
4667; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
4668; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s11
4669; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s56
4670; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s55
4671; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s54
4672; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
4673; GFX8-NOHSA-NEXT:    s_add_u32 s6, s16, 0xa0
4674; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s10, s10
4675; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4676; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s17, 0
4677; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
4678; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s10
4679; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s53
4680; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s52
4681; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s51
4682; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
4683; GFX8-NOHSA-NEXT:    s_add_u32 s6, s16, 0x90
4684; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s9, s9
4685; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4686; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s17, 0
4687; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
4688; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s9
4689; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s50
4690; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s49
4691; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s48
4692; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
4693; GFX8-NOHSA-NEXT:    s_add_u32 s6, s16, 0x80
4694; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s8, s8
4695; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4696; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s17, 0
4697; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
4698; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s8
4699; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s47
4700; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s46
4701; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s45
4702; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
4703; GFX8-NOHSA-NEXT:    s_add_u32 s6, s16, 0x70
4704; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4705; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s17, 0
4706; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
4707; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s44
4708; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s43
4709; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s42
4710; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s41
4711; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
4712; GFX8-NOHSA-NEXT:    s_add_u32 s6, s16, 0x60
4713; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4714; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s17, 0
4715; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
4716; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s40
4717; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s39
4718; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s38
4719; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s37
4720; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
4721; GFX8-NOHSA-NEXT:    s_add_u32 s6, s16, 0x50
4722; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s5, s5
4723; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4724; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s17, 0
4725; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
4726; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s4, s4
4727; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s5
4728; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s36
4729; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s35
4730; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s34
4731; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
4732; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4733; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s3, s3
4734; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
4735; GFX8-NOHSA-NEXT:    s_add_u32 s4, s16, 64
4736; GFX8-NOHSA-NEXT:    s_addc_u32 s5, s17, 0
4737; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s4
4738; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s33
4739; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s31
4740; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s30
4741; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s5
4742; GFX8-NOHSA-NEXT:    s_add_u32 s4, s16, 48
4743; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4744; GFX8-NOHSA-NEXT:    s_addc_u32 s5, s17, 0
4745; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s4
4746; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s2, s2
4747; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s3
4748; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s29
4749; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s28
4750; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s27
4751; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s5
4752; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4753; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s1, s1
4754; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
4755; GFX8-NOHSA-NEXT:    s_add_u32 s2, s16, 32
4756; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s17, 0
4757; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
4758; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
4759; GFX8-NOHSA-NEXT:    s_add_u32 s2, s16, 16
4760; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s26
4761; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s25
4762; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s24
4763; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s17, 0
4764; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4765; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
4766; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s1
4767; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s23
4768; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s22
4769; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s21
4770; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
4771; GFX8-NOHSA-NEXT:    s_sext_i32_i8 s0, s0
4772; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4773; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s16
4774; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
4775; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s20
4776; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s19
4777; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s18
4778; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s17
4779; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4780; GFX8-NOHSA-NEXT:    s_endpgm
4781;
4782; EG-LABEL: constant_sextload_v64i8_to_v64i32:
4783; EG:       ; %bb.0:
4784; EG-NEXT:    ALU 0, @32, KC0[CB0:0-32], KC1[]
4785; EG-NEXT:    TEX 1 @24
4786; EG-NEXT:    ALU 40, @33, KC0[CB0:0-32], KC1[]
4787; EG-NEXT:    TEX 1 @28
4788; EG-NEXT:    ALU 76, @74, KC0[CB0:0-32], KC1[]
4789; EG-NEXT:    ALU 72, @151, KC0[CB0:0-32], KC1[]
4790; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T50.X, 0
4791; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T49.X, 0
4792; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T19.X, 0
4793; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T35.X, 0
4794; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T34.X, 0
4795; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T33.X, 0
4796; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T32.X, 0
4797; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T30.X, 0
4798; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T29.X, 0
4799; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T28.X, 0
4800; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T27.X, 0
4801; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T26.X, 0
4802; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T25.X, 0
4803; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T24.X, 0
4804; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T23.X, 0
4805; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T22.X, 1
4806; EG-NEXT:    CF_END
4807; EG-NEXT:    PAD
4808; EG-NEXT:    Fetch clause starting at 24:
4809; EG-NEXT:     VTX_READ_128 T20.XYZW, T21.X, 32, #1
4810; EG-NEXT:     VTX_READ_128 T19.XYZW, T21.X, 48, #1
4811; EG-NEXT:    Fetch clause starting at 28:
4812; EG-NEXT:     VTX_READ_128 T31.XYZW, T21.X, 0, #1
4813; EG-NEXT:     VTX_READ_128 T21.XYZW, T21.X, 16, #1
4814; EG-NEXT:    ALU clause starting at 32:
4815; EG-NEXT:     MOV * T21.X, KC0[2].Z,
4816; EG-NEXT:    ALU clause starting at 33:
4817; EG-NEXT:     LSHR T22.X, KC0[2].Y, literal.x,
4818; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4819; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4820; EG-NEXT:     LSHR T23.X, PV.W, literal.x,
4821; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4822; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
4823; EG-NEXT:     LSHR T24.X, PV.W, literal.x,
4824; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4825; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
4826; EG-NEXT:     LSHR T25.X, PV.W, literal.x,
4827; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4828; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
4829; EG-NEXT:     LSHR T26.X, PV.W, literal.x,
4830; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4831; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
4832; EG-NEXT:     LSHR T27.X, PV.W, literal.x,
4833; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4834; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
4835; EG-NEXT:     LSHR T28.X, PV.W, literal.x,
4836; EG-NEXT:     LSHR T0.Y, T19.W, literal.y,
4837; EG-NEXT:     LSHR T0.Z, T19.Z, literal.z,
4838; EG-NEXT:     LSHR * T0.W, T19.W, literal.w,
4839; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4840; EG-NEXT:    8(1.121039e-44), 24(3.363116e-44)
4841; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.x,
4842; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
4843; EG-NEXT:     LSHR T29.X, PV.W, literal.x,
4844; EG-NEXT:     LSHR T1.Y, T19.Z, literal.y,
4845; EG-NEXT:     LSHR T1.Z, T19.Y, literal.z,
4846; EG-NEXT:     LSHR * T1.W, T19.Z, literal.w,
4847; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4848; EG-NEXT:    8(1.121039e-44), 24(3.363116e-44)
4849; EG-NEXT:     ADD_INT * T2.W, KC0[2].Y, literal.x,
4850; EG-NEXT:    128(1.793662e-43), 0(0.000000e+00)
4851; EG-NEXT:     LSHR T30.X, PV.W, literal.x,
4852; EG-NEXT:     LSHR T2.Y, T19.Y, literal.y,
4853; EG-NEXT:     LSHR T2.Z, T19.Y, literal.z,
4854; EG-NEXT:     LSHR T2.W, T19.X, literal.y,
4855; EG-NEXT:     LSHR * T3.W, T19.X, literal.z,
4856; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4857; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
4858; EG-NEXT:    ALU clause starting at 74:
4859; EG-NEXT:     LSHR T3.Y, T20.W, literal.x,
4860; EG-NEXT:     LSHR T3.Z, T20.W, literal.y,
4861; EG-NEXT:     LSHR T4.W, T20.Z, literal.x,
4862; EG-NEXT:     ADD_INT * T5.W, KC0[2].Y, literal.z,
4863; EG-NEXT:    16(2.242078e-44), 24(3.363116e-44)
4864; EG-NEXT:    144(2.017870e-43), 0(0.000000e+00)
4865; EG-NEXT:     LSHR T32.X, PS, literal.x,
4866; EG-NEXT:     LSHR T4.Y, T20.Z, literal.y,
4867; EG-NEXT:     LSHR T4.Z, T20.Y, literal.z,
4868; EG-NEXT:     LSHR T5.W, T20.Y, literal.y,
4869; EG-NEXT:     ADD_INT * T6.W, KC0[2].Y, literal.w,
4870; EG-NEXT:    2(2.802597e-45), 24(3.363116e-44)
4871; EG-NEXT:    16(2.242078e-44), 160(2.242078e-43)
4872; EG-NEXT:     LSHR T33.X, PS, literal.x,
4873; EG-NEXT:     LSHR T5.Y, T20.X, literal.y,
4874; EG-NEXT:     LSHR T5.Z, T20.X, literal.z,
4875; EG-NEXT:     LSHR T6.W, T21.W, literal.y,
4876; EG-NEXT:     ADD_INT * T7.W, KC0[2].Y, literal.w,
4877; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4878; EG-NEXT:    24(3.363116e-44), 176(2.466285e-43)
4879; EG-NEXT:     LSHR T34.X, PS, literal.x,
4880; EG-NEXT:     LSHR T6.Y, T21.W, literal.y,
4881; EG-NEXT:     LSHR T6.Z, T21.Z, literal.z,
4882; EG-NEXT:     LSHR T7.W, T21.Z, literal.y,
4883; EG-NEXT:     ADD_INT * T8.W, KC0[2].Y, literal.w,
4884; EG-NEXT:    2(2.802597e-45), 24(3.363116e-44)
4885; EG-NEXT:    16(2.242078e-44), 192(2.690493e-43)
4886; EG-NEXT:     LSHR T35.X, PS, literal.x,
4887; EG-NEXT:     LSHR T7.Y, T21.Y, literal.y,
4888; EG-NEXT:     LSHR T7.Z, T21.Y, literal.z,
4889; EG-NEXT:     LSHR T8.W, T21.X, literal.y,
4890; EG-NEXT:     LSHR * T9.W, T21.X, literal.z,
4891; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4892; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
4893; EG-NEXT:     BFE_INT T36.X, T31.X, 0.0, literal.x,
4894; EG-NEXT:     LSHR T8.Y, T31.W, literal.y,
4895; EG-NEXT:     LSHR T8.Z, T31.W, literal.z,
4896; EG-NEXT:     LSHR T10.W, T31.Z, literal.y,
4897; EG-NEXT:     LSHR * T11.W, T31.X, literal.z,
4898; EG-NEXT:    8(1.121039e-44), 16(2.242078e-44)
4899; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
4900; EG-NEXT:     BFE_INT T37.X, T31.Y, 0.0, literal.x,
4901; EG-NEXT:     LSHR T9.Y, T31.Z, literal.y,
4902; EG-NEXT:     LSHR T9.Z, T31.Y, literal.y,
4903; EG-NEXT:     BFE_INT T36.W, PS, 0.0, literal.x,
4904; EG-NEXT:     LSHR * T11.W, T31.X, literal.z,
4905; EG-NEXT:    8(1.121039e-44), 24(3.363116e-44)
4906; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4907; EG-NEXT:     BFE_INT T38.X, T31.Z, 0.0, literal.x,
4908; EG-NEXT:     LSHR T10.Y, T31.Y, literal.y,
4909; EG-NEXT:     BFE_INT T36.Z, PS, 0.0, literal.x,
4910; EG-NEXT:     BFE_INT T37.W, PV.Z, 0.0, literal.x,
4911; EG-NEXT:     LSHR * T11.W, T31.X, literal.x,
4912; EG-NEXT:    8(1.121039e-44), 16(2.242078e-44)
4913; EG-NEXT:     BFE_INT T39.X, T31.W, 0.0, literal.x,
4914; EG-NEXT:     BFE_INT T36.Y, PS, 0.0, literal.x,
4915; EG-NEXT:     BFE_INT T37.Z, PV.Y, 0.0, literal.x,
4916; EG-NEXT:     BFE_INT T38.W, T9.Y, 0.0, literal.x,
4917; EG-NEXT:     LSHR * T11.W, T31.Y, literal.x,
4918; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
4919; EG-NEXT:     BFE_INT T40.X, T21.X, 0.0, literal.x,
4920; EG-NEXT:     BFE_INT T37.Y, PS, 0.0, literal.x,
4921; EG-NEXT:     BFE_INT T38.Z, T10.W, 0.0, literal.x,
4922; EG-NEXT:     BFE_INT T39.W, T8.Z, 0.0, literal.x,
4923; EG-NEXT:     LSHR * T10.W, T31.Z, literal.x,
4924; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
4925; EG-NEXT:     BFE_INT T31.X, T21.Y, 0.0, literal.x,
4926; EG-NEXT:     BFE_INT T38.Y, PS, 0.0, literal.x,
4927; EG-NEXT:     BFE_INT T39.Z, T8.Y, 0.0, literal.x, BS:VEC_120/SCL_212
4928; EG-NEXT:     BFE_INT T40.W, T9.W, 0.0, literal.x,
4929; EG-NEXT:     LSHR * T9.W, T31.W, literal.x,
4930; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
4931; EG-NEXT:     BFE_INT T41.X, T21.Z, 0.0, literal.x,
4932; EG-NEXT:     BFE_INT T39.Y, PS, 0.0, literal.x,
4933; EG-NEXT:     BFE_INT T40.Z, T8.W, 0.0, literal.x,
4934; EG-NEXT:     BFE_INT * T31.W, T7.Z, 0.0, literal.x, BS:VEC_120/SCL_212
4935; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
4936; EG-NEXT:    ALU clause starting at 151:
4937; EG-NEXT:     LSHR * T8.W, T21.X, literal.x,
4938; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
4939; EG-NEXT:     BFE_INT T42.X, T21.W, 0.0, literal.x,
4940; EG-NEXT:     BFE_INT T40.Y, PV.W, 0.0, literal.x,
4941; EG-NEXT:     BFE_INT T31.Z, T7.Y, 0.0, literal.x,
4942; EG-NEXT:     BFE_INT T41.W, T7.W, 0.0, literal.x, BS:VEC_120/SCL_212
4943; EG-NEXT:     LSHR * T7.W, T21.Y, literal.x,
4944; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
4945; EG-NEXT:     BFE_INT T43.X, T20.X, 0.0, literal.x,
4946; EG-NEXT:     BFE_INT T31.Y, PS, 0.0, literal.x,
4947; EG-NEXT:     BFE_INT T41.Z, T6.Z, 0.0, literal.x,
4948; EG-NEXT:     BFE_INT T42.W, T6.Y, 0.0, literal.x,
4949; EG-NEXT:     LSHR * T7.W, T21.Z, literal.x,
4950; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
4951; EG-NEXT:     BFE_INT T21.X, T20.Y, 0.0, literal.x,
4952; EG-NEXT:     BFE_INT T41.Y, PS, 0.0, literal.x,
4953; EG-NEXT:     BFE_INT T42.Z, T6.W, 0.0, literal.x,
4954; EG-NEXT:     BFE_INT T43.W, T5.Z, 0.0, literal.x,
4955; EG-NEXT:     LSHR * T6.W, T21.W, literal.x,
4956; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
4957; EG-NEXT:     BFE_INT T44.X, T20.Z, 0.0, literal.x,
4958; EG-NEXT:     BFE_INT T42.Y, PS, 0.0, literal.x,
4959; EG-NEXT:     BFE_INT T43.Z, T5.Y, 0.0, literal.x,
4960; EG-NEXT:     BFE_INT T21.W, T5.W, 0.0, literal.x,
4961; EG-NEXT:     LSHR * T5.W, T20.X, literal.x,
4962; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
4963; EG-NEXT:     BFE_INT T45.X, T20.W, 0.0, literal.x,
4964; EG-NEXT:     BFE_INT T43.Y, PS, 0.0, literal.x,
4965; EG-NEXT:     BFE_INT T21.Z, T4.Z, 0.0, literal.x,
4966; EG-NEXT:     BFE_INT T44.W, T4.Y, 0.0, literal.x,
4967; EG-NEXT:     LSHR * T5.W, T20.Y, literal.x,
4968; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
4969; EG-NEXT:     BFE_INT T46.X, T19.X, 0.0, literal.x,
4970; EG-NEXT:     BFE_INT T21.Y, PS, 0.0, literal.x,
4971; EG-NEXT:     BFE_INT T44.Z, T4.W, 0.0, literal.x,
4972; EG-NEXT:     BFE_INT T45.W, T3.Z, 0.0, literal.x,
4973; EG-NEXT:     LSHR * T4.W, T20.Z, literal.x,
4974; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
4975; EG-NEXT:     BFE_INT T20.X, T19.Y, 0.0, literal.x,
4976; EG-NEXT:     BFE_INT T44.Y, PS, 0.0, literal.x,
4977; EG-NEXT:     BFE_INT T45.Z, T3.Y, 0.0, literal.x, BS:VEC_120/SCL_212
4978; EG-NEXT:     BFE_INT T46.W, T3.W, 0.0, literal.x,
4979; EG-NEXT:     LSHR * T3.W, T20.W, literal.x,
4980; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
4981; EG-NEXT:     BFE_INT T47.X, T19.Z, 0.0, literal.x,
4982; EG-NEXT:     BFE_INT T45.Y, PS, 0.0, literal.x,
4983; EG-NEXT:     BFE_INT T46.Z, T2.W, 0.0, literal.x,
4984; EG-NEXT:     BFE_INT T20.W, T2.Z, 0.0, literal.x, BS:VEC_120/SCL_212
4985; EG-NEXT:     LSHR * T2.W, T19.X, literal.x,
4986; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
4987; EG-NEXT:     BFE_INT T48.X, T19.W, 0.0, literal.x,
4988; EG-NEXT:     BFE_INT T46.Y, PS, 0.0, literal.x,
4989; EG-NEXT:     BFE_INT T20.Z, T2.Y, 0.0, literal.x,
4990; EG-NEXT:     BFE_INT T47.W, T1.W, 0.0, literal.x, BS:VEC_120/SCL_212
4991; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
4992; EG-NEXT:    8(1.121039e-44), 208(2.914701e-43)
4993; EG-NEXT:     LSHR T19.X, PS, literal.x,
4994; EG-NEXT:     BFE_INT T20.Y, T1.Z, 0.0, literal.y,
4995; EG-NEXT:     BFE_INT T47.Z, T1.Y, 0.0, literal.y,
4996; EG-NEXT:     BFE_INT T48.W, T0.W, 0.0, literal.y,
4997; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
4998; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
4999; EG-NEXT:    224(3.138909e-43), 0(0.000000e+00)
5000; EG-NEXT:     LSHR T49.X, PS, literal.x,
5001; EG-NEXT:     BFE_INT T47.Y, T0.Z, 0.0, literal.y,
5002; EG-NEXT:     BFE_INT T48.Z, T0.Y, 0.0, literal.y,
5003; EG-NEXT:     LSHR T0.W, T19.W, literal.y,
5004; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
5005; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
5006; EG-NEXT:    240(3.363116e-43), 0(0.000000e+00)
5007; EG-NEXT:     LSHR T50.X, PS, literal.x,
5008; EG-NEXT:     BFE_INT * T48.Y, PV.W, 0.0, literal.y,
5009; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
5010;
5011; GFX12-LABEL: constant_sextload_v64i8_to_v64i32:
5012; GFX12:       ; %bb.0:
5013; GFX12-NEXT:    s_load_b128 s[16:19], s[4:5], 0x24
5014; GFX12-NEXT:    s_wait_kmcnt 0x0
5015; GFX12-NEXT:    s_load_b512 s[0:15], s[18:19], 0x0
5016; GFX12-NEXT:    s_wait_kmcnt 0x0
5017; GFX12-NEXT:    s_ashr_i32 s64, s15, 24
5018; GFX12-NEXT:    s_bfe_i32 s65, s15, 0x80010
5019; GFX12-NEXT:    s_sext_i32_i8 s66, s15
5020; GFX12-NEXT:    s_bfe_i32 s15, s15, 0x80008
5021; GFX12-NEXT:    s_ashr_i32 s61, s14, 24
5022; GFX12-NEXT:    s_bfe_i32 s62, s14, 0x80010
5023; GFX12-NEXT:    s_bfe_i32 s63, s14, 0x80008
5024; GFX12-NEXT:    s_sext_i32_i8 s14, s14
5025; GFX12-NEXT:    v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s15
5026; GFX12-NEXT:    s_ashr_i32 s58, s13, 24
5027; GFX12-NEXT:    s_bfe_i32 s59, s13, 0x80010
5028; GFX12-NEXT:    s_bfe_i32 s60, s13, 0x80008
5029; GFX12-NEXT:    s_sext_i32_i8 s13, s13
5030; GFX12-NEXT:    v_dual_mov_b32 v0, s66 :: v_dual_mov_b32 v3, s64
5031; GFX12-NEXT:    v_dual_mov_b32 v2, s65 :: v_dual_mov_b32 v5, s63
5032; GFX12-NEXT:    s_ashr_i32 s55, s12, 24
5033; GFX12-NEXT:    s_bfe_i32 s56, s12, 0x80010
5034; GFX12-NEXT:    s_bfe_i32 s57, s12, 0x80008
5035; GFX12-NEXT:    s_sext_i32_i8 s12, s12
5036; GFX12-NEXT:    v_dual_mov_b32 v4, s14 :: v_dual_mov_b32 v7, s61
5037; GFX12-NEXT:    v_dual_mov_b32 v6, s62 :: v_dual_mov_b32 v9, s60
5038; GFX12-NEXT:    v_dual_mov_b32 v8, s13 :: v_dual_mov_b32 v11, s58
5039; GFX12-NEXT:    v_dual_mov_b32 v10, s59 :: v_dual_mov_b32 v13, s57
5040; GFX12-NEXT:    s_ashr_i32 s52, s11, 24
5041; GFX12-NEXT:    s_bfe_i32 s53, s11, 0x80010
5042; GFX12-NEXT:    s_bfe_i32 s54, s11, 0x80008
5043; GFX12-NEXT:    v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v15, s55
5044; GFX12-NEXT:    v_mov_b32_e32 v14, s56
5045; GFX12-NEXT:    s_sext_i32_i8 s11, s11
5046; GFX12-NEXT:    s_ashr_i32 s49, s10, 24
5047; GFX12-NEXT:    s_bfe_i32 s50, s10, 0x80010
5048; GFX12-NEXT:    s_bfe_i32 s51, s10, 0x80008
5049; GFX12-NEXT:    s_sext_i32_i8 s10, s10
5050; GFX12-NEXT:    s_ashr_i32 s46, s9, 24
5051; GFX12-NEXT:    s_bfe_i32 s47, s9, 0x80010
5052; GFX12-NEXT:    s_bfe_i32 s48, s9, 0x80008
5053; GFX12-NEXT:    s_sext_i32_i8 s9, s9
5054; GFX12-NEXT:    s_clause 0x3
5055; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[16:17] offset:240
5056; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[16:17] offset:224
5057; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[16:17] offset:208
5058; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[16:17] offset:192
5059; GFX12-NEXT:    v_dual_mov_b32 v1, s54 :: v_dual_mov_b32 v0, s11
5060; GFX12-NEXT:    v_dual_mov_b32 v3, s52 :: v_dual_mov_b32 v2, s53
5061; GFX12-NEXT:    v_mov_b32_e32 v5, s51
5062; GFX12-NEXT:    s_ashr_i32 s43, s8, 24
5063; GFX12-NEXT:    s_bfe_i32 s44, s8, 0x80010
5064; GFX12-NEXT:    s_bfe_i32 s45, s8, 0x80008
5065; GFX12-NEXT:    s_sext_i32_i8 s8, s8
5066; GFX12-NEXT:    v_dual_mov_b32 v4, s10 :: v_dual_mov_b32 v7, s49
5067; GFX12-NEXT:    v_dual_mov_b32 v6, s50 :: v_dual_mov_b32 v9, s48
5068; GFX12-NEXT:    s_ashr_i32 s40, s7, 24
5069; GFX12-NEXT:    s_bfe_i32 s41, s7, 0x80010
5070; GFX12-NEXT:    s_bfe_i32 s42, s7, 0x80008
5071; GFX12-NEXT:    s_sext_i32_i8 s7, s7
5072; GFX12-NEXT:    v_dual_mov_b32 v8, s9 :: v_dual_mov_b32 v11, s46
5073; GFX12-NEXT:    v_dual_mov_b32 v10, s47 :: v_dual_mov_b32 v13, s45
5074; GFX12-NEXT:    s_ashr_i32 s34, s5, 24
5075; GFX12-NEXT:    s_bfe_i32 s35, s5, 0x80010
5076; GFX12-NEXT:    s_bfe_i32 s36, s5, 0x80008
5077; GFX12-NEXT:    s_sext_i32_i8 s5, s5
5078; GFX12-NEXT:    s_ashr_i32 s37, s6, 24
5079; GFX12-NEXT:    s_bfe_i32 s38, s6, 0x80010
5080; GFX12-NEXT:    s_bfe_i32 s39, s6, 0x80008
5081; GFX12-NEXT:    s_sext_i32_i8 s6, s6
5082; GFX12-NEXT:    v_dual_mov_b32 v12, s8 :: v_dual_mov_b32 v15, s43
5083; GFX12-NEXT:    v_dual_mov_b32 v14, s44 :: v_dual_mov_b32 v17, s42
5084; GFX12-NEXT:    s_ashr_i32 s30, s4, 24
5085; GFX12-NEXT:    s_bfe_i32 s31, s4, 0x80010
5086; GFX12-NEXT:    s_bfe_i32 s33, s4, 0x80008
5087; GFX12-NEXT:    s_sext_i32_i8 s4, s4
5088; GFX12-NEXT:    v_dual_mov_b32 v16, s7 :: v_dual_mov_b32 v19, s40
5089; GFX12-NEXT:    v_dual_mov_b32 v18, s41 :: v_dual_mov_b32 v21, s39
5090; GFX12-NEXT:    s_ashr_i32 s27, s3, 24
5091; GFX12-NEXT:    s_bfe_i32 s28, s3, 0x80010
5092; GFX12-NEXT:    s_bfe_i32 s29, s3, 0x80008
5093; GFX12-NEXT:    s_sext_i32_i8 s3, s3
5094; GFX12-NEXT:    v_dual_mov_b32 v20, s6 :: v_dual_mov_b32 v23, s37
5095; GFX12-NEXT:    v_mov_b32_e32 v22, s38
5096; GFX12-NEXT:    s_clause 0x5
5097; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[16:17] offset:176
5098; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[16:17] offset:160
5099; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[16:17] offset:144
5100; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[16:17] offset:128
5101; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[16:17] offset:112
5102; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[16:17] offset:96
5103; GFX12-NEXT:    v_dual_mov_b32 v1, s36 :: v_dual_mov_b32 v0, s5
5104; GFX12-NEXT:    v_dual_mov_b32 v3, s34 :: v_dual_mov_b32 v2, s35
5105; GFX12-NEXT:    v_mov_b32_e32 v5, s33
5106; GFX12-NEXT:    s_ashr_i32 s24, s2, 24
5107; GFX12-NEXT:    s_bfe_i32 s25, s2, 0x80010
5108; GFX12-NEXT:    s_bfe_i32 s26, s2, 0x80008
5109; GFX12-NEXT:    s_sext_i32_i8 s2, s2
5110; GFX12-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v7, s30
5111; GFX12-NEXT:    v_dual_mov_b32 v6, s31 :: v_dual_mov_b32 v9, s29
5112; GFX12-NEXT:    s_ashr_i32 s21, s1, 24
5113; GFX12-NEXT:    s_bfe_i32 s22, s1, 0x80010
5114; GFX12-NEXT:    s_bfe_i32 s23, s1, 0x80008
5115; GFX12-NEXT:    s_sext_i32_i8 s1, s1
5116; GFX12-NEXT:    v_dual_mov_b32 v8, s3 :: v_dual_mov_b32 v11, s27
5117; GFX12-NEXT:    v_dual_mov_b32 v10, s28 :: v_dual_mov_b32 v13, s26
5118; GFX12-NEXT:    s_ashr_i32 s18, s0, 24
5119; GFX12-NEXT:    s_bfe_i32 s19, s0, 0x80010
5120; GFX12-NEXT:    s_bfe_i32 s20, s0, 0x80008
5121; GFX12-NEXT:    s_sext_i32_i8 s0, s0
5122; GFX12-NEXT:    v_dual_mov_b32 v12, s2 :: v_dual_mov_b32 v15, s24
5123; GFX12-NEXT:    v_dual_mov_b32 v14, s25 :: v_dual_mov_b32 v17, s23
5124; GFX12-NEXT:    v_dual_mov_b32 v16, s1 :: v_dual_mov_b32 v19, s21
5125; GFX12-NEXT:    v_dual_mov_b32 v18, s22 :: v_dual_mov_b32 v21, s20
5126; GFX12-NEXT:    v_dual_mov_b32 v20, s0 :: v_dual_mov_b32 v23, s18
5127; GFX12-NEXT:    v_mov_b32_e32 v22, s19
5128; GFX12-NEXT:    s_clause 0x5
5129; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[16:17] offset:80
5130; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[16:17] offset:64
5131; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[16:17] offset:48
5132; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[16:17] offset:32
5133; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[16:17] offset:16
5134; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[16:17]
5135; GFX12-NEXT:    s_endpgm
5136  %load = load <64 x i8>, ptr addrspace(4) %in
5137  %ext = sext <64 x i8> %load to <64 x i32>
5138  store <64 x i32> %ext, ptr addrspace(1) %out
5139  ret void
5140}
5141
5142define amdgpu_kernel void @constant_zextload_i8_to_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
5143; GFX6-NOHSA-LABEL: constant_zextload_i8_to_i64:
5144; GFX6-NOHSA:       ; %bb.0:
5145; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
5146; GFX6-NOHSA-NEXT:    s_mov_b32 s7, 0xf000
5147; GFX6-NOHSA-NEXT:    s_mov_b32 s6, -1
5148; GFX6-NOHSA-NEXT:    s_mov_b32 s10, s6
5149; GFX6-NOHSA-NEXT:    s_mov_b32 s11, s7
5150; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
5151; GFX6-NOHSA-NEXT:    s_mov_b32 s8, s2
5152; GFX6-NOHSA-NEXT:    s_mov_b32 s9, s3
5153; GFX6-NOHSA-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
5154; GFX6-NOHSA-NEXT:    s_mov_b32 s4, s0
5155; GFX6-NOHSA-NEXT:    s_mov_b32 s5, s1
5156; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, 0
5157; GFX6-NOHSA-NEXT:    s_waitcnt vmcnt(0)
5158; GFX6-NOHSA-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5159; GFX6-NOHSA-NEXT:    s_endpgm
5160;
5161; GFX7-HSA-LABEL: constant_zextload_i8_to_i64:
5162; GFX7-HSA:       ; %bb.0:
5163; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
5164; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5165; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
5166; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
5167; GFX7-HSA-NEXT:    flat_load_ubyte v0, v[0:1]
5168; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s0
5169; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s1
5170; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, 0
5171; GFX7-HSA-NEXT:    s_waitcnt vmcnt(0)
5172; GFX7-HSA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
5173; GFX7-HSA-NEXT:    s_endpgm
5174;
5175; GFX8-NOHSA-LABEL: constant_zextload_i8_to_i64:
5176; GFX8-NOHSA:       ; %bb.0:
5177; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
5178; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, 0
5179; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
5180; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
5181; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
5182; GFX8-NOHSA-NEXT:    flat_load_ubyte v2, v[0:1]
5183; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
5184; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
5185; GFX8-NOHSA-NEXT:    s_waitcnt vmcnt(0)
5186; GFX8-NOHSA-NEXT:    v_and_b32_e32 v2, 0xffff, v2
5187; GFX8-NOHSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
5188; GFX8-NOHSA-NEXT:    s_endpgm
5189;
5190; EG-LABEL: constant_zextload_i8_to_i64:
5191; EG:       ; %bb.0:
5192; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5193; EG-NEXT:    TEX 0 @6
5194; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
5195; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
5196; EG-NEXT:    CF_END
5197; EG-NEXT:    PAD
5198; EG-NEXT:    Fetch clause starting at 6:
5199; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
5200; EG-NEXT:    ALU clause starting at 8:
5201; EG-NEXT:     MOV * T0.X, KC0[2].Z,
5202; EG-NEXT:    ALU clause starting at 9:
5203; EG-NEXT:     MOV * T0.Y, 0.0,
5204; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
5205; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5206;
5207; GFX12-LABEL: constant_zextload_i8_to_i64:
5208; GFX12:       ; %bb.0:
5209; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
5210; GFX12-NEXT:    v_mov_b32_e32 v1, 0
5211; GFX12-NEXT:    s_wait_kmcnt 0x0
5212; GFX12-NEXT:    global_load_u8 v0, v1, s[2:3]
5213; GFX12-NEXT:    s_wait_loadcnt 0x0
5214; GFX12-NEXT:    v_and_b32_e32 v0, 0xffff, v0
5215; GFX12-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
5216; GFX12-NEXT:    s_endpgm
5217  %a = load i8, ptr addrspace(4) %in
5218  %ext = zext i8 %a to i64
5219  store i64 %ext, ptr addrspace(1) %out
5220  ret void
5221}
5222
5223; TODO: Why not 7 ?
5224define amdgpu_kernel void @constant_sextload_i8_to_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
5225; GFX6-NOHSA-LABEL: constant_sextload_i8_to_i64:
5226; GFX6-NOHSA:       ; %bb.0:
5227; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
5228; GFX6-NOHSA-NEXT:    s_mov_b32 s7, 0xf000
5229; GFX6-NOHSA-NEXT:    s_mov_b32 s6, -1
5230; GFX6-NOHSA-NEXT:    s_mov_b32 s10, s6
5231; GFX6-NOHSA-NEXT:    s_mov_b32 s11, s7
5232; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
5233; GFX6-NOHSA-NEXT:    s_mov_b32 s8, s2
5234; GFX6-NOHSA-NEXT:    s_mov_b32 s9, s3
5235; GFX6-NOHSA-NEXT:    buffer_load_sbyte v0, off, s[8:11], 0
5236; GFX6-NOHSA-NEXT:    s_mov_b32 s4, s0
5237; GFX6-NOHSA-NEXT:    s_mov_b32 s5, s1
5238; GFX6-NOHSA-NEXT:    s_waitcnt vmcnt(0)
5239; GFX6-NOHSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5240; GFX6-NOHSA-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5241; GFX6-NOHSA-NEXT:    s_endpgm
5242;
5243; GFX7-HSA-LABEL: constant_sextload_i8_to_i64:
5244; GFX7-HSA:       ; %bb.0:
5245; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
5246; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5247; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
5248; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
5249; GFX7-HSA-NEXT:    flat_load_sbyte v0, v[0:1]
5250; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s0
5251; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s1
5252; GFX7-HSA-NEXT:    s_waitcnt vmcnt(0)
5253; GFX7-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5254; GFX7-HSA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
5255; GFX7-HSA-NEXT:    s_endpgm
5256;
5257; GFX8-NOHSA-LABEL: constant_sextload_i8_to_i64:
5258; GFX8-NOHSA:       ; %bb.0:
5259; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
5260; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
5261; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
5262; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
5263; GFX8-NOHSA-NEXT:    flat_load_sbyte v2, v[0:1]
5264; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
5265; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
5266; GFX8-NOHSA-NEXT:    s_waitcnt vmcnt(0)
5267; GFX8-NOHSA-NEXT:    v_bfe_i32 v2, v2, 0, 16
5268; GFX8-NOHSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5269; GFX8-NOHSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
5270; GFX8-NOHSA-NEXT:    s_endpgm
5271;
5272; EG-LABEL: constant_sextload_i8_to_i64:
5273; EG:       ; %bb.0:
5274; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5275; EG-NEXT:    TEX 0 @6
5276; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
5277; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
5278; EG-NEXT:    CF_END
5279; EG-NEXT:    PAD
5280; EG-NEXT:    Fetch clause starting at 6:
5281; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
5282; EG-NEXT:    ALU clause starting at 8:
5283; EG-NEXT:     MOV * T0.X, KC0[2].Z,
5284; EG-NEXT:    ALU clause starting at 9:
5285; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
5286; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
5287; EG-NEXT:    8(1.121039e-44), 2(2.802597e-45)
5288; EG-NEXT:     ASHR * T0.Y, PV.X, literal.x,
5289; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5290;
5291; GFX12-LABEL: constant_sextload_i8_to_i64:
5292; GFX12:       ; %bb.0:
5293; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
5294; GFX12-NEXT:    v_mov_b32_e32 v2, 0
5295; GFX12-NEXT:    s_wait_kmcnt 0x0
5296; GFX12-NEXT:    global_load_i8 v0, v2, s[2:3]
5297; GFX12-NEXT:    s_wait_loadcnt 0x0
5298; GFX12-NEXT:    v_bfe_i32 v0, v0, 0, 16
5299; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5300; GFX12-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5301; GFX12-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
5302; GFX12-NEXT:    s_endpgm
5303  %a = load i8, ptr addrspace(4) %in
5304  %ext = sext i8 %a to i64
5305  store i64 %ext, ptr addrspace(1) %out
5306  ret void
5307}
5308
5309define amdgpu_kernel void @constant_zextload_v1i8_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
5310; GFX6-NOHSA-LABEL: constant_zextload_v1i8_to_v1i64:
5311; GFX6-NOHSA:       ; %bb.0:
5312; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
5313; GFX6-NOHSA-NEXT:    s_mov_b32 s7, 0xf000
5314; GFX6-NOHSA-NEXT:    s_mov_b32 s6, -1
5315; GFX6-NOHSA-NEXT:    s_mov_b32 s10, s6
5316; GFX6-NOHSA-NEXT:    s_mov_b32 s11, s7
5317; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
5318; GFX6-NOHSA-NEXT:    s_mov_b32 s8, s2
5319; GFX6-NOHSA-NEXT:    s_mov_b32 s9, s3
5320; GFX6-NOHSA-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
5321; GFX6-NOHSA-NEXT:    s_mov_b32 s4, s0
5322; GFX6-NOHSA-NEXT:    s_mov_b32 s5, s1
5323; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, 0
5324; GFX6-NOHSA-NEXT:    s_waitcnt vmcnt(0)
5325; GFX6-NOHSA-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5326; GFX6-NOHSA-NEXT:    s_endpgm
5327;
5328; GFX7-HSA-LABEL: constant_zextload_v1i8_to_v1i64:
5329; GFX7-HSA:       ; %bb.0:
5330; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
5331; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5332; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
5333; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
5334; GFX7-HSA-NEXT:    flat_load_ubyte v0, v[0:1]
5335; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s0
5336; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s1
5337; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, 0
5338; GFX7-HSA-NEXT:    s_waitcnt vmcnt(0)
5339; GFX7-HSA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
5340; GFX7-HSA-NEXT:    s_endpgm
5341;
5342; GFX8-NOHSA-LABEL: constant_zextload_v1i8_to_v1i64:
5343; GFX8-NOHSA:       ; %bb.0:
5344; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
5345; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
5346; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
5347; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
5348; GFX8-NOHSA-NEXT:    flat_load_ubyte v0, v[0:1]
5349; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s0
5350; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s1
5351; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, 0
5352; GFX8-NOHSA-NEXT:    s_waitcnt vmcnt(0)
5353; GFX8-NOHSA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
5354; GFX8-NOHSA-NEXT:    s_endpgm
5355;
5356; EG-LABEL: constant_zextload_v1i8_to_v1i64:
5357; EG:       ; %bb.0:
5358; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5359; EG-NEXT:    TEX 0 @6
5360; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
5361; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
5362; EG-NEXT:    CF_END
5363; EG-NEXT:    PAD
5364; EG-NEXT:    Fetch clause starting at 6:
5365; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
5366; EG-NEXT:    ALU clause starting at 8:
5367; EG-NEXT:     MOV * T0.X, KC0[2].Z,
5368; EG-NEXT:    ALU clause starting at 9:
5369; EG-NEXT:     MOV * T0.Y, 0.0,
5370; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
5371; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5372;
5373; GFX12-LABEL: constant_zextload_v1i8_to_v1i64:
5374; GFX12:       ; %bb.0:
5375; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
5376; GFX12-NEXT:    s_wait_kmcnt 0x0
5377; GFX12-NEXT:    s_load_u8 s2, s[2:3], 0x0
5378; GFX12-NEXT:    s_wait_kmcnt 0x0
5379; GFX12-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
5380; GFX12-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
5381; GFX12-NEXT:    s_endpgm
5382  %load = load <1 x i8>, ptr addrspace(4) %in
5383  %ext = zext <1 x i8> %load to <1 x i64>
5384  store <1 x i64> %ext, ptr addrspace(1) %out
5385  ret void
5386}
5387
5388; TODO: Why not 7 ?
5389define amdgpu_kernel void @constant_sextload_v1i8_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
5390; GFX6-NOHSA-LABEL: constant_sextload_v1i8_to_v1i64:
5391; GFX6-NOHSA:       ; %bb.0:
5392; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
5393; GFX6-NOHSA-NEXT:    s_mov_b32 s7, 0xf000
5394; GFX6-NOHSA-NEXT:    s_mov_b32 s6, -1
5395; GFX6-NOHSA-NEXT:    s_mov_b32 s10, s6
5396; GFX6-NOHSA-NEXT:    s_mov_b32 s11, s7
5397; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
5398; GFX6-NOHSA-NEXT:    s_mov_b32 s8, s2
5399; GFX6-NOHSA-NEXT:    s_mov_b32 s9, s3
5400; GFX6-NOHSA-NEXT:    buffer_load_sbyte v0, off, s[8:11], 0
5401; GFX6-NOHSA-NEXT:    s_mov_b32 s4, s0
5402; GFX6-NOHSA-NEXT:    s_mov_b32 s5, s1
5403; GFX6-NOHSA-NEXT:    s_waitcnt vmcnt(0)
5404; GFX6-NOHSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5405; GFX6-NOHSA-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5406; GFX6-NOHSA-NEXT:    s_endpgm
5407;
5408; GFX7-HSA-LABEL: constant_sextload_v1i8_to_v1i64:
5409; GFX7-HSA:       ; %bb.0:
5410; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
5411; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5412; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
5413; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
5414; GFX7-HSA-NEXT:    flat_load_sbyte v0, v[0:1]
5415; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s0
5416; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s1
5417; GFX7-HSA-NEXT:    s_waitcnt vmcnt(0)
5418; GFX7-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5419; GFX7-HSA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
5420; GFX7-HSA-NEXT:    s_endpgm
5421;
5422; GFX8-NOHSA-LABEL: constant_sextload_v1i8_to_v1i64:
5423; GFX8-NOHSA:       ; %bb.0:
5424; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
5425; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
5426; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
5427; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
5428; GFX8-NOHSA-NEXT:    flat_load_sbyte v2, v[0:1]
5429; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
5430; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
5431; GFX8-NOHSA-NEXT:    s_waitcnt vmcnt(0)
5432; GFX8-NOHSA-NEXT:    v_bfe_i32 v2, v2, 0, 16
5433; GFX8-NOHSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5434; GFX8-NOHSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
5435; GFX8-NOHSA-NEXT:    s_endpgm
5436;
5437; EG-LABEL: constant_sextload_v1i8_to_v1i64:
5438; EG:       ; %bb.0:
5439; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5440; EG-NEXT:    TEX 0 @6
5441; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
5442; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
5443; EG-NEXT:    CF_END
5444; EG-NEXT:    PAD
5445; EG-NEXT:    Fetch clause starting at 6:
5446; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
5447; EG-NEXT:    ALU clause starting at 8:
5448; EG-NEXT:     MOV * T0.X, KC0[2].Z,
5449; EG-NEXT:    ALU clause starting at 9:
5450; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
5451; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
5452; EG-NEXT:    8(1.121039e-44), 2(2.802597e-45)
5453; EG-NEXT:     ASHR * T0.Y, PV.X, literal.x,
5454; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5455;
5456; GFX12-LABEL: constant_sextload_v1i8_to_v1i64:
5457; GFX12:       ; %bb.0:
5458; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
5459; GFX12-NEXT:    v_mov_b32_e32 v2, 0
5460; GFX12-NEXT:    s_wait_kmcnt 0x0
5461; GFX12-NEXT:    global_load_i8 v0, v2, s[2:3]
5462; GFX12-NEXT:    s_wait_loadcnt 0x0
5463; GFX12-NEXT:    v_bfe_i32 v0, v0, 0, 16
5464; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5465; GFX12-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5466; GFX12-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
5467; GFX12-NEXT:    s_endpgm
5468  %load = load <1 x i8>, ptr addrspace(4) %in
5469  %ext = sext <1 x i8> %load to <1 x i64>
5470  store <1 x i64> %ext, ptr addrspace(1) %out
5471  ret void
5472}
5473
5474define amdgpu_kernel void @constant_zextload_v2i8_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
5475; GFX6-NOHSA-LABEL: constant_zextload_v2i8_to_v2i64:
5476; GFX6-NOHSA:       ; %bb.0:
5477; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
5478; GFX6-NOHSA-NEXT:    s_mov_b32 s7, 0xf000
5479; GFX6-NOHSA-NEXT:    s_mov_b32 s6, -1
5480; GFX6-NOHSA-NEXT:    s_mov_b32 s10, s6
5481; GFX6-NOHSA-NEXT:    s_mov_b32 s11, s7
5482; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
5483; GFX6-NOHSA-NEXT:    s_mov_b32 s8, s2
5484; GFX6-NOHSA-NEXT:    s_mov_b32 s9, s3
5485; GFX6-NOHSA-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
5486; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, 0
5487; GFX6-NOHSA-NEXT:    s_mov_b32 s4, s0
5488; GFX6-NOHSA-NEXT:    s_mov_b32 s5, s1
5489; GFX6-NOHSA-NEXT:    s_waitcnt vmcnt(0)
5490; GFX6-NOHSA-NEXT:    v_lshrrev_b32_e32 v2, 8, v0
5491; GFX6-NOHSA-NEXT:    v_and_b32_e32 v0, 0xff, v0
5492; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, v1
5493; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
5494; GFX6-NOHSA-NEXT:    s_endpgm
5495;
5496; GFX7-HSA-LABEL: constant_zextload_v2i8_to_v2i64:
5497; GFX7-HSA:       ; %bb.0:
5498; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
5499; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5500; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
5501; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
5502; GFX7-HSA-NEXT:    flat_load_ushort v0, v[0:1]
5503; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, 0
5504; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
5505; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
5506; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, v1
5507; GFX7-HSA-NEXT:    s_waitcnt vmcnt(0)
5508; GFX7-HSA-NEXT:    v_lshrrev_b32_e32 v2, 8, v0
5509; GFX7-HSA-NEXT:    v_and_b32_e32 v0, 0xff, v0
5510; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5511; GFX7-HSA-NEXT:    s_endpgm
5512;
5513; GFX8-NOHSA-LABEL: constant_zextload_v2i8_to_v2i64:
5514; GFX8-NOHSA:       ; %bb.0:
5515; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
5516; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, 8
5517; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
5518; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
5519; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
5520; GFX8-NOHSA-NEXT:    flat_load_ushort v0, v[0:1]
5521; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, 0
5522; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
5523; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
5524; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, v1
5525; GFX8-NOHSA-NEXT:    s_waitcnt vmcnt(0)
5526; GFX8-NOHSA-NEXT:    v_lshrrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
5527; GFX8-NOHSA-NEXT:    v_and_b32_e32 v0, 0xff, v0
5528; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5529; GFX8-NOHSA-NEXT:    s_endpgm
5530;
5531; EG-LABEL: constant_zextload_v2i8_to_v2i64:
5532; EG:       ; %bb.0:
5533; EG-NEXT:    ALU 1, @8, KC0[CB0:0-32], KC1[]
5534; EG-NEXT:    TEX 0 @6
5535; EG-NEXT:    ALU 14, @10, KC0[CB0:0-32], KC1[]
5536; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1
5537; EG-NEXT:    CF_END
5538; EG-NEXT:    PAD
5539; EG-NEXT:    Fetch clause starting at 6:
5540; EG-NEXT:     VTX_READ_16 T4.X, T4.X, 0, #1
5541; EG-NEXT:    ALU clause starting at 8:
5542; EG-NEXT:     MOV * T0.Y, T2.X,
5543; EG-NEXT:     MOV * T4.X, KC0[2].Z,
5544; EG-NEXT:    ALU clause starting at 10:
5545; EG-NEXT:     AND_INT T0.W, T4.X, literal.x,
5546; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
5547; EG-NEXT:    65535(9.183409e-41), -65536(nan)
5548; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
5549; EG-NEXT:     MOV * T2.X, PV.W,
5550; EG-NEXT:     MOV T0.Y, PV.X,
5551; EG-NEXT:     MOV * T1.W, literal.x,
5552; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
5553; EG-NEXT:     BFE_UINT * T4.Z, PV.Y, literal.x, PV.W,
5554; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
5555; EG-NEXT:     AND_INT T4.X, T0.W, literal.x,
5556; EG-NEXT:     MOV T4.Y, 0.0,
5557; EG-NEXT:     MOV T4.W, 0.0,
5558; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
5559; EG-NEXT:    255(3.573311e-43), 2(2.802597e-45)
5560;
5561; GFX12-LABEL: constant_zextload_v2i8_to_v2i64:
5562; GFX12:       ; %bb.0:
5563; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
5564; GFX12-NEXT:    v_mov_b32_e32 v1, 0
5565; GFX12-NEXT:    s_wait_kmcnt 0x0
5566; GFX12-NEXT:    global_load_u16 v0, v1, s[2:3]
5567; GFX12-NEXT:    s_wait_loadcnt 0x0
5568; GFX12-NEXT:    v_and_b32_e32 v2, 0xffff, v0
5569; GFX12-NEXT:    v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v0, 0xff, v0
5570; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
5571; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 8, v2
5572; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1]
5573; GFX12-NEXT:    s_endpgm
5574  %load = load <2 x i8>, ptr addrspace(4) %in
5575  %ext = zext <2 x i8> %load to <2 x i64>
5576  store <2 x i64> %ext, ptr addrspace(1) %out
5577  ret void
5578}
5579
5580define amdgpu_kernel void @constant_sextload_v2i8_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
5581; GFX6-NOHSA-LABEL: constant_sextload_v2i8_to_v2i64:
5582; GFX6-NOHSA:       ; %bb.0:
5583; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
5584; GFX6-NOHSA-NEXT:    s_mov_b32 s7, 0xf000
5585; GFX6-NOHSA-NEXT:    s_mov_b32 s6, -1
5586; GFX6-NOHSA-NEXT:    s_mov_b32 s10, s6
5587; GFX6-NOHSA-NEXT:    s_mov_b32 s11, s7
5588; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
5589; GFX6-NOHSA-NEXT:    s_mov_b32 s8, s2
5590; GFX6-NOHSA-NEXT:    s_mov_b32 s9, s3
5591; GFX6-NOHSA-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
5592; GFX6-NOHSA-NEXT:    s_mov_b32 s4, s0
5593; GFX6-NOHSA-NEXT:    s_mov_b32 s5, s1
5594; GFX6-NOHSA-NEXT:    s_waitcnt vmcnt(0)
5595; GFX6-NOHSA-NEXT:    v_lshrrev_b32_e32 v2, 8, v0
5596; GFX6-NOHSA-NEXT:    v_bfe_i32 v0, v0, 0, 8
5597; GFX6-NOHSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5598; GFX6-NOHSA-NEXT:    v_bfe_i32 v2, v2, 0, 8
5599; GFX6-NOHSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5600; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
5601; GFX6-NOHSA-NEXT:    s_endpgm
5602;
5603; GFX7-HSA-LABEL: constant_sextload_v2i8_to_v2i64:
5604; GFX7-HSA:       ; %bb.0:
5605; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
5606; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5607; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
5608; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
5609; GFX7-HSA-NEXT:    flat_load_ushort v0, v[0:1]
5610; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
5611; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
5612; GFX7-HSA-NEXT:    s_waitcnt vmcnt(0)
5613; GFX7-HSA-NEXT:    v_lshrrev_b32_e32 v2, 8, v0
5614; GFX7-HSA-NEXT:    v_bfe_i32 v0, v0, 0, 8
5615; GFX7-HSA-NEXT:    v_bfe_i32 v2, v2, 0, 8
5616; GFX7-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5617; GFX7-HSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5618; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5619; GFX7-HSA-NEXT:    s_endpgm
5620;
5621; GFX8-NOHSA-LABEL: constant_sextload_v2i8_to_v2i64:
5622; GFX8-NOHSA:       ; %bb.0:
5623; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
5624; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
5625; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
5626; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
5627; GFX8-NOHSA-NEXT:    flat_load_ushort v0, v[0:1]
5628; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
5629; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
5630; GFX8-NOHSA-NEXT:    s_waitcnt vmcnt(0)
5631; GFX8-NOHSA-NEXT:    v_lshrrev_b32_e32 v2, 8, v0
5632; GFX8-NOHSA-NEXT:    v_bfe_i32 v0, v0, 0, 8
5633; GFX8-NOHSA-NEXT:    v_bfe_i32 v2, v2, 0, 8
5634; GFX8-NOHSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5635; GFX8-NOHSA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5636; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5637; GFX8-NOHSA-NEXT:    s_endpgm
5638;
5639; EG-LABEL: constant_sextload_v2i8_to_v2i64:
5640; EG:       ; %bb.0:
5641; EG-NEXT:    ALU 1, @8, KC0[CB0:0-32], KC1[]
5642; EG-NEXT:    TEX 0 @6
5643; EG-NEXT:    ALU 15, @10, KC0[CB0:0-32], KC1[]
5644; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1
5645; EG-NEXT:    CF_END
5646; EG-NEXT:    PAD
5647; EG-NEXT:    Fetch clause starting at 6:
5648; EG-NEXT:     VTX_READ_16 T4.X, T4.X, 0, #1
5649; EG-NEXT:    ALU clause starting at 8:
5650; EG-NEXT:     MOV * T0.Y, T2.X,
5651; EG-NEXT:     MOV * T4.X, KC0[2].Z,
5652; EG-NEXT:    ALU clause starting at 10:
5653; EG-NEXT:     AND_INT T0.W, T4.X, literal.x,
5654; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
5655; EG-NEXT:    65535(9.183409e-41), -65536(nan)
5656; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
5657; EG-NEXT:     MOV * T2.X, PV.W,
5658; EG-NEXT:     MOV * T0.Y, PV.X,
5659; EG-NEXT:     BFE_INT * T4.X, T0.W, 0.0, literal.x,
5660; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
5661; EG-NEXT:     ASHR T4.Y, PV.X, literal.x,
5662; EG-NEXT:     LSHR * T0.W, T0.Y, literal.y,
5663; EG-NEXT:    31(4.344025e-44), 8(1.121039e-44)
5664; EG-NEXT:     BFE_INT * T4.Z, PV.W, 0.0, literal.x,
5665; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
5666; EG-NEXT:     LSHR T5.X, KC0[2].Y, literal.x,
5667; EG-NEXT:     ASHR * T4.W, PV.Z, literal.y,
5668; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
5669;
5670; GFX12-LABEL: constant_sextload_v2i8_to_v2i64:
5671; GFX12:       ; %bb.0:
5672; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
5673; GFX12-NEXT:    v_mov_b32_e32 v4, 0
5674; GFX12-NEXT:    s_wait_kmcnt 0x0
5675; GFX12-NEXT:    global_load_u16 v0, v4, s[2:3]
5676; GFX12-NEXT:    s_wait_loadcnt 0x0
5677; GFX12-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
5678; GFX12-NEXT:    v_bfe_i32 v0, v0, 0, 8
5679; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
5680; GFX12-NEXT:    v_bfe_i32 v2, v1, 0, 8
5681; GFX12-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5682; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
5683; GFX12-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5684; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[0:1]
5685; GFX12-NEXT:    s_endpgm
5686  %load = load <2 x i8>, ptr addrspace(4) %in
5687  %ext = sext <2 x i8> %load to <2 x i64>
5688  store <2 x i64> %ext, ptr addrspace(1) %out
5689  ret void
5690}
5691
5692define amdgpu_kernel void @constant_zextload_v4i8_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
5693; GFX6-NOHSA-LABEL: constant_zextload_v4i8_to_v4i64:
5694; GFX6-NOHSA:       ; %bb.0:
5695; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
5696; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
5697; GFX6-NOHSA-NEXT:    s_load_dword s4, s[2:3], 0x0
5698; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
5699; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, 0
5700; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
5701; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, v1
5702; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
5703; GFX6-NOHSA-NEXT:    s_bfe_u32 s5, s4, 0x80008
5704; GFX6-NOHSA-NEXT:    s_lshr_b32 s6, s4, 24
5705; GFX6-NOHSA-NEXT:    s_bfe_u32 s7, s4, 0x80010
5706; GFX6-NOHSA-NEXT:    s_and_b32 s4, s4, 0xff
5707; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s7
5708; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s6
5709; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5710; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
5711; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
5712; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
5713; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5714; GFX6-NOHSA-NEXT:    s_endpgm
5715;
5716; GFX7-HSA-LABEL: constant_zextload_v4i8_to_v4i64:
5717; GFX7-HSA:       ; %bb.0:
5718; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
5719; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, 0
5720; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, v1
5721; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5722; GFX7-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
5723; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5724; GFX7-HSA-NEXT:    s_bfe_u32 s4, s2, 0x80008
5725; GFX7-HSA-NEXT:    s_lshr_b32 s3, s2, 24
5726; GFX7-HSA-NEXT:    s_and_b32 s5, s2, 0xff
5727; GFX7-HSA-NEXT:    s_bfe_u32 s2, s2, 0x80010
5728; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
5729; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 16
5730; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s3
5731; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
5732; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
5733; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
5734; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5735; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
5736; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s5
5737; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s4
5738; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
5739; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5740; GFX7-HSA-NEXT:    s_endpgm
5741;
5742; GFX8-NOHSA-LABEL: constant_zextload_v4i8_to_v4i64:
5743; GFX8-NOHSA:       ; %bb.0:
5744; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
5745; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, 0
5746; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, v1
5747; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
5748; GFX8-NOHSA-NEXT:    s_load_dword s2, s[2:3], 0x0
5749; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
5750; GFX8-NOHSA-NEXT:    s_lshr_b32 s3, s2, 24
5751; GFX8-NOHSA-NEXT:    s_bfe_u32 s4, s2, 0x80008
5752; GFX8-NOHSA-NEXT:    s_and_b32 s5, s2, 0xff
5753; GFX8-NOHSA-NEXT:    s_bfe_u32 s2, s2, 0x80010
5754; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
5755; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 16
5756; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s3
5757; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
5758; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
5759; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
5760; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5761; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
5762; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s5
5763; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s4
5764; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
5765; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5766; GFX8-NOHSA-NEXT:    s_endpgm
5767;
5768; EG-LABEL: constant_zextload_v4i8_to_v4i64:
5769; EG:       ; %bb.0:
5770; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5771; EG-NEXT:    TEX 0 @6
5772; EG-NEXT:    ALU 17, @9, KC0[CB0:0-32], KC1[]
5773; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T7.X, 0
5774; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T6.X, 1
5775; EG-NEXT:    CF_END
5776; EG-NEXT:    Fetch clause starting at 6:
5777; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
5778; EG-NEXT:    ALU clause starting at 8:
5779; EG-NEXT:     MOV * T4.X, KC0[2].Z,
5780; EG-NEXT:    ALU clause starting at 9:
5781; EG-NEXT:     MOV * T0.W, literal.x,
5782; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
5783; EG-NEXT:     BFE_UINT T5.X, T4.X, literal.x, PV.W,
5784; EG-NEXT:     LSHR * T5.Z, T4.X, literal.y,
5785; EG-NEXT:    16(2.242078e-44), 24(3.363116e-44)
5786; EG-NEXT:     MOV T5.Y, 0.0,
5787; EG-NEXT:     BFE_UINT * T4.Z, T4.X, literal.x, T0.W,
5788; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
5789; EG-NEXT:     AND_INT T4.X, T4.X, literal.x,
5790; EG-NEXT:     MOV T4.Y, 0.0,
5791; EG-NEXT:     MOV T5.W, 0.0,
5792; EG-NEXT:     MOV * T4.W, 0.0,
5793; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
5794; EG-NEXT:     LSHR T6.X, KC0[2].Y, literal.x,
5795; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5796; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5797; EG-NEXT:     LSHR * T7.X, PV.W, literal.x,
5798; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5799;
5800; GFX12-LABEL: constant_zextload_v4i8_to_v4i64:
5801; GFX12:       ; %bb.0:
5802; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
5803; GFX12-NEXT:    s_wait_kmcnt 0x0
5804; GFX12-NEXT:    s_load_b32 s2, s[2:3], 0x0
5805; GFX12-NEXT:    s_wait_kmcnt 0x0
5806; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x80010
5807; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
5808; GFX12-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s3
5809; GFX12-NEXT:    s_lshr_b32 s4, s2, 24
5810; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x80008
5811; GFX12-NEXT:    v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, v1
5812; GFX12-NEXT:    s_and_b32 s2, s2, 0xff
5813; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:16
5814; GFX12-NEXT:    s_wait_alu 0xfffe
5815; GFX12-NEXT:    v_mov_b32_e32 v0, s2
5816; GFX12-NEXT:    v_mov_b32_e32 v2, s3
5817; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1]
5818; GFX12-NEXT:    s_endpgm
5819  %load = load <4 x i8>, ptr addrspace(4) %in
5820  %ext = zext <4 x i8> %load to <4 x i64>
5821  store <4 x i64> %ext, ptr addrspace(1) %out
5822  ret void
5823}
5824
5825define amdgpu_kernel void @constant_sextload_v4i8_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
5826; GFX6-NOHSA-LABEL: constant_sextload_v4i8_to_v4i64:
5827; GFX6-NOHSA:       ; %bb.0:
5828; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
5829; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
5830; GFX6-NOHSA-NEXT:    s_load_dword s4, s[2:3], 0x0
5831; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
5832; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
5833; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
5834; GFX6-NOHSA-NEXT:    s_lshr_b32 s6, s4, 16
5835; GFX6-NOHSA-NEXT:    s_lshr_b32 s8, s4, 24
5836; GFX6-NOHSA-NEXT:    s_lshr_b32 s10, s4, 8
5837; GFX6-NOHSA-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x80000
5838; GFX6-NOHSA-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x80000
5839; GFX6-NOHSA-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x80000
5840; GFX6-NOHSA-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x80000
5841; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
5842; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s5
5843; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s6
5844; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s7
5845; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s8
5846; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s9
5847; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:16
5848; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
5849; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s10
5850; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s11
5851; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5852; GFX6-NOHSA-NEXT:    s_endpgm
5853;
5854; GFX7-HSA-LABEL: constant_sextload_v4i8_to_v4i64:
5855; GFX7-HSA:       ; %bb.0:
5856; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
5857; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5858; GFX7-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
5859; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5860; GFX7-HSA-NEXT:    s_lshr_b32 s4, s2, 16
5861; GFX7-HSA-NEXT:    s_lshr_b32 s6, s2, 24
5862; GFX7-HSA-NEXT:    s_lshr_b32 s8, s2, 8
5863; GFX7-HSA-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x80000
5864; GFX7-HSA-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x80000
5865; GFX7-HSA-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x80000
5866; GFX7-HSA-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x80000
5867; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s4
5868; GFX7-HSA-NEXT:    s_add_u32 s4, s0, 16
5869; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s5
5870; GFX7-HSA-NEXT:    s_addc_u32 s5, s1, 0
5871; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s4
5872; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s6
5873; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s7
5874; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s5
5875; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5876; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
5877; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
5878; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
5879; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s8
5880; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s9
5881; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
5882; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5883; GFX7-HSA-NEXT:    s_endpgm
5884;
5885; GFX8-NOHSA-LABEL: constant_sextload_v4i8_to_v4i64:
5886; GFX8-NOHSA:       ; %bb.0:
5887; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
5888; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
5889; GFX8-NOHSA-NEXT:    s_load_dword s2, s[2:3], 0x0
5890; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
5891; GFX8-NOHSA-NEXT:    s_lshr_b32 s4, s2, 16
5892; GFX8-NOHSA-NEXT:    s_lshr_b32 s6, s2, 24
5893; GFX8-NOHSA-NEXT:    s_lshr_b32 s8, s2, 8
5894; GFX8-NOHSA-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x80000
5895; GFX8-NOHSA-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x80000
5896; GFX8-NOHSA-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x80000
5897; GFX8-NOHSA-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x80000
5898; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
5899; GFX8-NOHSA-NEXT:    s_add_u32 s4, s0, 16
5900; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s5
5901; GFX8-NOHSA-NEXT:    s_addc_u32 s5, s1, 0
5902; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s4
5903; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s6
5904; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s7
5905; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s5
5906; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5907; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
5908; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
5909; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
5910; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s8
5911; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s9
5912; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
5913; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5914; GFX8-NOHSA-NEXT:    s_endpgm
5915;
5916; EG-LABEL: constant_sextload_v4i8_to_v4i64:
5917; EG:       ; %bb.0:
5918; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5919; EG-NEXT:    TEX 0 @6
5920; EG-NEXT:    ALU 18, @9, KC0[CB0:0-32], KC1[]
5921; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T7.X, 0
5922; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
5923; EG-NEXT:    CF_END
5924; EG-NEXT:    Fetch clause starting at 6:
5925; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
5926; EG-NEXT:    ALU clause starting at 8:
5927; EG-NEXT:     MOV * T4.X, KC0[2].Z,
5928; EG-NEXT:    ALU clause starting at 9:
5929; EG-NEXT:     BFE_INT T5.X, T4.X, 0.0, literal.x,
5930; EG-NEXT:     ASHR T4.W, T4.X, literal.y,
5931; EG-NEXT:     LSHR * T6.X, KC0[2].Y, literal.z,
5932; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
5933; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5934; EG-NEXT:     ASHR T5.Y, PV.X, literal.x,
5935; EG-NEXT:     ASHR T4.Z, T4.X, literal.y,
5936; EG-NEXT:     LSHR T0.W, T4.X, literal.z,
5937; EG-NEXT:     LSHR * T1.W, T4.X, literal.w,
5938; EG-NEXT:    31(4.344025e-44), 24(3.363116e-44)
5939; EG-NEXT:    8(1.121039e-44), 16(2.242078e-44)
5940; EG-NEXT:     BFE_INT T4.X, PS, 0.0, literal.x,
5941; EG-NEXT:     BFE_INT T5.Z, PV.W, 0.0, literal.x,
5942; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5943; EG-NEXT:    8(1.121039e-44), 16(2.242078e-44)
5944; EG-NEXT:     LSHR T7.X, PV.W, literal.x,
5945; EG-NEXT:     ASHR T4.Y, PV.X, literal.y,
5946; EG-NEXT:     ASHR * T5.W, PV.Z, literal.y,
5947; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
5948;
5949; GFX12-LABEL: constant_sextload_v4i8_to_v4i64:
5950; GFX12:       ; %bb.0:
5951; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
5952; GFX12-NEXT:    s_wait_kmcnt 0x0
5953; GFX12-NEXT:    s_load_b32 s2, s[2:3], 0x0
5954; GFX12-NEXT:    s_wait_kmcnt 0x0
5955; GFX12-NEXT:    s_lshr_b32 s4, s2, 16
5956; GFX12-NEXT:    s_lshr_b32 s6, s2, 24
5957; GFX12-NEXT:    s_lshr_b32 s8, s2, 8
5958; GFX12-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x80000
5959; GFX12-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x80000
5960; GFX12-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x80000
5961; GFX12-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x80000
5962; GFX12-NEXT:    v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s3
5963; GFX12-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v5, s5
5964; GFX12-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v7, s7
5965; GFX12-NEXT:    v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v3, s9
5966; GFX12-NEXT:    v_mov_b32_e32 v2, s8
5967; GFX12-NEXT:    s_clause 0x1
5968; GFX12-NEXT:    global_store_b128 v8, v[4:7], s[0:1] offset:16
5969; GFX12-NEXT:    global_store_b128 v8, v[0:3], s[0:1]
5970; GFX12-NEXT:    s_endpgm
5971  %load = load <4 x i8>, ptr addrspace(4) %in
5972  %ext = sext <4 x i8> %load to <4 x i64>
5973  store <4 x i64> %ext, ptr addrspace(1) %out
5974  ret void
5975}
5976
5977define amdgpu_kernel void @constant_zextload_v8i8_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
5978; GFX6-NOHSA-LABEL: constant_zextload_v8i8_to_v8i64:
5979; GFX6-NOHSA:       ; %bb.0:
5980; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
5981; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
5982; GFX6-NOHSA-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
5983; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
5984; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, 0
5985; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
5986; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, v1
5987; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
5988; GFX6-NOHSA-NEXT:    s_lshr_b32 s6, s4, 24
5989; GFX6-NOHSA-NEXT:    s_lshr_b32 s7, s5, 24
5990; GFX6-NOHSA-NEXT:    s_bfe_u32 s8, s5, 0x80008
5991; GFX6-NOHSA-NEXT:    s_bfe_u32 s9, s4, 0x80008
5992; GFX6-NOHSA-NEXT:    s_and_b32 s10, s4, 0xff
5993; GFX6-NOHSA-NEXT:    s_and_b32 s11, s5, 0xff
5994; GFX6-NOHSA-NEXT:    s_bfe_u32 s5, s5, 0x80010
5995; GFX6-NOHSA-NEXT:    s_bfe_u32 s4, s4, 0x80010
5996; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s5
5997; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s7
5998; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5999; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
6000; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
6001; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s6
6002; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
6003; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
6004; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s11
6005; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s8
6006; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
6007; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
6008; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s10
6009; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s9
6010; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
6011; GFX6-NOHSA-NEXT:    s_endpgm
6012;
6013; GFX7-HSA-LABEL: constant_zextload_v8i8_to_v8i64:
6014; GFX7-HSA:       ; %bb.0:
6015; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
6016; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, 0
6017; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, v1
6018; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6019; GFX7-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
6020; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6021; GFX7-HSA-NEXT:    s_lshr_b32 s4, s2, 24
6022; GFX7-HSA-NEXT:    s_lshr_b32 s5, s3, 24
6023; GFX7-HSA-NEXT:    s_bfe_u32 s6, s3, 0x80008
6024; GFX7-HSA-NEXT:    s_bfe_u32 s7, s2, 0x80008
6025; GFX7-HSA-NEXT:    s_and_b32 s8, s2, 0xff
6026; GFX7-HSA-NEXT:    s_and_b32 s9, s3, 0xff
6027; GFX7-HSA-NEXT:    s_bfe_u32 s10, s2, 0x80010
6028; GFX7-HSA-NEXT:    s_bfe_u32 s2, s3, 0x80010
6029; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
6030; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 48
6031; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
6032; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
6033; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
6034; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 16
6035; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s5
6036; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
6037; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6038; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
6039; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
6040; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 32
6041; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s10
6042; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s4
6043; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
6044; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6045; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
6046; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s9
6047; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s6
6048; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
6049; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6050; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
6051; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s8
6052; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s7
6053; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
6054; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6055; GFX7-HSA-NEXT:    s_endpgm
6056;
6057; GFX8-NOHSA-LABEL: constant_zextload_v8i8_to_v8i64:
6058; GFX8-NOHSA:       ; %bb.0:
6059; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
6060; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, 0
6061; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, v1
6062; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
6063; GFX8-NOHSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
6064; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
6065; GFX8-NOHSA-NEXT:    s_lshr_b32 s4, s3, 24
6066; GFX8-NOHSA-NEXT:    s_bfe_u32 s5, s3, 0x80008
6067; GFX8-NOHSA-NEXT:    s_lshr_b32 s6, s2, 24
6068; GFX8-NOHSA-NEXT:    s_bfe_u32 s7, s2, 0x80008
6069; GFX8-NOHSA-NEXT:    s_and_b32 s8, s2, 0xff
6070; GFX8-NOHSA-NEXT:    s_bfe_u32 s9, s2, 0x80010
6071; GFX8-NOHSA-NEXT:    s_and_b32 s10, s3, 0xff
6072; GFX8-NOHSA-NEXT:    s_bfe_u32 s2, s3, 0x80010
6073; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
6074; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 48
6075; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
6076; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
6077; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
6078; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 32
6079; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s4
6080; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
6081; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6082; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
6083; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
6084; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 16
6085; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s10
6086; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
6087; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
6088; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6089; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
6090; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s9
6091; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s6
6092; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
6093; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6094; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
6095; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s8
6096; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s7
6097; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
6098; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6099; GFX8-NOHSA-NEXT:    s_endpgm
6100;
6101; EG-LABEL: constant_zextload_v8i8_to_v8i64:
6102; EG:       ; %bb.0:
6103; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
6104; EG-NEXT:    TEX 0 @8
6105; EG-NEXT:    ALU 34, @11, KC0[CB0:0-32], KC1[]
6106; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T12.X, 0
6107; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T11.X, 0
6108; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T10.X, 0
6109; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T9.X, 1
6110; EG-NEXT:    CF_END
6111; EG-NEXT:    Fetch clause starting at 8:
6112; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
6113; EG-NEXT:    ALU clause starting at 10:
6114; EG-NEXT:     MOV * T5.X, KC0[2].Z,
6115; EG-NEXT:    ALU clause starting at 11:
6116; EG-NEXT:     MOV * T0.W, literal.x,
6117; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
6118; EG-NEXT:     BFE_UINT T6.X, T5.Y, literal.x, PV.W,
6119; EG-NEXT:     LSHR * T6.Z, T5.Y, literal.y,
6120; EG-NEXT:    16(2.242078e-44), 24(3.363116e-44)
6121; EG-NEXT:     MOV T6.Y, 0.0,
6122; EG-NEXT:     BFE_UINT * T7.Z, T5.Y, literal.x, T0.W,
6123; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
6124; EG-NEXT:     AND_INT T7.X, T5.Y, literal.x,
6125; EG-NEXT:     MOV * T7.Y, 0.0,
6126; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
6127; EG-NEXT:     BFE_UINT T8.X, T5.X, literal.x, T0.W,
6128; EG-NEXT:     LSHR * T8.Z, T5.X, literal.y,
6129; EG-NEXT:    16(2.242078e-44), 24(3.363116e-44)
6130; EG-NEXT:     MOV T8.Y, 0.0,
6131; EG-NEXT:     BFE_UINT * T5.Z, T5.X, literal.x, T0.W,
6132; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
6133; EG-NEXT:     AND_INT T5.X, T5.X, literal.x,
6134; EG-NEXT:     MOV T5.Y, 0.0,
6135; EG-NEXT:     MOV T6.W, 0.0,
6136; EG-NEXT:     MOV * T7.W, 0.0,
6137; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
6138; EG-NEXT:     MOV T8.W, 0.0,
6139; EG-NEXT:     MOV * T5.W, 0.0,
6140; EG-NEXT:     LSHR T9.X, KC0[2].Y, literal.x,
6141; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6142; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6143; EG-NEXT:     LSHR T10.X, PV.W, literal.x,
6144; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6145; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6146; EG-NEXT:     LSHR T11.X, PV.W, literal.x,
6147; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6148; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
6149; EG-NEXT:     LSHR * T12.X, PV.W, literal.x,
6150; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6151;
6152; GFX12-LABEL: constant_zextload_v8i8_to_v8i64:
6153; GFX12:       ; %bb.0:
6154; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
6155; GFX12-NEXT:    s_wait_kmcnt 0x0
6156; GFX12-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
6157; GFX12-NEXT:    s_wait_kmcnt 0x0
6158; GFX12-NEXT:    s_bfe_u32 s4, s3, 0x80010
6159; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
6160; GFX12-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s4
6161; GFX12-NEXT:    s_lshr_b32 s5, s3, 24
6162; GFX12-NEXT:    s_bfe_u32 s4, s3, 0x80008
6163; GFX12-NEXT:    s_wait_alu 0xfffe
6164; GFX12-NEXT:    v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v3, v1
6165; GFX12-NEXT:    s_and_b32 s3, s3, 0xff
6166; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:48
6167; GFX12-NEXT:    v_mov_b32_e32 v0, s3
6168; GFX12-NEXT:    v_mov_b32_e32 v2, s4
6169; GFX12-NEXT:    s_lshr_b32 s3, s2, 24
6170; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x80010
6171; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:32
6172; GFX12-NEXT:    s_wait_alu 0xfffe
6173; GFX12-NEXT:    v_mov_b32_e32 v0, s4
6174; GFX12-NEXT:    v_mov_b32_e32 v2, s3
6175; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x80008
6176; GFX12-NEXT:    s_and_b32 s2, s2, 0xff
6177; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:16
6178; GFX12-NEXT:    s_wait_alu 0xfffe
6179; GFX12-NEXT:    v_mov_b32_e32 v0, s2
6180; GFX12-NEXT:    v_mov_b32_e32 v2, s3
6181; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1]
6182; GFX12-NEXT:    s_endpgm
6183  %load = load <8 x i8>, ptr addrspace(4) %in
6184  %ext = zext <8 x i8> %load to <8 x i64>
6185  store <8 x i64> %ext, ptr addrspace(1) %out
6186  ret void
6187}
6188
6189define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
6190; GFX6-NOHSA-LABEL: constant_sextload_v8i8_to_v8i64:
6191; GFX6-NOHSA:       ; %bb.0:
6192; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
6193; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
6194; GFX6-NOHSA-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
6195; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
6196; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
6197; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
6198; GFX6-NOHSA-NEXT:    s_lshr_b32 s6, s5, 16
6199; GFX6-NOHSA-NEXT:    s_lshr_b32 s8, s5, 8
6200; GFX6-NOHSA-NEXT:    s_mov_b32 s10, s5
6201; GFX6-NOHSA-NEXT:    s_lshr_b32 s12, s4, 16
6202; GFX6-NOHSA-NEXT:    s_lshr_b32 s14, s4, 24
6203; GFX6-NOHSA-NEXT:    s_lshr_b32 s16, s4, 8
6204; GFX6-NOHSA-NEXT:    s_bfe_i64 s[18:19], s[4:5], 0x80000
6205; GFX6-NOHSA-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x80000
6206; GFX6-NOHSA-NEXT:    s_ashr_i64 s[4:5], s[4:5], 56
6207; GFX6-NOHSA-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x80000
6208; GFX6-NOHSA-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x80000
6209; GFX6-NOHSA-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x80000
6210; GFX6-NOHSA-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x80000
6211; GFX6-NOHSA-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x80000
6212; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s4
6213; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s5
6214; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s10
6215; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s11
6216; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s18
6217; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s19
6218; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
6219; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s7
6220; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
6221; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s8
6222; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s9
6223; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32
6224; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(1)
6225; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s12
6226; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s13
6227; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s14
6228; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s15
6229; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
6230; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s16
6231; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s17
6232; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0
6233; GFX6-NOHSA-NEXT:    s_endpgm
6234;
6235; GFX7-HSA-LABEL: constant_sextload_v8i8_to_v8i64:
6236; GFX7-HSA:       ; %bb.0:
6237; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
6238; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6239; GFX7-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
6240; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6241; GFX7-HSA-NEXT:    s_lshr_b32 s4, s3, 16
6242; GFX7-HSA-NEXT:    s_lshr_b32 s6, s3, 8
6243; GFX7-HSA-NEXT:    s_mov_b32 s8, s3
6244; GFX7-HSA-NEXT:    s_lshr_b32 s10, s2, 16
6245; GFX7-HSA-NEXT:    s_lshr_b32 s12, s2, 24
6246; GFX7-HSA-NEXT:    s_lshr_b32 s14, s2, 8
6247; GFX7-HSA-NEXT:    s_bfe_i64 s[16:17], s[2:3], 0x80000
6248; GFX7-HSA-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x80000
6249; GFX7-HSA-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x80000
6250; GFX7-HSA-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x80000
6251; GFX7-HSA-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x80000
6252; GFX7-HSA-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x80000
6253; GFX7-HSA-NEXT:    s_ashr_i64 s[2:3], s[2:3], 56
6254; GFX7-HSA-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x80000
6255; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s2
6256; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 48
6257; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s3
6258; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
6259; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
6260; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
6261; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 32
6262; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s4
6263; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s5
6264; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
6265; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6266; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
6267; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
6268; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 16
6269; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s8
6270; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s9
6271; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s6
6272; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s7
6273; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
6274; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6275; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
6276; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s10
6277; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s11
6278; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s12
6279; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s13
6280; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
6281; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6282; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
6283; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s16
6284; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s17
6285; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s14
6286; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s15
6287; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
6288; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6289; GFX7-HSA-NEXT:    s_endpgm
6290;
6291; GFX8-NOHSA-LABEL: constant_sextload_v8i8_to_v8i64:
6292; GFX8-NOHSA:       ; %bb.0:
6293; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
6294; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
6295; GFX8-NOHSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
6296; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
6297; GFX8-NOHSA-NEXT:    s_lshr_b32 s4, s3, 16
6298; GFX8-NOHSA-NEXT:    s_lshr_b32 s6, s3, 8
6299; GFX8-NOHSA-NEXT:    s_mov_b32 s8, s3
6300; GFX8-NOHSA-NEXT:    s_lshr_b32 s10, s2, 16
6301; GFX8-NOHSA-NEXT:    s_lshr_b32 s12, s2, 24
6302; GFX8-NOHSA-NEXT:    s_lshr_b32 s14, s2, 8
6303; GFX8-NOHSA-NEXT:    s_bfe_i64 s[16:17], s[2:3], 0x80000
6304; GFX8-NOHSA-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x80000
6305; GFX8-NOHSA-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x80000
6306; GFX8-NOHSA-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x80000
6307; GFX8-NOHSA-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x80000
6308; GFX8-NOHSA-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x80000
6309; GFX8-NOHSA-NEXT:    s_ashr_i64 s[2:3], s[2:3], 56
6310; GFX8-NOHSA-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x80000
6311; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s2
6312; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 48
6313; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s3
6314; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
6315; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
6316; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
6317; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 32
6318; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
6319; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s5
6320; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
6321; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6322; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
6323; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
6324; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 16
6325; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s8
6326; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s9
6327; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s6
6328; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s7
6329; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
6330; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6331; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
6332; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s10
6333; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s11
6334; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s12
6335; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s13
6336; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
6337; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6338; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
6339; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s16
6340; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s17
6341; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s14
6342; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s15
6343; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
6344; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6345; GFX8-NOHSA-NEXT:    s_endpgm
6346;
6347; EG-LABEL: constant_sextload_v8i8_to_v8i64:
6348; EG:       ; %bb.0:
6349; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
6350; EG-NEXT:    TEX 0 @8
6351; EG-NEXT:    ALU 39, @11, KC0[CB0:0-32], KC1[]
6352; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T12.X, 0
6353; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T9.X, 0
6354; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T8.X, 0
6355; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T6.X, 1
6356; EG-NEXT:    CF_END
6357; EG-NEXT:    Fetch clause starting at 8:
6358; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
6359; EG-NEXT:    ALU clause starting at 10:
6360; EG-NEXT:     MOV * T5.X, KC0[2].Z,
6361; EG-NEXT:    ALU clause starting at 11:
6362; EG-NEXT:     LSHR * T6.X, KC0[2].Y, literal.x,
6363; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6364; EG-NEXT:     BFE_INT T7.X, T5.Y, 0.0, literal.x,
6365; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6366; EG-NEXT:    8(1.121039e-44), 16(2.242078e-44)
6367; EG-NEXT:     LSHR T8.X, PV.W, literal.x,
6368; EG-NEXT:     ASHR T7.Y, PV.X, literal.y,
6369; EG-NEXT:     LSHR T0.W, T5.Y, literal.z,
6370; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.w,
6371; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
6372; EG-NEXT:    8(1.121039e-44), 32(4.484155e-44)
6373; EG-NEXT:     LSHR T9.X, PS, literal.x,
6374; EG-NEXT:     BFE_INT T7.Z, PV.W, 0.0, literal.y,
6375; EG-NEXT:     ASHR * T10.W, T5.X, literal.z,
6376; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
6377; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6378; EG-NEXT:     BFE_INT T11.X, T5.X, 0.0, literal.x,
6379; EG-NEXT:     ASHR T10.Z, T5.X, literal.y,
6380; EG-NEXT:     LSHR T0.W, T5.X, literal.z,
6381; EG-NEXT:     ASHR * T5.W, T5.Y, literal.w,
6382; EG-NEXT:    8(1.121039e-44), 24(3.363116e-44)
6383; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6384; EG-NEXT:     BFE_INT T10.X, PV.W, 0.0, literal.x,
6385; EG-NEXT:     ASHR T11.Y, PV.X, literal.y,
6386; EG-NEXT:     ASHR T5.Z, T5.Y, literal.z,
6387; EG-NEXT:     LSHR T0.W, T5.X, literal.x,
6388; EG-NEXT:     LSHR * T1.W, T5.Y, literal.w,
6389; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
6390; EG-NEXT:    24(3.363116e-44), 16(2.242078e-44)
6391; EG-NEXT:     BFE_INT T5.X, PS, 0.0, literal.x,
6392; EG-NEXT:     ASHR T10.Y, PV.X, literal.y,
6393; EG-NEXT:     BFE_INT T11.Z, PV.W, 0.0, literal.x,
6394; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
6395; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
6396; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
6397; EG-NEXT:     LSHR T12.X, PV.W, literal.x,
6398; EG-NEXT:     ASHR T5.Y, PV.X, literal.y,
6399; EG-NEXT:     ASHR T11.W, PV.Z, literal.y,
6400; EG-NEXT:     ASHR * T7.W, T7.Z, literal.y,
6401; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
6402;
6403; GFX12-LABEL: constant_sextload_v8i8_to_v8i64:
6404; GFX12:       ; %bb.0:
6405; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
6406; GFX12-NEXT:    s_wait_kmcnt 0x0
6407; GFX12-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
6408; GFX12-NEXT:    s_wait_kmcnt 0x0
6409; GFX12-NEXT:    s_lshr_b32 s4, s3, 16
6410; GFX12-NEXT:    s_lshr_b32 s6, s3, 8
6411; GFX12-NEXT:    s_mov_b32 s8, s3
6412; GFX12-NEXT:    s_lshr_b32 s10, s2, 16
6413; GFX12-NEXT:    s_lshr_b32 s12, s2, 24
6414; GFX12-NEXT:    s_lshr_b32 s14, s2, 8
6415; GFX12-NEXT:    s_bfe_i64 s[16:17], s[2:3], 0x80000
6416; GFX12-NEXT:    s_ashr_i64 s[2:3], s[2:3], 56
6417; GFX12-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x80000
6418; GFX12-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x80000
6419; GFX12-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x80000
6420; GFX12-NEXT:    v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v3, s3
6421; GFX12-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x80000
6422; GFX12-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x80000
6423; GFX12-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v5, s17
6424; GFX12-NEXT:    v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v1, s5
6425; GFX12-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v9, s9
6426; GFX12-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x80000
6427; GFX12-NEXT:    v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v11, s7
6428; GFX12-NEXT:    v_dual_mov_b32 v10, s6 :: v_dual_mov_b32 v13, s11
6429; GFX12-NEXT:    v_dual_mov_b32 v12, s10 :: v_dual_mov_b32 v15, s13
6430; GFX12-NEXT:    v_dual_mov_b32 v14, s12 :: v_dual_mov_b32 v7, s15
6431; GFX12-NEXT:    v_mov_b32_e32 v6, s14
6432; GFX12-NEXT:    s_clause 0x3
6433; GFX12-NEXT:    global_store_b128 v16, v[0:3], s[0:1] offset:48
6434; GFX12-NEXT:    global_store_b128 v16, v[8:11], s[0:1] offset:32
6435; GFX12-NEXT:    global_store_b128 v16, v[12:15], s[0:1] offset:16
6436; GFX12-NEXT:    global_store_b128 v16, v[4:7], s[0:1]
6437; GFX12-NEXT:    s_endpgm
6438  %load = load <8 x i8>, ptr addrspace(4) %in
6439  %ext = sext <8 x i8> %load to <8 x i64>
6440  store <8 x i64> %ext, ptr addrspace(1) %out
6441  ret void
6442}
6443
6444define amdgpu_kernel void @constant_zextload_v16i8_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
6445; GFX6-NOHSA-LABEL: constant_zextload_v16i8_to_v16i64:
6446; GFX6-NOHSA:       ; %bb.0:
6447; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
6448; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
6449; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
6450; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
6451; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, 0
6452; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
6453; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, v1
6454; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
6455; GFX6-NOHSA-NEXT:    s_lshr_b32 s8, s5, 24
6456; GFX6-NOHSA-NEXT:    s_lshr_b32 s9, s4, 24
6457; GFX6-NOHSA-NEXT:    s_lshr_b32 s10, s7, 24
6458; GFX6-NOHSA-NEXT:    s_lshr_b32 s11, s6, 24
6459; GFX6-NOHSA-NEXT:    s_bfe_u32 s12, s6, 0x80008
6460; GFX6-NOHSA-NEXT:    s_bfe_u32 s13, s7, 0x80008
6461; GFX6-NOHSA-NEXT:    s_bfe_u32 s14, s4, 0x80008
6462; GFX6-NOHSA-NEXT:    s_bfe_u32 s15, s5, 0x80008
6463; GFX6-NOHSA-NEXT:    s_and_b32 s16, s5, 0xff
6464; GFX6-NOHSA-NEXT:    s_and_b32 s17, s4, 0xff
6465; GFX6-NOHSA-NEXT:    s_and_b32 s18, s7, 0xff
6466; GFX6-NOHSA-NEXT:    s_and_b32 s19, s6, 0xff
6467; GFX6-NOHSA-NEXT:    s_bfe_u32 s5, s5, 0x80010
6468; GFX6-NOHSA-NEXT:    s_bfe_u32 s4, s4, 0x80010
6469; GFX6-NOHSA-NEXT:    s_bfe_u32 s6, s6, 0x80010
6470; GFX6-NOHSA-NEXT:    s_bfe_u32 s7, s7, 0x80010
6471; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
6472; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s11
6473; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
6474; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
6475; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s7
6476; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s10
6477; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
6478; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
6479; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
6480; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s9
6481; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
6482; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
6483; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s5
6484; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s8
6485; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
6486; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
6487; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s19
6488; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s12
6489; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
6490; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
6491; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s18
6492; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s13
6493; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
6494; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
6495; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s17
6496; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s14
6497; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
6498; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
6499; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s16
6500; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s15
6501; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
6502; GFX6-NOHSA-NEXT:    s_endpgm
6503;
6504; GFX7-HSA-LABEL: constant_zextload_v16i8_to_v16i64:
6505; GFX7-HSA:       ; %bb.0:
6506; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
6507; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, 0
6508; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, v1
6509; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6510; GFX7-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
6511; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6512; GFX7-HSA-NEXT:    s_lshr_b32 s8, s5, 24
6513; GFX7-HSA-NEXT:    s_lshr_b32 s9, s4, 24
6514; GFX7-HSA-NEXT:    s_lshr_b32 s10, s7, 24
6515; GFX7-HSA-NEXT:    s_lshr_b32 s2, s6, 24
6516; GFX7-HSA-NEXT:    s_bfe_u32 s11, s6, 0x80008
6517; GFX7-HSA-NEXT:    s_bfe_u32 s12, s7, 0x80008
6518; GFX7-HSA-NEXT:    s_bfe_u32 s13, s4, 0x80008
6519; GFX7-HSA-NEXT:    s_bfe_u32 s14, s5, 0x80008
6520; GFX7-HSA-NEXT:    s_and_b32 s15, s5, 0xff
6521; GFX7-HSA-NEXT:    s_and_b32 s16, s4, 0xff
6522; GFX7-HSA-NEXT:    s_and_b32 s17, s7, 0xff
6523; GFX7-HSA-NEXT:    s_and_b32 s18, s6, 0xff
6524; GFX7-HSA-NEXT:    s_bfe_u32 s5, s5, 0x80010
6525; GFX7-HSA-NEXT:    s_bfe_u32 s4, s4, 0x80010
6526; GFX7-HSA-NEXT:    s_bfe_u32 s7, s7, 0x80010
6527; GFX7-HSA-NEXT:    s_bfe_u32 s3, s6, 0x80010
6528; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s2
6529; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 0x50
6530; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s3
6531; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
6532; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
6533; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
6534; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 0x70
6535; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
6536; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6537; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
6538; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
6539; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 16
6540; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s7
6541; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s10
6542; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
6543; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6544; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
6545; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
6546; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 48
6547; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s4
6548; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s9
6549; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
6550; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6551; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
6552; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
6553; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 64
6554; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s5
6555; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s8
6556; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
6557; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6558; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
6559; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
6560; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 0x60
6561; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s18
6562; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s11
6563; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
6564; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6565; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
6566; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s17
6567; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s12
6568; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
6569; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6570; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
6571; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
6572; GFX7-HSA-NEXT:    s_add_u32 s0, s0, 32
6573; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s16
6574; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s13
6575; GFX7-HSA-NEXT:    s_addc_u32 s1, s1, 0
6576; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6577; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
6578; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s15
6579; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s14
6580; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
6581; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6582; GFX7-HSA-NEXT:    s_endpgm
6583;
6584; GFX8-NOHSA-LABEL: constant_zextload_v16i8_to_v16i64:
6585; GFX8-NOHSA:       ; %bb.0:
6586; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
6587; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, 0
6588; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, v1
6589; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
6590; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
6591; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
6592; GFX8-NOHSA-NEXT:    s_lshr_b32 s8, s5, 24
6593; GFX8-NOHSA-NEXT:    s_lshr_b32 s2, s7, 24
6594; GFX8-NOHSA-NEXT:    s_lshr_b32 s9, s6, 24
6595; GFX8-NOHSA-NEXT:    s_bfe_u32 s10, s6, 0x80008
6596; GFX8-NOHSA-NEXT:    s_bfe_u32 s11, s7, 0x80008
6597; GFX8-NOHSA-NEXT:    s_bfe_u32 s12, s5, 0x80008
6598; GFX8-NOHSA-NEXT:    s_lshr_b32 s13, s4, 24
6599; GFX8-NOHSA-NEXT:    s_bfe_u32 s14, s4, 0x80008
6600; GFX8-NOHSA-NEXT:    s_and_b32 s15, s4, 0xff
6601; GFX8-NOHSA-NEXT:    s_bfe_u32 s4, s4, 0x80010
6602; GFX8-NOHSA-NEXT:    s_and_b32 s16, s5, 0xff
6603; GFX8-NOHSA-NEXT:    s_and_b32 s17, s7, 0xff
6604; GFX8-NOHSA-NEXT:    s_and_b32 s18, s6, 0xff
6605; GFX8-NOHSA-NEXT:    s_bfe_u32 s6, s6, 0x80010
6606; GFX8-NOHSA-NEXT:    s_bfe_u32 s5, s5, 0x80010
6607; GFX8-NOHSA-NEXT:    s_bfe_u32 s3, s7, 0x80010
6608; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s2
6609; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 0x70
6610; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s3
6611; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
6612; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
6613; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
6614; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 48
6615; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
6616; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6617; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
6618; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
6619; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 0x50
6620; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s5
6621; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s8
6622; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
6623; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6624; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
6625; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
6626; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 64
6627; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
6628; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s9
6629; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
6630; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6631; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
6632; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
6633; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 0x60
6634; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s18
6635; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s10
6636; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
6637; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6638; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
6639; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
6640; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 32
6641; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s17
6642; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s11
6643; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
6644; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6645; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
6646; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
6647; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 16
6648; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s16
6649; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s12
6650; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
6651; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6652; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
6653; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
6654; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s13
6655; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
6656; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6657; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
6658; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s15
6659; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s14
6660; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
6661; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6662; GFX8-NOHSA-NEXT:    s_endpgm
6663;
6664; EG-LABEL: constant_zextload_v16i8_to_v16i64:
6665; EG:       ; %bb.0:
6666; EG-NEXT:    ALU 0, @14, KC0[CB0:0-32], KC1[]
6667; EG-NEXT:    TEX 0 @12
6668; EG-NEXT:    ALU 68, @15, KC0[CB0:0-32], KC1[]
6669; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T22.X, 0
6670; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T21.X, 0
6671; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T20.X, 0
6672; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T19.X, 0
6673; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T18.X, 0
6674; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T17.X, 0
6675; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0
6676; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T15.X, 1
6677; EG-NEXT:    CF_END
6678; EG-NEXT:    Fetch clause starting at 12:
6679; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
6680; EG-NEXT:    ALU clause starting at 14:
6681; EG-NEXT:     MOV * T7.X, KC0[2].Z,
6682; EG-NEXT:    ALU clause starting at 15:
6683; EG-NEXT:     MOV * T0.W, literal.x,
6684; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
6685; EG-NEXT:     BFE_UINT T8.X, T7.W, literal.x, PV.W,
6686; EG-NEXT:     LSHR * T8.Z, T7.W, literal.y,
6687; EG-NEXT:    16(2.242078e-44), 24(3.363116e-44)
6688; EG-NEXT:     MOV T8.Y, 0.0,
6689; EG-NEXT:     BFE_UINT * T9.Z, T7.W, literal.x, T0.W,
6690; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
6691; EG-NEXT:     AND_INT T9.X, T7.W, literal.x,
6692; EG-NEXT:     MOV * T9.Y, 0.0,
6693; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
6694; EG-NEXT:     BFE_UINT T10.X, T7.Z, literal.x, T0.W,
6695; EG-NEXT:     LSHR * T10.Z, T7.Z, literal.y,
6696; EG-NEXT:    16(2.242078e-44), 24(3.363116e-44)
6697; EG-NEXT:     MOV T10.Y, 0.0,
6698; EG-NEXT:     BFE_UINT * T11.Z, T7.Z, literal.x, T0.W,
6699; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
6700; EG-NEXT:     AND_INT T11.X, T7.Z, literal.x,
6701; EG-NEXT:     MOV * T11.Y, 0.0,
6702; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
6703; EG-NEXT:     BFE_UINT T12.X, T7.Y, literal.x, T0.W,
6704; EG-NEXT:     LSHR * T12.Z, T7.Y, literal.y,
6705; EG-NEXT:    16(2.242078e-44), 24(3.363116e-44)
6706; EG-NEXT:     MOV T12.Y, 0.0,
6707; EG-NEXT:     BFE_UINT * T13.Z, T7.Y, literal.x, T0.W,
6708; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
6709; EG-NEXT:     AND_INT T13.X, T7.Y, literal.x,
6710; EG-NEXT:     MOV * T13.Y, 0.0,
6711; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
6712; EG-NEXT:     BFE_UINT T14.X, T7.X, literal.x, T0.W,
6713; EG-NEXT:     LSHR * T14.Z, T7.X, literal.y,
6714; EG-NEXT:    16(2.242078e-44), 24(3.363116e-44)
6715; EG-NEXT:     MOV T14.Y, 0.0,
6716; EG-NEXT:     BFE_UINT * T7.Z, T7.X, literal.x, T0.W,
6717; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
6718; EG-NEXT:     AND_INT T7.X, T7.X, literal.x,
6719; EG-NEXT:     MOV T7.Y, 0.0,
6720; EG-NEXT:     MOV T8.W, 0.0,
6721; EG-NEXT:     MOV * T9.W, 0.0,
6722; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
6723; EG-NEXT:     MOV T10.W, 0.0,
6724; EG-NEXT:     MOV * T11.W, 0.0,
6725; EG-NEXT:     MOV T12.W, 0.0,
6726; EG-NEXT:     MOV * T13.W, 0.0,
6727; EG-NEXT:     MOV T14.W, 0.0,
6728; EG-NEXT:     MOV * T7.W, 0.0,
6729; EG-NEXT:     LSHR T15.X, KC0[2].Y, literal.x,
6730; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6731; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6732; EG-NEXT:     LSHR T16.X, PV.W, literal.x,
6733; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6734; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6735; EG-NEXT:     LSHR T17.X, PV.W, literal.x,
6736; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6737; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
6738; EG-NEXT:     LSHR T18.X, PV.W, literal.x,
6739; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6740; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
6741; EG-NEXT:     LSHR T19.X, PV.W, literal.x,
6742; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6743; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
6744; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
6745; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6746; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
6747; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
6748; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6749; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
6750; EG-NEXT:     LSHR * T22.X, PV.W, literal.x,
6751; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6752;
6753; GFX12-LABEL: constant_zextload_v16i8_to_v16i64:
6754; GFX12:       ; %bb.0:
6755; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
6756; GFX12-NEXT:    s_wait_kmcnt 0x0
6757; GFX12-NEXT:    s_load_b128 s[4:7], s[2:3], 0x0
6758; GFX12-NEXT:    s_wait_kmcnt 0x0
6759; GFX12-NEXT:    s_bfe_u32 s2, s7, 0x80010
6760; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
6761; GFX12-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
6762; GFX12-NEXT:    s_lshr_b32 s3, s7, 24
6763; GFX12-NEXT:    s_lshr_b32 s2, s5, 24
6764; GFX12-NEXT:    s_wait_alu 0xfffe
6765; GFX12-NEXT:    v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v3, v1
6766; GFX12-NEXT:    s_bfe_u32 s3, s5, 0x80010
6767; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:112
6768; GFX12-NEXT:    s_wait_alu 0xfffe
6769; GFX12-NEXT:    v_mov_b32_e32 v0, s3
6770; GFX12-NEXT:    v_mov_b32_e32 v2, s2
6771; GFX12-NEXT:    s_lshr_b32 s2, s6, 24
6772; GFX12-NEXT:    s_bfe_u32 s3, s6, 0x80010
6773; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:48
6774; GFX12-NEXT:    s_wait_alu 0xfffe
6775; GFX12-NEXT:    v_mov_b32_e32 v0, s3
6776; GFX12-NEXT:    v_mov_b32_e32 v2, s2
6777; GFX12-NEXT:    s_bfe_u32 s2, s6, 0x80008
6778; GFX12-NEXT:    s_and_b32 s3, s6, 0xff
6779; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:80
6780; GFX12-NEXT:    s_wait_alu 0xfffe
6781; GFX12-NEXT:    v_mov_b32_e32 v0, s3
6782; GFX12-NEXT:    v_mov_b32_e32 v2, s2
6783; GFX12-NEXT:    s_bfe_u32 s2, s7, 0x80008
6784; GFX12-NEXT:    s_and_b32 s3, s7, 0xff
6785; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:64
6786; GFX12-NEXT:    s_wait_alu 0xfffe
6787; GFX12-NEXT:    v_mov_b32_e32 v0, s3
6788; GFX12-NEXT:    v_mov_b32_e32 v2, s2
6789; GFX12-NEXT:    s_bfe_u32 s2, s5, 0x80008
6790; GFX12-NEXT:    s_and_b32 s3, s5, 0xff
6791; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:96
6792; GFX12-NEXT:    s_wait_alu 0xfffe
6793; GFX12-NEXT:    v_mov_b32_e32 v0, s3
6794; GFX12-NEXT:    v_mov_b32_e32 v2, s2
6795; GFX12-NEXT:    s_lshr_b32 s2, s4, 24
6796; GFX12-NEXT:    s_bfe_u32 s3, s4, 0x80010
6797; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:32
6798; GFX12-NEXT:    s_wait_alu 0xfffe
6799; GFX12-NEXT:    v_mov_b32_e32 v0, s3
6800; GFX12-NEXT:    v_mov_b32_e32 v2, s2
6801; GFX12-NEXT:    s_bfe_u32 s2, s4, 0x80008
6802; GFX12-NEXT:    s_and_b32 s3, s4, 0xff
6803; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:16
6804; GFX12-NEXT:    s_wait_alu 0xfffe
6805; GFX12-NEXT:    v_mov_b32_e32 v0, s3
6806; GFX12-NEXT:    v_mov_b32_e32 v2, s2
6807; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1]
6808; GFX12-NEXT:    s_endpgm
6809  %load = load <16 x i8>, ptr addrspace(4) %in
6810  %ext = zext <16 x i8> %load to <16 x i64>
6811  store <16 x i64> %ext, ptr addrspace(1) %out
6812  ret void
6813}
6814
6815define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
6816; GFX6-NOHSA-LABEL: constant_sextload_v16i8_to_v16i64:
6817; GFX6-NOHSA:       ; %bb.0:
6818; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
6819; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
6820; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[8:11], s[2:3], 0x0
6821; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
6822; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
6823; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
6824; GFX6-NOHSA-NEXT:    s_lshr_b32 s12, s11, 16
6825; GFX6-NOHSA-NEXT:    s_lshr_b32 s14, s11, 8
6826; GFX6-NOHSA-NEXT:    s_mov_b32 s4, s11
6827; GFX6-NOHSA-NEXT:    s_lshr_b32 s16, s10, 16
6828; GFX6-NOHSA-NEXT:    s_lshr_b32 s18, s10, 24
6829; GFX6-NOHSA-NEXT:    s_lshr_b32 s20, s10, 8
6830; GFX6-NOHSA-NEXT:    s_lshr_b32 s22, s9, 16
6831; GFX6-NOHSA-NEXT:    s_lshr_b32 s24, s9, 8
6832; GFX6-NOHSA-NEXT:    s_mov_b32 s26, s9
6833; GFX6-NOHSA-NEXT:    s_lshr_b32 s28, s8, 16
6834; GFX6-NOHSA-NEXT:    s_lshr_b32 s30, s8, 24
6835; GFX6-NOHSA-NEXT:    s_lshr_b32 s34, s8, 8
6836; GFX6-NOHSA-NEXT:    s_bfe_i64 s[6:7], s[8:9], 0x80000
6837; GFX6-NOHSA-NEXT:    s_ashr_i64 s[36:37], s[8:9], 56
6838; GFX6-NOHSA-NEXT:    s_bfe_i64 s[38:39], s[10:11], 0x80000
6839; GFX6-NOHSA-NEXT:    s_ashr_i64 s[10:11], s[10:11], 56
6840; GFX6-NOHSA-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x80000
6841; GFX6-NOHSA-NEXT:    s_bfe_i64 s[40:41], s[4:5], 0x80000
6842; GFX6-NOHSA-NEXT:    s_bfe_i64 s[4:5], s[34:35], 0x80000
6843; GFX6-NOHSA-NEXT:    s_bfe_i64 s[8:9], s[30:31], 0x80000
6844; GFX6-NOHSA-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x80000
6845; GFX6-NOHSA-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x80000
6846; GFX6-NOHSA-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x80000
6847; GFX6-NOHSA-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x80000
6848; GFX6-NOHSA-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x80000
6849; GFX6-NOHSA-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x80000
6850; GFX6-NOHSA-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x80000
6851; GFX6-NOHSA-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x80000
6852; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s10
6853; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s11
6854; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s40
6855; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s41
6856; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s38
6857; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s39
6858; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s36
6859; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s37
6860; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s26
6861; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s27
6862; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s12
6863; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s13
6864; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
6865; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
6866; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
6867; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s7
6868; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s14
6869; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s15
6870; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:96
6871; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s16
6872; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s17
6873; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
6874; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s18
6875; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s19
6876; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:80
6877; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s20
6878; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s21
6879; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:64
6880; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
6881; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s22
6882; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s23
6883; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:48
6884; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s24
6885; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s25
6886; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:32
6887; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s28
6888; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s29
6889; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s8
6890; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s9
6891; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:16
6892; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
6893; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s4
6894; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s5
6895; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
6896; GFX6-NOHSA-NEXT:    s_endpgm
6897;
6898; GFX7-HSA-LABEL: constant_sextload_v16i8_to_v16i64:
6899; GFX7-HSA:       ; %bb.0:
6900; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
6901; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6902; GFX7-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
6903; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6904; GFX7-HSA-NEXT:    s_lshr_b32 s8, s7, 16
6905; GFX7-HSA-NEXT:    s_lshr_b32 s10, s7, 8
6906; GFX7-HSA-NEXT:    s_mov_b32 s12, s7
6907; GFX7-HSA-NEXT:    s_lshr_b32 s14, s6, 16
6908; GFX7-HSA-NEXT:    s_lshr_b32 s16, s6, 24
6909; GFX7-HSA-NEXT:    s_lshr_b32 s18, s6, 8
6910; GFX7-HSA-NEXT:    s_lshr_b32 s20, s5, 16
6911; GFX7-HSA-NEXT:    s_lshr_b32 s22, s5, 8
6912; GFX7-HSA-NEXT:    s_mov_b32 s24, s5
6913; GFX7-HSA-NEXT:    s_lshr_b32 s26, s4, 16
6914; GFX7-HSA-NEXT:    s_lshr_b32 s28, s4, 24
6915; GFX7-HSA-NEXT:    s_lshr_b32 s30, s4, 8
6916; GFX7-HSA-NEXT:    s_bfe_i64 s[2:3], s[4:5], 0x80000
6917; GFX7-HSA-NEXT:    s_ashr_i64 s[34:35], s[4:5], 56
6918; GFX7-HSA-NEXT:    s_bfe_i64 s[36:37], s[6:7], 0x80000
6919; GFX7-HSA-NEXT:    s_ashr_i64 s[4:5], s[6:7], 56
6920; GFX7-HSA-NEXT:    s_bfe_i64 s[6:7], s[8:9], 0x80000
6921; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s6
6922; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s7
6923; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s4
6924; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s5
6925; GFX7-HSA-NEXT:    s_bfe_i64 s[4:5], s[30:31], 0x80000
6926; GFX7-HSA-NEXT:    s_bfe_i64 s[6:7], s[28:29], 0x80000
6927; GFX7-HSA-NEXT:    s_bfe_i64 s[8:9], s[26:27], 0x80000
6928; GFX7-HSA-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x80000
6929; GFX7-HSA-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x80000
6930; GFX7-HSA-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x80000
6931; GFX7-HSA-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x80000
6932; GFX7-HSA-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x80000
6933; GFX7-HSA-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x80000
6934; GFX7-HSA-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x80000
6935; GFX7-HSA-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x80000
6936; GFX7-HSA-NEXT:    s_add_u32 s26, s0, 0x70
6937; GFX7-HSA-NEXT:    s_addc_u32 s27, s1, 0
6938; GFX7-HSA-NEXT:    v_mov_b32_e32 v6, s10
6939; GFX7-HSA-NEXT:    s_add_u32 s10, s0, 0x60
6940; GFX7-HSA-NEXT:    v_mov_b32_e32 v8, s26
6941; GFX7-HSA-NEXT:    v_mov_b32_e32 v7, s11
6942; GFX7-HSA-NEXT:    s_addc_u32 s11, s1, 0
6943; GFX7-HSA-NEXT:    v_mov_b32_e32 v10, s10
6944; GFX7-HSA-NEXT:    v_mov_b32_e32 v9, s27
6945; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s12
6946; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s13
6947; GFX7-HSA-NEXT:    v_mov_b32_e32 v11, s11
6948; GFX7-HSA-NEXT:    s_add_u32 s10, s0, 0x50
6949; GFX7-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
6950; GFX7-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
6951; GFX7-HSA-NEXT:    s_addc_u32 s11, s1, 0
6952; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s10
6953; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s14
6954; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s15
6955; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s16
6956; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s17
6957; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s11
6958; GFX7-HSA-NEXT:    s_add_u32 s10, s0, 64
6959; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6960; GFX7-HSA-NEXT:    s_addc_u32 s11, s1, 0
6961; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s10
6962; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s36
6963; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s37
6964; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s18
6965; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s19
6966; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s11
6967; GFX7-HSA-NEXT:    s_add_u32 s10, s0, 48
6968; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6969; GFX7-HSA-NEXT:    s_addc_u32 s11, s1, 0
6970; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s10
6971; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s20
6972; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s21
6973; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s34
6974; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s35
6975; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s11
6976; GFX7-HSA-NEXT:    s_add_u32 s10, s0, 32
6977; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6978; GFX7-HSA-NEXT:    s_addc_u32 s11, s1, 0
6979; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s10
6980; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s24
6981; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s25
6982; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s22
6983; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s23
6984; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s11
6985; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6986; GFX7-HSA-NEXT:    s_nop 0
6987; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s6
6988; GFX7-HSA-NEXT:    s_add_u32 s6, s0, 16
6989; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s7
6990; GFX7-HSA-NEXT:    s_addc_u32 s7, s1, 0
6991; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s6
6992; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s8
6993; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s9
6994; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s7
6995; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6996; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
6997; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
6998; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
6999; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s4
7000; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s5
7001; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
7002; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7003; GFX7-HSA-NEXT:    s_endpgm
7004;
7005; GFX8-NOHSA-LABEL: constant_sextload_v16i8_to_v16i64:
7006; GFX8-NOHSA:       ; %bb.0:
7007; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
7008; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
7009; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
7010; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
7011; GFX8-NOHSA-NEXT:    s_lshr_b32 s10, s7, 16
7012; GFX8-NOHSA-NEXT:    s_lshr_b32 s12, s7, 8
7013; GFX8-NOHSA-NEXT:    s_mov_b32 s14, s7
7014; GFX8-NOHSA-NEXT:    s_lshr_b32 s16, s6, 16
7015; GFX8-NOHSA-NEXT:    s_lshr_b32 s18, s6, 24
7016; GFX8-NOHSA-NEXT:    s_lshr_b32 s20, s6, 8
7017; GFX8-NOHSA-NEXT:    s_lshr_b32 s22, s5, 16
7018; GFX8-NOHSA-NEXT:    s_lshr_b32 s24, s5, 8
7019; GFX8-NOHSA-NEXT:    s_mov_b32 s26, s5
7020; GFX8-NOHSA-NEXT:    s_lshr_b32 s8, s4, 16
7021; GFX8-NOHSA-NEXT:    s_lshr_b32 s28, s4, 24
7022; GFX8-NOHSA-NEXT:    s_lshr_b32 s30, s4, 8
7023; GFX8-NOHSA-NEXT:    s_bfe_i64 s[2:3], s[4:5], 0x80000
7024; GFX8-NOHSA-NEXT:    s_ashr_i64 s[34:35], s[4:5], 56
7025; GFX8-NOHSA-NEXT:    s_bfe_i64 s[36:37], s[6:7], 0x80000
7026; GFX8-NOHSA-NEXT:    s_ashr_i64 s[38:39], s[6:7], 56
7027; GFX8-NOHSA-NEXT:    s_bfe_i64 s[4:5], s[30:31], 0x80000
7028; GFX8-NOHSA-NEXT:    s_bfe_i64 s[6:7], s[28:29], 0x80000
7029; GFX8-NOHSA-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x80000
7030; GFX8-NOHSA-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x80000
7031; GFX8-NOHSA-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x80000
7032; GFX8-NOHSA-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x80000
7033; GFX8-NOHSA-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x80000
7034; GFX8-NOHSA-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x80000
7035; GFX8-NOHSA-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x80000
7036; GFX8-NOHSA-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x80000
7037; GFX8-NOHSA-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x80000
7038; GFX8-NOHSA-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x80000
7039; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s10
7040; GFX8-NOHSA-NEXT:    s_add_u32 s10, s0, 0x70
7041; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s11
7042; GFX8-NOHSA-NEXT:    s_addc_u32 s11, s1, 0
7043; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s10
7044; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s38
7045; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s39
7046; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s11
7047; GFX8-NOHSA-NEXT:    s_add_u32 s10, s0, 0x60
7048; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7049; GFX8-NOHSA-NEXT:    s_addc_u32 s11, s1, 0
7050; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s10
7051; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s14
7052; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s15
7053; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s12
7054; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s13
7055; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s11
7056; GFX8-NOHSA-NEXT:    s_add_u32 s10, s0, 0x50
7057; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7058; GFX8-NOHSA-NEXT:    s_addc_u32 s11, s1, 0
7059; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s10
7060; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s16
7061; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s17
7062; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s18
7063; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s19
7064; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s11
7065; GFX8-NOHSA-NEXT:    s_add_u32 s10, s0, 64
7066; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7067; GFX8-NOHSA-NEXT:    s_addc_u32 s11, s1, 0
7068; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s10
7069; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s36
7070; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s37
7071; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s20
7072; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s21
7073; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s11
7074; GFX8-NOHSA-NEXT:    s_add_u32 s10, s0, 48
7075; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7076; GFX8-NOHSA-NEXT:    s_addc_u32 s11, s1, 0
7077; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s10
7078; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s22
7079; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s23
7080; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s34
7081; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s35
7082; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s11
7083; GFX8-NOHSA-NEXT:    s_add_u32 s10, s0, 32
7084; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7085; GFX8-NOHSA-NEXT:    s_addc_u32 s11, s1, 0
7086; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s10
7087; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s26
7088; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s27
7089; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s24
7090; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s25
7091; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s11
7092; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7093; GFX8-NOHSA-NEXT:    s_nop 0
7094; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s6
7095; GFX8-NOHSA-NEXT:    s_add_u32 s6, s0, 16
7096; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s7
7097; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s1, 0
7098; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
7099; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s8
7100; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s9
7101; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
7102; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7103; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
7104; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
7105; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
7106; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s4
7107; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s5
7108; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
7109; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7110; GFX8-NOHSA-NEXT:    s_endpgm
7111;
7112; EG-LABEL: constant_sextload_v16i8_to_v16i64:
7113; EG:       ; %bb.0:
7114; EG-NEXT:    ALU 0, @14, KC0[CB0:0-32], KC1[]
7115; EG-NEXT:    TEX 0 @12
7116; EG-NEXT:    ALU 78, @15, KC0[CB0:0-32], KC1[]
7117; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T22.X, 0
7118; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T16.X, 0
7119; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T15.X, 0
7120; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T12.X, 0
7121; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T11.X, 0
7122; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T10.X, 0
7123; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T9.X, 0
7124; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T8.X, 1
7125; EG-NEXT:    CF_END
7126; EG-NEXT:    Fetch clause starting at 12:
7127; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
7128; EG-NEXT:    ALU clause starting at 14:
7129; EG-NEXT:     MOV * T7.X, KC0[2].Z,
7130; EG-NEXT:    ALU clause starting at 15:
7131; EG-NEXT:     LSHR T8.X, KC0[2].Y, literal.x,
7132; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7133; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
7134; EG-NEXT:     LSHR T9.X, PV.W, literal.x,
7135; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7136; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
7137; EG-NEXT:     LSHR T10.X, PV.W, literal.x,
7138; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7139; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
7140; EG-NEXT:     LSHR T11.X, PV.W, literal.x,
7141; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7142; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
7143; EG-NEXT:     LSHR * T12.X, PV.W, literal.x,
7144; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
7145; EG-NEXT:     BFE_INT * T13.X, T7.W, 0.0, literal.x,
7146; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
7147; EG-NEXT:     BFE_INT T14.X, T7.Y, 0.0, literal.x,
7148; EG-NEXT:     ASHR T13.Y, PV.X, literal.y,
7149; EG-NEXT:     LSHR T0.W, T7.W, literal.x,
7150; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
7151; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
7152; EG-NEXT:    80(1.121039e-43), 0(0.000000e+00)
7153; EG-NEXT:     LSHR T15.X, PS, literal.x,
7154; EG-NEXT:     ASHR T14.Y, PV.X, literal.y,
7155; EG-NEXT:     BFE_INT T13.Z, PV.W, 0.0, literal.z,
7156; EG-NEXT:     LSHR T0.W, T7.Y, literal.z,
7157; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.w,
7158; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
7159; EG-NEXT:    8(1.121039e-44), 96(1.345247e-43)
7160; EG-NEXT:     LSHR T16.X, PS, literal.x,
7161; EG-NEXT:     BFE_INT T14.Z, PV.W, 0.0, literal.y,
7162; EG-NEXT:     ASHR * T17.W, T7.X, literal.z,
7163; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
7164; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7165; EG-NEXT:     BFE_INT T18.X, T7.X, 0.0, literal.x,
7166; EG-NEXT:     ASHR T17.Z, T7.X, literal.y,
7167; EG-NEXT:     LSHR T0.W, T7.X, literal.z,
7168; EG-NEXT:     ASHR * T19.W, T7.Y, literal.w,
7169; EG-NEXT:    8(1.121039e-44), 24(3.363116e-44)
7170; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7171; EG-NEXT:     BFE_INT T17.X, PV.W, 0.0, literal.x,
7172; EG-NEXT:     ASHR T18.Y, PV.X, literal.y,
7173; EG-NEXT:     ASHR T19.Z, T7.Y, literal.z,
7174; EG-NEXT:     LSHR T0.W, T7.X, literal.x,
7175; EG-NEXT:     LSHR * T1.W, T7.Y, literal.w,
7176; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
7177; EG-NEXT:    24(3.363116e-44), 16(2.242078e-44)
7178; EG-NEXT:     BFE_INT T19.X, PS, 0.0, literal.x,
7179; EG-NEXT:     ASHR T17.Y, PV.X, literal.y,
7180; EG-NEXT:     BFE_INT T18.Z, PV.W, 0.0, literal.x,
7181; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.z,
7182; EG-NEXT:     ASHR * T20.W, T7.Z, literal.y,
7183; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
7184; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
7185; EG-NEXT:     BFE_INT T7.X, T7.Z, 0.0, literal.x,
7186; EG-NEXT:     ASHR T19.Y, PV.X, literal.y,
7187; EG-NEXT:     ASHR T20.Z, T7.Z, literal.z,
7188; EG-NEXT:     LSHR T1.W, T7.Z, literal.w,
7189; EG-NEXT:     ASHR * T21.W, T7.W, literal.y,
7190; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
7191; EG-NEXT:    24(3.363116e-44), 16(2.242078e-44)
7192; EG-NEXT:     BFE_INT T20.X, PV.W, 0.0, literal.x,
7193; EG-NEXT:     ASHR T7.Y, PV.X, literal.y,
7194; EG-NEXT:     ASHR T21.Z, T7.W, literal.z,
7195; EG-NEXT:     LSHR T1.W, T7.Z, literal.x,
7196; EG-NEXT:     LSHR * T2.W, T7.W, literal.w,
7197; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
7198; EG-NEXT:    24(3.363116e-44), 16(2.242078e-44)
7199; EG-NEXT:     BFE_INT T21.X, PS, 0.0, literal.x,
7200; EG-NEXT:     ASHR T20.Y, PV.X, literal.y,
7201; EG-NEXT:     BFE_INT T7.Z, PV.W, 0.0, literal.x,
7202; EG-NEXT:     ASHR T18.W, T18.Z, literal.y,
7203; EG-NEXT:     ASHR * T14.W, T14.Z, literal.y,
7204; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
7205; EG-NEXT:     LSHR T22.X, T0.W, literal.x,
7206; EG-NEXT:     ASHR T21.Y, PV.X, literal.y,
7207; EG-NEXT:     ASHR T7.W, PV.Z, literal.y,
7208; EG-NEXT:     ASHR * T13.W, T13.Z, literal.y,
7209; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
7210;
7211; GFX12-LABEL: constant_sextload_v16i8_to_v16i64:
7212; GFX12:       ; %bb.0:
7213; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
7214; GFX12-NEXT:    s_wait_kmcnt 0x0
7215; GFX12-NEXT:    s_load_b128 s[4:7], s[2:3], 0x0
7216; GFX12-NEXT:    s_wait_kmcnt 0x0
7217; GFX12-NEXT:    s_lshr_b32 s2, s7, 16
7218; GFX12-NEXT:    s_lshr_b32 s8, s7, 8
7219; GFX12-NEXT:    s_mov_b32 s10, s7
7220; GFX12-NEXT:    s_lshr_b32 s12, s6, 16
7221; GFX12-NEXT:    s_lshr_b32 s14, s6, 24
7222; GFX12-NEXT:    s_lshr_b32 s16, s6, 8
7223; GFX12-NEXT:    s_bfe_i64 s[34:35], s[6:7], 0x80000
7224; GFX12-NEXT:    s_ashr_i64 s[6:7], s[6:7], 56
7225; GFX12-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x80000
7226; GFX12-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x80000
7227; GFX12-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x80000
7228; GFX12-NEXT:    v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v3, s7
7229; GFX12-NEXT:    s_lshr_b32 s18, s5, 16
7230; GFX12-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x80000
7231; GFX12-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x80000
7232; GFX12-NEXT:    v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v5, s35
7233; GFX12-NEXT:    v_dual_mov_b32 v4, s34 :: v_dual_mov_b32 v1, s3
7234; GFX12-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v9, s11
7235; GFX12-NEXT:    s_lshr_b32 s20, s5, 8
7236; GFX12-NEXT:    s_mov_b32 s22, s5
7237; GFX12-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x80000
7238; GFX12-NEXT:    v_dual_mov_b32 v8, s10 :: v_dual_mov_b32 v11, s9
7239; GFX12-NEXT:    v_dual_mov_b32 v10, s8 :: v_dual_mov_b32 v13, s13
7240; GFX12-NEXT:    s_lshr_b32 s24, s4, 16
7241; GFX12-NEXT:    s_lshr_b32 s26, s4, 24
7242; GFX12-NEXT:    s_lshr_b32 s28, s4, 8
7243; GFX12-NEXT:    s_bfe_i64 s[30:31], s[4:5], 0x80000
7244; GFX12-NEXT:    s_ashr_i64 s[4:5], s[4:5], 56
7245; GFX12-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x80000
7246; GFX12-NEXT:    v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v15, s15
7247; GFX12-NEXT:    v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v7, s17
7248; GFX12-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x80000
7249; GFX12-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x80000
7250; GFX12-NEXT:    v_dual_mov_b32 v6, s16 :: v_dual_mov_b32 v17, s19
7251; GFX12-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x80000
7252; GFX12-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x80000
7253; GFX12-NEXT:    v_dual_mov_b32 v16, s18 :: v_dual_mov_b32 v19, s5
7254; GFX12-NEXT:    v_mov_b32_e32 v18, s4
7255; GFX12-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x80000
7256; GFX12-NEXT:    s_clause 0x1
7257; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[0:1] offset:112
7258; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[0:1] offset:96
7259; GFX12-NEXT:    v_dual_mov_b32 v1, s23 :: v_dual_mov_b32 v0, s22
7260; GFX12-NEXT:    v_dual_mov_b32 v3, s21 :: v_dual_mov_b32 v2, s20
7261; GFX12-NEXT:    v_dual_mov_b32 v9, s25 :: v_dual_mov_b32 v8, s24
7262; GFX12-NEXT:    v_dual_mov_b32 v11, s27 :: v_dual_mov_b32 v10, s26
7263; GFX12-NEXT:    v_dual_mov_b32 v21, s31 :: v_dual_mov_b32 v20, s30
7264; GFX12-NEXT:    v_dual_mov_b32 v23, s29 :: v_dual_mov_b32 v22, s28
7265; GFX12-NEXT:    s_clause 0x5
7266; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[0:1] offset:80
7267; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[0:1] offset:64
7268; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[0:1] offset:48
7269; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[0:1] offset:32
7270; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[0:1] offset:16
7271; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[0:1]
7272; GFX12-NEXT:    s_endpgm
7273  %load = load <16 x i8>, ptr addrspace(4) %in
7274  %ext = sext <16 x i8> %load to <16 x i64>
7275  store <16 x i64> %ext, ptr addrspace(1) %out
7276  ret void
7277}
7278
7279define amdgpu_kernel void @constant_zextload_v32i8_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
7280; GFX6-NOHSA-LABEL: constant_zextload_v32i8_to_v32i64:
7281; GFX6-NOHSA:       ; %bb.0:
7282; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x9
7283; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
7284; GFX6-NOHSA-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
7285; GFX6-NOHSA-NEXT:    s_mov_b32 s11, 0xf000
7286; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, 0
7287; GFX6-NOHSA-NEXT:    s_mov_b32 s10, -1
7288; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, v1
7289; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
7290; GFX6-NOHSA-NEXT:    s_lshr_b32 s12, s0, 24
7291; GFX6-NOHSA-NEXT:    s_lshr_b32 s13, s1, 24
7292; GFX6-NOHSA-NEXT:    s_lshr_b32 s14, s2, 24
7293; GFX6-NOHSA-NEXT:    s_lshr_b32 s15, s3, 24
7294; GFX6-NOHSA-NEXT:    s_lshr_b32 s16, s4, 24
7295; GFX6-NOHSA-NEXT:    s_lshr_b32 s17, s5, 24
7296; GFX6-NOHSA-NEXT:    s_lshr_b32 s18, s6, 24
7297; GFX6-NOHSA-NEXT:    s_lshr_b32 s19, s7, 24
7298; GFX6-NOHSA-NEXT:    s_bfe_u32 s20, s7, 0x80008
7299; GFX6-NOHSA-NEXT:    s_bfe_u32 s21, s6, 0x80008
7300; GFX6-NOHSA-NEXT:    s_bfe_u32 s22, s5, 0x80008
7301; GFX6-NOHSA-NEXT:    s_bfe_u32 s23, s4, 0x80008
7302; GFX6-NOHSA-NEXT:    s_bfe_u32 s24, s3, 0x80008
7303; GFX6-NOHSA-NEXT:    s_bfe_u32 s25, s2, 0x80008
7304; GFX6-NOHSA-NEXT:    s_bfe_u32 s26, s1, 0x80008
7305; GFX6-NOHSA-NEXT:    s_bfe_u32 s27, s0, 0x80008
7306; GFX6-NOHSA-NEXT:    s_and_b32 s28, s0, 0xff
7307; GFX6-NOHSA-NEXT:    s_and_b32 s29, s1, 0xff
7308; GFX6-NOHSA-NEXT:    s_and_b32 s30, s2, 0xff
7309; GFX6-NOHSA-NEXT:    s_and_b32 s31, s3, 0xff
7310; GFX6-NOHSA-NEXT:    s_and_b32 s33, s4, 0xff
7311; GFX6-NOHSA-NEXT:    s_and_b32 s34, s5, 0xff
7312; GFX6-NOHSA-NEXT:    s_and_b32 s35, s6, 0xff
7313; GFX6-NOHSA-NEXT:    s_and_b32 s36, s7, 0xff
7314; GFX6-NOHSA-NEXT:    s_bfe_u32 s0, s0, 0x80010
7315; GFX6-NOHSA-NEXT:    s_bfe_u32 s1, s1, 0x80010
7316; GFX6-NOHSA-NEXT:    s_bfe_u32 s2, s2, 0x80010
7317; GFX6-NOHSA-NEXT:    s_bfe_u32 s3, s3, 0x80010
7318; GFX6-NOHSA-NEXT:    s_bfe_u32 s4, s4, 0x80010
7319; GFX6-NOHSA-NEXT:    s_bfe_u32 s5, s5, 0x80010
7320; GFX6-NOHSA-NEXT:    s_bfe_u32 s7, s7, 0x80010
7321; GFX6-NOHSA-NEXT:    s_bfe_u32 s6, s6, 0x80010
7322; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s7
7323; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s19
7324; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:240
7325; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
7326; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
7327; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s18
7328; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:208
7329; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
7330; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s5
7331; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s17
7332; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:176
7333; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
7334; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
7335; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s16
7336; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:144
7337; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
7338; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s3
7339; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s15
7340; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:112
7341; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
7342; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
7343; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s14
7344; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:80
7345; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
7346; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s1
7347; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s13
7348; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:48
7349; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
7350; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
7351; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s12
7352; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
7353; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
7354; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s36
7355; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s20
7356; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:224
7357; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
7358; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s35
7359; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s21
7360; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:192
7361; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
7362; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s34
7363; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s22
7364; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:160
7365; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
7366; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s33
7367; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s23
7368; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:128
7369; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
7370; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s31
7371; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s24
7372; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:96
7373; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
7374; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s30
7375; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s25
7376; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:64
7377; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
7378; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s29
7379; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s26
7380; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:32
7381; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
7382; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s28
7383; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s27
7384; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
7385; GFX6-NOHSA-NEXT:    s_endpgm
7386;
7387; GFX7-HSA-LABEL: constant_zextload_v32i8_to_v32i64:
7388; GFX7-HSA:       ; %bb.0:
7389; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
7390; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, 0
7391; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, v1
7392; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
7393; GFX7-HSA-NEXT:    s_load_dwordx8 s[8:15], s[2:3], 0x0
7394; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
7395; GFX7-HSA-NEXT:    s_lshr_b32 s16, s8, 24
7396; GFX7-HSA-NEXT:    s_lshr_b32 s17, s9, 24
7397; GFX7-HSA-NEXT:    s_lshr_b32 s18, s10, 24
7398; GFX7-HSA-NEXT:    s_lshr_b32 s19, s11, 24
7399; GFX7-HSA-NEXT:    s_lshr_b32 s20, s12, 24
7400; GFX7-HSA-NEXT:    s_lshr_b32 s21, s13, 24
7401; GFX7-HSA-NEXT:    s_lshr_b32 s22, s14, 24
7402; GFX7-HSA-NEXT:    s_lshr_b32 s23, s15, 24
7403; GFX7-HSA-NEXT:    s_bfe_u32 s24, s15, 0x80008
7404; GFX7-HSA-NEXT:    s_bfe_u32 s25, s14, 0x80008
7405; GFX7-HSA-NEXT:    s_bfe_u32 s26, s13, 0x80008
7406; GFX7-HSA-NEXT:    s_bfe_u32 s27, s12, 0x80008
7407; GFX7-HSA-NEXT:    s_bfe_u32 s28, s11, 0x80008
7408; GFX7-HSA-NEXT:    s_bfe_u32 s29, s10, 0x80008
7409; GFX7-HSA-NEXT:    s_bfe_u32 s4, s9, 0x80008
7410; GFX7-HSA-NEXT:    s_bfe_u32 s2, s8, 0x80008
7411; GFX7-HSA-NEXT:    s_and_b32 s3, s8, 0xff
7412; GFX7-HSA-NEXT:    s_and_b32 s5, s9, 0xff
7413; GFX7-HSA-NEXT:    s_and_b32 s30, s10, 0xff
7414; GFX7-HSA-NEXT:    s_and_b32 s31, s11, 0xff
7415; GFX7-HSA-NEXT:    s_and_b32 s33, s12, 0xff
7416; GFX7-HSA-NEXT:    s_and_b32 s34, s13, 0xff
7417; GFX7-HSA-NEXT:    s_and_b32 s35, s14, 0xff
7418; GFX7-HSA-NEXT:    s_and_b32 s36, s15, 0xff
7419; GFX7-HSA-NEXT:    s_bfe_u32 s8, s8, 0x80010
7420; GFX7-HSA-NEXT:    s_bfe_u32 s9, s9, 0x80010
7421; GFX7-HSA-NEXT:    s_bfe_u32 s10, s10, 0x80010
7422; GFX7-HSA-NEXT:    s_bfe_u32 s11, s11, 0x80010
7423; GFX7-HSA-NEXT:    s_bfe_u32 s12, s12, 0x80010
7424; GFX7-HSA-NEXT:    s_bfe_u32 s13, s13, 0x80010
7425; GFX7-HSA-NEXT:    s_bfe_u32 s14, s14, 0x80010
7426; GFX7-HSA-NEXT:    s_bfe_u32 s15, s15, 0x80010
7427; GFX7-HSA-NEXT:    s_add_u32 s6, s0, 0xf0
7428; GFX7-HSA-NEXT:    s_addc_u32 s7, s1, 0
7429; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s6
7430; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s7
7431; GFX7-HSA-NEXT:    s_add_u32 s6, s0, 0xd0
7432; GFX7-HSA-NEXT:    s_addc_u32 s7, s1, 0
7433; GFX7-HSA-NEXT:    v_mov_b32_e32 v6, s6
7434; GFX7-HSA-NEXT:    v_mov_b32_e32 v7, s7
7435; GFX7-HSA-NEXT:    s_add_u32 s6, s0, 0xb0
7436; GFX7-HSA-NEXT:    s_addc_u32 s7, s1, 0
7437; GFX7-HSA-NEXT:    v_mov_b32_e32 v9, s7
7438; GFX7-HSA-NEXT:    v_mov_b32_e32 v8, s6
7439; GFX7-HSA-NEXT:    s_add_u32 s6, s0, 0x90
7440; GFX7-HSA-NEXT:    s_addc_u32 s7, s1, 0
7441; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s15
7442; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s23
7443; GFX7-HSA-NEXT:    v_mov_b32_e32 v11, s7
7444; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7445; GFX7-HSA-NEXT:    v_mov_b32_e32 v10, s6
7446; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s14
7447; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s22
7448; GFX7-HSA-NEXT:    flat_store_dwordx4 v[6:7], v[0:3]
7449; GFX7-HSA-NEXT:    s_add_u32 s6, s0, 0x70
7450; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s13
7451; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s21
7452; GFX7-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
7453; GFX7-HSA-NEXT:    s_addc_u32 s7, s1, 0
7454; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s12
7455; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s20
7456; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s6
7457; GFX7-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[0:3]
7458; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s7
7459; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s11
7460; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s19
7461; GFX7-HSA-NEXT:    s_add_u32 s6, s0, 0x50
7462; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7463; GFX7-HSA-NEXT:    s_addc_u32 s7, s1, 0
7464; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s6
7465; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s10
7466; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s18
7467; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s7
7468; GFX7-HSA-NEXT:    s_add_u32 s6, s0, 48
7469; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7470; GFX7-HSA-NEXT:    s_addc_u32 s7, s1, 0
7471; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s6
7472; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s9
7473; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s17
7474; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s7
7475; GFX7-HSA-NEXT:    s_add_u32 s6, s0, 16
7476; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7477; GFX7-HSA-NEXT:    s_addc_u32 s7, s1, 0
7478; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s6
7479; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s8
7480; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s16
7481; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s7
7482; GFX7-HSA-NEXT:    s_add_u32 s6, s0, 0xe0
7483; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7484; GFX7-HSA-NEXT:    s_addc_u32 s7, s1, 0
7485; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s6
7486; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s36
7487; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s24
7488; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s7
7489; GFX7-HSA-NEXT:    s_add_u32 s6, s0, 0xc0
7490; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7491; GFX7-HSA-NEXT:    s_addc_u32 s7, s1, 0
7492; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s6
7493; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s35
7494; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s25
7495; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s7
7496; GFX7-HSA-NEXT:    s_add_u32 s6, s0, 0xa0
7497; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7498; GFX7-HSA-NEXT:    s_addc_u32 s7, s1, 0
7499; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s6
7500; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s34
7501; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s26
7502; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s7
7503; GFX7-HSA-NEXT:    s_add_u32 s6, s0, 0x80
7504; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7505; GFX7-HSA-NEXT:    s_addc_u32 s7, s1, 0
7506; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s6
7507; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s33
7508; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s27
7509; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s7
7510; GFX7-HSA-NEXT:    s_add_u32 s6, s0, 0x60
7511; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7512; GFX7-HSA-NEXT:    s_addc_u32 s7, s1, 0
7513; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s6
7514; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s31
7515; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s28
7516; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s7
7517; GFX7-HSA-NEXT:    s_add_u32 s6, s0, 64
7518; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7519; GFX7-HSA-NEXT:    s_addc_u32 s7, s1, 0
7520; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s6
7521; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s30
7522; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s29
7523; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s7
7524; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7525; GFX7-HSA-NEXT:    s_nop 0
7526; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s4
7527; GFX7-HSA-NEXT:    s_add_u32 s4, s0, 32
7528; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s5
7529; GFX7-HSA-NEXT:    s_addc_u32 s5, s1, 0
7530; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s4
7531; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s5
7532; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7533; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
7534; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s3
7535; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s2
7536; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
7537; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7538; GFX7-HSA-NEXT:    s_endpgm
7539;
7540; GFX8-NOHSA-LABEL: constant_zextload_v32i8_to_v32i64:
7541; GFX8-NOHSA:       ; %bb.0:
7542; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
7543; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, 0
7544; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, v1
7545; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
7546; GFX8-NOHSA-NEXT:    s_load_dwordx8 s[8:15], s[2:3], 0x0
7547; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
7548; GFX8-NOHSA-NEXT:    s_lshr_b32 s16, s9, 24
7549; GFX8-NOHSA-NEXT:    s_lshr_b32 s17, s11, 24
7550; GFX8-NOHSA-NEXT:    s_lshr_b32 s18, s13, 24
7551; GFX8-NOHSA-NEXT:    s_lshr_b32 s6, s15, 24
7552; GFX8-NOHSA-NEXT:    s_bfe_u32 s19, s15, 0x80008
7553; GFX8-NOHSA-NEXT:    s_lshr_b32 s20, s14, 24
7554; GFX8-NOHSA-NEXT:    s_bfe_u32 s21, s14, 0x80008
7555; GFX8-NOHSA-NEXT:    s_bfe_u32 s22, s13, 0x80008
7556; GFX8-NOHSA-NEXT:    s_lshr_b32 s23, s12, 24
7557; GFX8-NOHSA-NEXT:    s_bfe_u32 s24, s12, 0x80008
7558; GFX8-NOHSA-NEXT:    s_bfe_u32 s25, s11, 0x80008
7559; GFX8-NOHSA-NEXT:    s_lshr_b32 s26, s10, 24
7560; GFX8-NOHSA-NEXT:    s_bfe_u32 s27, s10, 0x80008
7561; GFX8-NOHSA-NEXT:    s_bfe_u32 s28, s9, 0x80008
7562; GFX8-NOHSA-NEXT:    s_lshr_b32 s4, s8, 24
7563; GFX8-NOHSA-NEXT:    s_bfe_u32 s2, s8, 0x80008
7564; GFX8-NOHSA-NEXT:    s_and_b32 s3, s8, 0xff
7565; GFX8-NOHSA-NEXT:    s_bfe_u32 s5, s8, 0x80010
7566; GFX8-NOHSA-NEXT:    s_and_b32 s8, s9, 0xff
7567; GFX8-NOHSA-NEXT:    s_and_b32 s29, s10, 0xff
7568; GFX8-NOHSA-NEXT:    s_bfe_u32 s10, s10, 0x80010
7569; GFX8-NOHSA-NEXT:    s_and_b32 s30, s11, 0xff
7570; GFX8-NOHSA-NEXT:    s_and_b32 s31, s12, 0xff
7571; GFX8-NOHSA-NEXT:    s_bfe_u32 s12, s12, 0x80010
7572; GFX8-NOHSA-NEXT:    s_and_b32 s33, s13, 0xff
7573; GFX8-NOHSA-NEXT:    s_and_b32 s34, s14, 0xff
7574; GFX8-NOHSA-NEXT:    s_bfe_u32 s14, s14, 0x80010
7575; GFX8-NOHSA-NEXT:    s_and_b32 s35, s15, 0xff
7576; GFX8-NOHSA-NEXT:    s_bfe_u32 s9, s9, 0x80010
7577; GFX8-NOHSA-NEXT:    s_bfe_u32 s11, s11, 0x80010
7578; GFX8-NOHSA-NEXT:    s_bfe_u32 s13, s13, 0x80010
7579; GFX8-NOHSA-NEXT:    s_bfe_u32 s7, s15, 0x80010
7580; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s6
7581; GFX8-NOHSA-NEXT:    s_add_u32 s6, s0, 0xf0
7582; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s7
7583; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s1, 0
7584; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
7585; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
7586; GFX8-NOHSA-NEXT:    s_add_u32 s6, s0, 0xb0
7587; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7588; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s1, 0
7589; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
7590; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s13
7591; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s18
7592; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
7593; GFX8-NOHSA-NEXT:    s_add_u32 s6, s0, 0x70
7594; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7595; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s1, 0
7596; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
7597; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s11
7598; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s17
7599; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
7600; GFX8-NOHSA-NEXT:    s_add_u32 s6, s0, 48
7601; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7602; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s1, 0
7603; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
7604; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s9
7605; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s16
7606; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
7607; GFX8-NOHSA-NEXT:    s_add_u32 s6, s0, 0xe0
7608; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7609; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s1, 0
7610; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
7611; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s35
7612; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s19
7613; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
7614; GFX8-NOHSA-NEXT:    s_add_u32 s6, s0, 0xd0
7615; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7616; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s1, 0
7617; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
7618; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s14
7619; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s20
7620; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
7621; GFX8-NOHSA-NEXT:    s_add_u32 s6, s0, 0xc0
7622; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7623; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s1, 0
7624; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
7625; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s34
7626; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s21
7627; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
7628; GFX8-NOHSA-NEXT:    s_add_u32 s6, s0, 0xa0
7629; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7630; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s1, 0
7631; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
7632; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s33
7633; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s22
7634; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
7635; GFX8-NOHSA-NEXT:    s_add_u32 s6, s0, 0x90
7636; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7637; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s1, 0
7638; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
7639; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s12
7640; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s23
7641; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
7642; GFX8-NOHSA-NEXT:    s_add_u32 s6, s0, 0x80
7643; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7644; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s1, 0
7645; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
7646; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s31
7647; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s24
7648; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
7649; GFX8-NOHSA-NEXT:    s_add_u32 s6, s0, 0x60
7650; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7651; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s1, 0
7652; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
7653; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s30
7654; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s25
7655; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
7656; GFX8-NOHSA-NEXT:    s_add_u32 s6, s0, 0x50
7657; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7658; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s1, 0
7659; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
7660; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s10
7661; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s26
7662; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
7663; GFX8-NOHSA-NEXT:    s_add_u32 s6, s0, 64
7664; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7665; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s1, 0
7666; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
7667; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s29
7668; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s27
7669; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
7670; GFX8-NOHSA-NEXT:    s_add_u32 s6, s0, 32
7671; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7672; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s1, 0
7673; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
7674; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s8
7675; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s28
7676; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
7677; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7678; GFX8-NOHSA-NEXT:    s_nop 0
7679; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s4
7680; GFX8-NOHSA-NEXT:    s_add_u32 s4, s0, 16
7681; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s5
7682; GFX8-NOHSA-NEXT:    s_addc_u32 s5, s1, 0
7683; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s4
7684; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s5
7685; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7686; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
7687; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s3
7688; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s2
7689; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
7690; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7691; GFX8-NOHSA-NEXT:    s_endpgm
7692;
7693; EG-LABEL: constant_zextload_v32i8_to_v32i64:
7694; EG:       ; %bb.0:
7695; EG-NEXT:    ALU 0, @26, KC0[CB0:0-32], KC1[]
7696; EG-NEXT:    TEX 1 @22
7697; EG-NEXT:    ALU 103, @27, KC0[CB0:0-32], KC1[]
7698; EG-NEXT:    ALU 33, @131, KC0[CB0:0-32], KC1[]
7699; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T42.X, 0
7700; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T41.X, 0
7701; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T40.X, 0
7702; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T39.X, 0
7703; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T38.X, 0
7704; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T37.X, 0
7705; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T36.X, 0
7706; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T35.X, 0
7707; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T34.X, 0
7708; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T33.X, 0
7709; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T32.X, 0
7710; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T31.X, 0
7711; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T30.X, 0
7712; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T29.X, 0
7713; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T28.X, 0
7714; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T27.X, 1
7715; EG-NEXT:    CF_END
7716; EG-NEXT:    PAD
7717; EG-NEXT:    Fetch clause starting at 22:
7718; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 0, #1
7719; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 16, #1
7720; EG-NEXT:    ALU clause starting at 26:
7721; EG-NEXT:     MOV * T11.X, KC0[2].Z,
7722; EG-NEXT:    ALU clause starting at 27:
7723; EG-NEXT:     MOV * T0.W, literal.x,
7724; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
7725; EG-NEXT:     BFE_UINT T13.X, T11.W, literal.x, PV.W,
7726; EG-NEXT:     LSHR * T13.Z, T11.W, literal.y,
7727; EG-NEXT:    16(2.242078e-44), 24(3.363116e-44)
7728; EG-NEXT:     MOV T13.Y, 0.0,
7729; EG-NEXT:     BFE_UINT * T14.Z, T11.W, literal.x, T0.W,
7730; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
7731; EG-NEXT:     AND_INT T14.X, T11.W, literal.x,
7732; EG-NEXT:     MOV * T14.Y, 0.0,
7733; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
7734; EG-NEXT:     BFE_UINT T15.X, T11.Z, literal.x, T0.W,
7735; EG-NEXT:     LSHR * T15.Z, T11.Z, literal.y,
7736; EG-NEXT:    16(2.242078e-44), 24(3.363116e-44)
7737; EG-NEXT:     MOV T15.Y, 0.0,
7738; EG-NEXT:     BFE_UINT * T16.Z, T11.Z, literal.x, T0.W,
7739; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
7740; EG-NEXT:     AND_INT T16.X, T11.Z, literal.x,
7741; EG-NEXT:     MOV * T16.Y, 0.0,
7742; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
7743; EG-NEXT:     BFE_UINT T17.X, T11.Y, literal.x, T0.W,
7744; EG-NEXT:     LSHR * T17.Z, T11.Y, literal.y,
7745; EG-NEXT:    16(2.242078e-44), 24(3.363116e-44)
7746; EG-NEXT:     MOV T17.Y, 0.0,
7747; EG-NEXT:     BFE_UINT * T18.Z, T11.Y, literal.x, T0.W,
7748; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
7749; EG-NEXT:     AND_INT T18.X, T11.Y, literal.x,
7750; EG-NEXT:     MOV * T18.Y, 0.0,
7751; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
7752; EG-NEXT:     BFE_UINT T19.X, T11.X, literal.x, T0.W,
7753; EG-NEXT:     LSHR * T19.Z, T11.X, literal.y,
7754; EG-NEXT:    16(2.242078e-44), 24(3.363116e-44)
7755; EG-NEXT:     MOV T19.Y, 0.0,
7756; EG-NEXT:     BFE_UINT * T11.Z, T11.X, literal.x, T0.W,
7757; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
7758; EG-NEXT:     AND_INT T11.X, T11.X, literal.x,
7759; EG-NEXT:     MOV * T11.Y, 0.0,
7760; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
7761; EG-NEXT:     BFE_UINT T20.X, T12.W, literal.x, T0.W,
7762; EG-NEXT:     LSHR * T20.Z, T12.W, literal.y,
7763; EG-NEXT:    16(2.242078e-44), 24(3.363116e-44)
7764; EG-NEXT:     MOV T20.Y, 0.0,
7765; EG-NEXT:     BFE_UINT * T21.Z, T12.W, literal.x, T0.W,
7766; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
7767; EG-NEXT:     AND_INT T21.X, T12.W, literal.x,
7768; EG-NEXT:     MOV * T21.Y, 0.0,
7769; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
7770; EG-NEXT:     BFE_UINT T22.X, T12.Z, literal.x, T0.W,
7771; EG-NEXT:     LSHR * T22.Z, T12.Z, literal.y,
7772; EG-NEXT:    16(2.242078e-44), 24(3.363116e-44)
7773; EG-NEXT:     MOV T22.Y, 0.0,
7774; EG-NEXT:     BFE_UINT * T23.Z, T12.Z, literal.x, T0.W,
7775; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
7776; EG-NEXT:     AND_INT T23.X, T12.Z, literal.x,
7777; EG-NEXT:     MOV * T23.Y, 0.0,
7778; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
7779; EG-NEXT:     BFE_UINT T24.X, T12.Y, literal.x, T0.W,
7780; EG-NEXT:     LSHR * T24.Z, T12.Y, literal.y,
7781; EG-NEXT:    16(2.242078e-44), 24(3.363116e-44)
7782; EG-NEXT:     MOV T24.Y, 0.0,
7783; EG-NEXT:     BFE_UINT * T25.Z, T12.Y, literal.x, T0.W,
7784; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
7785; EG-NEXT:     AND_INT T25.X, T12.Y, literal.x,
7786; EG-NEXT:     MOV * T25.Y, 0.0,
7787; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
7788; EG-NEXT:     BFE_UINT T26.X, T12.X, literal.x, T0.W,
7789; EG-NEXT:     LSHR * T26.Z, T12.X, literal.y,
7790; EG-NEXT:    16(2.242078e-44), 24(3.363116e-44)
7791; EG-NEXT:     MOV T26.Y, 0.0,
7792; EG-NEXT:     BFE_UINT * T12.Z, T12.X, literal.x, T0.W,
7793; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
7794; EG-NEXT:     AND_INT T12.X, T12.X, literal.x,
7795; EG-NEXT:     MOV T12.Y, 0.0,
7796; EG-NEXT:     MOV T13.W, 0.0,
7797; EG-NEXT:     MOV * T14.W, 0.0,
7798; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
7799; EG-NEXT:     MOV T15.W, 0.0,
7800; EG-NEXT:     MOV * T16.W, 0.0,
7801; EG-NEXT:     MOV T17.W, 0.0,
7802; EG-NEXT:     MOV * T18.W, 0.0,
7803; EG-NEXT:     MOV T19.W, 0.0,
7804; EG-NEXT:     MOV * T11.W, 0.0,
7805; EG-NEXT:     MOV T20.W, 0.0,
7806; EG-NEXT:     MOV * T21.W, 0.0,
7807; EG-NEXT:     MOV T22.W, 0.0,
7808; EG-NEXT:     MOV * T23.W, 0.0,
7809; EG-NEXT:     MOV T24.W, 0.0,
7810; EG-NEXT:     MOV * T25.W, 0.0,
7811; EG-NEXT:     MOV T26.W, 0.0,
7812; EG-NEXT:     MOV * T12.W, 0.0,
7813; EG-NEXT:     LSHR T27.X, KC0[2].Y, literal.x,
7814; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7815; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
7816; EG-NEXT:     LSHR T28.X, PV.W, literal.x,
7817; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7818; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
7819; EG-NEXT:     LSHR T29.X, PV.W, literal.x,
7820; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7821; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
7822; EG-NEXT:     LSHR T30.X, PV.W, literal.x,
7823; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7824; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
7825; EG-NEXT:     LSHR * T31.X, PV.W, literal.x,
7826; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
7827; EG-NEXT:    ALU clause starting at 131:
7828; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
7829; EG-NEXT:    80(1.121039e-43), 0(0.000000e+00)
7830; EG-NEXT:     LSHR T32.X, PV.W, literal.x,
7831; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7832; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
7833; EG-NEXT:     LSHR T33.X, PV.W, literal.x,
7834; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7835; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
7836; EG-NEXT:     LSHR T34.X, PV.W, literal.x,
7837; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7838; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
7839; EG-NEXT:     LSHR T35.X, PV.W, literal.x,
7840; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7841; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
7842; EG-NEXT:     LSHR T36.X, PV.W, literal.x,
7843; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7844; EG-NEXT:    2(2.802597e-45), 160(2.242078e-43)
7845; EG-NEXT:     LSHR T37.X, PV.W, literal.x,
7846; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7847; EG-NEXT:    2(2.802597e-45), 176(2.466285e-43)
7848; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
7849; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7850; EG-NEXT:    2(2.802597e-45), 192(2.690493e-43)
7851; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
7852; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7853; EG-NEXT:    2(2.802597e-45), 208(2.914701e-43)
7854; EG-NEXT:     LSHR T40.X, PV.W, literal.x,
7855; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7856; EG-NEXT:    2(2.802597e-45), 224(3.138909e-43)
7857; EG-NEXT:     LSHR T41.X, PV.W, literal.x,
7858; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7859; EG-NEXT:    2(2.802597e-45), 240(3.363116e-43)
7860; EG-NEXT:     LSHR * T42.X, PV.W, literal.x,
7861; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
7862;
7863; GFX12-LABEL: constant_zextload_v32i8_to_v32i64:
7864; GFX12:       ; %bb.0:
7865; GFX12-NEXT:    s_load_b128 s[8:11], s[4:5], 0x24
7866; GFX12-NEXT:    s_wait_kmcnt 0x0
7867; GFX12-NEXT:    s_load_b256 s[0:7], s[10:11], 0x0
7868; GFX12-NEXT:    s_wait_kmcnt 0x0
7869; GFX12-NEXT:    s_bfe_u32 s10, s7, 0x80010
7870; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
7871; GFX12-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s10
7872; GFX12-NEXT:    s_lshr_b32 s11, s7, 24
7873; GFX12-NEXT:    s_lshr_b32 s10, s5, 24
7874; GFX12-NEXT:    s_wait_alu 0xfffe
7875; GFX12-NEXT:    v_dual_mov_b32 v2, s11 :: v_dual_mov_b32 v3, v1
7876; GFX12-NEXT:    s_bfe_u32 s11, s5, 0x80010
7877; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9] offset:240
7878; GFX12-NEXT:    s_wait_alu 0xfffe
7879; GFX12-NEXT:    v_mov_b32_e32 v0, s11
7880; GFX12-NEXT:    v_mov_b32_e32 v2, s10
7881; GFX12-NEXT:    s_lshr_b32 s10, s3, 24
7882; GFX12-NEXT:    s_bfe_u32 s11, s3, 0x80010
7883; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9] offset:176
7884; GFX12-NEXT:    s_wait_alu 0xfffe
7885; GFX12-NEXT:    v_mov_b32_e32 v0, s11
7886; GFX12-NEXT:    v_mov_b32_e32 v2, s10
7887; GFX12-NEXT:    s_lshr_b32 s10, s1, 24
7888; GFX12-NEXT:    s_bfe_u32 s11, s1, 0x80010
7889; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9] offset:112
7890; GFX12-NEXT:    s_wait_alu 0xfffe
7891; GFX12-NEXT:    v_mov_b32_e32 v0, s11
7892; GFX12-NEXT:    v_mov_b32_e32 v2, s10
7893; GFX12-NEXT:    s_bfe_u32 s10, s7, 0x80008
7894; GFX12-NEXT:    s_and_b32 s7, s7, 0xff
7895; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9] offset:48
7896; GFX12-NEXT:    v_mov_b32_e32 v0, s7
7897; GFX12-NEXT:    s_wait_alu 0xfffe
7898; GFX12-NEXT:    v_mov_b32_e32 v2, s10
7899; GFX12-NEXT:    s_lshr_b32 s7, s6, 24
7900; GFX12-NEXT:    s_bfe_u32 s10, s6, 0x80010
7901; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9] offset:224
7902; GFX12-NEXT:    s_wait_alu 0xfffe
7903; GFX12-NEXT:    v_mov_b32_e32 v0, s10
7904; GFX12-NEXT:    v_mov_b32_e32 v2, s7
7905; GFX12-NEXT:    s_bfe_u32 s7, s6, 0x80008
7906; GFX12-NEXT:    s_and_b32 s6, s6, 0xff
7907; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9] offset:208
7908; GFX12-NEXT:    s_wait_alu 0xfffe
7909; GFX12-NEXT:    v_mov_b32_e32 v0, s6
7910; GFX12-NEXT:    v_mov_b32_e32 v2, s7
7911; GFX12-NEXT:    s_bfe_u32 s6, s5, 0x80008
7912; GFX12-NEXT:    s_and_b32 s5, s5, 0xff
7913; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9] offset:192
7914; GFX12-NEXT:    v_mov_b32_e32 v0, s5
7915; GFX12-NEXT:    s_wait_alu 0xfffe
7916; GFX12-NEXT:    v_mov_b32_e32 v2, s6
7917; GFX12-NEXT:    s_lshr_b32 s5, s4, 24
7918; GFX12-NEXT:    s_bfe_u32 s6, s4, 0x80010
7919; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9] offset:160
7920; GFX12-NEXT:    s_wait_alu 0xfffe
7921; GFX12-NEXT:    v_mov_b32_e32 v0, s6
7922; GFX12-NEXT:    v_mov_b32_e32 v2, s5
7923; GFX12-NEXT:    s_bfe_u32 s5, s4, 0x80008
7924; GFX12-NEXT:    s_and_b32 s4, s4, 0xff
7925; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9] offset:144
7926; GFX12-NEXT:    s_wait_alu 0xfffe
7927; GFX12-NEXT:    v_mov_b32_e32 v0, s4
7928; GFX12-NEXT:    v_mov_b32_e32 v2, s5
7929; GFX12-NEXT:    s_bfe_u32 s4, s3, 0x80008
7930; GFX12-NEXT:    s_and_b32 s3, s3, 0xff
7931; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9] offset:128
7932; GFX12-NEXT:    v_mov_b32_e32 v0, s3
7933; GFX12-NEXT:    s_wait_alu 0xfffe
7934; GFX12-NEXT:    v_mov_b32_e32 v2, s4
7935; GFX12-NEXT:    s_lshr_b32 s3, s2, 24
7936; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x80010
7937; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9] offset:96
7938; GFX12-NEXT:    s_wait_alu 0xfffe
7939; GFX12-NEXT:    v_mov_b32_e32 v0, s4
7940; GFX12-NEXT:    v_mov_b32_e32 v2, s3
7941; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x80008
7942; GFX12-NEXT:    s_and_b32 s2, s2, 0xff
7943; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9] offset:80
7944; GFX12-NEXT:    s_wait_alu 0xfffe
7945; GFX12-NEXT:    v_mov_b32_e32 v0, s2
7946; GFX12-NEXT:    v_mov_b32_e32 v2, s3
7947; GFX12-NEXT:    s_bfe_u32 s2, s1, 0x80008
7948; GFX12-NEXT:    s_and_b32 s1, s1, 0xff
7949; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9] offset:64
7950; GFX12-NEXT:    v_mov_b32_e32 v0, s1
7951; GFX12-NEXT:    s_wait_alu 0xfffe
7952; GFX12-NEXT:    v_mov_b32_e32 v2, s2
7953; GFX12-NEXT:    s_lshr_b32 s1, s0, 24
7954; GFX12-NEXT:    s_bfe_u32 s2, s0, 0x80010
7955; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9] offset:32
7956; GFX12-NEXT:    s_wait_alu 0xfffe
7957; GFX12-NEXT:    v_mov_b32_e32 v0, s2
7958; GFX12-NEXT:    v_mov_b32_e32 v2, s1
7959; GFX12-NEXT:    s_bfe_u32 s1, s0, 0x80008
7960; GFX12-NEXT:    s_and_b32 s0, s0, 0xff
7961; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9] offset:16
7962; GFX12-NEXT:    s_wait_alu 0xfffe
7963; GFX12-NEXT:    v_mov_b32_e32 v0, s0
7964; GFX12-NEXT:    v_mov_b32_e32 v2, s1
7965; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9]
7966; GFX12-NEXT:    s_endpgm
7967  %load = load <32 x i8>, ptr addrspace(4) %in
7968  %ext = zext <32 x i8> %load to <32 x i64>
7969  store <32 x i64> %ext, ptr addrspace(1) %out
7970  ret void
7971}
7972
7973define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
7974; GFX6-NOHSA-LABEL: constant_sextload_v32i8_to_v32i64:
7975; GFX6-NOHSA:       ; %bb.0:
7976; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x9
7977; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
7978; GFX6-NOHSA-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
7979; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
7980; GFX6-NOHSA-NEXT:    s_lshr_b32 s10, s7, 16
7981; GFX6-NOHSA-NEXT:    s_lshr_b32 s28, s7, 8
7982; GFX6-NOHSA-NEXT:    s_mov_b32 s38, s7
7983; GFX6-NOHSA-NEXT:    s_lshr_b32 s40, s6, 16
7984; GFX6-NOHSA-NEXT:    s_lshr_b32 s30, s6, 24
7985; GFX6-NOHSA-NEXT:    s_lshr_b32 s36, s6, 8
7986; GFX6-NOHSA-NEXT:    s_lshr_b32 s12, s5, 16
7987; GFX6-NOHSA-NEXT:    s_lshr_b32 s14, s5, 8
7988; GFX6-NOHSA-NEXT:    s_mov_b32 s42, s5
7989; GFX6-NOHSA-NEXT:    s_lshr_b32 s16, s4, 16
7990; GFX6-NOHSA-NEXT:    s_lshr_b32 s18, s4, 24
7991; GFX6-NOHSA-NEXT:    s_lshr_b32 s20, s4, 8
7992; GFX6-NOHSA-NEXT:    s_lshr_b32 s22, s3, 16
7993; GFX6-NOHSA-NEXT:    s_lshr_b32 s24, s3, 8
7994; GFX6-NOHSA-NEXT:    s_mov_b32 s34, s3
7995; GFX6-NOHSA-NEXT:    s_lshr_b32 s26, s2, 16
7996; GFX6-NOHSA-NEXT:    s_bfe_i64 s[58:59], s[42:43], 0x80000
7997; GFX6-NOHSA-NEXT:    s_bfe_i64 s[60:61], s[38:39], 0x80000
7998; GFX6-NOHSA-NEXT:    s_bfe_i64 s[62:63], s[10:11], 0x80000
7999; GFX6-NOHSA-NEXT:    s_lshr_b32 s42, s2, 24
8000; GFX6-NOHSA-NEXT:    s_lshr_b32 s44, s2, 8
8001; GFX6-NOHSA-NEXT:    s_lshr_b32 s46, s1, 16
8002; GFX6-NOHSA-NEXT:    s_lshr_b32 s48, s1, 8
8003; GFX6-NOHSA-NEXT:    s_mov_b32 s54, s1
8004; GFX6-NOHSA-NEXT:    s_lshr_b32 s50, s0, 16
8005; GFX6-NOHSA-NEXT:    s_lshr_b32 s52, s0, 24
8006; GFX6-NOHSA-NEXT:    s_lshr_b32 s56, s0, 8
8007; GFX6-NOHSA-NEXT:    s_bfe_i64 s[10:11], s[0:1], 0x80000
8008; GFX6-NOHSA-NEXT:    s_ashr_i64 s[38:39], s[0:1], 56
8009; GFX6-NOHSA-NEXT:    s_ashr_i64 s[64:65], s[2:3], 56
8010; GFX6-NOHSA-NEXT:    s_bfe_i64 s[66:67], s[4:5], 0x80000
8011; GFX6-NOHSA-NEXT:    s_ashr_i64 s[68:69], s[4:5], 56
8012; GFX6-NOHSA-NEXT:    s_bfe_i64 s[70:71], s[6:7], 0x80000
8013; GFX6-NOHSA-NEXT:    s_ashr_i64 s[6:7], s[6:7], 56
8014; GFX6-NOHSA-NEXT:    s_bfe_i64 s[4:5], s[2:3], 0x80000
8015; GFX6-NOHSA-NEXT:    s_mov_b32 s0, s8
8016; GFX6-NOHSA-NEXT:    s_mov_b32 s1, s9
8017; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s6
8018; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s7
8019; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s60
8020; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s61
8021; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s70
8022; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s71
8023; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s68
8024; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s69
8025; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s58
8026; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s59
8027; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v18, s66
8028; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v19, s67
8029; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v22, s64
8030; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v23, s65
8031; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s62
8032; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s63
8033; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
8034; GFX6-NOHSA-NEXT:    s_bfe_i64 s[6:7], s[40:41], 0x80000
8035; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
8036; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v24, s6
8037; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
8038; GFX6-NOHSA-NEXT:    s_bfe_i64 s[8:9], s[36:37], 0x80000
8039; GFX6-NOHSA-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x80000
8040; GFX6-NOHSA-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x80000
8041; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s28
8042; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s29
8043; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v25, s7
8044; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
8045; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v26, s30
8046; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v27, s31
8047; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s8
8048; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s9
8049; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:208
8050; GFX6-NOHSA-NEXT:    s_bfe_i64 s[40:41], s[54:55], 0x80000
8051; GFX6-NOHSA-NEXT:    s_bfe_i64 s[54:55], s[34:35], 0x80000
8052; GFX6-NOHSA-NEXT:    s_bfe_i64 s[6:7], s[56:57], 0x80000
8053; GFX6-NOHSA-NEXT:    s_bfe_i64 s[8:9], s[52:53], 0x80000
8054; GFX6-NOHSA-NEXT:    s_bfe_i64 s[28:29], s[50:51], 0x80000
8055; GFX6-NOHSA-NEXT:    s_bfe_i64 s[30:31], s[48:49], 0x80000
8056; GFX6-NOHSA-NEXT:    s_bfe_i64 s[34:35], s[46:47], 0x80000
8057; GFX6-NOHSA-NEXT:    s_bfe_i64 s[36:37], s[44:45], 0x80000
8058; GFX6-NOHSA-NEXT:    s_bfe_i64 s[42:43], s[42:43], 0x80000
8059; GFX6-NOHSA-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x80000
8060; GFX6-NOHSA-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x80000
8061; GFX6-NOHSA-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x80000
8062; GFX6-NOHSA-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x80000
8063; GFX6-NOHSA-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x80000
8064; GFX6-NOHSA-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x80000
8065; GFX6-NOHSA-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x80000
8066; GFX6-NOHSA-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x80000
8067; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:192
8068; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(3)
8069; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s54
8070; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s55
8071; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
8072; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s12
8073; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s13
8074; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:176
8075; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s4
8076; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s5
8077; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s14
8078; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s15
8079; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:160
8080; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s16
8081; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s17
8082; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s18
8083; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s19
8084; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:144
8085; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
8086; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s38
8087; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v9, s39
8088; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v20, s20
8089; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v21, s21
8090; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:128
8091; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s40
8092; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v11, s41
8093; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
8094; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v20, s22
8095; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v21, s23
8096; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:112
8097; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s10
8098; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v15, s11
8099; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s24
8100; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s25
8101; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
8102; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
8103; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s26
8104; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s27
8105; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s42
8106; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s43
8107; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
8108; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s36
8109; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s37
8110; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64
8111; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
8112; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s34
8113; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s35
8114; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:48
8115; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s30
8116; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v13, s31
8117; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:32
8118; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s28
8119; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s29
8120; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s8
8121; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s9
8122; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
8123; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v16, s6
8124; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v17, s7
8125; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0
8126; GFX6-NOHSA-NEXT:    s_endpgm
8127;
8128; GFX7-HSA-LABEL: constant_sextload_v32i8_to_v32i64:
8129; GFX7-HSA:       ; %bb.0:
8130; GFX7-HSA-NEXT:    s_load_dwordx4 s[8:11], s[8:9], 0x0
8131; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
8132; GFX7-HSA-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
8133; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
8134; GFX7-HSA-NEXT:    s_lshr_b32 s14, s7, 16
8135; GFX7-HSA-NEXT:    s_lshr_b32 s50, s7, 8
8136; GFX7-HSA-NEXT:    s_mov_b32 s52, s7
8137; GFX7-HSA-NEXT:    s_lshr_b32 s54, s6, 16
8138; GFX7-HSA-NEXT:    s_lshr_b32 s56, s6, 24
8139; GFX7-HSA-NEXT:    s_lshr_b32 s58, s6, 8
8140; GFX7-HSA-NEXT:    s_lshr_b32 s60, s5, 16
8141; GFX7-HSA-NEXT:    s_lshr_b32 s48, s5, 8
8142; GFX7-HSA-NEXT:    s_mov_b32 s62, s5
8143; GFX7-HSA-NEXT:    s_lshr_b32 s42, s4, 16
8144; GFX7-HSA-NEXT:    s_lshr_b32 s40, s4, 24
8145; GFX7-HSA-NEXT:    s_lshr_b32 s38, s4, 8
8146; GFX7-HSA-NEXT:    s_lshr_b32 s36, s3, 16
8147; GFX7-HSA-NEXT:    s_lshr_b32 s30, s3, 8
8148; GFX7-HSA-NEXT:    s_mov_b32 s34, s3
8149; GFX7-HSA-NEXT:    s_lshr_b32 s26, s2, 16
8150; GFX7-HSA-NEXT:    s_lshr_b32 s24, s2, 24
8151; GFX7-HSA-NEXT:    s_lshr_b32 s22, s2, 8
8152; GFX7-HSA-NEXT:    s_lshr_b32 s20, s1, 16
8153; GFX7-HSA-NEXT:    s_lshr_b32 s64, s1, 8
8154; GFX7-HSA-NEXT:    s_mov_b32 s16, s1
8155; GFX7-HSA-NEXT:    s_lshr_b32 s66, s0, 16
8156; GFX7-HSA-NEXT:    s_lshr_b32 s68, s0, 24
8157; GFX7-HSA-NEXT:    s_lshr_b32 s70, s0, 8
8158; GFX7-HSA-NEXT:    s_bfe_i64 s[12:13], s[2:3], 0x80000
8159; GFX7-HSA-NEXT:    s_ashr_i64 s[18:19], s[2:3], 56
8160; GFX7-HSA-NEXT:    s_bfe_i64 s[28:29], s[4:5], 0x80000
8161; GFX7-HSA-NEXT:    s_ashr_i64 s[44:45], s[4:5], 56
8162; GFX7-HSA-NEXT:    s_ashr_i64 s[2:3], s[6:7], 56
8163; GFX7-HSA-NEXT:    s_bfe_i64 s[4:5], s[14:15], 0x80000
8164; GFX7-HSA-NEXT:    s_bfe_i64 s[10:11], s[0:1], 0x80000
8165; GFX7-HSA-NEXT:    s_ashr_i64 s[0:1], s[0:1], 56
8166; GFX7-HSA-NEXT:    s_bfe_i64 s[46:47], s[6:7], 0x80000
8167; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s4
8168; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s5
8169; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s2
8170; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s3
8171; GFX7-HSA-NEXT:    s_bfe_i64 s[2:3], s[70:71], 0x80000
8172; GFX7-HSA-NEXT:    s_bfe_i64 s[4:5], s[68:69], 0x80000
8173; GFX7-HSA-NEXT:    s_bfe_i64 s[6:7], s[66:67], 0x80000
8174; GFX7-HSA-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x80000
8175; GFX7-HSA-NEXT:    s_bfe_i64 s[14:15], s[64:65], 0x80000
8176; GFX7-HSA-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x80000
8177; GFX7-HSA-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x80000
8178; GFX7-HSA-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x80000
8179; GFX7-HSA-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x80000
8180; GFX7-HSA-NEXT:    s_bfe_i64 s[34:35], s[34:35], 0x80000
8181; GFX7-HSA-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x80000
8182; GFX7-HSA-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x80000
8183; GFX7-HSA-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x80000
8184; GFX7-HSA-NEXT:    s_bfe_i64 s[40:41], s[40:41], 0x80000
8185; GFX7-HSA-NEXT:    s_bfe_i64 s[42:43], s[42:43], 0x80000
8186; GFX7-HSA-NEXT:    s_bfe_i64 s[62:63], s[62:63], 0x80000
8187; GFX7-HSA-NEXT:    s_bfe_i64 s[48:49], s[48:49], 0x80000
8188; GFX7-HSA-NEXT:    s_bfe_i64 s[60:61], s[60:61], 0x80000
8189; GFX7-HSA-NEXT:    s_bfe_i64 s[58:59], s[58:59], 0x80000
8190; GFX7-HSA-NEXT:    s_bfe_i64 s[56:57], s[56:57], 0x80000
8191; GFX7-HSA-NEXT:    s_bfe_i64 s[54:55], s[54:55], 0x80000
8192; GFX7-HSA-NEXT:    s_bfe_i64 s[52:53], s[52:53], 0x80000
8193; GFX7-HSA-NEXT:    s_bfe_i64 s[50:51], s[50:51], 0x80000
8194; GFX7-HSA-NEXT:    s_add_u32 s64, s8, 0xf0
8195; GFX7-HSA-NEXT:    s_addc_u32 s65, s9, 0
8196; GFX7-HSA-NEXT:    v_mov_b32_e32 v6, s50
8197; GFX7-HSA-NEXT:    s_add_u32 s50, s8, 0xe0
8198; GFX7-HSA-NEXT:    v_mov_b32_e32 v7, s51
8199; GFX7-HSA-NEXT:    s_addc_u32 s51, s9, 0
8200; GFX7-HSA-NEXT:    v_mov_b32_e32 v24, s50
8201; GFX7-HSA-NEXT:    v_mov_b32_e32 v25, s51
8202; GFX7-HSA-NEXT:    s_add_u32 s50, s8, 0xd0
8203; GFX7-HSA-NEXT:    s_addc_u32 s51, s9, 0
8204; GFX7-HSA-NEXT:    v_mov_b32_e32 v18, s44
8205; GFX7-HSA-NEXT:    s_add_u32 s44, s8, 0xc0
8206; GFX7-HSA-NEXT:    v_mov_b32_e32 v19, s45
8207; GFX7-HSA-NEXT:    s_addc_u32 s45, s9, 0
8208; GFX7-HSA-NEXT:    v_mov_b32_e32 v28, s44
8209; GFX7-HSA-NEXT:    v_mov_b32_e32 v22, s64
8210; GFX7-HSA-NEXT:    v_mov_b32_e32 v29, s45
8211; GFX7-HSA-NEXT:    s_add_u32 s44, s8, 0xb0
8212; GFX7-HSA-NEXT:    v_mov_b32_e32 v23, s65
8213; GFX7-HSA-NEXT:    s_addc_u32 s45, s9, 0
8214; GFX7-HSA-NEXT:    flat_store_dwordx4 v[22:23], v[0:3]
8215; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s52
8216; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s42
8217; GFX7-HSA-NEXT:    s_add_u32 s42, s8, 0xa0
8218; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s53
8219; GFX7-HSA-NEXT:    v_mov_b32_e32 v26, s50
8220; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s43
8221; GFX7-HSA-NEXT:    s_addc_u32 s43, s9, 0
8222; GFX7-HSA-NEXT:    v_mov_b32_e32 v8, s54
8223; GFX7-HSA-NEXT:    v_mov_b32_e32 v9, s55
8224; GFX7-HSA-NEXT:    v_mov_b32_e32 v10, s56
8225; GFX7-HSA-NEXT:    v_mov_b32_e32 v11, s57
8226; GFX7-HSA-NEXT:    v_mov_b32_e32 v27, s51
8227; GFX7-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[4:7]
8228; GFX7-HSA-NEXT:    flat_store_dwordx4 v[26:27], v[8:11]
8229; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s28
8230; GFX7-HSA-NEXT:    s_add_u32 s28, s8, 0x90
8231; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s29
8232; GFX7-HSA-NEXT:    s_addc_u32 s29, s9, 0
8233; GFX7-HSA-NEXT:    v_mov_b32_e32 v24, s28
8234; GFX7-HSA-NEXT:    v_mov_b32_e32 v10, s42
8235; GFX7-HSA-NEXT:    v_mov_b32_e32 v25, s29
8236; GFX7-HSA-NEXT:    s_add_u32 s28, s8, 0x80
8237; GFX7-HSA-NEXT:    v_mov_b32_e32 v20, s62
8238; GFX7-HSA-NEXT:    v_mov_b32_e32 v21, s63
8239; GFX7-HSA-NEXT:    v_mov_b32_e32 v22, s48
8240; GFX7-HSA-NEXT:    v_mov_b32_e32 v23, s49
8241; GFX7-HSA-NEXT:    v_mov_b32_e32 v30, s44
8242; GFX7-HSA-NEXT:    v_mov_b32_e32 v11, s43
8243; GFX7-HSA-NEXT:    s_addc_u32 s29, s9, 0
8244; GFX7-HSA-NEXT:    v_mov_b32_e32 v16, s60
8245; GFX7-HSA-NEXT:    v_mov_b32_e32 v17, s61
8246; GFX7-HSA-NEXT:    v_mov_b32_e32 v31, s45
8247; GFX7-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[20:23]
8248; GFX7-HSA-NEXT:    v_mov_b32_e32 v10, s18
8249; GFX7-HSA-NEXT:    s_add_u32 s18, s8, 0x70
8250; GFX7-HSA-NEXT:    flat_store_dwordx4 v[30:31], v[16:19]
8251; GFX7-HSA-NEXT:    v_mov_b32_e32 v11, s19
8252; GFX7-HSA-NEXT:    s_addc_u32 s19, s9, 0
8253; GFX7-HSA-NEXT:    v_mov_b32_e32 v16, s18
8254; GFX7-HSA-NEXT:    v_mov_b32_e32 v17, s19
8255; GFX7-HSA-NEXT:    s_add_u32 s18, s8, 0x60
8256; GFX7-HSA-NEXT:    s_addc_u32 s19, s9, 0
8257; GFX7-HSA-NEXT:    v_mov_b32_e32 v18, s18
8258; GFX7-HSA-NEXT:    v_mov_b32_e32 v12, s46
8259; GFX7-HSA-NEXT:    v_mov_b32_e32 v13, s47
8260; GFX7-HSA-NEXT:    v_mov_b32_e32 v14, s58
8261; GFX7-HSA-NEXT:    v_mov_b32_e32 v15, s59
8262; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s40
8263; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s41
8264; GFX7-HSA-NEXT:    v_mov_b32_e32 v26, s28
8265; GFX7-HSA-NEXT:    v_mov_b32_e32 v19, s19
8266; GFX7-HSA-NEXT:    s_add_u32 s18, s8, 0x50
8267; GFX7-HSA-NEXT:    flat_store_dwordx4 v[28:29], v[12:15]
8268; GFX7-HSA-NEXT:    v_mov_b32_e32 v27, s29
8269; GFX7-HSA-NEXT:    v_mov_b32_e32 v6, s38
8270; GFX7-HSA-NEXT:    v_mov_b32_e32 v7, s39
8271; GFX7-HSA-NEXT:    v_mov_b32_e32 v8, s36
8272; GFX7-HSA-NEXT:    v_mov_b32_e32 v9, s37
8273; GFX7-HSA-NEXT:    v_mov_b32_e32 v12, s34
8274; GFX7-HSA-NEXT:    v_mov_b32_e32 v13, s35
8275; GFX7-HSA-NEXT:    v_mov_b32_e32 v14, s30
8276; GFX7-HSA-NEXT:    v_mov_b32_e32 v15, s31
8277; GFX7-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[0:3]
8278; GFX7-HSA-NEXT:    flat_store_dwordx4 v[26:27], v[4:7]
8279; GFX7-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
8280; GFX7-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[12:15]
8281; GFX7-HSA-NEXT:    s_addc_u32 s19, s9, 0
8282; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s18
8283; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s26
8284; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s27
8285; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s24
8286; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s25
8287; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s19
8288; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8289; GFX7-HSA-NEXT:    s_nop 0
8290; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s12
8291; GFX7-HSA-NEXT:    s_add_u32 s12, s8, 64
8292; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s13
8293; GFX7-HSA-NEXT:    s_addc_u32 s13, s9, 0
8294; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s12
8295; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s22
8296; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s23
8297; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s13
8298; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8299; GFX7-HSA-NEXT:    s_nop 0
8300; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s0
8301; GFX7-HSA-NEXT:    s_add_u32 s0, s8, 48
8302; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s1
8303; GFX7-HSA-NEXT:    s_addc_u32 s1, s9, 0
8304; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
8305; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
8306; GFX7-HSA-NEXT:    s_add_u32 s0, s8, 32
8307; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s20
8308; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s21
8309; GFX7-HSA-NEXT:    s_addc_u32 s1, s9, 0
8310; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8311; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
8312; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
8313; GFX7-HSA-NEXT:    s_add_u32 s0, s8, 16
8314; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s16
8315; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s17
8316; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s14
8317; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s15
8318; GFX7-HSA-NEXT:    s_addc_u32 s1, s9, 0
8319; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8320; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
8321; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s6
8322; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s7
8323; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s4
8324; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s5
8325; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
8326; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8327; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s8
8328; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s10
8329; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s11
8330; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s2
8331; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s3
8332; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s9
8333; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8334; GFX7-HSA-NEXT:    s_endpgm
8335;
8336; GFX8-NOHSA-LABEL: constant_sextload_v32i8_to_v32i64:
8337; GFX8-NOHSA:       ; %bb.0:
8338; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x24
8339; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
8340; GFX8-NOHSA-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
8341; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
8342; GFX8-NOHSA-NEXT:    s_lshr_b32 s46, s7, 16
8343; GFX8-NOHSA-NEXT:    s_lshr_b32 s48, s7, 8
8344; GFX8-NOHSA-NEXT:    s_mov_b32 s50, s7
8345; GFX8-NOHSA-NEXT:    s_lshr_b32 s52, s6, 16
8346; GFX8-NOHSA-NEXT:    s_lshr_b32 s54, s6, 24
8347; GFX8-NOHSA-NEXT:    s_lshr_b32 s56, s6, 8
8348; GFX8-NOHSA-NEXT:    s_lshr_b32 s58, s5, 16
8349; GFX8-NOHSA-NEXT:    s_lshr_b32 s60, s5, 8
8350; GFX8-NOHSA-NEXT:    s_mov_b32 s62, s5
8351; GFX8-NOHSA-NEXT:    s_lshr_b32 s44, s4, 16
8352; GFX8-NOHSA-NEXT:    s_lshr_b32 s40, s4, 24
8353; GFX8-NOHSA-NEXT:    s_lshr_b32 s38, s4, 8
8354; GFX8-NOHSA-NEXT:    s_lshr_b32 s36, s3, 16
8355; GFX8-NOHSA-NEXT:    s_lshr_b32 s30, s3, 8
8356; GFX8-NOHSA-NEXT:    s_mov_b32 s28, s3
8357; GFX8-NOHSA-NEXT:    s_lshr_b32 s24, s2, 16
8358; GFX8-NOHSA-NEXT:    s_lshr_b32 s22, s2, 24
8359; GFX8-NOHSA-NEXT:    s_lshr_b32 s20, s2, 8
8360; GFX8-NOHSA-NEXT:    s_lshr_b32 s18, s1, 16
8361; GFX8-NOHSA-NEXT:    s_lshr_b32 s14, s1, 8
8362; GFX8-NOHSA-NEXT:    s_mov_b32 s64, s1
8363; GFX8-NOHSA-NEXT:    s_lshr_b32 s66, s0, 16
8364; GFX8-NOHSA-NEXT:    s_lshr_b32 s68, s0, 24
8365; GFX8-NOHSA-NEXT:    s_lshr_b32 s70, s0, 8
8366; GFX8-NOHSA-NEXT:    s_bfe_i64 s[10:11], s[0:1], 0x80000
8367; GFX8-NOHSA-NEXT:    s_ashr_i64 s[12:13], s[0:1], 56
8368; GFX8-NOHSA-NEXT:    s_bfe_i64 s[16:17], s[2:3], 0x80000
8369; GFX8-NOHSA-NEXT:    s_ashr_i64 s[26:27], s[2:3], 56
8370; GFX8-NOHSA-NEXT:    s_bfe_i64 s[34:35], s[4:5], 0x80000
8371; GFX8-NOHSA-NEXT:    s_ashr_i64 s[42:43], s[4:5], 56
8372; GFX8-NOHSA-NEXT:    s_bfe_i64 s[72:73], s[6:7], 0x80000
8373; GFX8-NOHSA-NEXT:    s_ashr_i64 s[74:75], s[6:7], 56
8374; GFX8-NOHSA-NEXT:    s_bfe_i64 s[0:1], s[70:71], 0x80000
8375; GFX8-NOHSA-NEXT:    s_bfe_i64 s[2:3], s[68:69], 0x80000
8376; GFX8-NOHSA-NEXT:    s_bfe_i64 s[4:5], s[66:67], 0x80000
8377; GFX8-NOHSA-NEXT:    s_bfe_i64 s[6:7], s[64:65], 0x80000
8378; GFX8-NOHSA-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x80000
8379; GFX8-NOHSA-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x80000
8380; GFX8-NOHSA-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x80000
8381; GFX8-NOHSA-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x80000
8382; GFX8-NOHSA-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x80000
8383; GFX8-NOHSA-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x80000
8384; GFX8-NOHSA-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x80000
8385; GFX8-NOHSA-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x80000
8386; GFX8-NOHSA-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x80000
8387; GFX8-NOHSA-NEXT:    s_bfe_i64 s[40:41], s[40:41], 0x80000
8388; GFX8-NOHSA-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x80000
8389; GFX8-NOHSA-NEXT:    s_bfe_i64 s[62:63], s[62:63], 0x80000
8390; GFX8-NOHSA-NEXT:    s_bfe_i64 s[60:61], s[60:61], 0x80000
8391; GFX8-NOHSA-NEXT:    s_bfe_i64 s[58:59], s[58:59], 0x80000
8392; GFX8-NOHSA-NEXT:    s_bfe_i64 s[56:57], s[56:57], 0x80000
8393; GFX8-NOHSA-NEXT:    s_bfe_i64 s[54:55], s[54:55], 0x80000
8394; GFX8-NOHSA-NEXT:    s_bfe_i64 s[52:53], s[52:53], 0x80000
8395; GFX8-NOHSA-NEXT:    s_bfe_i64 s[50:51], s[50:51], 0x80000
8396; GFX8-NOHSA-NEXT:    s_bfe_i64 s[48:49], s[48:49], 0x80000
8397; GFX8-NOHSA-NEXT:    s_bfe_i64 s[46:47], s[46:47], 0x80000
8398; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s46
8399; GFX8-NOHSA-NEXT:    s_add_u32 s46, s8, 0xf0
8400; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s47
8401; GFX8-NOHSA-NEXT:    s_addc_u32 s47, s9, 0
8402; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s46
8403; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s74
8404; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s75
8405; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s47
8406; GFX8-NOHSA-NEXT:    s_add_u32 s46, s8, 0xe0
8407; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8408; GFX8-NOHSA-NEXT:    s_addc_u32 s47, s9, 0
8409; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s46
8410; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s50
8411; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s51
8412; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s48
8413; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s49
8414; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s47
8415; GFX8-NOHSA-NEXT:    s_add_u32 s46, s8, 0xd0
8416; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8417; GFX8-NOHSA-NEXT:    s_addc_u32 s47, s9, 0
8418; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s46
8419; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s52
8420; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s53
8421; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s54
8422; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s55
8423; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s47
8424; GFX8-NOHSA-NEXT:    s_add_u32 s46, s8, 0xc0
8425; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8426; GFX8-NOHSA-NEXT:    s_addc_u32 s47, s9, 0
8427; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s46
8428; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s72
8429; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s73
8430; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s56
8431; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s57
8432; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s47
8433; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8434; GFX8-NOHSA-NEXT:    s_nop 0
8435; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s42
8436; GFX8-NOHSA-NEXT:    s_add_u32 s42, s8, 0xb0
8437; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s43
8438; GFX8-NOHSA-NEXT:    s_addc_u32 s43, s9, 0
8439; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s42
8440; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s58
8441; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s59
8442; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s43
8443; GFX8-NOHSA-NEXT:    s_add_u32 s42, s8, 0xa0
8444; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8445; GFX8-NOHSA-NEXT:    s_addc_u32 s43, s9, 0
8446; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s42
8447; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s62
8448; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s63
8449; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s60
8450; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s61
8451; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s43
8452; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8453; GFX8-NOHSA-NEXT:    s_nop 0
8454; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s40
8455; GFX8-NOHSA-NEXT:    s_add_u32 s40, s8, 0x90
8456; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s41
8457; GFX8-NOHSA-NEXT:    s_addc_u32 s41, s9, 0
8458; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s40
8459; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s44
8460; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s45
8461; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s41
8462; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8463; GFX8-NOHSA-NEXT:    s_nop 0
8464; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s34
8465; GFX8-NOHSA-NEXT:    s_add_u32 s34, s8, 0x80
8466; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s35
8467; GFX8-NOHSA-NEXT:    s_addc_u32 s35, s9, 0
8468; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s34
8469; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s38
8470; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s39
8471; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s35
8472; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8473; GFX8-NOHSA-NEXT:    s_nop 0
8474; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s26
8475; GFX8-NOHSA-NEXT:    s_add_u32 s26, s8, 0x70
8476; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s27
8477; GFX8-NOHSA-NEXT:    s_addc_u32 s27, s9, 0
8478; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s26
8479; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s36
8480; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s37
8481; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s27
8482; GFX8-NOHSA-NEXT:    s_add_u32 s26, s8, 0x60
8483; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8484; GFX8-NOHSA-NEXT:    s_addc_u32 s27, s9, 0
8485; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s26
8486; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s28
8487; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s29
8488; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s30
8489; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s31
8490; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s27
8491; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8492; GFX8-NOHSA-NEXT:    s_nop 0
8493; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s22
8494; GFX8-NOHSA-NEXT:    s_add_u32 s22, s8, 0x50
8495; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s23
8496; GFX8-NOHSA-NEXT:    s_addc_u32 s23, s9, 0
8497; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s22
8498; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s24
8499; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s25
8500; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s23
8501; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8502; GFX8-NOHSA-NEXT:    s_nop 0
8503; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s16
8504; GFX8-NOHSA-NEXT:    s_add_u32 s16, s8, 64
8505; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s17
8506; GFX8-NOHSA-NEXT:    s_addc_u32 s17, s9, 0
8507; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s16
8508; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s20
8509; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s21
8510; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s17
8511; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8512; GFX8-NOHSA-NEXT:    s_nop 0
8513; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s12
8514; GFX8-NOHSA-NEXT:    s_add_u32 s12, s8, 48
8515; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s13
8516; GFX8-NOHSA-NEXT:    s_addc_u32 s13, s9, 0
8517; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s12
8518; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s18
8519; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s19
8520; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s13
8521; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8522; GFX8-NOHSA-NEXT:    s_nop 0
8523; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
8524; GFX8-NOHSA-NEXT:    s_add_u32 s6, s8, 32
8525; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s7
8526; GFX8-NOHSA-NEXT:    s_addc_u32 s7, s9, 0
8527; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
8528; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s14
8529; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s15
8530; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s7
8531; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8532; GFX8-NOHSA-NEXT:    s_nop 0
8533; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s2
8534; GFX8-NOHSA-NEXT:    s_add_u32 s2, s8, 16
8535; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s3
8536; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s9, 0
8537; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
8538; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
8539; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s5
8540; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
8541; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8542; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s8
8543; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s10
8544; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s11
8545; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s0
8546; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s1
8547; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s9
8548; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8549; GFX8-NOHSA-NEXT:    s_endpgm
8550;
8551; EG-LABEL: constant_sextload_v32i8_to_v32i64:
8552; EG:       ; %bb.0:
8553; EG-NEXT:    ALU 0, @26, KC0[CB0:0-32], KC1[]
8554; EG-NEXT:    TEX 1 @22
8555; EG-NEXT:    ALU 84, @27, KC0[CB0:0-32], KC1[]
8556; EG-NEXT:    ALU 71, @112, KC0[], KC1[]
8557; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T42.X, 0
8558; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T31.X, 0
8559; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T30.X, 0
8560; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T25.X, 0
8561; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T24.X, 0
8562; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T23.X, 0
8563; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T22.X, 0
8564; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T21.X, 0
8565; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T20.X, 0
8566; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T19.X, 0
8567; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T18.X, 0
8568; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T17.X, 0
8569; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T16.X, 0
8570; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T15.X, 0
8571; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T14.X, 0
8572; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T13.X, 1
8573; EG-NEXT:    CF_END
8574; EG-NEXT:    PAD
8575; EG-NEXT:    Fetch clause starting at 22:
8576; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 0, #1
8577; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 16, #1
8578; EG-NEXT:    ALU clause starting at 26:
8579; EG-NEXT:     MOV * T11.X, KC0[2].Z,
8580; EG-NEXT:    ALU clause starting at 27:
8581; EG-NEXT:     LSHR T13.X, KC0[2].Y, literal.x,
8582; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8583; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
8584; EG-NEXT:     LSHR T14.X, PV.W, literal.x,
8585; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8586; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
8587; EG-NEXT:     LSHR T15.X, PV.W, literal.x,
8588; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8589; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
8590; EG-NEXT:     LSHR T16.X, PV.W, literal.x,
8591; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8592; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
8593; EG-NEXT:     LSHR T17.X, PV.W, literal.x,
8594; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8595; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
8596; EG-NEXT:     LSHR T18.X, PV.W, literal.x,
8597; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8598; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
8599; EG-NEXT:     LSHR T19.X, PV.W, literal.x,
8600; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8601; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
8602; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
8603; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8604; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
8605; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
8606; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8607; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
8608; EG-NEXT:     LSHR T22.X, PV.W, literal.x,
8609; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8610; EG-NEXT:    2(2.802597e-45), 160(2.242078e-43)
8611; EG-NEXT:     LSHR T23.X, PV.W, literal.x,
8612; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8613; EG-NEXT:    2(2.802597e-45), 176(2.466285e-43)
8614; EG-NEXT:     LSHR T24.X, PV.W, literal.x,
8615; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8616; EG-NEXT:    2(2.802597e-45), 192(2.690493e-43)
8617; EG-NEXT:     LSHR * T25.X, PV.W, literal.x,
8618; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
8619; EG-NEXT:     BFE_INT * T26.X, T11.W, 0.0, literal.x,
8620; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
8621; EG-NEXT:     BFE_INT T27.X, T11.Y, 0.0, literal.x,
8622; EG-NEXT:     ASHR T26.Y, PV.X, literal.y,
8623; EG-NEXT:     LSHR * T0.W, T11.W, literal.x,
8624; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
8625; EG-NEXT:     BFE_INT T28.X, T11.X, 0.0, literal.x,
8626; EG-NEXT:     ASHR T27.Y, PV.X, literal.y,
8627; EG-NEXT:     BFE_INT T26.Z, PV.W, 0.0, literal.x,
8628; EG-NEXT:     LSHR * T0.W, T11.Y, literal.x,
8629; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
8630; EG-NEXT:     BFE_INT T29.X, T12.W, 0.0, literal.x,
8631; EG-NEXT:     ASHR T28.Y, PV.X, literal.y,
8632; EG-NEXT:     BFE_INT T27.Z, PV.W, 0.0, literal.x,
8633; EG-NEXT:     LSHR T0.W, T11.X, literal.x,
8634; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
8635; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
8636; EG-NEXT:    208(2.914701e-43), 0(0.000000e+00)
8637; EG-NEXT:     LSHR T30.X, PS, literal.x,
8638; EG-NEXT:     ASHR T29.Y, PV.X, literal.y,
8639; EG-NEXT:     BFE_INT T28.Z, PV.W, 0.0, literal.z,
8640; EG-NEXT:     LSHR T0.W, T12.W, literal.z,
8641; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.w,
8642; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
8643; EG-NEXT:    8(1.121039e-44), 224(3.138909e-43)
8644; EG-NEXT:     LSHR T31.X, PS, literal.x,
8645; EG-NEXT:     BFE_INT T29.Z, PV.W, 0.0, literal.y,
8646; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.z,
8647; EG-NEXT:     ASHR * T32.W, T12.X, literal.w,
8648; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
8649; EG-NEXT:    240(3.363116e-43), 31(4.344025e-44)
8650; EG-NEXT:     BFE_INT T33.X, T12.Z, 0.0, literal.x,
8651; EG-NEXT:     LSHR T0.Y, T11.Z, literal.x, BS:VEC_120/SCL_212
8652; EG-NEXT:     ASHR T32.Z, T12.X, literal.y,
8653; EG-NEXT:     LSHR T1.W, T12.X, literal.z,
8654; EG-NEXT:     ASHR * T34.W, T12.Y, literal.w,
8655; EG-NEXT:    8(1.121039e-44), 24(3.363116e-44)
8656; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8657; EG-NEXT:     BFE_INT T32.X, PV.W, 0.0, literal.x,
8658; EG-NEXT:     ASHR T33.Y, PV.X, literal.y,
8659; EG-NEXT:     ASHR T34.Z, T12.Y, literal.z,
8660; EG-NEXT:     LSHR T1.W, T12.Z, literal.x,
8661; EG-NEXT:     LSHR * T2.W, T12.Y, literal.w,
8662; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
8663; EG-NEXT:    24(3.363116e-44), 16(2.242078e-44)
8664; EG-NEXT:     BFE_INT * T34.X, PS, 0.0, literal.x,
8665; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
8666; EG-NEXT:    ALU clause starting at 112:
8667; EG-NEXT:     ASHR T32.Y, T32.X, literal.x,
8668; EG-NEXT:     BFE_INT T33.Z, T1.W, 0.0, literal.y,
8669; EG-NEXT:     LSHR T1.W, T11.W, literal.z, BS:VEC_120/SCL_212
8670; EG-NEXT:     ASHR * T35.W, T12.Z, literal.x,
8671; EG-NEXT:    31(4.344025e-44), 8(1.121039e-44)
8672; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
8673; EG-NEXT:     BFE_INT T36.X, T12.X, 0.0, literal.x,
8674; EG-NEXT:     ASHR T34.Y, T34.X, literal.y, BS:VEC_120/SCL_212
8675; EG-NEXT:     ASHR T35.Z, T12.Z, literal.z,
8676; EG-NEXT:     LSHR T2.W, T12.Z, literal.w,
8677; EG-NEXT:     ASHR * T37.W, T12.W, literal.y,
8678; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
8679; EG-NEXT:    24(3.363116e-44), 16(2.242078e-44)
8680; EG-NEXT:     BFE_INT T35.X, PV.W, 0.0, literal.x,
8681; EG-NEXT:     ASHR T36.Y, PV.X, literal.y,
8682; EG-NEXT:     ASHR T37.Z, T12.W, literal.z,
8683; EG-NEXT:     LSHR T2.W, T12.X, literal.x,
8684; EG-NEXT:     LSHR * T3.W, T12.W, literal.w,
8685; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
8686; EG-NEXT:    24(3.363116e-44), 16(2.242078e-44)
8687; EG-NEXT:     BFE_INT T37.X, PS, 0.0, literal.x,
8688; EG-NEXT:     ASHR T35.Y, PV.X, literal.y,
8689; EG-NEXT:     BFE_INT T36.Z, PV.W, 0.0, literal.x,
8690; EG-NEXT:     LSHR T2.W, T11.Z, literal.z,
8691; EG-NEXT:     ASHR * T12.W, T11.X, literal.y,
8692; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
8693; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
8694; EG-NEXT:     BFE_INT T38.X, T12.Y, 0.0, literal.x,
8695; EG-NEXT:     ASHR T37.Y, PV.X, literal.y,
8696; EG-NEXT:     ASHR T12.Z, T11.X, literal.z,
8697; EG-NEXT:     LSHR T3.W, T11.X, literal.w,
8698; EG-NEXT:     ASHR * T39.W, T11.Y, literal.y,
8699; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
8700; EG-NEXT:    24(3.363116e-44), 16(2.242078e-44)
8701; EG-NEXT:     BFE_INT T12.X, PV.W, 0.0, literal.x,
8702; EG-NEXT:     ASHR T38.Y, PV.X, literal.y,
8703; EG-NEXT:     ASHR T39.Z, T11.Y, literal.z,
8704; EG-NEXT:     LSHR T3.W, T12.Y, literal.x, BS:VEC_120/SCL_212
8705; EG-NEXT:     LSHR * T4.W, T11.Y, literal.w,
8706; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
8707; EG-NEXT:    24(3.363116e-44), 16(2.242078e-44)
8708; EG-NEXT:     BFE_INT T39.X, PS, 0.0, literal.x,
8709; EG-NEXT:     ASHR T12.Y, PV.X, literal.y,
8710; EG-NEXT:     BFE_INT T38.Z, PV.W, 0.0, literal.x,
8711; EG-NEXT:     ASHR T36.W, T36.Z, literal.y,
8712; EG-NEXT:     ASHR * T40.W, T11.Z, literal.y,
8713; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
8714; EG-NEXT:     BFE_INT T11.X, T11.Z, 0.0, literal.x,
8715; EG-NEXT:     ASHR T39.Y, PV.X, literal.y,
8716; EG-NEXT:     ASHR T40.Z, T11.Z, literal.z,
8717; EG-NEXT:     ASHR T38.W, PV.Z, literal.y,
8718; EG-NEXT:     ASHR * T41.W, T11.W, literal.y,
8719; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
8720; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
8721; EG-NEXT:     BFE_INT T40.X, T2.W, 0.0, literal.x,
8722; EG-NEXT:     ASHR T11.Y, PV.X, literal.y,
8723; EG-NEXT:     ASHR T41.Z, T11.W, literal.z, BS:VEC_120/SCL_212
8724; EG-NEXT:     ASHR T33.W, T33.Z, literal.y,
8725; EG-NEXT:     ASHR * T29.W, T29.Z, literal.y,
8726; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
8727; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
8728; EG-NEXT:     BFE_INT T41.X, T1.W, 0.0, literal.x,
8729; EG-NEXT:     ASHR T40.Y, PV.X, literal.y,
8730; EG-NEXT:     BFE_INT T11.Z, T0.Y, 0.0, literal.x,
8731; EG-NEXT:     ASHR T28.W, T28.Z, literal.y,
8732; EG-NEXT:     ASHR * T27.W, T27.Z, literal.y,
8733; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
8734; EG-NEXT:     LSHR T42.X, T0.W, literal.x,
8735; EG-NEXT:     ASHR T41.Y, PV.X, literal.y,
8736; EG-NEXT:     ASHR T11.W, PV.Z, literal.y,
8737; EG-NEXT:     ASHR * T26.W, T26.Z, literal.y,
8738; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
8739;
8740; GFX12-LABEL: constant_sextload_v32i8_to_v32i64:
8741; GFX12:       ; %bb.0:
8742; GFX12-NEXT:    s_load_b128 s[8:11], s[4:5], 0x24
8743; GFX12-NEXT:    s_wait_kmcnt 0x0
8744; GFX12-NEXT:    s_load_b256 s[0:7], s[10:11], 0x0
8745; GFX12-NEXT:    s_wait_kmcnt 0x0
8746; GFX12-NEXT:    s_lshr_b32 s36, s7, 16
8747; GFX12-NEXT:    s_lshr_b32 s38, s7, 8
8748; GFX12-NEXT:    s_mov_b32 s40, s7
8749; GFX12-NEXT:    s_lshr_b32 s42, s6, 16
8750; GFX12-NEXT:    s_lshr_b32 s44, s6, 24
8751; GFX12-NEXT:    s_ashr_i64 s[74:75], s[6:7], 56
8752; GFX12-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x80000
8753; GFX12-NEXT:    s_lshr_b32 s46, s6, 8
8754; GFX12-NEXT:    s_bfe_i64 s[40:41], s[40:41], 0x80000
8755; GFX12-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x80000
8756; GFX12-NEXT:    v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s37
8757; GFX12-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x80000
8758; GFX12-NEXT:    s_bfe_i64 s[42:43], s[42:43], 0x80000
8759; GFX12-NEXT:    s_wait_alu 0xfffe
8760; GFX12-NEXT:    v_dual_mov_b32 v0, s36 :: v_dual_mov_b32 v3, s75
8761; GFX12-NEXT:    v_dual_mov_b32 v2, s74 :: v_dual_mov_b32 v5, s41
8762; GFX12-NEXT:    s_lshr_b32 s48, s5, 16
8763; GFX12-NEXT:    s_bfe_i64 s[72:73], s[6:7], 0x80000
8764; GFX12-NEXT:    s_bfe_i64 s[46:47], s[46:47], 0x80000
8765; GFX12-NEXT:    v_dual_mov_b32 v4, s40 :: v_dual_mov_b32 v7, s39
8766; GFX12-NEXT:    v_dual_mov_b32 v6, s38 :: v_dual_mov_b32 v9, s43
8767; GFX12-NEXT:    s_lshr_b32 s50, s5, 8
8768; GFX12-NEXT:    s_mov_b32 s52, s5
8769; GFX12-NEXT:    v_dual_mov_b32 v8, s42 :: v_dual_mov_b32 v11, s45
8770; GFX12-NEXT:    v_dual_mov_b32 v10, s44 :: v_dual_mov_b32 v13, s73
8771; GFX12-NEXT:    s_lshr_b32 s54, s4, 16
8772; GFX12-NEXT:    s_lshr_b32 s56, s4, 24
8773; GFX12-NEXT:    s_ashr_i64 s[70:71], s[4:5], 56
8774; GFX12-NEXT:    v_dual_mov_b32 v12, s72 :: v_dual_mov_b32 v15, s47
8775; GFX12-NEXT:    s_bfe_i64 s[36:37], s[48:49], 0x80000
8776; GFX12-NEXT:    v_mov_b32_e32 v14, s46
8777; GFX12-NEXT:    s_lshr_b32 s58, s4, 8
8778; GFX12-NEXT:    s_bfe_i64 s[52:53], s[52:53], 0x80000
8779; GFX12-NEXT:    s_bfe_i64 s[50:51], s[50:51], 0x80000
8780; GFX12-NEXT:    s_lshr_b32 s60, s3, 16
8781; GFX12-NEXT:    s_bfe_i64 s[56:57], s[56:57], 0x80000
8782; GFX12-NEXT:    s_bfe_i64 s[54:55], s[54:55], 0x80000
8783; GFX12-NEXT:    s_clause 0x3
8784; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[8:9] offset:240
8785; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[8:9] offset:224
8786; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[8:9] offset:208
8787; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[8:9] offset:192
8788; GFX12-NEXT:    s_wait_alu 0xfffe
8789; GFX12-NEXT:    v_dual_mov_b32 v1, s37 :: v_dual_mov_b32 v0, s36
8790; GFX12-NEXT:    v_dual_mov_b32 v3, s71 :: v_dual_mov_b32 v2, s70
8791; GFX12-NEXT:    v_mov_b32_e32 v5, s53
8792; GFX12-NEXT:    s_lshr_b32 s34, s3, 8
8793; GFX12-NEXT:    s_mov_b32 s30, s3
8794; GFX12-NEXT:    s_lshr_b32 s24, s2, 16
8795; GFX12-NEXT:    s_lshr_b32 s22, s2, 24
8796; GFX12-NEXT:    s_bfe_i64 s[28:29], s[4:5], 0x80000
8797; GFX12-NEXT:    s_bfe_i64 s[58:59], s[58:59], 0x80000
8798; GFX12-NEXT:    v_dual_mov_b32 v4, s52 :: v_dual_mov_b32 v7, s51
8799; GFX12-NEXT:    v_dual_mov_b32 v6, s50 :: v_dual_mov_b32 v9, s55
8800; GFX12-NEXT:    s_lshr_b32 s20, s2, 8
8801; GFX12-NEXT:    s_ashr_i64 s[26:27], s[2:3], 56
8802; GFX12-NEXT:    s_bfe_i64 s[60:61], s[60:61], 0x80000
8803; GFX12-NEXT:    v_dual_mov_b32 v8, s54 :: v_dual_mov_b32 v11, s57
8804; GFX12-NEXT:    v_dual_mov_b32 v10, s56 :: v_dual_mov_b32 v13, s29
8805; GFX12-NEXT:    s_lshr_b32 s18, s1, 16
8806; GFX12-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x80000
8807; GFX12-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x80000
8808; GFX12-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x80000
8809; GFX12-NEXT:    s_bfe_i64 s[34:35], s[34:35], 0x80000
8810; GFX12-NEXT:    v_dual_mov_b32 v12, s28 :: v_dual_mov_b32 v15, s59
8811; GFX12-NEXT:    v_dual_mov_b32 v14, s58 :: v_dual_mov_b32 v17, s61
8812; GFX12-NEXT:    s_lshr_b32 s14, s1, 8
8813; GFX12-NEXT:    s_mov_b32 s62, s1
8814; GFX12-NEXT:    s_bfe_i64 s[16:17], s[2:3], 0x80000
8815; GFX12-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x80000
8816; GFX12-NEXT:    v_dual_mov_b32 v16, s60 :: v_dual_mov_b32 v19, s27
8817; GFX12-NEXT:    v_dual_mov_b32 v18, s26 :: v_dual_mov_b32 v21, s31
8818; GFX12-NEXT:    s_lshr_b32 s64, s0, 16
8819; GFX12-NEXT:    s_lshr_b32 s66, s0, 24
8820; GFX12-NEXT:    s_ashr_i64 s[12:13], s[0:1], 56
8821; GFX12-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x80000
8822; GFX12-NEXT:    v_dual_mov_b32 v20, s30 :: v_dual_mov_b32 v23, s35
8823; GFX12-NEXT:    v_mov_b32_e32 v22, s34
8824; GFX12-NEXT:    s_clause 0x5
8825; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[8:9] offset:176
8826; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[8:9] offset:160
8827; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[8:9] offset:144
8828; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[8:9] offset:128
8829; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[8:9] offset:112
8830; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[8:9] offset:96
8831; GFX12-NEXT:    v_dual_mov_b32 v1, s25 :: v_dual_mov_b32 v0, s24
8832; GFX12-NEXT:    v_dual_mov_b32 v3, s23 :: v_dual_mov_b32 v2, s22
8833; GFX12-NEXT:    v_mov_b32_e32 v5, s17
8834; GFX12-NEXT:    s_lshr_b32 s68, s0, 8
8835; GFX12-NEXT:    s_bfe_i64 s[6:7], s[62:63], 0x80000
8836; GFX12-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x80000
8837; GFX12-NEXT:    v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v7, s21
8838; GFX12-NEXT:    v_dual_mov_b32 v6, s20 :: v_dual_mov_b32 v9, s19
8839; GFX12-NEXT:    s_bfe_i64 s[2:3], s[66:67], 0x80000
8840; GFX12-NEXT:    s_bfe_i64 s[4:5], s[64:65], 0x80000
8841; GFX12-NEXT:    v_dual_mov_b32 v8, s18 :: v_dual_mov_b32 v11, s13
8842; GFX12-NEXT:    v_dual_mov_b32 v10, s12 :: v_dual_mov_b32 v13, s7
8843; GFX12-NEXT:    s_bfe_i64 s[10:11], s[0:1], 0x80000
8844; GFX12-NEXT:    s_bfe_i64 s[0:1], s[68:69], 0x80000
8845; GFX12-NEXT:    v_dual_mov_b32 v12, s6 :: v_dual_mov_b32 v15, s15
8846; GFX12-NEXT:    v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v17, s5
8847; GFX12-NEXT:    v_dual_mov_b32 v16, s4 :: v_dual_mov_b32 v19, s3
8848; GFX12-NEXT:    v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v21, s11
8849; GFX12-NEXT:    v_dual_mov_b32 v20, s10 :: v_dual_mov_b32 v23, s1
8850; GFX12-NEXT:    v_mov_b32_e32 v22, s0
8851; GFX12-NEXT:    s_clause 0x5
8852; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[8:9] offset:80
8853; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[8:9] offset:64
8854; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[8:9] offset:48
8855; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[8:9] offset:32
8856; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[8:9] offset:16
8857; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[8:9]
8858; GFX12-NEXT:    s_endpgm
8859  %load = load <32 x i8>, ptr addrspace(4) %in
8860  %ext = sext <32 x i8> %load to <32 x i64>
8861  store <32 x i64> %ext, ptr addrspace(1) %out
8862  ret void
8863}
8864
8865; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i64:
8866; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
8867;   %load = load <64 x i8>, ptr addrspace(4) %in
8868;   %ext = zext <64 x i8> %load to <64 x i64>
8869;   store <64 x i64> %ext, ptr addrspace(1) %out
8870;   ret void
8871; }
8872
8873; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i64:
8874; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
8875;   %load = load <64 x i8>, ptr addrspace(4) %in
8876;   %ext = sext <64 x i8> %load to <64 x i64>
8877;   store <64 x i64> %ext, ptr addrspace(1) %out
8878;   ret void
8879; }
8880
8881define amdgpu_kernel void @constant_zextload_i8_to_i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
8882; GFX6-NOHSA-LABEL: constant_zextload_i8_to_i16:
8883; GFX6-NOHSA:       ; %bb.0:
8884; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
8885; GFX6-NOHSA-NEXT:    s_mov_b32 s7, 0xf000
8886; GFX6-NOHSA-NEXT:    s_mov_b32 s6, -1
8887; GFX6-NOHSA-NEXT:    s_mov_b32 s10, s6
8888; GFX6-NOHSA-NEXT:    s_mov_b32 s11, s7
8889; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
8890; GFX6-NOHSA-NEXT:    s_mov_b32 s8, s2
8891; GFX6-NOHSA-NEXT:    s_mov_b32 s9, s3
8892; GFX6-NOHSA-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
8893; GFX6-NOHSA-NEXT:    s_mov_b32 s4, s0
8894; GFX6-NOHSA-NEXT:    s_mov_b32 s5, s1
8895; GFX6-NOHSA-NEXT:    s_waitcnt vmcnt(0)
8896; GFX6-NOHSA-NEXT:    buffer_store_short v0, off, s[4:7], 0
8897; GFX6-NOHSA-NEXT:    s_endpgm
8898;
8899; GFX7-HSA-LABEL: constant_zextload_i8_to_i16:
8900; GFX7-HSA:       ; %bb.0:
8901; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
8902; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
8903; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
8904; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
8905; GFX7-HSA-NEXT:    flat_load_ubyte v2, v[0:1]
8906; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
8907; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1
8908; GFX7-HSA-NEXT:    s_waitcnt vmcnt(0)
8909; GFX7-HSA-NEXT:    flat_store_short v[0:1], v2
8910; GFX7-HSA-NEXT:    s_endpgm
8911;
8912; GFX8-NOHSA-LABEL: constant_zextload_i8_to_i16:
8913; GFX8-NOHSA:       ; %bb.0:
8914; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
8915; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
8916; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
8917; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
8918; GFX8-NOHSA-NEXT:    flat_load_ubyte v2, v[0:1]
8919; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
8920; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
8921; GFX8-NOHSA-NEXT:    s_waitcnt vmcnt(0)
8922; GFX8-NOHSA-NEXT:    flat_store_short v[0:1], v2
8923; GFX8-NOHSA-NEXT:    s_endpgm
8924;
8925; EG-LABEL: constant_zextload_i8_to_i16:
8926; EG:       ; %bb.0:
8927; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
8928; EG-NEXT:    TEX 0 @6
8929; EG-NEXT:    ALU 10, @9, KC0[CB0:0-32], KC1[]
8930; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
8931; EG-NEXT:    CF_END
8932; EG-NEXT:    PAD
8933; EG-NEXT:    Fetch clause starting at 6:
8934; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
8935; EG-NEXT:    ALU clause starting at 8:
8936; EG-NEXT:     MOV * T0.X, KC0[2].Z,
8937; EG-NEXT:    ALU clause starting at 9:
8938; EG-NEXT:     AND_INT * T0.W, KC0[2].Y, literal.x,
8939; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
8940; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
8941; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
8942; EG-NEXT:     LSHL T0.X, T0.X, PV.W,
8943; EG-NEXT:     LSHL * T0.W, literal.x, PV.W,
8944; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
8945; EG-NEXT:     MOV T0.Y, 0.0,
8946; EG-NEXT:     MOV * T0.Z, 0.0,
8947; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
8948; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
8949;
8950; GFX12-LABEL: constant_zextload_i8_to_i16:
8951; GFX12:       ; %bb.0:
8952; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
8953; GFX12-NEXT:    v_mov_b32_e32 v0, 0
8954; GFX12-NEXT:    s_wait_kmcnt 0x0
8955; GFX12-NEXT:    global_load_u8 v1, v0, s[2:3]
8956; GFX12-NEXT:    s_wait_loadcnt 0x0
8957; GFX12-NEXT:    global_store_b16 v0, v1, s[0:1]
8958; GFX12-NEXT:    s_endpgm
8959  %a = load i8, ptr addrspace(4) %in
8960  %ext = zext i8 %a to i16
8961  store i16 %ext, ptr addrspace(1) %out
8962  ret void
8963}
8964
8965define amdgpu_kernel void @constant_sextload_i8_to_i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
8966; GFX6-NOHSA-LABEL: constant_sextload_i8_to_i16:
8967; GFX6-NOHSA:       ; %bb.0:
8968; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
8969; GFX6-NOHSA-NEXT:    s_mov_b32 s7, 0xf000
8970; GFX6-NOHSA-NEXT:    s_mov_b32 s6, -1
8971; GFX6-NOHSA-NEXT:    s_mov_b32 s10, s6
8972; GFX6-NOHSA-NEXT:    s_mov_b32 s11, s7
8973; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
8974; GFX6-NOHSA-NEXT:    s_mov_b32 s8, s2
8975; GFX6-NOHSA-NEXT:    s_mov_b32 s9, s3
8976; GFX6-NOHSA-NEXT:    buffer_load_sbyte v0, off, s[8:11], 0
8977; GFX6-NOHSA-NEXT:    s_mov_b32 s4, s0
8978; GFX6-NOHSA-NEXT:    s_mov_b32 s5, s1
8979; GFX6-NOHSA-NEXT:    s_waitcnt vmcnt(0)
8980; GFX6-NOHSA-NEXT:    buffer_store_short v0, off, s[4:7], 0
8981; GFX6-NOHSA-NEXT:    s_endpgm
8982;
8983; GFX7-HSA-LABEL: constant_sextload_i8_to_i16:
8984; GFX7-HSA:       ; %bb.0:
8985; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
8986; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
8987; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
8988; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
8989; GFX7-HSA-NEXT:    flat_load_sbyte v2, v[0:1]
8990; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
8991; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1
8992; GFX7-HSA-NEXT:    s_waitcnt vmcnt(0)
8993; GFX7-HSA-NEXT:    flat_store_short v[0:1], v2
8994; GFX7-HSA-NEXT:    s_endpgm
8995;
8996; GFX8-NOHSA-LABEL: constant_sextload_i8_to_i16:
8997; GFX8-NOHSA:       ; %bb.0:
8998; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
8999; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9000; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
9001; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
9002; GFX8-NOHSA-NEXT:    flat_load_sbyte v2, v[0:1]
9003; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
9004; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
9005; GFX8-NOHSA-NEXT:    s_waitcnt vmcnt(0)
9006; GFX8-NOHSA-NEXT:    flat_store_short v[0:1], v2
9007; GFX8-NOHSA-NEXT:    s_endpgm
9008;
9009; EG-LABEL: constant_sextload_i8_to_i16:
9010; EG:       ; %bb.0:
9011; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
9012; EG-NEXT:    TEX 0 @6
9013; EG-NEXT:    ALU 12, @9, KC0[CB0:0-32], KC1[]
9014; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
9015; EG-NEXT:    CF_END
9016; EG-NEXT:    PAD
9017; EG-NEXT:    Fetch clause starting at 6:
9018; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
9019; EG-NEXT:    ALU clause starting at 8:
9020; EG-NEXT:     MOV * T0.X, KC0[2].Z,
9021; EG-NEXT:    ALU clause starting at 9:
9022; EG-NEXT:     BFE_INT T0.W, T0.X, 0.0, literal.x,
9023; EG-NEXT:     AND_INT * T1.W, KC0[2].Y, literal.y,
9024; EG-NEXT:    8(1.121039e-44), 3(4.203895e-45)
9025; EG-NEXT:     AND_INT T0.W, PV.W, literal.x,
9026; EG-NEXT:     LSHL * T1.W, PS, literal.y,
9027; EG-NEXT:    65535(9.183409e-41), 3(4.203895e-45)
9028; EG-NEXT:     LSHL T0.X, PV.W, PS,
9029; EG-NEXT:     LSHL * T0.W, literal.x, PS,
9030; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
9031; EG-NEXT:     MOV T0.Y, 0.0,
9032; EG-NEXT:     MOV * T0.Z, 0.0,
9033; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
9034; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
9035;
9036; GFX12-LABEL: constant_sextload_i8_to_i16:
9037; GFX12:       ; %bb.0:
9038; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
9039; GFX12-NEXT:    v_mov_b32_e32 v0, 0
9040; GFX12-NEXT:    s_wait_kmcnt 0x0
9041; GFX12-NEXT:    global_load_i8 v1, v0, s[2:3]
9042; GFX12-NEXT:    s_wait_loadcnt 0x0
9043; GFX12-NEXT:    global_store_b16 v0, v1, s[0:1]
9044; GFX12-NEXT:    s_endpgm
9045  %a = load i8, ptr addrspace(4) %in
9046  %ext = sext i8 %a to i16
9047  store i16 %ext, ptr addrspace(1) %out
9048  ret void
9049}
9050
9051define amdgpu_kernel void @constant_zextload_v1i8_to_v1i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
9052; GFX6-NOHSA-LABEL: constant_zextload_v1i8_to_v1i16:
9053; GFX6-NOHSA:       ; %bb.0:
9054; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
9055; GFX6-NOHSA-NEXT:    s_mov_b32 s7, 0xf000
9056; GFX6-NOHSA-NEXT:    s_mov_b32 s6, -1
9057; GFX6-NOHSA-NEXT:    s_mov_b32 s10, s6
9058; GFX6-NOHSA-NEXT:    s_mov_b32 s11, s7
9059; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9060; GFX6-NOHSA-NEXT:    s_mov_b32 s8, s2
9061; GFX6-NOHSA-NEXT:    s_mov_b32 s9, s3
9062; GFX6-NOHSA-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
9063; GFX6-NOHSA-NEXT:    s_mov_b32 s4, s0
9064; GFX6-NOHSA-NEXT:    s_mov_b32 s5, s1
9065; GFX6-NOHSA-NEXT:    s_waitcnt vmcnt(0)
9066; GFX6-NOHSA-NEXT:    buffer_store_short v0, off, s[4:7], 0
9067; GFX6-NOHSA-NEXT:    s_endpgm
9068;
9069; GFX7-HSA-LABEL: constant_zextload_v1i8_to_v1i16:
9070; GFX7-HSA:       ; %bb.0:
9071; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
9072; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
9073; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
9074; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
9075; GFX7-HSA-NEXT:    flat_load_ubyte v2, v[0:1]
9076; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
9077; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1
9078; GFX7-HSA-NEXT:    s_waitcnt vmcnt(0)
9079; GFX7-HSA-NEXT:    flat_store_short v[0:1], v2
9080; GFX7-HSA-NEXT:    s_endpgm
9081;
9082; GFX8-NOHSA-LABEL: constant_zextload_v1i8_to_v1i16:
9083; GFX8-NOHSA:       ; %bb.0:
9084; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
9085; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9086; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
9087; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
9088; GFX8-NOHSA-NEXT:    flat_load_ubyte v2, v[0:1]
9089; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
9090; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
9091; GFX8-NOHSA-NEXT:    s_waitcnt vmcnt(0)
9092; GFX8-NOHSA-NEXT:    flat_store_short v[0:1], v2
9093; GFX8-NOHSA-NEXT:    s_endpgm
9094;
9095; EG-LABEL: constant_zextload_v1i8_to_v1i16:
9096; EG:       ; %bb.0:
9097; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
9098; EG-NEXT:    TEX 0 @6
9099; EG-NEXT:    ALU 10, @9, KC0[CB0:0-32], KC1[]
9100; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
9101; EG-NEXT:    CF_END
9102; EG-NEXT:    PAD
9103; EG-NEXT:    Fetch clause starting at 6:
9104; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
9105; EG-NEXT:    ALU clause starting at 8:
9106; EG-NEXT:     MOV * T0.X, KC0[2].Z,
9107; EG-NEXT:    ALU clause starting at 9:
9108; EG-NEXT:     AND_INT * T0.W, KC0[2].Y, literal.x,
9109; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
9110; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
9111; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
9112; EG-NEXT:     LSHL T0.X, T0.X, PV.W,
9113; EG-NEXT:     LSHL * T0.W, literal.x, PV.W,
9114; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
9115; EG-NEXT:     MOV T0.Y, 0.0,
9116; EG-NEXT:     MOV * T0.Z, 0.0,
9117; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
9118; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
9119;
9120; GFX12-LABEL: constant_zextload_v1i8_to_v1i16:
9121; GFX12:       ; %bb.0:
9122; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
9123; GFX12-NEXT:    v_mov_b32_e32 v0, 0
9124; GFX12-NEXT:    s_wait_kmcnt 0x0
9125; GFX12-NEXT:    global_load_u8 v1, v0, s[2:3]
9126; GFX12-NEXT:    s_wait_loadcnt 0x0
9127; GFX12-NEXT:    global_store_b16 v0, v1, s[0:1]
9128; GFX12-NEXT:    s_endpgm
9129  %load = load <1 x i8>, ptr addrspace(4) %in
9130  %ext = zext <1 x i8> %load to <1 x i16>
9131  store <1 x i16> %ext, ptr addrspace(1) %out
9132  ret void
9133}
9134
9135define amdgpu_kernel void @constant_sextload_v1i8_to_v1i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
9136; GFX6-NOHSA-LABEL: constant_sextload_v1i8_to_v1i16:
9137; GFX6-NOHSA:       ; %bb.0:
9138; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
9139; GFX6-NOHSA-NEXT:    s_mov_b32 s7, 0xf000
9140; GFX6-NOHSA-NEXT:    s_mov_b32 s6, -1
9141; GFX6-NOHSA-NEXT:    s_mov_b32 s10, s6
9142; GFX6-NOHSA-NEXT:    s_mov_b32 s11, s7
9143; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9144; GFX6-NOHSA-NEXT:    s_mov_b32 s8, s2
9145; GFX6-NOHSA-NEXT:    s_mov_b32 s9, s3
9146; GFX6-NOHSA-NEXT:    buffer_load_sbyte v0, off, s[8:11], 0
9147; GFX6-NOHSA-NEXT:    s_mov_b32 s4, s0
9148; GFX6-NOHSA-NEXT:    s_mov_b32 s5, s1
9149; GFX6-NOHSA-NEXT:    s_waitcnt vmcnt(0)
9150; GFX6-NOHSA-NEXT:    buffer_store_short v0, off, s[4:7], 0
9151; GFX6-NOHSA-NEXT:    s_endpgm
9152;
9153; GFX7-HSA-LABEL: constant_sextload_v1i8_to_v1i16:
9154; GFX7-HSA:       ; %bb.0:
9155; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
9156; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
9157; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
9158; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
9159; GFX7-HSA-NEXT:    flat_load_sbyte v2, v[0:1]
9160; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
9161; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1
9162; GFX7-HSA-NEXT:    s_waitcnt vmcnt(0)
9163; GFX7-HSA-NEXT:    flat_store_short v[0:1], v2
9164; GFX7-HSA-NEXT:    s_endpgm
9165;
9166; GFX8-NOHSA-LABEL: constant_sextload_v1i8_to_v1i16:
9167; GFX8-NOHSA:       ; %bb.0:
9168; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
9169; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9170; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
9171; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
9172; GFX8-NOHSA-NEXT:    flat_load_sbyte v2, v[0:1]
9173; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
9174; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
9175; GFX8-NOHSA-NEXT:    s_waitcnt vmcnt(0)
9176; GFX8-NOHSA-NEXT:    flat_store_short v[0:1], v2
9177; GFX8-NOHSA-NEXT:    s_endpgm
9178;
9179; EG-LABEL: constant_sextload_v1i8_to_v1i16:
9180; EG:       ; %bb.0:
9181; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
9182; EG-NEXT:    TEX 0 @6
9183; EG-NEXT:    ALU 12, @9, KC0[CB0:0-32], KC1[]
9184; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
9185; EG-NEXT:    CF_END
9186; EG-NEXT:    PAD
9187; EG-NEXT:    Fetch clause starting at 6:
9188; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
9189; EG-NEXT:    ALU clause starting at 8:
9190; EG-NEXT:     MOV * T0.X, KC0[2].Z,
9191; EG-NEXT:    ALU clause starting at 9:
9192; EG-NEXT:     BFE_INT T0.W, T0.X, 0.0, literal.x,
9193; EG-NEXT:     AND_INT * T1.W, KC0[2].Y, literal.y,
9194; EG-NEXT:    8(1.121039e-44), 3(4.203895e-45)
9195; EG-NEXT:     AND_INT T0.W, PV.W, literal.x,
9196; EG-NEXT:     LSHL * T1.W, PS, literal.y,
9197; EG-NEXT:    65535(9.183409e-41), 3(4.203895e-45)
9198; EG-NEXT:     LSHL T0.X, PV.W, PS,
9199; EG-NEXT:     LSHL * T0.W, literal.x, PS,
9200; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
9201; EG-NEXT:     MOV T0.Y, 0.0,
9202; EG-NEXT:     MOV * T0.Z, 0.0,
9203; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
9204; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
9205;
9206; GFX12-LABEL: constant_sextload_v1i8_to_v1i16:
9207; GFX12:       ; %bb.0:
9208; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
9209; GFX12-NEXT:    v_mov_b32_e32 v0, 0
9210; GFX12-NEXT:    s_wait_kmcnt 0x0
9211; GFX12-NEXT:    global_load_i8 v1, v0, s[2:3]
9212; GFX12-NEXT:    s_wait_loadcnt 0x0
9213; GFX12-NEXT:    global_store_b16 v0, v1, s[0:1]
9214; GFX12-NEXT:    s_endpgm
9215  %load = load <1 x i8>, ptr addrspace(4) %in
9216  %ext = sext <1 x i8> %load to <1 x i16>
9217  store <1 x i16> %ext, ptr addrspace(1) %out
9218  ret void
9219}
9220
9221define amdgpu_kernel void @constant_zextload_v2i8_to_v2i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
9222; GFX6-NOHSA-LABEL: constant_zextload_v2i8_to_v2i16:
9223; GFX6-NOHSA:       ; %bb.0:
9224; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
9225; GFX6-NOHSA-NEXT:    s_mov_b32 s7, 0xf000
9226; GFX6-NOHSA-NEXT:    s_mov_b32 s6, -1
9227; GFX6-NOHSA-NEXT:    s_mov_b32 s10, s6
9228; GFX6-NOHSA-NEXT:    s_mov_b32 s11, s7
9229; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9230; GFX6-NOHSA-NEXT:    s_mov_b32 s8, s2
9231; GFX6-NOHSA-NEXT:    s_mov_b32 s9, s3
9232; GFX6-NOHSA-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
9233; GFX6-NOHSA-NEXT:    s_mov_b32 s4, s0
9234; GFX6-NOHSA-NEXT:    s_mov_b32 s5, s1
9235; GFX6-NOHSA-NEXT:    s_waitcnt vmcnt(0)
9236; GFX6-NOHSA-NEXT:    v_lshlrev_b32_e32 v1, 8, v0
9237; GFX6-NOHSA-NEXT:    v_or_b32_e32 v0, v0, v1
9238; GFX6-NOHSA-NEXT:    v_and_b32_e32 v0, 0xff00ff, v0
9239; GFX6-NOHSA-NEXT:    buffer_store_dword v0, off, s[4:7], 0
9240; GFX6-NOHSA-NEXT:    s_endpgm
9241;
9242; GFX7-HSA-LABEL: constant_zextload_v2i8_to_v2i16:
9243; GFX7-HSA:       ; %bb.0:
9244; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
9245; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
9246; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
9247; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
9248; GFX7-HSA-NEXT:    flat_load_ushort v2, v[0:1]
9249; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
9250; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1
9251; GFX7-HSA-NEXT:    s_waitcnt vmcnt(0)
9252; GFX7-HSA-NEXT:    v_lshlrev_b32_e32 v3, 8, v2
9253; GFX7-HSA-NEXT:    v_or_b32_e32 v2, v2, v3
9254; GFX7-HSA-NEXT:    v_and_b32_e32 v2, 0xff00ff, v2
9255; GFX7-HSA-NEXT:    flat_store_dword v[0:1], v2
9256; GFX7-HSA-NEXT:    s_endpgm
9257;
9258; GFX8-NOHSA-LABEL: constant_zextload_v2i8_to_v2i16:
9259; GFX8-NOHSA:       ; %bb.0:
9260; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
9261; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9262; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
9263; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
9264; GFX8-NOHSA-NEXT:    flat_load_ushort v2, v[0:1]
9265; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
9266; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
9267; GFX8-NOHSA-NEXT:    s_waitcnt vmcnt(0)
9268; GFX8-NOHSA-NEXT:    v_lshlrev_b32_e32 v3, 8, v2
9269; GFX8-NOHSA-NEXT:    v_and_b32_e32 v3, 0xff0000, v3
9270; GFX8-NOHSA-NEXT:    v_or_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
9271; GFX8-NOHSA-NEXT:    flat_store_dword v[0:1], v2
9272; GFX8-NOHSA-NEXT:    s_endpgm
9273;
9274; EG-LABEL: constant_zextload_v2i8_to_v2i16:
9275; EG:       ; %bb.0:
9276; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
9277; EG-NEXT:    TEX 0 @6
9278; EG-NEXT:    ALU 7, @9, KC0[CB0:0-32], KC1[]
9279; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.X, T6.X, 1
9280; EG-NEXT:    CF_END
9281; EG-NEXT:    PAD
9282; EG-NEXT:    Fetch clause starting at 6:
9283; EG-NEXT:     VTX_READ_16 T5.X, T5.X, 0, #1
9284; EG-NEXT:    ALU clause starting at 8:
9285; EG-NEXT:     MOV * T5.X, KC0[2].Z,
9286; EG-NEXT:    ALU clause starting at 9:
9287; EG-NEXT:     LSHL * T0.W, T5.X, literal.x,
9288; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
9289; EG-NEXT:     AND_INT T0.W, PV.W, literal.x,
9290; EG-NEXT:     AND_INT * T1.W, T5.X, literal.y,
9291; EG-NEXT:    16711680(2.341805e-38), 255(3.573311e-43)
9292; EG-NEXT:     OR_INT T5.X, PS, PV.W,
9293; EG-NEXT:     LSHR * T6.X, KC0[2].Y, literal.x,
9294; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
9295;
9296; GFX12-LABEL: constant_zextload_v2i8_to_v2i16:
9297; GFX12:       ; %bb.0:
9298; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
9299; GFX12-NEXT:    v_mov_b32_e32 v0, 0
9300; GFX12-NEXT:    s_wait_kmcnt 0x0
9301; GFX12-NEXT:    global_load_u16 v1, v0, s[2:3]
9302; GFX12-NEXT:    s_wait_loadcnt 0x0
9303; GFX12-NEXT:    v_and_b32_e32 v2, 0xffff, v1
9304; GFX12-NEXT:    v_and_b32_e32 v1, 0xff, v1
9305; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
9306; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 8, v2
9307; GFX12-NEXT:    v_and_b32_e32 v1, 0xffff, v1
9308; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
9309; GFX12-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
9310; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
9311; GFX12-NEXT:    s_endpgm
9312  %load = load <2 x i8>, ptr addrspace(4) %in
9313  %ext = zext <2 x i8> %load to <2 x i16>
9314  store <2 x i16> %ext, ptr addrspace(1) %out
9315  ret void
9316}
9317
9318define amdgpu_kernel void @constant_sextload_v2i8_to_v2i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
9319; GFX6-NOHSA-LABEL: constant_sextload_v2i8_to_v2i16:
9320; GFX6-NOHSA:       ; %bb.0:
9321; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
9322; GFX6-NOHSA-NEXT:    s_mov_b32 s7, 0xf000
9323; GFX6-NOHSA-NEXT:    s_mov_b32 s6, -1
9324; GFX6-NOHSA-NEXT:    s_mov_b32 s10, s6
9325; GFX6-NOHSA-NEXT:    s_mov_b32 s11, s7
9326; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9327; GFX6-NOHSA-NEXT:    s_mov_b32 s8, s2
9328; GFX6-NOHSA-NEXT:    s_mov_b32 s9, s3
9329; GFX6-NOHSA-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
9330; GFX6-NOHSA-NEXT:    s_mov_b32 s4, s0
9331; GFX6-NOHSA-NEXT:    s_mov_b32 s5, s1
9332; GFX6-NOHSA-NEXT:    s_waitcnt vmcnt(0)
9333; GFX6-NOHSA-NEXT:    v_bfe_i32 v1, v0, 8, 8
9334; GFX6-NOHSA-NEXT:    v_bfe_i32 v0, v0, 0, 8
9335; GFX6-NOHSA-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
9336; GFX6-NOHSA-NEXT:    v_and_b32_e32 v0, 0xffff, v0
9337; GFX6-NOHSA-NEXT:    v_or_b32_e32 v0, v0, v1
9338; GFX6-NOHSA-NEXT:    buffer_store_dword v0, off, s[4:7], 0
9339; GFX6-NOHSA-NEXT:    s_endpgm
9340;
9341; GFX7-HSA-LABEL: constant_sextload_v2i8_to_v2i16:
9342; GFX7-HSA:       ; %bb.0:
9343; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
9344; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
9345; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
9346; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
9347; GFX7-HSA-NEXT:    flat_load_ushort v2, v[0:1]
9348; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
9349; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1
9350; GFX7-HSA-NEXT:    s_waitcnt vmcnt(0)
9351; GFX7-HSA-NEXT:    v_bfe_i32 v3, v2, 8, 8
9352; GFX7-HSA-NEXT:    v_bfe_i32 v2, v2, 0, 8
9353; GFX7-HSA-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
9354; GFX7-HSA-NEXT:    v_and_b32_e32 v2, 0xffff, v2
9355; GFX7-HSA-NEXT:    v_or_b32_e32 v2, v2, v3
9356; GFX7-HSA-NEXT:    flat_store_dword v[0:1], v2
9357; GFX7-HSA-NEXT:    s_endpgm
9358;
9359; GFX8-NOHSA-LABEL: constant_sextload_v2i8_to_v2i16:
9360; GFX8-NOHSA:       ; %bb.0:
9361; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
9362; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, 0xffff
9363; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, 8
9364; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9365; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
9366; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
9367; GFX8-NOHSA-NEXT:    flat_load_ushort v2, v[0:1]
9368; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
9369; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
9370; GFX8-NOHSA-NEXT:    s_waitcnt vmcnt(0)
9371; GFX8-NOHSA-NEXT:    v_and_b32_sdwa v3, v3, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
9372; GFX8-NOHSA-NEXT:    v_lshlrev_b32_sdwa v2, v4, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
9373; GFX8-NOHSA-NEXT:    v_and_b32_e32 v2, 0xffff0000, v2
9374; GFX8-NOHSA-NEXT:    v_or_b32_e32 v2, v3, v2
9375; GFX8-NOHSA-NEXT:    flat_store_dword v[0:1], v2
9376; GFX8-NOHSA-NEXT:    s_endpgm
9377;
9378; EG-LABEL: constant_sextload_v2i8_to_v2i16:
9379; EG:       ; %bb.0:
9380; EG-NEXT:    ALU 1, @8, KC0[CB0:0-32], KC1[]
9381; EG-NEXT:    TEX 0 @6
9382; EG-NEXT:    ALU 16, @10, KC0[CB0:0-32], KC1[]
9383; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.X, T6.X, 1
9384; EG-NEXT:    CF_END
9385; EG-NEXT:    PAD
9386; EG-NEXT:    Fetch clause starting at 6:
9387; EG-NEXT:     VTX_READ_16 T5.X, T5.X, 0, #1
9388; EG-NEXT:    ALU clause starting at 8:
9389; EG-NEXT:     MOV * T0.Y, T2.X,
9390; EG-NEXT:     MOV * T5.X, KC0[2].Z,
9391; EG-NEXT:    ALU clause starting at 10:
9392; EG-NEXT:     AND_INT T0.W, T5.X, literal.x,
9393; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
9394; EG-NEXT:    65535(9.183409e-41), -65536(nan)
9395; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
9396; EG-NEXT:     MOV * T2.X, PV.W,
9397; EG-NEXT:     MOV * T0.Y, PV.X,
9398; EG-NEXT:     LSHR * T1.W, PV.Y, literal.x,
9399; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
9400; EG-NEXT:     BFE_INT T0.Z, T0.W, 0.0, literal.x,
9401; EG-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
9402; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
9403; EG-NEXT:     LSHL T0.W, PV.W, literal.x,
9404; EG-NEXT:     AND_INT * T1.W, PV.Z, literal.y,
9405; EG-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
9406; EG-NEXT:     OR_INT T5.X, PS, PV.W,
9407; EG-NEXT:     LSHR * T6.X, KC0[2].Y, literal.x,
9408; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
9409;
9410; GFX12-LABEL: constant_sextload_v2i8_to_v2i16:
9411; GFX12:       ; %bb.0:
9412; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
9413; GFX12-NEXT:    v_mov_b32_e32 v0, 0
9414; GFX12-NEXT:    s_wait_kmcnt 0x0
9415; GFX12-NEXT:    global_load_u16 v1, v0, s[2:3]
9416; GFX12-NEXT:    s_wait_loadcnt 0x0
9417; GFX12-NEXT:    v_bfe_i32 v2, v1, 0, 16
9418; GFX12-NEXT:    v_bfe_i32 v1, v1, 0, 8
9419; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
9420; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 8, v2
9421; GFX12-NEXT:    v_and_b32_e32 v1, 0xffff, v1
9422; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
9423; GFX12-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
9424; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
9425; GFX12-NEXT:    s_endpgm
9426  %load = load <2 x i8>, ptr addrspace(4) %in
9427  %ext = sext <2 x i8> %load to <2 x i16>
9428  store <2 x i16> %ext, ptr addrspace(1) %out
9429  ret void
9430}
9431
9432define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
9433; GFX6-NOHSA-LABEL: constant_zextload_v4i8_to_v4i16:
9434; GFX6-NOHSA:       ; %bb.0:
9435; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
9436; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9437; GFX6-NOHSA-NEXT:    s_load_dword s4, s[2:3], 0x0
9438; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
9439; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
9440; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9441; GFX6-NOHSA-NEXT:    s_and_b32 s5, s4, 0xff00
9442; GFX6-NOHSA-NEXT:    s_lshr_b32 s6, s4, 24
9443; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
9444; GFX6-NOHSA-NEXT:    s_and_b32 s4, s4, 0xff
9445; GFX6-NOHSA-NEXT:    v_alignbit_b32 v0, s6, v0, 16
9446; GFX6-NOHSA-NEXT:    s_lshl_b32 s5, s5, 8
9447; GFX6-NOHSA-NEXT:    s_or_b32 s4, s4, s5
9448; GFX6-NOHSA-NEXT:    v_and_b32_e32 v1, 0xff00ff, v0
9449; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
9450; GFX6-NOHSA-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
9451; GFX6-NOHSA-NEXT:    s_endpgm
9452;
9453; GFX7-HSA-LABEL: constant_zextload_v4i8_to_v4i16:
9454; GFX7-HSA:       ; %bb.0:
9455; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
9456; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
9457; GFX7-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
9458; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
9459; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1
9460; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
9461; GFX7-HSA-NEXT:    s_and_b32 s0, s2, 0xff00
9462; GFX7-HSA-NEXT:    s_lshr_b32 s1, s2, 24
9463; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s2
9464; GFX7-HSA-NEXT:    s_and_b32 s2, s2, 0xff
9465; GFX7-HSA-NEXT:    s_lshl_b32 s0, s0, 8
9466; GFX7-HSA-NEXT:    v_alignbit_b32 v2, s1, v2, 16
9467; GFX7-HSA-NEXT:    s_or_b32 s0, s2, s0
9468; GFX7-HSA-NEXT:    v_and_b32_e32 v3, 0xff00ff, v2
9469; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s0
9470; GFX7-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
9471; GFX7-HSA-NEXT:    s_endpgm
9472;
9473; GFX8-NOHSA-LABEL: constant_zextload_v4i8_to_v4i16:
9474; GFX8-NOHSA:       ; %bb.0:
9475; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
9476; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9477; GFX8-NOHSA-NEXT:    s_load_dword s2, s[2:3], 0x0
9478; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
9479; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
9480; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9481; GFX8-NOHSA-NEXT:    s_lshr_b32 s0, s2, 24
9482; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s2
9483; GFX8-NOHSA-NEXT:    s_and_b32 s1, s2, 0xff
9484; GFX8-NOHSA-NEXT:    s_lshl_b32 s2, s2, 8
9485; GFX8-NOHSA-NEXT:    v_alignbit_b32 v2, s0, v2, 16
9486; GFX8-NOHSA-NEXT:    s_and_b32 s0, s2, 0xff0000
9487; GFX8-NOHSA-NEXT:    s_or_b32 s0, s1, s0
9488; GFX8-NOHSA-NEXT:    v_and_b32_e32 v3, 0xff00ff, v2
9489; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s0
9490; GFX8-NOHSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
9491; GFX8-NOHSA-NEXT:    s_endpgm
9492;
9493; EG-LABEL: constant_zextload_v4i8_to_v4i16:
9494; EG:       ; %bb.0:
9495; EG-NEXT:    ALU 1, @8, KC0[CB0:0-32], KC1[]
9496; EG-NEXT:    TEX 0 @6
9497; EG-NEXT:    ALU 31, @10, KC0[CB0:0-32], KC1[]
9498; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XY, T7.X, 1
9499; EG-NEXT:    CF_END
9500; EG-NEXT:    PAD
9501; EG-NEXT:    Fetch clause starting at 6:
9502; EG-NEXT:     VTX_READ_32 T7.X, T7.X, 0, #1
9503; EG-NEXT:    ALU clause starting at 8:
9504; EG-NEXT:     MOV * T0.Y, T4.X,
9505; EG-NEXT:     MOV * T7.X, KC0[2].Z,
9506; EG-NEXT:    ALU clause starting at 10:
9507; EG-NEXT:     AND_INT T0.W, T7.X, literal.x,
9508; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
9509; EG-NEXT:    255(3.573311e-43), -65536(nan)
9510; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
9511; EG-NEXT:     MOV * T4.X, PV.W,
9512; EG-NEXT:     MOV T0.Y, PV.X,
9513; EG-NEXT:     LSHL * T0.W, T7.X, literal.x,
9514; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
9515; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
9516; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
9517; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
9518; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
9519; EG-NEXT:     MOV T4.X, PV.W,
9520; EG-NEXT:     MOV T0.Y, T5.X,
9521; EG-NEXT:     MOV * T0.W, literal.x,
9522; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
9523; EG-NEXT:     BFE_UINT T0.W, T7.X, literal.x, PV.W,
9524; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
9525; EG-NEXT:    16(2.242078e-44), -65536(nan)
9526; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
9527; EG-NEXT:     MOV * T5.X, PV.W,
9528; EG-NEXT:     MOV T0.Y, PV.X,
9529; EG-NEXT:     LSHR * T0.W, T7.X, literal.x,
9530; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
9531; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
9532; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
9533; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
9534; EG-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
9535; EG-NEXT:     OR_INT * T8.Y, PV.W, PS,
9536; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
9537; EG-NEXT:     MOV T5.X, PV.Y,
9538; EG-NEXT:     MOV * T8.X, T4.X,
9539;
9540; GFX12-LABEL: constant_zextload_v4i8_to_v4i16:
9541; GFX12:       ; %bb.0:
9542; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
9543; GFX12-NEXT:    s_wait_kmcnt 0x0
9544; GFX12-NEXT:    s_load_b32 s2, s[2:3], 0x0
9545; GFX12-NEXT:    s_wait_kmcnt 0x0
9546; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x80008
9547; GFX12-NEXT:    s_lshr_b32 s4, s2, 24
9548; GFX12-NEXT:    s_and_b32 s5, s2, 0xff
9549; GFX12-NEXT:    s_bfe_u32 s2, s2, 0x80010
9550; GFX12-NEXT:    s_pack_ll_b32_b16 s3, s5, s3
9551; GFX12-NEXT:    s_pack_ll_b32_b16 s2, s2, s4
9552; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
9553; GFX12-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
9554; GFX12-NEXT:    v_mov_b32_e32 v0, s3
9555; GFX12-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
9556; GFX12-NEXT:    s_endpgm
9557  %load = load <4 x i8>, ptr addrspace(4) %in
9558  %ext = zext <4 x i8> %load to <4 x i16>
9559  store <4 x i16> %ext, ptr addrspace(1) %out
9560  ret void
9561}
9562
9563define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
9564; GFX6-NOHSA-LABEL: constant_sextload_v4i8_to_v4i16:
9565; GFX6-NOHSA:       ; %bb.0:
9566; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
9567; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9568; GFX6-NOHSA-NEXT:    s_load_dword s2, s[2:3], 0x0
9569; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
9570; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9571; GFX6-NOHSA-NEXT:    s_ashr_i32 s4, s2, 24
9572; GFX6-NOHSA-NEXT:    s_bfe_i32 s5, s2, 0x80010
9573; GFX6-NOHSA-NEXT:    s_bfe_i32 s6, s2, 0x80008
9574; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s2, s2
9575; GFX6-NOHSA-NEXT:    s_lshl_b32 s4, s4, 16
9576; GFX6-NOHSA-NEXT:    s_and_b32 s5, s5, 0xffff
9577; GFX6-NOHSA-NEXT:    s_lshl_b32 s6, s6, 16
9578; GFX6-NOHSA-NEXT:    s_and_b32 s2, s2, 0xffff
9579; GFX6-NOHSA-NEXT:    s_or_b32 s4, s5, s4
9580; GFX6-NOHSA-NEXT:    s_or_b32 s5, s2, s6
9581; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
9582; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s5
9583; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s4
9584; GFX6-NOHSA-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
9585; GFX6-NOHSA-NEXT:    s_endpgm
9586;
9587; GFX7-HSA-LABEL: constant_sextload_v4i8_to_v4i16:
9588; GFX7-HSA:       ; %bb.0:
9589; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
9590; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
9591; GFX7-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
9592; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
9593; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s1
9594; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
9595; GFX7-HSA-NEXT:    s_ashr_i32 s0, s2, 24
9596; GFX7-HSA-NEXT:    s_bfe_i32 s1, s2, 0x80010
9597; GFX7-HSA-NEXT:    s_bfe_i32 s3, s2, 0x80008
9598; GFX7-HSA-NEXT:    s_sext_i32_i8 s2, s2
9599; GFX7-HSA-NEXT:    s_lshl_b32 s0, s0, 16
9600; GFX7-HSA-NEXT:    s_and_b32 s1, s1, 0xffff
9601; GFX7-HSA-NEXT:    s_lshl_b32 s3, s3, 16
9602; GFX7-HSA-NEXT:    s_and_b32 s2, s2, 0xffff
9603; GFX7-HSA-NEXT:    s_or_b32 s0, s1, s0
9604; GFX7-HSA-NEXT:    s_or_b32 s1, s2, s3
9605; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s1
9606; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s0
9607; GFX7-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
9608; GFX7-HSA-NEXT:    s_endpgm
9609;
9610; GFX8-NOHSA-LABEL: constant_sextload_v4i8_to_v4i16:
9611; GFX8-NOHSA:       ; %bb.0:
9612; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
9613; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9614; GFX8-NOHSA-NEXT:    s_load_dword s2, s[2:3], 0x0
9615; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
9616; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
9617; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9618; GFX8-NOHSA-NEXT:    s_lshr_b32 s0, s2, 16
9619; GFX8-NOHSA-NEXT:    s_sext_i32_i16 s1, s2
9620; GFX8-NOHSA-NEXT:    s_bfe_i32 s3, s2, 0x80000
9621; GFX8-NOHSA-NEXT:    s_ashr_i32 s2, s2, 24
9622; GFX8-NOHSA-NEXT:    s_lshl_b32 s1, s1, 8
9623; GFX8-NOHSA-NEXT:    s_bfe_i32 s0, s0, 0x80000
9624; GFX8-NOHSA-NEXT:    s_and_b32 s3, 0xffff, s3
9625; GFX8-NOHSA-NEXT:    s_lshl_b32 s2, s2, 16
9626; GFX8-NOHSA-NEXT:    s_and_b32 s1, s1, 0xffff0000
9627; GFX8-NOHSA-NEXT:    s_and_b32 s0, 0xffff, s0
9628; GFX8-NOHSA-NEXT:    s_or_b32 s1, s3, s1
9629; GFX8-NOHSA-NEXT:    s_or_b32 s0, s0, s2
9630; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s1
9631; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s0
9632; GFX8-NOHSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
9633; GFX8-NOHSA-NEXT:    s_endpgm
9634;
9635; EG-LABEL: constant_sextload_v4i8_to_v4i16:
9636; EG:       ; %bb.0:
9637; EG-NEXT:    ALU 1, @8, KC0[CB0:0-32], KC1[]
9638; EG-NEXT:    TEX 0 @6
9639; EG-NEXT:    ALU 37, @10, KC0[CB0:0-32], KC1[]
9640; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XY, T7.X, 1
9641; EG-NEXT:    CF_END
9642; EG-NEXT:    PAD
9643; EG-NEXT:    Fetch clause starting at 6:
9644; EG-NEXT:     VTX_READ_32 T7.X, T7.X, 0, #1
9645; EG-NEXT:    ALU clause starting at 8:
9646; EG-NEXT:     MOV * T0.Y, T4.X,
9647; EG-NEXT:     MOV * T7.X, KC0[2].Z,
9648; EG-NEXT:    ALU clause starting at 10:
9649; EG-NEXT:     BFE_INT * T0.W, T7.X, 0.0, literal.x,
9650; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
9651; EG-NEXT:     AND_INT T0.W, PV.W, literal.x,
9652; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
9653; EG-NEXT:    65535(9.183409e-41), -65536(nan)
9654; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
9655; EG-NEXT:     MOV * T4.X, PV.W,
9656; EG-NEXT:     MOV T0.Y, PV.X,
9657; EG-NEXT:     LSHR * T0.W, T7.X, literal.x,
9658; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
9659; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
9660; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
9661; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
9662; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
9663; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
9664; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
9665; EG-NEXT:     MOV T4.X, PV.W,
9666; EG-NEXT:     MOV T0.Y, T5.X,
9667; EG-NEXT:     LSHR * T0.W, T7.X, literal.x, BS:VEC_120/SCL_212
9668; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
9669; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
9670; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
9671; EG-NEXT:    8(1.121039e-44), -65536(nan)
9672; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
9673; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
9674; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
9675; EG-NEXT:     MOV * T5.X, PV.W,
9676; EG-NEXT:     MOV T0.Y, PV.X,
9677; EG-NEXT:     ASHR * T0.W, T7.X, literal.x,
9678; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
9679; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
9680; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
9681; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
9682; EG-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
9683; EG-NEXT:     OR_INT * T8.Y, PV.W, PS,
9684; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
9685; EG-NEXT:     MOV T5.X, PV.Y,
9686; EG-NEXT:     MOV * T8.X, T4.X,
9687;
9688; GFX12-LABEL: constant_sextload_v4i8_to_v4i16:
9689; GFX12:       ; %bb.0:
9690; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
9691; GFX12-NEXT:    s_wait_kmcnt 0x0
9692; GFX12-NEXT:    s_load_b32 s2, s[2:3], 0x0
9693; GFX12-NEXT:    s_wait_kmcnt 0x0
9694; GFX12-NEXT:    s_lshr_b32 s3, s2, 16
9695; GFX12-NEXT:    s_sext_i32_i16 s5, s2
9696; GFX12-NEXT:    s_ashr_i32 s4, s2, 24
9697; GFX12-NEXT:    s_bfe_i32 s2, s2, 0x80000
9698; GFX12-NEXT:    s_lshr_b32 s5, s5, 8
9699; GFX12-NEXT:    s_bfe_i32 s3, s3, 0x80000
9700; GFX12-NEXT:    s_pack_ll_b32_b16 s2, s2, s5
9701; GFX12-NEXT:    s_pack_ll_b32_b16 s3, s3, s4
9702; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
9703; GFX12-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
9704; GFX12-NEXT:    v_mov_b32_e32 v0, s2
9705; GFX12-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
9706; GFX12-NEXT:    s_endpgm
9707  %load = load <4 x i8>, ptr addrspace(4) %in
9708  %ext = sext <4 x i8> %load to <4 x i16>
9709  store <4 x i16> %ext, ptr addrspace(1) %out
9710  ret void
9711}
9712
9713define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
9714; GFX6-NOHSA-LABEL: constant_zextload_v8i8_to_v8i16:
9715; GFX6-NOHSA:       ; %bb.0:
9716; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
9717; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9718; GFX6-NOHSA-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
9719; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
9720; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
9721; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9722; GFX6-NOHSA-NEXT:    s_and_b32 s6, s4, 0xff00
9723; GFX6-NOHSA-NEXT:    s_lshr_b32 s7, s4, 24
9724; GFX6-NOHSA-NEXT:    s_and_b32 s8, s5, 0xff00
9725; GFX6-NOHSA-NEXT:    s_lshr_b32 s9, s5, 24
9726; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s5
9727; GFX6-NOHSA-NEXT:    s_and_b32 s5, s5, 0xff
9728; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s4
9729; GFX6-NOHSA-NEXT:    s_and_b32 s4, s4, 0xff
9730; GFX6-NOHSA-NEXT:    v_alignbit_b32 v0, s9, v0, 16
9731; GFX6-NOHSA-NEXT:    s_lshl_b32 s8, s8, 8
9732; GFX6-NOHSA-NEXT:    v_alignbit_b32 v1, s7, v1, 16
9733; GFX6-NOHSA-NEXT:    s_lshl_b32 s6, s6, 8
9734; GFX6-NOHSA-NEXT:    v_and_b32_e32 v3, 0xff00ff, v0
9735; GFX6-NOHSA-NEXT:    s_or_b32 s5, s5, s8
9736; GFX6-NOHSA-NEXT:    s_or_b32 s4, s4, s6
9737; GFX6-NOHSA-NEXT:    v_and_b32_e32 v1, 0xff00ff, v1
9738; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
9739; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
9740; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
9741; GFX6-NOHSA-NEXT:    s_endpgm
9742;
9743; GFX7-HSA-LABEL: constant_zextload_v8i8_to_v8i16:
9744; GFX7-HSA:       ; %bb.0:
9745; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
9746; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
9747; GFX7-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
9748; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
9749; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
9750; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
9751; GFX7-HSA-NEXT:    s_lshr_b32 s5, s3, 24
9752; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s3
9753; GFX7-HSA-NEXT:    v_alignbit_b32 v0, s5, v0, 16
9754; GFX7-HSA-NEXT:    s_and_b32 s0, s2, 0xff00
9755; GFX7-HSA-NEXT:    s_lshr_b32 s1, s2, 24
9756; GFX7-HSA-NEXT:    s_and_b32 s4, s3, 0xff00
9757; GFX7-HSA-NEXT:    v_and_b32_e32 v3, 0xff00ff, v0
9758; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
9759; GFX7-HSA-NEXT:    s_and_b32 s3, s3, 0xff
9760; GFX7-HSA-NEXT:    s_lshl_b32 s4, s4, 8
9761; GFX7-HSA-NEXT:    v_alignbit_b32 v0, s1, v0, 16
9762; GFX7-HSA-NEXT:    s_and_b32 s1, s2, 0xff
9763; GFX7-HSA-NEXT:    s_lshl_b32 s0, s0, 8
9764; GFX7-HSA-NEXT:    s_or_b32 s3, s3, s4
9765; GFX7-HSA-NEXT:    s_or_b32 s0, s1, s0
9766; GFX7-HSA-NEXT:    v_and_b32_e32 v1, 0xff00ff, v0
9767; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
9768; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s3
9769; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
9770; GFX7-HSA-NEXT:    s_endpgm
9771;
9772; GFX8-NOHSA-LABEL: constant_zextload_v8i8_to_v8i16:
9773; GFX8-NOHSA:       ; %bb.0:
9774; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
9775; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9776; GFX8-NOHSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
9777; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
9778; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
9779; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9780; GFX8-NOHSA-NEXT:    s_lshr_b32 s0, s2, 24
9781; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
9782; GFX8-NOHSA-NEXT:    s_lshr_b32 s1, s3, 24
9783; GFX8-NOHSA-NEXT:    s_bfe_u32 s4, s3, 0x80010
9784; GFX8-NOHSA-NEXT:    s_and_b32 s5, s3, 0xff
9785; GFX8-NOHSA-NEXT:    s_lshl_b32 s3, s3, 8
9786; GFX8-NOHSA-NEXT:    v_alignbit_b32 v0, s0, v0, 16
9787; GFX8-NOHSA-NEXT:    s_and_b32 s0, s2, 0xff
9788; GFX8-NOHSA-NEXT:    s_lshl_b32 s2, s2, 8
9789; GFX8-NOHSA-NEXT:    s_lshl_b32 s1, s1, 16
9790; GFX8-NOHSA-NEXT:    s_and_b32 s3, s3, 0xff0000
9791; GFX8-NOHSA-NEXT:    s_and_b32 s2, s2, 0xff0000
9792; GFX8-NOHSA-NEXT:    s_or_b32 s1, s4, s1
9793; GFX8-NOHSA-NEXT:    s_or_b32 s3, s5, s3
9794; GFX8-NOHSA-NEXT:    s_or_b32 s0, s0, s2
9795; GFX8-NOHSA-NEXT:    v_and_b32_e32 v1, 0xff00ff, v0
9796; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
9797; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s3
9798; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s1
9799; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
9800; GFX8-NOHSA-NEXT:    s_endpgm
9801;
9802; EG-LABEL: constant_zextload_v8i8_to_v8i16:
9803; EG:       ; %bb.0:
9804; EG-NEXT:    ALU 1, @8, KC0[CB0:0-32], KC1[]
9805; EG-NEXT:    TEX 0 @6
9806; EG-NEXT:    ALU 61, @10, KC0[CB0:0-32], KC1[]
9807; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T11.X, 1
9808; EG-NEXT:    CF_END
9809; EG-NEXT:    PAD
9810; EG-NEXT:    Fetch clause starting at 6:
9811; EG-NEXT:     VTX_READ_64 T11.XY, T11.X, 0, #1
9812; EG-NEXT:    ALU clause starting at 8:
9813; EG-NEXT:     MOV * T0.Y, T8.X,
9814; EG-NEXT:     MOV * T11.X, KC0[2].Z,
9815; EG-NEXT:    ALU clause starting at 10:
9816; EG-NEXT:     AND_INT T0.W, T11.X, literal.x,
9817; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
9818; EG-NEXT:    255(3.573311e-43), -65536(nan)
9819; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
9820; EG-NEXT:     MOV * T8.X, PV.W,
9821; EG-NEXT:     MOV T0.Y, PV.X,
9822; EG-NEXT:     LSHL * T0.W, T11.X, literal.x,
9823; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
9824; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
9825; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
9826; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
9827; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
9828; EG-NEXT:     MOV T8.X, PV.W,
9829; EG-NEXT:     MOV T0.Y, T9.X,
9830; EG-NEXT:     MOV * T0.W, literal.x,
9831; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
9832; EG-NEXT:     BFE_UINT T1.W, T11.X, literal.x, PV.W,
9833; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.y,
9834; EG-NEXT:    16(2.242078e-44), -65536(nan)
9835; EG-NEXT:     OR_INT * T1.W, PS, PV.W,
9836; EG-NEXT:     MOV * T9.X, PV.W,
9837; EG-NEXT:     MOV T0.Y, PV.X,
9838; EG-NEXT:     LSHR * T1.W, T11.X, literal.x,
9839; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
9840; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
9841; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
9842; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
9843; EG-NEXT:     OR_INT * T12.Y, PV.W, PS,
9844; EG-NEXT:     MOV T9.X, PV.Y,
9845; EG-NEXT:     MOV * T0.Y, T4.X,
9846; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
9847; EG-NEXT:     AND_INT * T2.W, T11.Y, literal.y,
9848; EG-NEXT:    -65536(nan), 255(3.573311e-43)
9849; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
9850; EG-NEXT:     MOV * T4.X, PV.W,
9851; EG-NEXT:     MOV T0.Y, PV.X,
9852; EG-NEXT:     LSHL * T1.W, T11.Y, literal.x,
9853; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
9854; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
9855; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
9856; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
9857; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
9858; EG-NEXT:     MOV T4.X, PV.W,
9859; EG-NEXT:     MOV T0.Y, T5.X,
9860; EG-NEXT:     BFE_UINT * T0.W, T11.Y, literal.x, T0.W,
9861; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
9862; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.x,
9863; EG-NEXT:    -65536(nan), 0(0.000000e+00)
9864; EG-NEXT:     OR_INT * T0.W, PV.W, T0.W,
9865; EG-NEXT:     MOV * T5.X, PV.W,
9866; EG-NEXT:     MOV T0.Y, PV.X,
9867; EG-NEXT:     LSHR * T0.W, T11.Y, literal.x,
9868; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
9869; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
9870; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
9871; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
9872; EG-NEXT:     LSHR T11.X, KC0[2].Y, literal.x,
9873; EG-NEXT:     OR_INT * T12.W, PV.W, PS,
9874; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
9875; EG-NEXT:     MOV T5.X, PV.W,
9876; EG-NEXT:     MOV * T12.X, T8.X,
9877; EG-NEXT:     MOV * T12.Z, T4.X,
9878;
9879; GFX12-LABEL: constant_zextload_v8i8_to_v8i16:
9880; GFX12:       ; %bb.0:
9881; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
9882; GFX12-NEXT:    s_wait_kmcnt 0x0
9883; GFX12-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
9884; GFX12-NEXT:    s_wait_kmcnt 0x0
9885; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x80008
9886; GFX12-NEXT:    s_lshr_b32 s5, s2, 24
9887; GFX12-NEXT:    s_bfe_u32 s6, s3, 0x80008
9888; GFX12-NEXT:    s_lshr_b32 s7, s3, 24
9889; GFX12-NEXT:    s_bfe_u32 s8, s3, 0x80010
9890; GFX12-NEXT:    s_and_b32 s3, s3, 0xff
9891; GFX12-NEXT:    s_bfe_u32 s9, s2, 0x80010
9892; GFX12-NEXT:    s_and_b32 s2, s2, 0xff
9893; GFX12-NEXT:    s_pack_ll_b32_b16 s7, s8, s7
9894; GFX12-NEXT:    s_pack_ll_b32_b16 s3, s3, s6
9895; GFX12-NEXT:    s_pack_ll_b32_b16 s2, s2, s4
9896; GFX12-NEXT:    s_pack_ll_b32_b16 s4, s9, s5
9897; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
9898; GFX12-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s4
9899; GFX12-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, s7
9900; GFX12-NEXT:    v_mov_b32_e32 v2, s3
9901; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[0:1]
9902; GFX12-NEXT:    s_endpgm
9903  %load = load <8 x i8>, ptr addrspace(4) %in
9904  %ext = zext <8 x i8> %load to <8 x i16>
9905  store <8 x i16> %ext, ptr addrspace(1) %out
9906  ret void
9907}
9908
9909define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
9910; GFX6-NOHSA-LABEL: constant_sextload_v8i8_to_v8i16:
9911; GFX6-NOHSA:       ; %bb.0:
9912; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
9913; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9914; GFX6-NOHSA-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
9915; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
9916; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9917; GFX6-NOHSA-NEXT:    s_ashr_i32 s2, s5, 24
9918; GFX6-NOHSA-NEXT:    s_bfe_i32 s6, s5, 0x80010
9919; GFX6-NOHSA-NEXT:    s_bfe_i32 s7, s5, 0x80008
9920; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s5, s5
9921; GFX6-NOHSA-NEXT:    s_ashr_i32 s8, s4, 24
9922; GFX6-NOHSA-NEXT:    s_bfe_i32 s9, s4, 0x80010
9923; GFX6-NOHSA-NEXT:    s_bfe_i32 s10, s4, 0x80008
9924; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s4, s4
9925; GFX6-NOHSA-NEXT:    s_lshl_b32 s2, s2, 16
9926; GFX6-NOHSA-NEXT:    s_and_b32 s6, s6, 0xffff
9927; GFX6-NOHSA-NEXT:    s_lshl_b32 s7, s7, 16
9928; GFX6-NOHSA-NEXT:    s_and_b32 s5, s5, 0xffff
9929; GFX6-NOHSA-NEXT:    s_lshl_b32 s8, s8, 16
9930; GFX6-NOHSA-NEXT:    s_and_b32 s9, s9, 0xffff
9931; GFX6-NOHSA-NEXT:    s_lshl_b32 s10, s10, 16
9932; GFX6-NOHSA-NEXT:    s_and_b32 s4, s4, 0xffff
9933; GFX6-NOHSA-NEXT:    s_or_b32 s6, s6, s2
9934; GFX6-NOHSA-NEXT:    s_or_b32 s5, s5, s7
9935; GFX6-NOHSA-NEXT:    s_or_b32 s7, s9, s8
9936; GFX6-NOHSA-NEXT:    s_or_b32 s4, s4, s10
9937; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
9938; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
9939; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s7
9940; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
9941; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s6
9942; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
9943; GFX6-NOHSA-NEXT:    s_endpgm
9944;
9945; GFX7-HSA-LABEL: constant_sextload_v8i8_to_v8i16:
9946; GFX7-HSA:       ; %bb.0:
9947; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
9948; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
9949; GFX7-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
9950; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
9951; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
9952; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
9953; GFX7-HSA-NEXT:    s_ashr_i32 s0, s3, 24
9954; GFX7-HSA-NEXT:    s_bfe_i32 s1, s3, 0x80010
9955; GFX7-HSA-NEXT:    s_bfe_i32 s4, s3, 0x80008
9956; GFX7-HSA-NEXT:    s_sext_i32_i8 s3, s3
9957; GFX7-HSA-NEXT:    s_lshl_b32 s0, s0, 16
9958; GFX7-HSA-NEXT:    s_and_b32 s1, s1, 0xffff
9959; GFX7-HSA-NEXT:    s_lshl_b32 s4, s4, 16
9960; GFX7-HSA-NEXT:    s_and_b32 s3, s3, 0xffff
9961; GFX7-HSA-NEXT:    s_or_b32 s0, s1, s0
9962; GFX7-HSA-NEXT:    s_or_b32 s1, s3, s4
9963; GFX7-HSA-NEXT:    s_ashr_i32 s3, s2, 24
9964; GFX7-HSA-NEXT:    s_bfe_i32 s4, s2, 0x80010
9965; GFX7-HSA-NEXT:    s_lshl_b32 s3, s3, 16
9966; GFX7-HSA-NEXT:    s_and_b32 s4, s4, 0xffff
9967; GFX7-HSA-NEXT:    s_or_b32 s3, s4, s3
9968; GFX7-HSA-NEXT:    s_bfe_i32 s4, s2, 0x80008
9969; GFX7-HSA-NEXT:    s_sext_i32_i8 s2, s2
9970; GFX7-HSA-NEXT:    s_lshl_b32 s4, s4, 16
9971; GFX7-HSA-NEXT:    s_and_b32 s2, s2, 0xffff
9972; GFX7-HSA-NEXT:    s_or_b32 s2, s2, s4
9973; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
9974; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s3
9975; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s1
9976; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s0
9977; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
9978; GFX7-HSA-NEXT:    s_endpgm
9979;
9980; GFX8-NOHSA-LABEL: constant_sextload_v8i8_to_v8i16:
9981; GFX8-NOHSA:       ; %bb.0:
9982; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
9983; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9984; GFX8-NOHSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
9985; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
9986; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
9987; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
9988; GFX8-NOHSA-NEXT:    s_sext_i32_i16 s0, s3
9989; GFX8-NOHSA-NEXT:    s_bfe_i32 s1, s3, 0x80000
9990; GFX8-NOHSA-NEXT:    s_lshl_b32 s0, s0, 8
9991; GFX8-NOHSA-NEXT:    s_and_b32 s1, 0xffff, s1
9992; GFX8-NOHSA-NEXT:    s_and_b32 s0, s0, 0xffff0000
9993; GFX8-NOHSA-NEXT:    s_or_b32 s7, s1, s0
9994; GFX8-NOHSA-NEXT:    s_sext_i32_i16 s0, s2
9995; GFX8-NOHSA-NEXT:    s_bfe_i32 s6, s2, 0x80000
9996; GFX8-NOHSA-NEXT:    s_lshl_b32 s0, s0, 8
9997; GFX8-NOHSA-NEXT:    s_and_b32 s6, 0xffff, s6
9998; GFX8-NOHSA-NEXT:    s_and_b32 s0, s0, 0xffff0000
9999; GFX8-NOHSA-NEXT:    s_lshr_b32 s5, s3, 16
10000; GFX8-NOHSA-NEXT:    s_or_b32 s6, s6, s0
10001; GFX8-NOHSA-NEXT:    s_ashr_i64 s[0:1], s[2:3], 56
10002; GFX8-NOHSA-NEXT:    s_bfe_i32 s1, s5, 0x80000
10003; GFX8-NOHSA-NEXT:    s_lshr_b32 s4, s2, 16
10004; GFX8-NOHSA-NEXT:    s_lshl_b32 s0, s0, 16
10005; GFX8-NOHSA-NEXT:    s_and_b32 s1, 0xffff, s1
10006; GFX8-NOHSA-NEXT:    s_or_b32 s0, s1, s0
10007; GFX8-NOHSA-NEXT:    s_ashr_i32 s1, s2, 24
10008; GFX8-NOHSA-NEXT:    s_bfe_i32 s2, s4, 0x80000
10009; GFX8-NOHSA-NEXT:    s_lshl_b32 s1, s1, 16
10010; GFX8-NOHSA-NEXT:    s_and_b32 s2, 0xffff, s2
10011; GFX8-NOHSA-NEXT:    s_or_b32 s1, s2, s1
10012; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
10013; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s1
10014; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s7
10015; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s0
10016; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
10017; GFX8-NOHSA-NEXT:    s_endpgm
10018;
10019; EG-LABEL: constant_sextload_v8i8_to_v8i16:
10020; EG:       ; %bb.0:
10021; EG-NEXT:    ALU 1, @8, KC0[CB0:0-32], KC1[]
10022; EG-NEXT:    TEX 0 @6
10023; EG-NEXT:    ALU 74, @10, KC0[CB0:0-32], KC1[]
10024; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T11.X, 1
10025; EG-NEXT:    CF_END
10026; EG-NEXT:    PAD
10027; EG-NEXT:    Fetch clause starting at 6:
10028; EG-NEXT:     VTX_READ_64 T11.XY, T11.X, 0, #1
10029; EG-NEXT:    ALU clause starting at 8:
10030; EG-NEXT:     MOV * T0.Y, T8.X,
10031; EG-NEXT:     MOV * T11.X, KC0[2].Z,
10032; EG-NEXT:    ALU clause starting at 10:
10033; EG-NEXT:     BFE_INT * T0.W, T11.X, 0.0, literal.x,
10034; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
10035; EG-NEXT:     AND_INT T0.W, PV.W, literal.x,
10036; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
10037; EG-NEXT:    65535(9.183409e-41), -65536(nan)
10038; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
10039; EG-NEXT:     MOV * T8.X, PV.W,
10040; EG-NEXT:     MOV T0.Y, PV.X,
10041; EG-NEXT:     LSHR * T0.W, T11.X, literal.x,
10042; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
10043; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
10044; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
10045; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
10046; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
10047; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
10048; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
10049; EG-NEXT:     MOV T8.X, PV.W,
10050; EG-NEXT:     MOV T0.Y, T9.X,
10051; EG-NEXT:     LSHR * T0.W, T11.X, literal.x, BS:VEC_120/SCL_212
10052; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
10053; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
10054; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
10055; EG-NEXT:    8(1.121039e-44), -65536(nan)
10056; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
10057; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
10058; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
10059; EG-NEXT:     MOV * T9.X, PV.W,
10060; EG-NEXT:     MOV T0.Y, PV.X,
10061; EG-NEXT:     ASHR * T0.W, T11.X, literal.x,
10062; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
10063; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
10064; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
10065; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
10066; EG-NEXT:     OR_INT * T12.Y, PV.W, PS,
10067; EG-NEXT:     MOV T9.X, PV.Y,
10068; EG-NEXT:     MOV T0.Y, T4.X,
10069; EG-NEXT:     BFE_INT * T0.W, T11.Y, 0.0, literal.x,
10070; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
10071; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
10072; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
10073; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
10074; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
10075; EG-NEXT:     MOV * T4.X, PV.W,
10076; EG-NEXT:     MOV T0.Y, PV.X,
10077; EG-NEXT:     LSHR * T0.W, T11.Y, literal.x,
10078; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
10079; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
10080; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
10081; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
10082; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
10083; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
10084; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
10085; EG-NEXT:     MOV T4.X, PV.W,
10086; EG-NEXT:     MOV T0.Y, T5.X,
10087; EG-NEXT:     LSHR * T0.W, T11.Y, literal.x,
10088; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
10089; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
10090; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
10091; EG-NEXT:    8(1.121039e-44), -65536(nan)
10092; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
10093; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
10094; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
10095; EG-NEXT:     MOV * T5.X, PV.W,
10096; EG-NEXT:     MOV T0.Y, PV.X,
10097; EG-NEXT:     ASHR * T0.W, T11.Y, literal.x,
10098; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
10099; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
10100; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
10101; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
10102; EG-NEXT:     LSHR T11.X, KC0[2].Y, literal.x,
10103; EG-NEXT:     OR_INT * T12.W, PV.W, PS,
10104; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
10105; EG-NEXT:     MOV T5.X, PV.W,
10106; EG-NEXT:     MOV * T12.X, T8.X,
10107; EG-NEXT:     MOV * T12.Z, T4.X,
10108;
10109; GFX12-LABEL: constant_sextload_v8i8_to_v8i16:
10110; GFX12:       ; %bb.0:
10111; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
10112; GFX12-NEXT:    s_wait_kmcnt 0x0
10113; GFX12-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
10114; GFX12-NEXT:    s_wait_kmcnt 0x0
10115; GFX12-NEXT:    s_ashr_i64 s[4:5], s[2:3], 56
10116; GFX12-NEXT:    s_lshr_b32 s6, s2, 16
10117; GFX12-NEXT:    s_lshr_b32 s7, s3, 16
10118; GFX12-NEXT:    s_bfe_i32 s5, s3, 0x80000
10119; GFX12-NEXT:    s_sext_i32_i16 s3, s3
10120; GFX12-NEXT:    s_ashr_i32 s8, s2, 24
10121; GFX12-NEXT:    s_bfe_i32 s9, s2, 0x80000
10122; GFX12-NEXT:    s_sext_i32_i16 s2, s2
10123; GFX12-NEXT:    s_bfe_i32 s7, s7, 0x80000
10124; GFX12-NEXT:    s_lshr_b32 s3, s3, 8
10125; GFX12-NEXT:    s_bfe_i32 s6, s6, 0x80000
10126; GFX12-NEXT:    s_lshr_b32 s2, s2, 8
10127; GFX12-NEXT:    s_pack_ll_b32_b16 s4, s7, s4
10128; GFX12-NEXT:    s_pack_ll_b32_b16 s3, s5, s3
10129; GFX12-NEXT:    s_pack_ll_b32_b16 s2, s9, s2
10130; GFX12-NEXT:    s_pack_ll_b32_b16 s5, s6, s8
10131; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
10132; GFX12-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s5
10133; GFX12-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, s4
10134; GFX12-NEXT:    v_mov_b32_e32 v2, s3
10135; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[0:1]
10136; GFX12-NEXT:    s_endpgm
10137  %load = load <8 x i8>, ptr addrspace(4) %in
10138  %ext = sext <8 x i8> %load to <8 x i16>
10139  store <8 x i16> %ext, ptr addrspace(1) %out
10140  ret void
10141}
10142
10143define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
10144; GFX6-NOHSA-LABEL: constant_zextload_v16i8_to_v16i16:
10145; GFX6-NOHSA:       ; %bb.0:
10146; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
10147; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
10148; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
10149; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
10150; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
10151; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
10152; GFX6-NOHSA-NEXT:    s_and_b32 s8, s6, 0xff00
10153; GFX6-NOHSA-NEXT:    s_lshr_b32 s9, s6, 24
10154; GFX6-NOHSA-NEXT:    s_and_b32 s10, s7, 0xff00
10155; GFX6-NOHSA-NEXT:    s_lshr_b32 s11, s7, 24
10156; GFX6-NOHSA-NEXT:    s_and_b32 s12, s4, 0xff00
10157; GFX6-NOHSA-NEXT:    s_lshr_b32 s13, s4, 24
10158; GFX6-NOHSA-NEXT:    s_and_b32 s14, s5, 0xff00
10159; GFX6-NOHSA-NEXT:    s_lshr_b32 s15, s5, 24
10160; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s5
10161; GFX6-NOHSA-NEXT:    s_and_b32 s5, s5, 0xff
10162; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s4
10163; GFX6-NOHSA-NEXT:    s_and_b32 s4, s4, 0xff
10164; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s7
10165; GFX6-NOHSA-NEXT:    s_and_b32 s7, s7, 0xff
10166; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s6
10167; GFX6-NOHSA-NEXT:    s_and_b32 s6, s6, 0xff
10168; GFX6-NOHSA-NEXT:    v_alignbit_b32 v0, s15, v0, 16
10169; GFX6-NOHSA-NEXT:    s_lshl_b32 s14, s14, 8
10170; GFX6-NOHSA-NEXT:    v_alignbit_b32 v1, s13, v1, 16
10171; GFX6-NOHSA-NEXT:    s_lshl_b32 s12, s12, 8
10172; GFX6-NOHSA-NEXT:    v_alignbit_b32 v2, s11, v2, 16
10173; GFX6-NOHSA-NEXT:    s_lshl_b32 s10, s10, 8
10174; GFX6-NOHSA-NEXT:    v_alignbit_b32 v4, s9, v3, 16
10175; GFX6-NOHSA-NEXT:    s_lshl_b32 s8, s8, 8
10176; GFX6-NOHSA-NEXT:    v_and_b32_e32 v3, 0xff00ff, v0
10177; GFX6-NOHSA-NEXT:    s_or_b32 s5, s5, s14
10178; GFX6-NOHSA-NEXT:    v_and_b32_e32 v1, 0xff00ff, v1
10179; GFX6-NOHSA-NEXT:    s_or_b32 s4, s4, s12
10180; GFX6-NOHSA-NEXT:    v_and_b32_e32 v7, 0xff00ff, v2
10181; GFX6-NOHSA-NEXT:    s_or_b32 s7, s7, s10
10182; GFX6-NOHSA-NEXT:    s_or_b32 s6, s6, s8
10183; GFX6-NOHSA-NEXT:    v_and_b32_e32 v5, 0xff00ff, v4
10184; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s6
10185; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s7
10186; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
10187; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
10188; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
10189; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
10190; GFX6-NOHSA-NEXT:    s_endpgm
10191;
10192; GFX7-HSA-LABEL: constant_zextload_v16i8_to_v16i16:
10193; GFX7-HSA:       ; %bb.0:
10194; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
10195; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
10196; GFX7-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
10197; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
10198; GFX7-HSA-NEXT:    s_lshr_b32 s13, s5, 24
10199; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s5
10200; GFX7-HSA-NEXT:    v_alignbit_b32 v0, s13, v0, 16
10201; GFX7-HSA-NEXT:    s_lshr_b32 s11, s4, 24
10202; GFX7-HSA-NEXT:    v_and_b32_e32 v3, 0xff00ff, v0
10203; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s4
10204; GFX7-HSA-NEXT:    v_alignbit_b32 v0, s11, v0, 16
10205; GFX7-HSA-NEXT:    s_lshr_b32 s9, s7, 24
10206; GFX7-HSA-NEXT:    v_and_b32_e32 v1, 0xff00ff, v0
10207; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s7
10208; GFX7-HSA-NEXT:    v_alignbit_b32 v0, s9, v0, 16
10209; GFX7-HSA-NEXT:    s_and_b32 s2, s6, 0xff00
10210; GFX7-HSA-NEXT:    s_lshr_b32 s3, s6, 24
10211; GFX7-HSA-NEXT:    s_and_b32 s8, s7, 0xff00
10212; GFX7-HSA-NEXT:    s_and_b32 s10, s4, 0xff00
10213; GFX7-HSA-NEXT:    s_and_b32 s12, s5, 0xff00
10214; GFX7-HSA-NEXT:    v_and_b32_e32 v7, 0xff00ff, v0
10215; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s6
10216; GFX7-HSA-NEXT:    s_and_b32 s5, s5, 0xff
10217; GFX7-HSA-NEXT:    s_lshl_b32 s12, s12, 8
10218; GFX7-HSA-NEXT:    s_and_b32 s4, s4, 0xff
10219; GFX7-HSA-NEXT:    s_lshl_b32 s10, s10, 8
10220; GFX7-HSA-NEXT:    s_and_b32 s7, s7, 0xff
10221; GFX7-HSA-NEXT:    s_lshl_b32 s8, s8, 8
10222; GFX7-HSA-NEXT:    v_alignbit_b32 v0, s3, v0, 16
10223; GFX7-HSA-NEXT:    s_and_b32 s3, s6, 0xff
10224; GFX7-HSA-NEXT:    s_lshl_b32 s2, s2, 8
10225; GFX7-HSA-NEXT:    s_or_b32 s5, s5, s12
10226; GFX7-HSA-NEXT:    s_or_b32 s4, s4, s10
10227; GFX7-HSA-NEXT:    s_or_b32 s7, s7, s8
10228; GFX7-HSA-NEXT:    s_or_b32 s2, s3, s2
10229; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
10230; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 16
10231; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
10232; GFX7-HSA-NEXT:    v_mov_b32_e32 v9, s3
10233; GFX7-HSA-NEXT:    v_and_b32_e32 v5, 0xff00ff, v0
10234; GFX7-HSA-NEXT:    v_mov_b32_e32 v6, s7
10235; GFX7-HSA-NEXT:    v_mov_b32_e32 v8, s2
10236; GFX7-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[4:7]
10237; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s4
10238; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
10239; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s5
10240; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
10241; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
10242; GFX7-HSA-NEXT:    s_endpgm
10243;
10244; GFX8-NOHSA-LABEL: constant_zextload_v16i8_to_v16i16:
10245; GFX8-NOHSA:       ; %bb.0:
10246; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
10247; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
10248; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
10249; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
10250; GFX8-NOHSA-NEXT:    s_lshr_b32 s3, s4, 24
10251; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
10252; GFX8-NOHSA-NEXT:    v_alignbit_b32 v0, s3, v0, 16
10253; GFX8-NOHSA-NEXT:    s_and_b32 s3, s4, 0xff
10254; GFX8-NOHSA-NEXT:    s_lshl_b32 s4, s4, 8
10255; GFX8-NOHSA-NEXT:    s_lshr_b32 s8, s5, 24
10256; GFX8-NOHSA-NEXT:    s_and_b32 s4, s4, 0xff0000
10257; GFX8-NOHSA-NEXT:    s_bfe_u32 s9, s5, 0x80010
10258; GFX8-NOHSA-NEXT:    s_lshl_b32 s8, s8, 16
10259; GFX8-NOHSA-NEXT:    s_or_b32 s4, s3, s4
10260; GFX8-NOHSA-NEXT:    s_lshr_b32 s3, s7, 24
10261; GFX8-NOHSA-NEXT:    s_lshr_b32 s2, s6, 24
10262; GFX8-NOHSA-NEXT:    s_or_b32 s8, s9, s8
10263; GFX8-NOHSA-NEXT:    v_and_b32_e32 v1, 0xff00ff, v0
10264; GFX8-NOHSA-NEXT:    s_lshl_b32 s3, s3, 16
10265; GFX8-NOHSA-NEXT:    s_bfe_u32 s9, s7, 0x80010
10266; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
10267; GFX8-NOHSA-NEXT:    s_and_b32 s10, s5, 0xff
10268; GFX8-NOHSA-NEXT:    s_lshl_b32 s5, s5, 8
10269; GFX8-NOHSA-NEXT:    s_or_b32 s3, s9, s3
10270; GFX8-NOHSA-NEXT:    s_and_b32 s9, s7, 0xff
10271; GFX8-NOHSA-NEXT:    s_lshl_b32 s7, s7, 8
10272; GFX8-NOHSA-NEXT:    v_alignbit_b32 v0, s2, v0, 16
10273; GFX8-NOHSA-NEXT:    s_and_b32 s2, s6, 0xff
10274; GFX8-NOHSA-NEXT:    s_lshl_b32 s6, s6, 8
10275; GFX8-NOHSA-NEXT:    s_and_b32 s5, s5, 0xff0000
10276; GFX8-NOHSA-NEXT:    s_and_b32 s7, s7, 0xff0000
10277; GFX8-NOHSA-NEXT:    s_and_b32 s6, s6, 0xff0000
10278; GFX8-NOHSA-NEXT:    s_or_b32 s5, s10, s5
10279; GFX8-NOHSA-NEXT:    s_or_b32 s7, s9, s7
10280; GFX8-NOHSA-NEXT:    s_or_b32 s2, s2, s6
10281; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s2
10282; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 16
10283; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
10284; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
10285; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v7, s3
10286; GFX8-NOHSA-NEXT:    v_and_b32_e32 v3, 0xff00ff, v0
10287; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s7
10288; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v6, s2
10289; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[6:7], v[2:5]
10290; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
10291; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
10292; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
10293; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s8
10294; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
10295; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
10296; GFX8-NOHSA-NEXT:    s_endpgm
10297;
10298; EG-LABEL: constant_zextload_v16i8_to_v16i16:
10299; EG:       ; %bb.0:
10300; EG-NEXT:    ALU 1, @10, KC0[CB0:0-32], KC1[]
10301; EG-NEXT:    TEX 0 @8
10302; EG-NEXT:    ALU 103, @12, KC0[], KC1[]
10303; EG-NEXT:    ALU 20, @116, KC0[CB0:0-32], KC1[]
10304; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T22.X, 0
10305; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T21.X, 1
10306; EG-NEXT:    CF_END
10307; EG-NEXT:    PAD
10308; EG-NEXT:    Fetch clause starting at 8:
10309; EG-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 0, #1
10310; EG-NEXT:    ALU clause starting at 10:
10311; EG-NEXT:     MOV * T0.Y, T16.X,
10312; EG-NEXT:     MOV * T19.X, KC0[2].Z,
10313; EG-NEXT:    ALU clause starting at 12:
10314; EG-NEXT:     AND_INT T0.W, T19.X, literal.x,
10315; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
10316; EG-NEXT:    255(3.573311e-43), -65536(nan)
10317; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
10318; EG-NEXT:     MOV * T16.X, PV.W,
10319; EG-NEXT:     MOV T0.Y, PV.X,
10320; EG-NEXT:     LSHL * T0.W, T19.X, literal.x,
10321; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
10322; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
10323; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
10324; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
10325; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
10326; EG-NEXT:     MOV T16.X, PV.W,
10327; EG-NEXT:     MOV T0.Y, T17.X,
10328; EG-NEXT:     MOV * T0.W, literal.x,
10329; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
10330; EG-NEXT:     BFE_UINT T1.W, T19.X, literal.x, PV.W,
10331; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.y,
10332; EG-NEXT:    16(2.242078e-44), -65536(nan)
10333; EG-NEXT:     OR_INT * T1.W, PS, PV.W,
10334; EG-NEXT:     MOV * T17.X, PV.W,
10335; EG-NEXT:     MOV T0.Y, PV.X,
10336; EG-NEXT:     LSHR * T1.W, T19.X, literal.x,
10337; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
10338; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
10339; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
10340; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
10341; EG-NEXT:     OR_INT * T20.Y, PV.W, PS,
10342; EG-NEXT:     MOV T17.X, PV.Y,
10343; EG-NEXT:     MOV * T0.Y, T12.X,
10344; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
10345; EG-NEXT:     AND_INT * T2.W, T19.Y, literal.y,
10346; EG-NEXT:    -65536(nan), 255(3.573311e-43)
10347; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
10348; EG-NEXT:     MOV * T12.X, PV.W,
10349; EG-NEXT:     MOV T0.Y, PV.X,
10350; EG-NEXT:     LSHL * T1.W, T19.Y, literal.x,
10351; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
10352; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
10353; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
10354; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
10355; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
10356; EG-NEXT:     MOV T12.X, PV.W,
10357; EG-NEXT:     MOV T0.Y, T13.X,
10358; EG-NEXT:     BFE_UINT * T1.W, T19.Y, literal.x, T0.W,
10359; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
10360; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
10361; EG-NEXT:    -65536(nan), 0(0.000000e+00)
10362; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
10363; EG-NEXT:     MOV * T13.X, PV.W,
10364; EG-NEXT:     MOV T0.Y, PV.X,
10365; EG-NEXT:     LSHR * T1.W, T19.Y, literal.x,
10366; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
10367; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
10368; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
10369; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
10370; EG-NEXT:     OR_INT * T20.W, PV.W, PS,
10371; EG-NEXT:     MOV T13.X, PV.W,
10372; EG-NEXT:     MOV * T0.Y, T8.X,
10373; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
10374; EG-NEXT:     AND_INT * T2.W, T19.Z, literal.y,
10375; EG-NEXT:    -65536(nan), 255(3.573311e-43)
10376; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
10377; EG-NEXT:     MOV * T8.X, PV.W,
10378; EG-NEXT:     MOV T0.Y, PV.X,
10379; EG-NEXT:     LSHL * T1.W, T19.Z, literal.x,
10380; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
10381; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
10382; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
10383; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
10384; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
10385; EG-NEXT:     MOV T8.X, PV.W,
10386; EG-NEXT:     MOV T0.Y, T9.X,
10387; EG-NEXT:     BFE_UINT * T1.W, T19.Z, literal.x, T0.W,
10388; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
10389; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
10390; EG-NEXT:    -65536(nan), 0(0.000000e+00)
10391; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
10392; EG-NEXT:     MOV * T9.X, PV.W,
10393; EG-NEXT:     MOV T0.Y, PV.X,
10394; EG-NEXT:     LSHR * T1.W, T19.Z, literal.x,
10395; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
10396; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
10397; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
10398; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
10399; EG-NEXT:     OR_INT * T19.Y, PV.W, PS,
10400; EG-NEXT:     MOV T9.X, PV.Y,
10401; EG-NEXT:     MOV * T0.Y, T4.X,
10402; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
10403; EG-NEXT:     AND_INT * T2.W, T19.W, literal.y,
10404; EG-NEXT:    -65536(nan), 255(3.573311e-43)
10405; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
10406; EG-NEXT:     MOV * T4.X, PV.W,
10407; EG-NEXT:     MOV T0.Y, PV.X,
10408; EG-NEXT:     LSHL * T1.W, T19.W, literal.x,
10409; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
10410; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
10411; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
10412; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
10413; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
10414; EG-NEXT:     MOV T4.X, PV.W,
10415; EG-NEXT:     MOV T0.Y, T5.X,
10416; EG-NEXT:     BFE_UINT * T0.W, T19.W, literal.x, T0.W,
10417; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
10418; EG-NEXT:    ALU clause starting at 116:
10419; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.x,
10420; EG-NEXT:    -65536(nan), 0(0.000000e+00)
10421; EG-NEXT:     OR_INT * T0.W, PV.W, T0.W,
10422; EG-NEXT:     MOV * T5.X, PV.W,
10423; EG-NEXT:     MOV T0.Y, PV.X,
10424; EG-NEXT:     LSHR T0.W, T19.W, literal.x,
10425; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
10426; EG-NEXT:    8(1.121039e-44), 16(2.242078e-44)
10427; EG-NEXT:     LSHR T21.X, PS, literal.x,
10428; EG-NEXT:     AND_INT T1.W, PV.Y, literal.y,
10429; EG-NEXT:     AND_INT * T0.W, PV.W, literal.z,
10430; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
10431; EG-NEXT:    16711680(2.341805e-38), 0(0.000000e+00)
10432; EG-NEXT:     LSHR T22.X, KC0[2].Y, literal.x,
10433; EG-NEXT:     OR_INT * T19.W, PV.W, PS,
10434; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
10435; EG-NEXT:     MOV T5.X, PV.W,
10436; EG-NEXT:     MOV * T20.X, T16.X,
10437; EG-NEXT:     MOV * T20.Z, T12.X,
10438; EG-NEXT:     MOV T19.X, T8.X,
10439; EG-NEXT:     MOV * T19.Z, T4.X, BS:VEC_120/SCL_212
10440;
10441; GFX12-LABEL: constant_zextload_v16i8_to_v16i16:
10442; GFX12:       ; %bb.0:
10443; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
10444; GFX12-NEXT:    s_wait_kmcnt 0x0
10445; GFX12-NEXT:    s_load_b128 s[4:7], s[2:3], 0x0
10446; GFX12-NEXT:    s_wait_kmcnt 0x0
10447; GFX12-NEXT:    s_bfe_u32 s2, s6, 0x80008
10448; GFX12-NEXT:    s_lshr_b32 s3, s6, 24
10449; GFX12-NEXT:    s_bfe_u32 s8, s7, 0x80008
10450; GFX12-NEXT:    s_lshr_b32 s9, s7, 24
10451; GFX12-NEXT:    s_bfe_u32 s16, s7, 0x80010
10452; GFX12-NEXT:    s_and_b32 s7, s7, 0xff
10453; GFX12-NEXT:    s_bfe_u32 s17, s6, 0x80010
10454; GFX12-NEXT:    s_and_b32 s6, s6, 0xff
10455; GFX12-NEXT:    s_bfe_u32 s10, s4, 0x80008
10456; GFX12-NEXT:    s_lshr_b32 s11, s4, 24
10457; GFX12-NEXT:    s_bfe_u32 s12, s5, 0x80008
10458; GFX12-NEXT:    s_lshr_b32 s13, s5, 24
10459; GFX12-NEXT:    s_bfe_u32 s14, s5, 0x80010
10460; GFX12-NEXT:    s_and_b32 s5, s5, 0xff
10461; GFX12-NEXT:    s_bfe_u32 s15, s4, 0x80010
10462; GFX12-NEXT:    s_and_b32 s4, s4, 0xff
10463; GFX12-NEXT:    s_pack_ll_b32_b16 s9, s16, s9
10464; GFX12-NEXT:    s_pack_ll_b32_b16 s7, s7, s8
10465; GFX12-NEXT:    s_pack_ll_b32_b16 s2, s6, s2
10466; GFX12-NEXT:    s_pack_ll_b32_b16 s3, s17, s3
10467; GFX12-NEXT:    s_pack_ll_b32_b16 s13, s14, s13
10468; GFX12-NEXT:    s_pack_ll_b32_b16 s5, s5, s12
10469; GFX12-NEXT:    s_pack_ll_b32_b16 s11, s15, s11
10470; GFX12-NEXT:    s_pack_ll_b32_b16 s4, s4, s10
10471; GFX12-NEXT:    v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s3
10472; GFX12-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, s9
10473; GFX12-NEXT:    v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v5, s11
10474; GFX12-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v7, s13
10475; GFX12-NEXT:    v_mov_b32_e32 v6, s5
10476; GFX12-NEXT:    s_clause 0x1
10477; GFX12-NEXT:    global_store_b128 v8, v[0:3], s[0:1] offset:16
10478; GFX12-NEXT:    global_store_b128 v8, v[4:7], s[0:1]
10479; GFX12-NEXT:    s_endpgm
10480  %load = load <16 x i8>, ptr addrspace(4) %in
10481  %ext = zext <16 x i8> %load to <16 x i16>
10482  store <16 x i16> %ext, ptr addrspace(1) %out
10483  ret void
10484}
10485
10486define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
10487; GFX6-NOHSA-LABEL: constant_sextload_v16i8_to_v16i16:
10488; GFX6-NOHSA:       ; %bb.0:
10489; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
10490; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
10491; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
10492; GFX6-NOHSA-NEXT:    s_mov_b32 s3, 0xf000
10493; GFX6-NOHSA-NEXT:    s_mov_b32 s2, -1
10494; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
10495; GFX6-NOHSA-NEXT:    s_ashr_i32 s8, s5, 24
10496; GFX6-NOHSA-NEXT:    s_bfe_i32 s9, s5, 0x80010
10497; GFX6-NOHSA-NEXT:    s_bfe_i32 s10, s5, 0x80008
10498; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s5, s5
10499; GFX6-NOHSA-NEXT:    s_ashr_i32 s11, s4, 24
10500; GFX6-NOHSA-NEXT:    s_bfe_i32 s12, s4, 0x80010
10501; GFX6-NOHSA-NEXT:    s_bfe_i32 s13, s4, 0x80008
10502; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s4, s4
10503; GFX6-NOHSA-NEXT:    s_ashr_i32 s14, s7, 24
10504; GFX6-NOHSA-NEXT:    s_bfe_i32 s15, s7, 0x80010
10505; GFX6-NOHSA-NEXT:    s_bfe_i32 s16, s7, 0x80008
10506; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s7, s7
10507; GFX6-NOHSA-NEXT:    s_ashr_i32 s17, s6, 24
10508; GFX6-NOHSA-NEXT:    s_bfe_i32 s18, s6, 0x80010
10509; GFX6-NOHSA-NEXT:    s_bfe_i32 s19, s6, 0x80008
10510; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s6, s6
10511; GFX6-NOHSA-NEXT:    s_lshl_b32 s8, s8, 16
10512; GFX6-NOHSA-NEXT:    s_and_b32 s9, s9, 0xffff
10513; GFX6-NOHSA-NEXT:    s_lshl_b32 s10, s10, 16
10514; GFX6-NOHSA-NEXT:    s_and_b32 s5, s5, 0xffff
10515; GFX6-NOHSA-NEXT:    s_lshl_b32 s11, s11, 16
10516; GFX6-NOHSA-NEXT:    s_and_b32 s12, s12, 0xffff
10517; GFX6-NOHSA-NEXT:    s_lshl_b32 s13, s13, 16
10518; GFX6-NOHSA-NEXT:    s_and_b32 s4, s4, 0xffff
10519; GFX6-NOHSA-NEXT:    s_lshl_b32 s14, s14, 16
10520; GFX6-NOHSA-NEXT:    s_and_b32 s15, s15, 0xffff
10521; GFX6-NOHSA-NEXT:    s_lshl_b32 s16, s16, 16
10522; GFX6-NOHSA-NEXT:    s_and_b32 s7, s7, 0xffff
10523; GFX6-NOHSA-NEXT:    s_lshl_b32 s17, s17, 16
10524; GFX6-NOHSA-NEXT:    s_and_b32 s18, s18, 0xffff
10525; GFX6-NOHSA-NEXT:    s_lshl_b32 s19, s19, 16
10526; GFX6-NOHSA-NEXT:    s_and_b32 s6, s6, 0xffff
10527; GFX6-NOHSA-NEXT:    s_or_b32 s8, s9, s8
10528; GFX6-NOHSA-NEXT:    s_or_b32 s5, s5, s10
10529; GFX6-NOHSA-NEXT:    s_or_b32 s9, s12, s11
10530; GFX6-NOHSA-NEXT:    s_or_b32 s10, s15, s14
10531; GFX6-NOHSA-NEXT:    s_or_b32 s7, s7, s16
10532; GFX6-NOHSA-NEXT:    s_or_b32 s11, s18, s17
10533; GFX6-NOHSA-NEXT:    s_or_b32 s6, s6, s19
10534; GFX6-NOHSA-NEXT:    s_or_b32 s4, s4, s13
10535; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
10536; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s11
10537; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s7
10538; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s10
10539; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
10540; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
10541; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
10542; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s9
10543; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
10544; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s8
10545; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
10546; GFX6-NOHSA-NEXT:    s_endpgm
10547;
10548; GFX7-HSA-LABEL: constant_sextload_v16i8_to_v16i16:
10549; GFX7-HSA:       ; %bb.0:
10550; GFX7-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
10551; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
10552; GFX7-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
10553; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
10554; GFX7-HSA-NEXT:    s_ashr_i32 s2, s5, 24
10555; GFX7-HSA-NEXT:    s_bfe_i32 s3, s5, 0x80010
10556; GFX7-HSA-NEXT:    s_lshl_b32 s2, s2, 16
10557; GFX7-HSA-NEXT:    s_and_b32 s3, s3, 0xffff
10558; GFX7-HSA-NEXT:    s_bfe_i32 s8, s5, 0x80008
10559; GFX7-HSA-NEXT:    s_sext_i32_i8 s5, s5
10560; GFX7-HSA-NEXT:    s_ashr_i32 s9, s4, 24
10561; GFX7-HSA-NEXT:    s_or_b32 s10, s3, s2
10562; GFX7-HSA-NEXT:    s_bfe_i32 s3, s4, 0x80010
10563; GFX7-HSA-NEXT:    s_lshl_b32 s8, s8, 16
10564; GFX7-HSA-NEXT:    s_and_b32 s5, s5, 0xffff
10565; GFX7-HSA-NEXT:    s_lshl_b32 s2, s9, 16
10566; GFX7-HSA-NEXT:    s_and_b32 s3, s3, 0xffff
10567; GFX7-HSA-NEXT:    s_or_b32 s5, s5, s8
10568; GFX7-HSA-NEXT:    s_or_b32 s8, s3, s2
10569; GFX7-HSA-NEXT:    s_bfe_i32 s2, s4, 0x80008
10570; GFX7-HSA-NEXT:    s_sext_i32_i8 s3, s4
10571; GFX7-HSA-NEXT:    s_lshl_b32 s2, s2, 16
10572; GFX7-HSA-NEXT:    s_and_b32 s3, s3, 0xffff
10573; GFX7-HSA-NEXT:    s_or_b32 s4, s3, s2
10574; GFX7-HSA-NEXT:    s_ashr_i32 s2, s7, 24
10575; GFX7-HSA-NEXT:    s_bfe_i32 s3, s7, 0x80010
10576; GFX7-HSA-NEXT:    s_lshl_b32 s2, s2, 16
10577; GFX7-HSA-NEXT:    s_and_b32 s3, s3, 0xffff
10578; GFX7-HSA-NEXT:    s_or_b32 s2, s3, s2
10579; GFX7-HSA-NEXT:    s_bfe_i32 s3, s7, 0x80008
10580; GFX7-HSA-NEXT:    s_sext_i32_i8 s7, s7
10581; GFX7-HSA-NEXT:    s_lshl_b32 s3, s3, 16
10582; GFX7-HSA-NEXT:    s_and_b32 s7, s7, 0xffff
10583; GFX7-HSA-NEXT:    s_or_b32 s3, s7, s3
10584; GFX7-HSA-NEXT:    s_ashr_i32 s7, s6, 24
10585; GFX7-HSA-NEXT:    s_bfe_i32 s9, s6, 0x80010
10586; GFX7-HSA-NEXT:    s_lshl_b32 s7, s7, 16
10587; GFX7-HSA-NEXT:    s_and_b32 s9, s9, 0xffff
10588; GFX7-HSA-NEXT:    s_or_b32 s7, s9, s7
10589; GFX7-HSA-NEXT:    s_bfe_i32 s9, s6, 0x80008
10590; GFX7-HSA-NEXT:    s_sext_i32_i8 s6, s6
10591; GFX7-HSA-NEXT:    s_lshl_b32 s9, s9, 16
10592; GFX7-HSA-NEXT:    s_and_b32 s6, s6, 0xffff
10593; GFX7-HSA-NEXT:    s_or_b32 s6, s6, s9
10594; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s2
10595; GFX7-HSA-NEXT:    s_add_u32 s2, s0, 16
10596; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s3
10597; GFX7-HSA-NEXT:    s_addc_u32 s3, s1, 0
10598; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s3
10599; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s6
10600; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s7
10601; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
10602; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
10603; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
10604; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s4
10605; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s8
10606; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s5
10607; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s10
10608; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
10609; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
10610; GFX7-HSA-NEXT:    s_endpgm
10611;
10612; GFX8-NOHSA-LABEL: constant_sextload_v16i8_to_v16i16:
10613; GFX8-NOHSA:       ; %bb.0:
10614; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
10615; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
10616; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
10617; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
10618; GFX8-NOHSA-NEXT:    s_lshr_b32 s3, s5, 16
10619; GFX8-NOHSA-NEXT:    s_sext_i32_i16 s10, s5
10620; GFX8-NOHSA-NEXT:    s_bfe_i32 s11, s5, 0x80000
10621; GFX8-NOHSA-NEXT:    s_ashr_i32 s5, s5, 16
10622; GFX8-NOHSA-NEXT:    s_lshl_b32 s5, s5, 8
10623; GFX8-NOHSA-NEXT:    s_bfe_i32 s3, s3, 0x80000
10624; GFX8-NOHSA-NEXT:    s_lshr_b32 s2, s4, 16
10625; GFX8-NOHSA-NEXT:    s_and_b32 s5, s5, 0xffff0000
10626; GFX8-NOHSA-NEXT:    s_and_b32 s3, 0xffff, s3
10627; GFX8-NOHSA-NEXT:    s_sext_i32_i16 s12, s4
10628; GFX8-NOHSA-NEXT:    s_lshl_b32 s10, s10, 8
10629; GFX8-NOHSA-NEXT:    s_or_b32 s5, s3, s5
10630; GFX8-NOHSA-NEXT:    s_ashr_i32 s3, s4, 24
10631; GFX8-NOHSA-NEXT:    s_bfe_i32 s2, s2, 0x80000
10632; GFX8-NOHSA-NEXT:    s_and_b32 s11, 0xffff, s11
10633; GFX8-NOHSA-NEXT:    s_lshl_b32 s12, s12, 8
10634; GFX8-NOHSA-NEXT:    s_and_b32 s10, s10, 0xffff0000
10635; GFX8-NOHSA-NEXT:    s_lshl_b32 s3, s3, 16
10636; GFX8-NOHSA-NEXT:    s_and_b32 s2, 0xffff, s2
10637; GFX8-NOHSA-NEXT:    s_or_b32 s10, s11, s10
10638; GFX8-NOHSA-NEXT:    s_and_b32 s11, s12, 0xffff0000
10639; GFX8-NOHSA-NEXT:    s_bfe_i32 s12, s4, 0x80000
10640; GFX8-NOHSA-NEXT:    s_or_b32 s4, s2, s3
10641; GFX8-NOHSA-NEXT:    s_sext_i32_i16 s2, s7
10642; GFX8-NOHSA-NEXT:    s_lshl_b32 s2, s2, 8
10643; GFX8-NOHSA-NEXT:    s_bfe_i32 s3, s7, 0x80000
10644; GFX8-NOHSA-NEXT:    s_and_b32 s12, 0xffff, s12
10645; GFX8-NOHSA-NEXT:    s_and_b32 s2, s2, 0xffff0000
10646; GFX8-NOHSA-NEXT:    s_and_b32 s3, 0xffff, s3
10647; GFX8-NOHSA-NEXT:    s_or_b32 s11, s12, s11
10648; GFX8-NOHSA-NEXT:    s_or_b32 s12, s3, s2
10649; GFX8-NOHSA-NEXT:    s_sext_i32_i16 s2, s6
10650; GFX8-NOHSA-NEXT:    s_lshl_b32 s2, s2, 8
10651; GFX8-NOHSA-NEXT:    s_bfe_i32 s3, s6, 0x80000
10652; GFX8-NOHSA-NEXT:    s_and_b32 s2, s2, 0xffff0000
10653; GFX8-NOHSA-NEXT:    s_and_b32 s3, 0xffff, s3
10654; GFX8-NOHSA-NEXT:    s_lshr_b32 s9, s7, 16
10655; GFX8-NOHSA-NEXT:    s_or_b32 s13, s3, s2
10656; GFX8-NOHSA-NEXT:    s_ashr_i64 s[2:3], s[6:7], 56
10657; GFX8-NOHSA-NEXT:    s_bfe_i32 s3, s9, 0x80000
10658; GFX8-NOHSA-NEXT:    s_lshr_b32 s8, s6, 16
10659; GFX8-NOHSA-NEXT:    s_lshl_b32 s2, s2, 16
10660; GFX8-NOHSA-NEXT:    s_and_b32 s3, 0xffff, s3
10661; GFX8-NOHSA-NEXT:    s_or_b32 s2, s3, s2
10662; GFX8-NOHSA-NEXT:    s_ashr_i32 s3, s6, 24
10663; GFX8-NOHSA-NEXT:    s_bfe_i32 s6, s8, 0x80000
10664; GFX8-NOHSA-NEXT:    s_lshl_b32 s3, s3, 16
10665; GFX8-NOHSA-NEXT:    s_and_b32 s6, 0xffff, s6
10666; GFX8-NOHSA-NEXT:    s_or_b32 s3, s6, s3
10667; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s2
10668; GFX8-NOHSA-NEXT:    s_add_u32 s2, s0, 16
10669; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s3
10670; GFX8-NOHSA-NEXT:    s_addc_u32 s3, s1, 0
10671; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s3
10672; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s13
10673; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s12
10674; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
10675; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
10676; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
10677; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s11
10678; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s4
10679; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s10
10680; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s5
10681; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
10682; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
10683; GFX8-NOHSA-NEXT:    s_endpgm
10684;
10685; EG-LABEL: constant_sextload_v16i8_to_v16i16:
10686; EG:       ; %bb.0:
10687; EG-NEXT:    ALU 1, @10, KC0[CB0:0-32], KC1[]
10688; EG-NEXT:    TEX 0 @8
10689; EG-NEXT:    ALU 104, @12, KC0[], KC1[]
10690; EG-NEXT:    ALU 46, @117, KC0[CB0:0-32], KC1[]
10691; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T22.X, 0
10692; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T21.X, 1
10693; EG-NEXT:    CF_END
10694; EG-NEXT:    PAD
10695; EG-NEXT:    Fetch clause starting at 8:
10696; EG-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 0, #1
10697; EG-NEXT:    ALU clause starting at 10:
10698; EG-NEXT:     MOV * T0.Y, T16.X,
10699; EG-NEXT:     MOV * T19.X, KC0[2].Z,
10700; EG-NEXT:    ALU clause starting at 12:
10701; EG-NEXT:     BFE_INT * T0.W, T19.X, 0.0, literal.x,
10702; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
10703; EG-NEXT:     AND_INT T0.W, PV.W, literal.x,
10704; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
10705; EG-NEXT:    65535(9.183409e-41), -65536(nan)
10706; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
10707; EG-NEXT:     MOV * T16.X, PV.W,
10708; EG-NEXT:     MOV T0.Y, PV.X,
10709; EG-NEXT:     LSHR * T0.W, T19.X, literal.x,
10710; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
10711; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
10712; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
10713; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
10714; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
10715; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
10716; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
10717; EG-NEXT:     MOV T16.X, PV.W,
10718; EG-NEXT:     MOV T0.Y, T17.X,
10719; EG-NEXT:     LSHR * T0.W, T19.X, literal.x, BS:VEC_120/SCL_212
10720; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
10721; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
10722; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
10723; EG-NEXT:    8(1.121039e-44), -65536(nan)
10724; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
10725; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
10726; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
10727; EG-NEXT:     MOV * T17.X, PV.W,
10728; EG-NEXT:     MOV T0.Y, PV.X,
10729; EG-NEXT:     ASHR * T0.W, T19.X, literal.x,
10730; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
10731; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
10732; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
10733; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
10734; EG-NEXT:     OR_INT * T20.Y, PV.W, PS,
10735; EG-NEXT:     MOV T17.X, PV.Y,
10736; EG-NEXT:     MOV T0.Y, T12.X,
10737; EG-NEXT:     BFE_INT * T0.W, T19.Y, 0.0, literal.x,
10738; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
10739; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
10740; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
10741; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
10742; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
10743; EG-NEXT:     MOV * T12.X, PV.W,
10744; EG-NEXT:     MOV T0.Y, PV.X,
10745; EG-NEXT:     LSHR * T0.W, T19.Y, literal.x,
10746; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
10747; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
10748; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
10749; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
10750; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
10751; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
10752; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
10753; EG-NEXT:     MOV T12.X, PV.W,
10754; EG-NEXT:     MOV T0.Y, T13.X,
10755; EG-NEXT:     LSHR * T0.W, T19.Y, literal.x,
10756; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
10757; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
10758; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
10759; EG-NEXT:    8(1.121039e-44), -65536(nan)
10760; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
10761; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
10762; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
10763; EG-NEXT:     MOV * T13.X, PV.W,
10764; EG-NEXT:     MOV T0.Y, PV.X,
10765; EG-NEXT:     ASHR * T0.W, T19.Y, literal.x,
10766; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
10767; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
10768; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
10769; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
10770; EG-NEXT:     OR_INT * T20.W, PV.W, PS,
10771; EG-NEXT:     MOV T13.X, PV.W,
10772; EG-NEXT:     MOV T0.Y, T8.X,
10773; EG-NEXT:     BFE_INT * T0.W, T19.Z, 0.0, literal.x,
10774; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
10775; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
10776; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
10777; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
10778; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
10779; EG-NEXT:     MOV * T8.X, PV.W,
10780; EG-NEXT:     MOV T0.Y, PV.X,
10781; EG-NEXT:     LSHR * T0.W, T19.Z, literal.x,
10782; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
10783; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
10784; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
10785; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
10786; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
10787; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
10788; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
10789; EG-NEXT:     MOV T8.X, PV.W,
10790; EG-NEXT:     MOV T0.Y, T9.X,
10791; EG-NEXT:     LSHR * T0.W, T19.Z, literal.x,
10792; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
10793; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
10794; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
10795; EG-NEXT:    8(1.121039e-44), -65536(nan)
10796; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
10797; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
10798; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
10799; EG-NEXT:     MOV * T9.X, PV.W,
10800; EG-NEXT:     MOV T0.Y, PV.X,
10801; EG-NEXT:     ASHR * T0.W, T19.Z, literal.x,
10802; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
10803; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
10804; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
10805; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
10806; EG-NEXT:    ALU clause starting at 117:
10807; EG-NEXT:     OR_INT * T19.Y, T1.W, T0.W,
10808; EG-NEXT:     MOV T9.X, PV.Y,
10809; EG-NEXT:     MOV T0.Y, T4.X,
10810; EG-NEXT:     BFE_INT * T0.W, T19.W, 0.0, literal.x,
10811; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
10812; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
10813; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
10814; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
10815; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
10816; EG-NEXT:     MOV * T4.X, PV.W,
10817; EG-NEXT:     MOV T0.Y, PV.X,
10818; EG-NEXT:     LSHR * T0.W, T19.W, literal.x,
10819; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
10820; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
10821; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
10822; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
10823; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
10824; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
10825; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
10826; EG-NEXT:     MOV T4.X, PV.W,
10827; EG-NEXT:     MOV T0.Y, T5.X,
10828; EG-NEXT:     LSHR * T0.W, T19.W, literal.x,
10829; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
10830; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
10831; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
10832; EG-NEXT:    8(1.121039e-44), -65536(nan)
10833; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
10834; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
10835; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
10836; EG-NEXT:     MOV * T5.X, PV.W,
10837; EG-NEXT:     MOV T0.Y, PV.X,
10838; EG-NEXT:     ASHR T0.W, T19.W, literal.x,
10839; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
10840; EG-NEXT:    24(3.363116e-44), 16(2.242078e-44)
10841; EG-NEXT:     LSHR T21.X, PS, literal.x,
10842; EG-NEXT:     AND_INT T1.W, PV.Y, literal.y,
10843; EG-NEXT:     LSHL * T0.W, PV.W, literal.z,
10844; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
10845; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
10846; EG-NEXT:     LSHR T22.X, KC0[2].Y, literal.x,
10847; EG-NEXT:     OR_INT * T19.W, PV.W, PS,
10848; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
10849; EG-NEXT:     MOV T5.X, PV.W,
10850; EG-NEXT:     MOV * T20.X, T16.X,
10851; EG-NEXT:     MOV * T20.Z, T12.X,
10852; EG-NEXT:     MOV T19.X, T8.X,
10853; EG-NEXT:     MOV * T19.Z, T4.X, BS:VEC_120/SCL_212
10854;
10855; GFX12-LABEL: constant_sextload_v16i8_to_v16i16:
10856; GFX12:       ; %bb.0:
10857; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
10858; GFX12-NEXT:    s_wait_kmcnt 0x0
10859; GFX12-NEXT:    s_load_b128 s[4:7], s[2:3], 0x0
10860; GFX12-NEXT:    s_wait_kmcnt 0x0
10861; GFX12-NEXT:    s_ashr_i64 s[2:3], s[6:7], 56
10862; GFX12-NEXT:    s_lshr_b32 s8, s6, 16
10863; GFX12-NEXT:    s_lshr_b32 s9, s7, 16
10864; GFX12-NEXT:    s_bfe_i32 s3, s7, 0x80000
10865; GFX12-NEXT:    s_sext_i32_i16 s7, s7
10866; GFX12-NEXT:    s_ashr_i32 s16, s6, 24
10867; GFX12-NEXT:    s_bfe_i32 s17, s6, 0x80000
10868; GFX12-NEXT:    s_sext_i32_i16 s6, s6
10869; GFX12-NEXT:    s_lshr_b32 s10, s4, 16
10870; GFX12-NEXT:    s_lshr_b32 s11, s5, 16
10871; GFX12-NEXT:    s_ashr_i32 s12, s5, 16
10872; GFX12-NEXT:    s_bfe_i32 s13, s5, 0x80000
10873; GFX12-NEXT:    s_sext_i32_i16 s5, s5
10874; GFX12-NEXT:    s_ashr_i32 s14, s4, 24
10875; GFX12-NEXT:    s_bfe_i32 s15, s4, 0x80000
10876; GFX12-NEXT:    s_sext_i32_i16 s4, s4
10877; GFX12-NEXT:    s_bfe_i32 s9, s9, 0x80000
10878; GFX12-NEXT:    s_lshr_b32 s7, s7, 8
10879; GFX12-NEXT:    s_bfe_i32 s8, s8, 0x80000
10880; GFX12-NEXT:    s_lshr_b32 s6, s6, 8
10881; GFX12-NEXT:    s_lshr_b32 s12, s12, 8
10882; GFX12-NEXT:    s_bfe_i32 s11, s11, 0x80000
10883; GFX12-NEXT:    s_lshr_b32 s5, s5, 8
10884; GFX12-NEXT:    s_bfe_i32 s10, s10, 0x80000
10885; GFX12-NEXT:    s_lshr_b32 s4, s4, 8
10886; GFX12-NEXT:    s_pack_ll_b32_b16 s2, s9, s2
10887; GFX12-NEXT:    s_pack_ll_b32_b16 s3, s3, s7
10888; GFX12-NEXT:    s_pack_ll_b32_b16 s6, s17, s6
10889; GFX12-NEXT:    s_pack_ll_b32_b16 s7, s8, s16
10890; GFX12-NEXT:    s_pack_ll_b32_b16 s11, s11, s12
10891; GFX12-NEXT:    s_pack_ll_b32_b16 s5, s13, s5
10892; GFX12-NEXT:    s_pack_ll_b32_b16 s10, s10, s14
10893; GFX12-NEXT:    s_pack_ll_b32_b16 s4, s15, s4
10894; GFX12-NEXT:    v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s7
10895; GFX12-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v3, s2
10896; GFX12-NEXT:    v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v5, s10
10897; GFX12-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v7, s11
10898; GFX12-NEXT:    v_mov_b32_e32 v6, s5
10899; GFX12-NEXT:    s_clause 0x1
10900; GFX12-NEXT:    global_store_b128 v8, v[0:3], s[0:1] offset:16
10901; GFX12-NEXT:    global_store_b128 v8, v[4:7], s[0:1]
10902; GFX12-NEXT:    s_endpgm
10903  %load = load <16 x i8>, ptr addrspace(4) %in
10904  %ext = sext <16 x i8> %load to <16 x i16>
10905  store <16 x i16> %ext, ptr addrspace(1) %out
10906  ret void
10907}
10908
10909define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
10910; GFX6-NOHSA-LABEL: constant_zextload_v32i8_to_v32i16:
10911; GFX6-NOHSA:       ; %bb.0:
10912; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x9
10913; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
10914; GFX6-NOHSA-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
10915; GFX6-NOHSA-NEXT:    s_mov_b32 s11, 0xf000
10916; GFX6-NOHSA-NEXT:    s_mov_b32 s10, -1
10917; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
10918; GFX6-NOHSA-NEXT:    s_and_b32 s12, s6, 0xff00
10919; GFX6-NOHSA-NEXT:    s_lshr_b32 s13, s6, 24
10920; GFX6-NOHSA-NEXT:    s_and_b32 s14, s7, 0xff00
10921; GFX6-NOHSA-NEXT:    s_lshr_b32 s15, s7, 24
10922; GFX6-NOHSA-NEXT:    s_and_b32 s16, s4, 0xff00
10923; GFX6-NOHSA-NEXT:    s_lshr_b32 s17, s4, 24
10924; GFX6-NOHSA-NEXT:    s_and_b32 s18, s5, 0xff00
10925; GFX6-NOHSA-NEXT:    s_lshr_b32 s19, s5, 24
10926; GFX6-NOHSA-NEXT:    s_and_b32 s20, s2, 0xff00
10927; GFX6-NOHSA-NEXT:    s_lshr_b32 s21, s2, 24
10928; GFX6-NOHSA-NEXT:    s_and_b32 s22, s3, 0xff00
10929; GFX6-NOHSA-NEXT:    s_lshr_b32 s23, s3, 24
10930; GFX6-NOHSA-NEXT:    s_and_b32 s24, s0, 0xff00
10931; GFX6-NOHSA-NEXT:    s_lshr_b32 s25, s0, 24
10932; GFX6-NOHSA-NEXT:    s_and_b32 s26, s1, 0xff00
10933; GFX6-NOHSA-NEXT:    s_lshr_b32 s27, s1, 24
10934; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s1
10935; GFX6-NOHSA-NEXT:    s_and_b32 s1, s1, 0xff
10936; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s0
10937; GFX6-NOHSA-NEXT:    s_and_b32 s0, s0, 0xff
10938; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s3
10939; GFX6-NOHSA-NEXT:    s_and_b32 s3, s3, 0xff
10940; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s2
10941; GFX6-NOHSA-NEXT:    s_and_b32 s2, s2, 0xff
10942; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s5
10943; GFX6-NOHSA-NEXT:    s_and_b32 s5, s5, 0xff
10944; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v5, s4
10945; GFX6-NOHSA-NEXT:    s_and_b32 s4, s4, 0xff
10946; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s7
10947; GFX6-NOHSA-NEXT:    s_and_b32 s7, s7, 0xff
10948; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v7, s6
10949; GFX6-NOHSA-NEXT:    s_and_b32 s6, s6, 0xff
10950; GFX6-NOHSA-NEXT:    v_alignbit_b32 v0, s27, v0, 16
10951; GFX6-NOHSA-NEXT:    s_lshl_b32 s26, s26, 8
10952; GFX6-NOHSA-NEXT:    v_alignbit_b32 v1, s25, v1, 16
10953; GFX6-NOHSA-NEXT:    s_lshl_b32 s24, s24, 8
10954; GFX6-NOHSA-NEXT:    v_alignbit_b32 v2, s23, v2, 16
10955; GFX6-NOHSA-NEXT:    s_lshl_b32 s22, s22, 8
10956; GFX6-NOHSA-NEXT:    v_alignbit_b32 v8, s21, v3, 16
10957; GFX6-NOHSA-NEXT:    s_lshl_b32 s20, s20, 8
10958; GFX6-NOHSA-NEXT:    v_alignbit_b32 v4, s19, v4, 16
10959; GFX6-NOHSA-NEXT:    s_lshl_b32 s18, s18, 8
10960; GFX6-NOHSA-NEXT:    v_alignbit_b32 v9, s17, v5, 16
10961; GFX6-NOHSA-NEXT:    s_lshl_b32 s16, s16, 8
10962; GFX6-NOHSA-NEXT:    v_alignbit_b32 v6, s15, v6, 16
10963; GFX6-NOHSA-NEXT:    s_lshl_b32 s14, s14, 8
10964; GFX6-NOHSA-NEXT:    v_alignbit_b32 v10, s13, v7, 16
10965; GFX6-NOHSA-NEXT:    s_lshl_b32 s12, s12, 8
10966; GFX6-NOHSA-NEXT:    v_and_b32_e32 v3, 0xff00ff, v0
10967; GFX6-NOHSA-NEXT:    s_or_b32 s1, s1, s26
10968; GFX6-NOHSA-NEXT:    v_and_b32_e32 v1, 0xff00ff, v1
10969; GFX6-NOHSA-NEXT:    s_or_b32 s0, s0, s24
10970; GFX6-NOHSA-NEXT:    v_and_b32_e32 v7, 0xff00ff, v2
10971; GFX6-NOHSA-NEXT:    s_or_b32 s3, s3, s22
10972; GFX6-NOHSA-NEXT:    v_and_b32_e32 v5, 0xff00ff, v8
10973; GFX6-NOHSA-NEXT:    s_or_b32 s2, s2, s20
10974; GFX6-NOHSA-NEXT:    v_and_b32_e32 v11, 0xff00ff, v4
10975; GFX6-NOHSA-NEXT:    s_or_b32 s5, s5, s18
10976; GFX6-NOHSA-NEXT:    v_and_b32_e32 v9, 0xff00ff, v9
10977; GFX6-NOHSA-NEXT:    s_or_b32 s4, s4, s16
10978; GFX6-NOHSA-NEXT:    v_and_b32_e32 v15, 0xff00ff, v6
10979; GFX6-NOHSA-NEXT:    s_or_b32 s7, s7, s14
10980; GFX6-NOHSA-NEXT:    s_or_b32 s6, s6, s12
10981; GFX6-NOHSA-NEXT:    v_and_b32_e32 v13, 0xff00ff, v10
10982; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v12, s6
10983; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v14, s7
10984; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[12:15], off, s[8:11], 0 offset:48
10985; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v8, s4
10986; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v10, s5
10987; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[8:11], 0 offset:32
10988; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v4, s2
10989; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v6, s3
10990; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[4:7], off, s[8:11], 0 offset:16
10991; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
10992; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s1
10993; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
10994; GFX6-NOHSA-NEXT:    s_endpgm
10995;
10996; GFX7-HSA-LABEL: constant_zextload_v32i8_to_v32i16:
10997; GFX7-HSA:       ; %bb.0:
10998; GFX7-HSA-NEXT:    s_load_dwordx4 s[8:11], s[8:9], 0x0
10999; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
11000; GFX7-HSA-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
11001; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
11002; GFX7-HSA-NEXT:    s_lshr_b32 s25, s1, 24
11003; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s1
11004; GFX7-HSA-NEXT:    v_alignbit_b32 v0, s25, v0, 16
11005; GFX7-HSA-NEXT:    s_lshr_b32 s23, s0, 24
11006; GFX7-HSA-NEXT:    v_and_b32_e32 v3, 0xff00ff, v0
11007; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s0
11008; GFX7-HSA-NEXT:    v_alignbit_b32 v0, s23, v0, 16
11009; GFX7-HSA-NEXT:    s_lshr_b32 s21, s3, 24
11010; GFX7-HSA-NEXT:    v_and_b32_e32 v1, 0xff00ff, v0
11011; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s3
11012; GFX7-HSA-NEXT:    v_alignbit_b32 v0, s21, v0, 16
11013; GFX7-HSA-NEXT:    s_lshr_b32 s19, s2, 24
11014; GFX7-HSA-NEXT:    s_and_b32 s24, s1, 0xff00
11015; GFX7-HSA-NEXT:    v_and_b32_e32 v7, 0xff00ff, v0
11016; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
11017; GFX7-HSA-NEXT:    s_and_b32 s22, s0, 0xff00
11018; GFX7-HSA-NEXT:    s_and_b32 s1, s1, 0xff
11019; GFX7-HSA-NEXT:    s_lshl_b32 s24, s24, 8
11020; GFX7-HSA-NEXT:    v_alignbit_b32 v0, s19, v0, 16
11021; GFX7-HSA-NEXT:    s_lshr_b32 s17, s5, 24
11022; GFX7-HSA-NEXT:    s_and_b32 s20, s3, 0xff00
11023; GFX7-HSA-NEXT:    s_or_b32 s24, s1, s24
11024; GFX7-HSA-NEXT:    s_and_b32 s0, s0, 0xff
11025; GFX7-HSA-NEXT:    s_lshl_b32 s1, s22, 8
11026; GFX7-HSA-NEXT:    v_and_b32_e32 v5, 0xff00ff, v0
11027; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s5
11028; GFX7-HSA-NEXT:    s_and_b32 s18, s2, 0xff00
11029; GFX7-HSA-NEXT:    s_or_b32 s22, s0, s1
11030; GFX7-HSA-NEXT:    s_and_b32 s0, s3, 0xff
11031; GFX7-HSA-NEXT:    s_lshl_b32 s1, s20, 8
11032; GFX7-HSA-NEXT:    v_alignbit_b32 v0, s17, v0, 16
11033; GFX7-HSA-NEXT:    s_lshr_b32 s15, s4, 24
11034; GFX7-HSA-NEXT:    s_and_b32 s16, s5, 0xff00
11035; GFX7-HSA-NEXT:    s_or_b32 s3, s0, s1
11036; GFX7-HSA-NEXT:    s_and_b32 s0, s2, 0xff
11037; GFX7-HSA-NEXT:    s_lshl_b32 s1, s18, 8
11038; GFX7-HSA-NEXT:    v_and_b32_e32 v11, 0xff00ff, v0
11039; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s4
11040; GFX7-HSA-NEXT:    s_and_b32 s14, s4, 0xff00
11041; GFX7-HSA-NEXT:    s_or_b32 s2, s0, s1
11042; GFX7-HSA-NEXT:    s_and_b32 s0, s5, 0xff
11043; GFX7-HSA-NEXT:    s_lshl_b32 s1, s16, 8
11044; GFX7-HSA-NEXT:    v_alignbit_b32 v0, s15, v0, 16
11045; GFX7-HSA-NEXT:    s_and_b32 s12, s7, 0xff00
11046; GFX7-HSA-NEXT:    s_lshr_b32 s13, s7, 24
11047; GFX7-HSA-NEXT:    s_or_b32 s5, s0, s1
11048; GFX7-HSA-NEXT:    v_and_b32_e32 v9, 0xff00ff, v0
11049; GFX7-HSA-NEXT:    s_and_b32 s0, s4, 0xff
11050; GFX7-HSA-NEXT:    s_lshl_b32 s1, s14, 8
11051; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s7
11052; GFX7-HSA-NEXT:    s_and_b32 s10, s6, 0xff00
11053; GFX7-HSA-NEXT:    s_or_b32 s4, s0, s1
11054; GFX7-HSA-NEXT:    v_alignbit_b32 v0, s13, v0, 16
11055; GFX7-HSA-NEXT:    s_and_b32 s0, s7, 0xff
11056; GFX7-HSA-NEXT:    s_lshl_b32 s1, s12, 8
11057; GFX7-HSA-NEXT:    s_lshr_b32 s11, s6, 24
11058; GFX7-HSA-NEXT:    v_and_b32_e32 v15, 0xff00ff, v0
11059; GFX7-HSA-NEXT:    s_or_b32 s0, s0, s1
11060; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s6
11061; GFX7-HSA-NEXT:    s_and_b32 s1, s6, 0xff
11062; GFX7-HSA-NEXT:    s_lshl_b32 s6, s10, 8
11063; GFX7-HSA-NEXT:    s_or_b32 s1, s1, s6
11064; GFX7-HSA-NEXT:    v_mov_b32_e32 v14, s0
11065; GFX7-HSA-NEXT:    s_add_u32 s0, s8, 48
11066; GFX7-HSA-NEXT:    v_mov_b32_e32 v12, s1
11067; GFX7-HSA-NEXT:    s_addc_u32 s1, s9, 0
11068; GFX7-HSA-NEXT:    v_mov_b32_e32 v17, s1
11069; GFX7-HSA-NEXT:    v_alignbit_b32 v0, s11, v0, 16
11070; GFX7-HSA-NEXT:    v_mov_b32_e32 v16, s0
11071; GFX7-HSA-NEXT:    s_add_u32 s0, s8, 32
11072; GFX7-HSA-NEXT:    v_and_b32_e32 v13, 0xff00ff, v0
11073; GFX7-HSA-NEXT:    s_addc_u32 s1, s9, 0
11074; GFX7-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[12:15]
11075; GFX7-HSA-NEXT:    v_mov_b32_e32 v8, s4
11076; GFX7-HSA-NEXT:    v_mov_b32_e32 v13, s1
11077; GFX7-HSA-NEXT:    v_mov_b32_e32 v12, s0
11078; GFX7-HSA-NEXT:    s_add_u32 s0, s8, 16
11079; GFX7-HSA-NEXT:    v_mov_b32_e32 v10, s5
11080; GFX7-HSA-NEXT:    s_addc_u32 s1, s9, 0
11081; GFX7-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[8:11]
11082; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s2
11083; GFX7-HSA-NEXT:    v_mov_b32_e32 v9, s1
11084; GFX7-HSA-NEXT:    v_mov_b32_e32 v6, s3
11085; GFX7-HSA-NEXT:    v_mov_b32_e32 v8, s0
11086; GFX7-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[4:7]
11087; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s22
11088; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s8
11089; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s24
11090; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s9
11091; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
11092; GFX7-HSA-NEXT:    s_endpgm
11093;
11094; GFX8-NOHSA-LABEL: constant_zextload_v32i8_to_v32i16:
11095; GFX8-NOHSA:       ; %bb.0:
11096; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x24
11097; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
11098; GFX8-NOHSA-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
11099; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
11100; GFX8-NOHSA-NEXT:    s_lshr_b32 s14, s1, 24
11101; GFX8-NOHSA-NEXT:    s_bfe_u32 s15, s1, 0x80010
11102; GFX8-NOHSA-NEXT:    s_and_b32 s16, s1, 0xff
11103; GFX8-NOHSA-NEXT:    s_lshl_b32 s1, s1, 8
11104; GFX8-NOHSA-NEXT:    s_lshl_b32 s14, s14, 16
11105; GFX8-NOHSA-NEXT:    s_and_b32 s1, s1, 0xff0000
11106; GFX8-NOHSA-NEXT:    s_lshr_b32 s13, s0, 24
11107; GFX8-NOHSA-NEXT:    s_or_b32 s14, s15, s14
11108; GFX8-NOHSA-NEXT:    s_or_b32 s15, s16, s1
11109; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
11110; GFX8-NOHSA-NEXT:    s_and_b32 s1, s0, 0xff
11111; GFX8-NOHSA-NEXT:    s_lshl_b32 s0, s0, 8
11112; GFX8-NOHSA-NEXT:    s_and_b32 s0, s0, 0xff0000
11113; GFX8-NOHSA-NEXT:    v_alignbit_b32 v0, s13, v0, 16
11114; GFX8-NOHSA-NEXT:    s_or_b32 s13, s1, s0
11115; GFX8-NOHSA-NEXT:    s_lshr_b32 s0, s3, 24
11116; GFX8-NOHSA-NEXT:    s_lshl_b32 s0, s0, 16
11117; GFX8-NOHSA-NEXT:    s_bfe_u32 s1, s3, 0x80010
11118; GFX8-NOHSA-NEXT:    s_or_b32 s16, s1, s0
11119; GFX8-NOHSA-NEXT:    s_lshl_b32 s1, s3, 8
11120; GFX8-NOHSA-NEXT:    s_and_b32 s0, s3, 0xff
11121; GFX8-NOHSA-NEXT:    s_and_b32 s1, s1, 0xff0000
11122; GFX8-NOHSA-NEXT:    s_or_b32 s3, s0, s1
11123; GFX8-NOHSA-NEXT:    s_lshl_b32 s1, s2, 8
11124; GFX8-NOHSA-NEXT:    s_and_b32 s0, s2, 0xff
11125; GFX8-NOHSA-NEXT:    s_and_b32 s1, s1, 0xff0000
11126; GFX8-NOHSA-NEXT:    s_lshr_b32 s12, s2, 24
11127; GFX8-NOHSA-NEXT:    v_and_b32_e32 v1, 0xff00ff, v0
11128; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
11129; GFX8-NOHSA-NEXT:    s_or_b32 s2, s0, s1
11130; GFX8-NOHSA-NEXT:    s_lshr_b32 s0, s5, 24
11131; GFX8-NOHSA-NEXT:    s_lshl_b32 s0, s0, 16
11132; GFX8-NOHSA-NEXT:    s_bfe_u32 s1, s5, 0x80010
11133; GFX8-NOHSA-NEXT:    v_alignbit_b32 v0, s12, v0, 16
11134; GFX8-NOHSA-NEXT:    s_or_b32 s12, s1, s0
11135; GFX8-NOHSA-NEXT:    s_lshl_b32 s1, s5, 8
11136; GFX8-NOHSA-NEXT:    s_and_b32 s0, s5, 0xff
11137; GFX8-NOHSA-NEXT:    s_and_b32 s1, s1, 0xff0000
11138; GFX8-NOHSA-NEXT:    s_or_b32 s5, s0, s1
11139; GFX8-NOHSA-NEXT:    s_lshl_b32 s1, s4, 8
11140; GFX8-NOHSA-NEXT:    s_and_b32 s0, s4, 0xff
11141; GFX8-NOHSA-NEXT:    s_and_b32 s1, s1, 0xff0000
11142; GFX8-NOHSA-NEXT:    s_lshr_b32 s11, s4, 24
11143; GFX8-NOHSA-NEXT:    v_and_b32_e32 v3, 0xff00ff, v0
11144; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
11145; GFX8-NOHSA-NEXT:    s_or_b32 s4, s0, s1
11146; GFX8-NOHSA-NEXT:    s_lshr_b32 s0, s7, 24
11147; GFX8-NOHSA-NEXT:    s_lshl_b32 s0, s0, 16
11148; GFX8-NOHSA-NEXT:    s_bfe_u32 s1, s7, 0x80010
11149; GFX8-NOHSA-NEXT:    s_or_b32 s0, s1, s0
11150; GFX8-NOHSA-NEXT:    s_and_b32 s1, s7, 0xff
11151; GFX8-NOHSA-NEXT:    s_lshl_b32 s7, s7, 8
11152; GFX8-NOHSA-NEXT:    v_alignbit_b32 v0, s11, v0, 16
11153; GFX8-NOHSA-NEXT:    s_and_b32 s7, s7, 0xff0000
11154; GFX8-NOHSA-NEXT:    s_lshr_b32 s10, s6, 24
11155; GFX8-NOHSA-NEXT:    v_and_b32_e32 v5, 0xff00ff, v0
11156; GFX8-NOHSA-NEXT:    s_or_b32 s1, s1, s7
11157; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
11158; GFX8-NOHSA-NEXT:    s_and_b32 s7, s6, 0xff
11159; GFX8-NOHSA-NEXT:    s_lshl_b32 s6, s6, 8
11160; GFX8-NOHSA-NEXT:    s_and_b32 s6, s6, 0xff0000
11161; GFX8-NOHSA-NEXT:    s_or_b32 s6, s7, s6
11162; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v9, s0
11163; GFX8-NOHSA-NEXT:    s_add_u32 s0, s8, 48
11164; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v8, s1
11165; GFX8-NOHSA-NEXT:    s_addc_u32 s1, s9, 0
11166; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v11, s1
11167; GFX8-NOHSA-NEXT:    v_alignbit_b32 v0, s10, v0, 16
11168; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v10, s0
11169; GFX8-NOHSA-NEXT:    s_add_u32 s0, s8, 32
11170; GFX8-NOHSA-NEXT:    v_and_b32_e32 v7, 0xff00ff, v0
11171; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v6, s6
11172; GFX8-NOHSA-NEXT:    s_addc_u32 s1, s9, 0
11173; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[10:11], v[6:9]
11174; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s4
11175; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v9, s1
11176; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v8, s0
11177; GFX8-NOHSA-NEXT:    s_add_u32 s0, s8, 16
11178; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v6, s5
11179; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v7, s12
11180; GFX8-NOHSA-NEXT:    s_addc_u32 s1, s9, 0
11181; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[8:9], v[4:7]
11182; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s2
11183; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v7, s1
11184; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s3
11185; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s16
11186; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v6, s0
11187; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[6:7], v[2:5]
11188; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s13
11189; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s8
11190; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s15
11191; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s14
11192; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s9
11193; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
11194; GFX8-NOHSA-NEXT:    s_endpgm
11195;
11196; EG-LABEL: constant_zextload_v32i8_to_v32i16:
11197; EG:       ; %bb.0:
11198; EG-NEXT:    ALU 1, @14, KC0[CB0:0-32], KC1[]
11199; EG-NEXT:    TEX 1 @10
11200; EG-NEXT:    ALU 103, @16, KC0[], KC1[]
11201; EG-NEXT:    ALU 104, @120, KC0[], KC1[]
11202; EG-NEXT:    ALU 41, @225, KC0[CB0:0-32], KC1[]
11203; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T42.X, 0
11204; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T41.X, 0
11205; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T40.X, 0
11206; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T39.X, 1
11207; EG-NEXT:    CF_END
11208; EG-NEXT:    Fetch clause starting at 10:
11209; EG-NEXT:     VTX_READ_128 T37.XYZW, T35.X, 16, #1
11210; EG-NEXT:     VTX_READ_128 T35.XYZW, T35.X, 0, #1
11211; EG-NEXT:    ALU clause starting at 14:
11212; EG-NEXT:     MOV * T0.Y, T16.X,
11213; EG-NEXT:     MOV * T35.X, KC0[2].Z,
11214; EG-NEXT:    ALU clause starting at 16:
11215; EG-NEXT:     AND_INT T0.W, T37.X, literal.x,
11216; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
11217; EG-NEXT:    255(3.573311e-43), -65536(nan)
11218; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
11219; EG-NEXT:     MOV * T16.X, PV.W,
11220; EG-NEXT:     MOV T0.Y, PV.X,
11221; EG-NEXT:     LSHL * T0.W, T37.X, literal.x,
11222; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
11223; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
11224; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
11225; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
11226; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
11227; EG-NEXT:     MOV T16.X, PV.W,
11228; EG-NEXT:     MOV T0.Y, T17.X,
11229; EG-NEXT:     MOV * T0.W, literal.x,
11230; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
11231; EG-NEXT:     BFE_UINT T1.W, T37.X, literal.x, PV.W,
11232; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.y,
11233; EG-NEXT:    16(2.242078e-44), -65536(nan)
11234; EG-NEXT:     OR_INT * T1.W, PS, PV.W,
11235; EG-NEXT:     MOV * T17.X, PV.W,
11236; EG-NEXT:     MOV T0.Y, PV.X,
11237; EG-NEXT:     LSHR * T1.W, T37.X, literal.x,
11238; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
11239; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
11240; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
11241; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
11242; EG-NEXT:     OR_INT * T36.Y, PV.W, PS,
11243; EG-NEXT:     MOV T17.X, PV.Y,
11244; EG-NEXT:     MOV * T0.Y, T12.X,
11245; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
11246; EG-NEXT:     AND_INT * T2.W, T37.Y, literal.y,
11247; EG-NEXT:    -65536(nan), 255(3.573311e-43)
11248; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
11249; EG-NEXT:     MOV * T12.X, PV.W,
11250; EG-NEXT:     MOV T0.Y, PV.X,
11251; EG-NEXT:     LSHL * T1.W, T37.Y, literal.x,
11252; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
11253; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
11254; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
11255; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
11256; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
11257; EG-NEXT:     MOV T12.X, PV.W,
11258; EG-NEXT:     MOV T0.Y, T13.X,
11259; EG-NEXT:     BFE_UINT * T1.W, T37.Y, literal.x, T0.W,
11260; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
11261; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
11262; EG-NEXT:    -65536(nan), 0(0.000000e+00)
11263; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
11264; EG-NEXT:     MOV * T13.X, PV.W,
11265; EG-NEXT:     MOV T0.Y, PV.X,
11266; EG-NEXT:     LSHR * T1.W, T37.Y, literal.x,
11267; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
11268; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
11269; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
11270; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
11271; EG-NEXT:     OR_INT * T36.W, PV.W, PS,
11272; EG-NEXT:     MOV T13.X, PV.W,
11273; EG-NEXT:     MOV * T0.Y, T8.X,
11274; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
11275; EG-NEXT:     AND_INT * T2.W, T37.Z, literal.y,
11276; EG-NEXT:    -65536(nan), 255(3.573311e-43)
11277; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
11278; EG-NEXT:     MOV * T8.X, PV.W,
11279; EG-NEXT:     MOV T0.Y, PV.X,
11280; EG-NEXT:     LSHL * T1.W, T37.Z, literal.x,
11281; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
11282; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
11283; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
11284; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
11285; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
11286; EG-NEXT:     MOV T8.X, PV.W,
11287; EG-NEXT:     MOV T0.Y, T9.X,
11288; EG-NEXT:     BFE_UINT * T1.W, T37.Z, literal.x, T0.W,
11289; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
11290; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
11291; EG-NEXT:    -65536(nan), 0(0.000000e+00)
11292; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
11293; EG-NEXT:     MOV * T9.X, PV.W,
11294; EG-NEXT:     MOV T0.Y, PV.X,
11295; EG-NEXT:     LSHR * T1.W, T37.Z, literal.x,
11296; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
11297; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
11298; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
11299; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
11300; EG-NEXT:     OR_INT * T37.Y, PV.W, PS,
11301; EG-NEXT:     MOV T9.X, PV.Y,
11302; EG-NEXT:     MOV * T0.Y, T4.X,
11303; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
11304; EG-NEXT:     AND_INT * T2.W, T37.W, literal.y,
11305; EG-NEXT:    -65536(nan), 255(3.573311e-43)
11306; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
11307; EG-NEXT:     MOV * T4.X, PV.W,
11308; EG-NEXT:     MOV T0.Y, PV.X,
11309; EG-NEXT:     LSHL * T1.W, T37.W, literal.x,
11310; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
11311; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
11312; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
11313; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
11314; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
11315; EG-NEXT:     MOV T4.X, PV.W,
11316; EG-NEXT:     MOV T0.Y, T5.X,
11317; EG-NEXT:     BFE_UINT * T1.W, T37.W, literal.x, T0.W,
11318; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
11319; EG-NEXT:    ALU clause starting at 120:
11320; EG-NEXT:     AND_INT * T2.W, T0.Y, literal.x,
11321; EG-NEXT:    -65536(nan), 0(0.000000e+00)
11322; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
11323; EG-NEXT:     MOV * T5.X, PV.W,
11324; EG-NEXT:     MOV T0.Y, PV.X,
11325; EG-NEXT:     LSHR * T1.W, T37.W, literal.x,
11326; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
11327; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
11328; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
11329; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
11330; EG-NEXT:     OR_INT * T37.W, PV.W, PS,
11331; EG-NEXT:     MOV T5.X, PV.W,
11332; EG-NEXT:     MOV * T0.Y, T32.X,
11333; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
11334; EG-NEXT:     AND_INT * T2.W, T35.X, literal.y,
11335; EG-NEXT:    -65536(nan), 255(3.573311e-43)
11336; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
11337; EG-NEXT:     MOV * T32.X, PV.W,
11338; EG-NEXT:     MOV T0.Y, PV.X,
11339; EG-NEXT:     LSHL * T1.W, T35.X, literal.x,
11340; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
11341; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
11342; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
11343; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
11344; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
11345; EG-NEXT:     MOV T32.X, PV.W,
11346; EG-NEXT:     MOV T0.Y, T33.X,
11347; EG-NEXT:     BFE_UINT * T1.W, T35.X, literal.x, T0.W, BS:VEC_120/SCL_212
11348; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
11349; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
11350; EG-NEXT:    -65536(nan), 0(0.000000e+00)
11351; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
11352; EG-NEXT:     MOV * T33.X, PV.W,
11353; EG-NEXT:     MOV T0.Y, PV.X,
11354; EG-NEXT:     LSHR * T1.W, T35.X, literal.x,
11355; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
11356; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
11357; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
11358; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
11359; EG-NEXT:     OR_INT * T38.Y, PV.W, PS,
11360; EG-NEXT:     MOV T33.X, PV.Y,
11361; EG-NEXT:     MOV * T0.Y, T28.X,
11362; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
11363; EG-NEXT:     AND_INT * T2.W, T35.Y, literal.y,
11364; EG-NEXT:    -65536(nan), 255(3.573311e-43)
11365; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
11366; EG-NEXT:     MOV * T28.X, PV.W,
11367; EG-NEXT:     MOV T0.Y, PV.X,
11368; EG-NEXT:     LSHL * T1.W, T35.Y, literal.x,
11369; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
11370; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
11371; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
11372; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
11373; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
11374; EG-NEXT:     MOV T28.X, PV.W,
11375; EG-NEXT:     MOV T0.Y, T29.X,
11376; EG-NEXT:     BFE_UINT * T1.W, T35.Y, literal.x, T0.W,
11377; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
11378; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
11379; EG-NEXT:    -65536(nan), 0(0.000000e+00)
11380; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
11381; EG-NEXT:     MOV * T29.X, PV.W,
11382; EG-NEXT:     MOV T0.Y, PV.X,
11383; EG-NEXT:     LSHR * T1.W, T35.Y, literal.x,
11384; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
11385; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
11386; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
11387; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
11388; EG-NEXT:     OR_INT * T38.W, PV.W, PS,
11389; EG-NEXT:     MOV T29.X, PV.W,
11390; EG-NEXT:     MOV * T0.Y, T24.X,
11391; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
11392; EG-NEXT:     AND_INT * T2.W, T35.Z, literal.y,
11393; EG-NEXT:    -65536(nan), 255(3.573311e-43)
11394; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
11395; EG-NEXT:     MOV * T24.X, PV.W,
11396; EG-NEXT:     MOV T0.Y, PV.X,
11397; EG-NEXT:     LSHL * T1.W, T35.Z, literal.x,
11398; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
11399; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
11400; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
11401; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
11402; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
11403; EG-NEXT:     MOV T24.X, PV.W,
11404; EG-NEXT:     MOV T0.Y, T25.X,
11405; EG-NEXT:     BFE_UINT * T1.W, T35.Z, literal.x, T0.W,
11406; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
11407; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
11408; EG-NEXT:    -65536(nan), 0(0.000000e+00)
11409; EG-NEXT:     OR_INT * T1.W, PV.W, T1.W,
11410; EG-NEXT:     MOV * T25.X, PV.W,
11411; EG-NEXT:     MOV T0.Y, PV.X,
11412; EG-NEXT:     LSHR * T1.W, T35.Z, literal.x,
11413; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
11414; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
11415; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
11416; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
11417; EG-NEXT:     OR_INT * T35.Y, PV.W, PS,
11418; EG-NEXT:     MOV T25.X, PV.Y,
11419; EG-NEXT:     MOV * T0.Y, T20.X,
11420; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
11421; EG-NEXT:     AND_INT * T2.W, T35.W, literal.y,
11422; EG-NEXT:    -65536(nan), 255(3.573311e-43)
11423; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
11424; EG-NEXT:     MOV * T20.X, PV.W,
11425; EG-NEXT:    ALU clause starting at 225:
11426; EG-NEXT:     MOV T0.Y, T20.X,
11427; EG-NEXT:     LSHL * T1.W, T35.W, literal.x,
11428; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
11429; EG-NEXT:     AND_INT T2.W, PV.Y, literal.x,
11430; EG-NEXT:     AND_INT * T1.W, PV.W, literal.y,
11431; EG-NEXT:    65535(9.183409e-41), 16711680(2.341805e-38)
11432; EG-NEXT:     OR_INT * T1.W, PV.W, PS,
11433; EG-NEXT:     MOV T20.X, PV.W,
11434; EG-NEXT:     MOV T0.Y, T21.X,
11435; EG-NEXT:     BFE_UINT * T0.W, T35.W, literal.x, T0.W,
11436; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
11437; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.x,
11438; EG-NEXT:    -65536(nan), 0(0.000000e+00)
11439; EG-NEXT:     OR_INT * T0.W, PV.W, T0.W,
11440; EG-NEXT:     MOV * T21.X, PV.W,
11441; EG-NEXT:     MOV T0.Y, PV.X,
11442; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
11443; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
11444; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
11445; EG-NEXT:     LSHR * T40.X, KC0[2].Y, literal.x,
11446; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
11447; EG-NEXT:     LSHR T0.W, T35.W, literal.x,
11448; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
11449; EG-NEXT:    8(1.121039e-44), 48(6.726233e-44)
11450; EG-NEXT:     LSHR T41.X, PS, literal.x,
11451; EG-NEXT:     AND_INT T0.Z, T0.Y, literal.y,
11452; EG-NEXT:     AND_INT T0.W, PV.W, literal.z,
11453; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.w,
11454; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
11455; EG-NEXT:    16711680(2.341805e-38), 32(4.484155e-44)
11456; EG-NEXT:     LSHR T42.X, PS, literal.x,
11457; EG-NEXT:     OR_INT * T35.W, PV.Z, PV.W,
11458; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
11459; EG-NEXT:     MOV T21.X, PV.W,
11460; EG-NEXT:     MOV * T36.X, T16.X,
11461; EG-NEXT:     MOV * T36.Z, T12.X,
11462; EG-NEXT:     MOV T37.X, T8.X,
11463; EG-NEXT:     MOV T37.Z, T4.X, BS:VEC_120/SCL_212
11464; EG-NEXT:     MOV * T38.X, T32.X,
11465; EG-NEXT:     MOV * T38.Z, T28.X,
11466; EG-NEXT:     MOV T35.X, T24.X,
11467; EG-NEXT:     MOV * T35.Z, T20.X, BS:VEC_120/SCL_212
11468;
11469; GFX12-LABEL: constant_zextload_v32i8_to_v32i16:
11470; GFX12:       ; %bb.0:
11471; GFX12-NEXT:    s_load_b128 s[8:11], s[4:5], 0x24
11472; GFX12-NEXT:    s_wait_kmcnt 0x0
11473; GFX12-NEXT:    s_load_b256 s[0:7], s[10:11], 0x0
11474; GFX12-NEXT:    s_wait_kmcnt 0x0
11475; GFX12-NEXT:    s_bfe_u32 s12, s7, 0x80008
11476; GFX12-NEXT:    s_lshr_b32 s13, s7, 24
11477; GFX12-NEXT:    s_bfe_u32 s33, s7, 0x80010
11478; GFX12-NEXT:    s_and_b32 s7, s7, 0xff
11479; GFX12-NEXT:    s_bfe_u32 s10, s6, 0x80008
11480; GFX12-NEXT:    s_lshr_b32 s11, s6, 24
11481; GFX12-NEXT:    s_pack_ll_b32_b16 s7, s7, s12
11482; GFX12-NEXT:    s_and_b32 s12, s6, 0xff
11483; GFX12-NEXT:    s_bfe_u32 s6, s6, 0x80010
11484; GFX12-NEXT:    s_bfe_u32 s14, s4, 0x80008
11485; GFX12-NEXT:    s_lshr_b32 s15, s4, 24
11486; GFX12-NEXT:    s_bfe_u32 s16, s5, 0x80008
11487; GFX12-NEXT:    s_lshr_b32 s17, s5, 24
11488; GFX12-NEXT:    s_bfe_u32 s30, s5, 0x80010
11489; GFX12-NEXT:    s_and_b32 s5, s5, 0xff
11490; GFX12-NEXT:    s_bfe_u32 s31, s4, 0x80010
11491; GFX12-NEXT:    s_and_b32 s4, s4, 0xff
11492; GFX12-NEXT:    s_bfe_u32 s18, s2, 0x80008
11493; GFX12-NEXT:    s_lshr_b32 s19, s2, 24
11494; GFX12-NEXT:    s_bfe_u32 s20, s3, 0x80008
11495; GFX12-NEXT:    s_lshr_b32 s21, s3, 24
11496; GFX12-NEXT:    s_bfe_u32 s28, s3, 0x80010
11497; GFX12-NEXT:    s_and_b32 s3, s3, 0xff
11498; GFX12-NEXT:    s_bfe_u32 s29, s2, 0x80010
11499; GFX12-NEXT:    s_and_b32 s2, s2, 0xff
11500; GFX12-NEXT:    s_pack_ll_b32_b16 s13, s33, s13
11501; GFX12-NEXT:    s_pack_ll_b32_b16 s10, s12, s10
11502; GFX12-NEXT:    s_pack_ll_b32_b16 s6, s6, s11
11503; GFX12-NEXT:    s_bfe_u32 s22, s0, 0x80008
11504; GFX12-NEXT:    s_lshr_b32 s23, s0, 24
11505; GFX12-NEXT:    s_bfe_u32 s24, s1, 0x80008
11506; GFX12-NEXT:    s_lshr_b32 s25, s1, 24
11507; GFX12-NEXT:    s_bfe_u32 s26, s1, 0x80010
11508; GFX12-NEXT:    s_and_b32 s1, s1, 0xff
11509; GFX12-NEXT:    s_bfe_u32 s27, s0, 0x80010
11510; GFX12-NEXT:    s_and_b32 s0, s0, 0xff
11511; GFX12-NEXT:    s_pack_ll_b32_b16 s17, s30, s17
11512; GFX12-NEXT:    s_pack_ll_b32_b16 s5, s5, s16
11513; GFX12-NEXT:    s_pack_ll_b32_b16 s15, s31, s15
11514; GFX12-NEXT:    s_pack_ll_b32_b16 s4, s4, s14
11515; GFX12-NEXT:    v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v1, s6
11516; GFX12-NEXT:    s_pack_ll_b32_b16 s21, s28, s21
11517; GFX12-NEXT:    s_pack_ll_b32_b16 s3, s3, s20
11518; GFX12-NEXT:    s_pack_ll_b32_b16 s19, s29, s19
11519; GFX12-NEXT:    s_pack_ll_b32_b16 s2, s2, s18
11520; GFX12-NEXT:    v_dual_mov_b32 v0, s10 :: v_dual_mov_b32 v3, s13
11521; GFX12-NEXT:    v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v5, s15
11522; GFX12-NEXT:    s_pack_ll_b32_b16 s25, s26, s25
11523; GFX12-NEXT:    s_pack_ll_b32_b16 s1, s1, s24
11524; GFX12-NEXT:    s_pack_ll_b32_b16 s23, s27, s23
11525; GFX12-NEXT:    s_pack_ll_b32_b16 s0, s0, s22
11526; GFX12-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v7, s17
11527; GFX12-NEXT:    v_dual_mov_b32 v6, s5 :: v_dual_mov_b32 v9, s19
11528; GFX12-NEXT:    v_dual_mov_b32 v8, s2 :: v_dual_mov_b32 v11, s21
11529; GFX12-NEXT:    v_dual_mov_b32 v10, s3 :: v_dual_mov_b32 v13, s23
11530; GFX12-NEXT:    v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v15, s25
11531; GFX12-NEXT:    v_mov_b32_e32 v14, s1
11532; GFX12-NEXT:    s_clause 0x3
11533; GFX12-NEXT:    global_store_b128 v16, v[0:3], s[8:9] offset:48
11534; GFX12-NEXT:    global_store_b128 v16, v[4:7], s[8:9] offset:32
11535; GFX12-NEXT:    global_store_b128 v16, v[8:11], s[8:9] offset:16
11536; GFX12-NEXT:    global_store_b128 v16, v[12:15], s[8:9]
11537; GFX12-NEXT:    s_endpgm
11538  %load = load <32 x i8>, ptr addrspace(4) %in
11539  %ext = zext <32 x i8> %load to <32 x i16>
11540  store <32 x i16> %ext, ptr addrspace(1) %out
11541  ret void
11542}
11543
11544define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
11545; GFX6-NOHSA-LABEL: constant_sextload_v32i8_to_v32i16:
11546; GFX6-NOHSA:       ; %bb.0:
11547; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x9
11548; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
11549; GFX6-NOHSA-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
11550; GFX6-NOHSA-NEXT:    s_mov_b32 s11, 0xf000
11551; GFX6-NOHSA-NEXT:    s_mov_b32 s10, -1
11552; GFX6-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
11553; GFX6-NOHSA-NEXT:    s_ashr_i32 s12, s1, 24
11554; GFX6-NOHSA-NEXT:    s_bfe_i32 s13, s1, 0x80010
11555; GFX6-NOHSA-NEXT:    s_bfe_i32 s14, s1, 0x80008
11556; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s1, s1
11557; GFX6-NOHSA-NEXT:    s_ashr_i32 s15, s0, 24
11558; GFX6-NOHSA-NEXT:    s_bfe_i32 s16, s0, 0x80010
11559; GFX6-NOHSA-NEXT:    s_bfe_i32 s17, s0, 0x80008
11560; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s0, s0
11561; GFX6-NOHSA-NEXT:    s_ashr_i32 s18, s3, 24
11562; GFX6-NOHSA-NEXT:    s_bfe_i32 s19, s3, 0x80010
11563; GFX6-NOHSA-NEXT:    s_bfe_i32 s20, s3, 0x80008
11564; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s3, s3
11565; GFX6-NOHSA-NEXT:    s_ashr_i32 s21, s2, 24
11566; GFX6-NOHSA-NEXT:    s_bfe_i32 s22, s2, 0x80010
11567; GFX6-NOHSA-NEXT:    s_bfe_i32 s23, s2, 0x80008
11568; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s2, s2
11569; GFX6-NOHSA-NEXT:    s_ashr_i32 s24, s5, 24
11570; GFX6-NOHSA-NEXT:    s_bfe_i32 s25, s5, 0x80010
11571; GFX6-NOHSA-NEXT:    s_bfe_i32 s26, s5, 0x80008
11572; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s5, s5
11573; GFX6-NOHSA-NEXT:    s_ashr_i32 s27, s4, 24
11574; GFX6-NOHSA-NEXT:    s_bfe_i32 s28, s4, 0x80010
11575; GFX6-NOHSA-NEXT:    s_bfe_i32 s29, s4, 0x80008
11576; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s4, s4
11577; GFX6-NOHSA-NEXT:    s_ashr_i32 s30, s7, 24
11578; GFX6-NOHSA-NEXT:    s_bfe_i32 s31, s7, 0x80010
11579; GFX6-NOHSA-NEXT:    s_bfe_i32 s33, s7, 0x80008
11580; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s7, s7
11581; GFX6-NOHSA-NEXT:    s_ashr_i32 s34, s6, 24
11582; GFX6-NOHSA-NEXT:    s_bfe_i32 s35, s6, 0x80010
11583; GFX6-NOHSA-NEXT:    s_bfe_i32 s36, s6, 0x80008
11584; GFX6-NOHSA-NEXT:    s_sext_i32_i8 s6, s6
11585; GFX6-NOHSA-NEXT:    s_lshl_b32 s12, s12, 16
11586; GFX6-NOHSA-NEXT:    s_and_b32 s13, s13, 0xffff
11587; GFX6-NOHSA-NEXT:    s_lshl_b32 s14, s14, 16
11588; GFX6-NOHSA-NEXT:    s_and_b32 s1, s1, 0xffff
11589; GFX6-NOHSA-NEXT:    s_lshl_b32 s15, s15, 16
11590; GFX6-NOHSA-NEXT:    s_and_b32 s16, s16, 0xffff
11591; GFX6-NOHSA-NEXT:    s_lshl_b32 s17, s17, 16
11592; GFX6-NOHSA-NEXT:    s_and_b32 s0, s0, 0xffff
11593; GFX6-NOHSA-NEXT:    s_lshl_b32 s18, s18, 16
11594; GFX6-NOHSA-NEXT:    s_and_b32 s19, s19, 0xffff
11595; GFX6-NOHSA-NEXT:    s_lshl_b32 s20, s20, 16
11596; GFX6-NOHSA-NEXT:    s_and_b32 s3, s3, 0xffff
11597; GFX6-NOHSA-NEXT:    s_lshl_b32 s21, s21, 16
11598; GFX6-NOHSA-NEXT:    s_and_b32 s22, s22, 0xffff
11599; GFX6-NOHSA-NEXT:    s_lshl_b32 s23, s23, 16
11600; GFX6-NOHSA-NEXT:    s_and_b32 s2, s2, 0xffff
11601; GFX6-NOHSA-NEXT:    s_lshl_b32 s24, s24, 16
11602; GFX6-NOHSA-NEXT:    s_and_b32 s25, s25, 0xffff
11603; GFX6-NOHSA-NEXT:    s_lshl_b32 s26, s26, 16
11604; GFX6-NOHSA-NEXT:    s_and_b32 s5, s5, 0xffff
11605; GFX6-NOHSA-NEXT:    s_lshl_b32 s27, s27, 16
11606; GFX6-NOHSA-NEXT:    s_and_b32 s28, s28, 0xffff
11607; GFX6-NOHSA-NEXT:    s_lshl_b32 s29, s29, 16
11608; GFX6-NOHSA-NEXT:    s_and_b32 s4, s4, 0xffff
11609; GFX6-NOHSA-NEXT:    s_lshl_b32 s30, s30, 16
11610; GFX6-NOHSA-NEXT:    s_and_b32 s31, s31, 0xffff
11611; GFX6-NOHSA-NEXT:    s_lshl_b32 s33, s33, 16
11612; GFX6-NOHSA-NEXT:    s_and_b32 s7, s7, 0xffff
11613; GFX6-NOHSA-NEXT:    s_lshl_b32 s34, s34, 16
11614; GFX6-NOHSA-NEXT:    s_and_b32 s35, s35, 0xffff
11615; GFX6-NOHSA-NEXT:    s_lshl_b32 s36, s36, 16
11616; GFX6-NOHSA-NEXT:    s_and_b32 s6, s6, 0xffff
11617; GFX6-NOHSA-NEXT:    s_or_b32 s12, s13, s12
11618; GFX6-NOHSA-NEXT:    s_or_b32 s1, s1, s14
11619; GFX6-NOHSA-NEXT:    s_or_b32 s13, s16, s15
11620; GFX6-NOHSA-NEXT:    s_or_b32 s0, s0, s17
11621; GFX6-NOHSA-NEXT:    s_or_b32 s14, s19, s18
11622; GFX6-NOHSA-NEXT:    s_or_b32 s3, s3, s20
11623; GFX6-NOHSA-NEXT:    s_or_b32 s15, s22, s21
11624; GFX6-NOHSA-NEXT:    s_or_b32 s2, s2, s23
11625; GFX6-NOHSA-NEXT:    s_or_b32 s16, s25, s24
11626; GFX6-NOHSA-NEXT:    s_or_b32 s5, s5, s26
11627; GFX6-NOHSA-NEXT:    s_or_b32 s17, s28, s27
11628; GFX6-NOHSA-NEXT:    s_or_b32 s18, s31, s30
11629; GFX6-NOHSA-NEXT:    s_or_b32 s7, s7, s33
11630; GFX6-NOHSA-NEXT:    s_or_b32 s19, s35, s34
11631; GFX6-NOHSA-NEXT:    s_or_b32 s6, s6, s36
11632; GFX6-NOHSA-NEXT:    s_or_b32 s4, s4, s29
11633; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s6
11634; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s19
11635; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s7
11636; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s18
11637; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:48
11638; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
11639; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s4
11640; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s17
11641; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s5
11642; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s16
11643; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:32
11644; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
11645; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s2
11646; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s15
11647; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s3
11648; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s14
11649; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
11650; GFX6-NOHSA-NEXT:    s_waitcnt expcnt(0)
11651; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v0, s0
11652; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v1, s13
11653; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v2, s1
11654; GFX6-NOHSA-NEXT:    v_mov_b32_e32 v3, s12
11655; GFX6-NOHSA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
11656; GFX6-NOHSA-NEXT:    s_endpgm
11657;
11658; GFX7-HSA-LABEL: constant_sextload_v32i8_to_v32i16:
11659; GFX7-HSA:       ; %bb.0:
11660; GFX7-HSA-NEXT:    s_load_dwordx4 s[8:11], s[8:9], 0x0
11661; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
11662; GFX7-HSA-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
11663; GFX7-HSA-NEXT:    s_waitcnt lgkmcnt(0)
11664; GFX7-HSA-NEXT:    s_ashr_i32 s10, s1, 24
11665; GFX7-HSA-NEXT:    s_bfe_i32 s11, s1, 0x80010
11666; GFX7-HSA-NEXT:    s_bfe_i32 s12, s1, 0x80008
11667; GFX7-HSA-NEXT:    s_sext_i32_i8 s1, s1
11668; GFX7-HSA-NEXT:    s_lshl_b32 s10, s10, 16
11669; GFX7-HSA-NEXT:    s_and_b32 s11, s11, 0xffff
11670; GFX7-HSA-NEXT:    s_lshl_b32 s12, s12, 16
11671; GFX7-HSA-NEXT:    s_and_b32 s1, s1, 0xffff
11672; GFX7-HSA-NEXT:    s_ashr_i32 s13, s0, 24
11673; GFX7-HSA-NEXT:    s_or_b32 s10, s11, s10
11674; GFX7-HSA-NEXT:    s_or_b32 s11, s1, s12
11675; GFX7-HSA-NEXT:    s_bfe_i32 s12, s0, 0x80010
11676; GFX7-HSA-NEXT:    s_lshl_b32 s1, s13, 16
11677; GFX7-HSA-NEXT:    s_and_b32 s12, s12, 0xffff
11678; GFX7-HSA-NEXT:    s_or_b32 s12, s12, s1
11679; GFX7-HSA-NEXT:    s_bfe_i32 s1, s0, 0x80008
11680; GFX7-HSA-NEXT:    s_sext_i32_i8 s0, s0
11681; GFX7-HSA-NEXT:    s_lshl_b32 s1, s1, 16
11682; GFX7-HSA-NEXT:    s_and_b32 s0, s0, 0xffff
11683; GFX7-HSA-NEXT:    s_or_b32 s13, s0, s1
11684; GFX7-HSA-NEXT:    s_ashr_i32 s0, s3, 24
11685; GFX7-HSA-NEXT:    s_bfe_i32 s1, s3, 0x80010
11686; GFX7-HSA-NEXT:    s_lshl_b32 s0, s0, 16
11687; GFX7-HSA-NEXT:    s_and_b32 s1, s1, 0xffff
11688; GFX7-HSA-NEXT:    s_or_b32 s14, s1, s0
11689; GFX7-HSA-NEXT:    s_bfe_i32 s0, s3, 0x80008
11690; GFX7-HSA-NEXT:    s_sext_i32_i8 s1, s3
11691; GFX7-HSA-NEXT:    s_lshl_b32 s0, s0, 16
11692; GFX7-HSA-NEXT:    s_and_b32 s1, s1, 0xffff
11693; GFX7-HSA-NEXT:    s_or_b32 s3, s1, s0
11694; GFX7-HSA-NEXT:    s_ashr_i32 s0, s2, 24
11695; GFX7-HSA-NEXT:    s_bfe_i32 s1, s2, 0x80010
11696; GFX7-HSA-NEXT:    s_lshl_b32 s0, s0, 16
11697; GFX7-HSA-NEXT:    s_and_b32 s1, s1, 0xffff
11698; GFX7-HSA-NEXT:    s_or_b32 s15, s1, s0
11699; GFX7-HSA-NEXT:    s_bfe_i32 s0, s2, 0x80008
11700; GFX7-HSA-NEXT:    s_sext_i32_i8 s1, s2
11701; GFX7-HSA-NEXT:    s_lshl_b32 s0, s0, 16
11702; GFX7-HSA-NEXT:    s_and_b32 s1, s1, 0xffff
11703; GFX7-HSA-NEXT:    s_or_b32 s2, s1, s0
11704; GFX7-HSA-NEXT:    s_ashr_i32 s0, s5, 24
11705; GFX7-HSA-NEXT:    s_bfe_i32 s1, s5, 0x80010
11706; GFX7-HSA-NEXT:    s_lshl_b32 s0, s0, 16
11707; GFX7-HSA-NEXT:    s_and_b32 s1, s1, 0xffff
11708; GFX7-HSA-NEXT:    s_or_b32 s16, s1, s0
11709; GFX7-HSA-NEXT:    s_bfe_i32 s0, s5, 0x80008
11710; GFX7-HSA-NEXT:    s_sext_i32_i8 s1, s5
11711; GFX7-HSA-NEXT:    s_lshl_b32 s0, s0, 16
11712; GFX7-HSA-NEXT:    s_and_b32 s1, s1, 0xffff
11713; GFX7-HSA-NEXT:    s_or_b32 s5, s1, s0
11714; GFX7-HSA-NEXT:    s_ashr_i32 s0, s4, 24
11715; GFX7-HSA-NEXT:    s_bfe_i32 s1, s4, 0x80010
11716; GFX7-HSA-NEXT:    s_lshl_b32 s0, s0, 16
11717; GFX7-HSA-NEXT:    s_and_b32 s1, s1, 0xffff
11718; GFX7-HSA-NEXT:    s_or_b32 s17, s1, s0
11719; GFX7-HSA-NEXT:    s_bfe_i32 s0, s4, 0x80008
11720; GFX7-HSA-NEXT:    s_sext_i32_i8 s1, s4
11721; GFX7-HSA-NEXT:    s_lshl_b32 s0, s0, 16
11722; GFX7-HSA-NEXT:    s_and_b32 s1, s1, 0xffff
11723; GFX7-HSA-NEXT:    s_or_b32 s4, s1, s0
11724; GFX7-HSA-NEXT:    s_ashr_i32 s0, s7, 24
11725; GFX7-HSA-NEXT:    s_bfe_i32 s1, s7, 0x80010
11726; GFX7-HSA-NEXT:    s_lshl_b32 s0, s0, 16
11727; GFX7-HSA-NEXT:    s_and_b32 s1, s1, 0xffff
11728; GFX7-HSA-NEXT:    s_or_b32 s0, s1, s0
11729; GFX7-HSA-NEXT:    s_bfe_i32 s1, s7, 0x80008
11730; GFX7-HSA-NEXT:    s_sext_i32_i8 s7, s7
11731; GFX7-HSA-NEXT:    s_lshl_b32 s1, s1, 16
11732; GFX7-HSA-NEXT:    s_and_b32 s7, s7, 0xffff
11733; GFX7-HSA-NEXT:    s_or_b32 s1, s7, s1
11734; GFX7-HSA-NEXT:    s_ashr_i32 s7, s6, 24
11735; GFX7-HSA-NEXT:    s_bfe_i32 s18, s6, 0x80010
11736; GFX7-HSA-NEXT:    s_lshl_b32 s7, s7, 16
11737; GFX7-HSA-NEXT:    s_and_b32 s18, s18, 0xffff
11738; GFX7-HSA-NEXT:    s_or_b32 s7, s18, s7
11739; GFX7-HSA-NEXT:    s_bfe_i32 s18, s6, 0x80008
11740; GFX7-HSA-NEXT:    s_sext_i32_i8 s6, s6
11741; GFX7-HSA-NEXT:    s_lshl_b32 s18, s18, 16
11742; GFX7-HSA-NEXT:    s_and_b32 s6, s6, 0xffff
11743; GFX7-HSA-NEXT:    s_or_b32 s6, s6, s18
11744; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s0
11745; GFX7-HSA-NEXT:    s_add_u32 s0, s8, 48
11746; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s1
11747; GFX7-HSA-NEXT:    s_addc_u32 s1, s9, 0
11748; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
11749; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
11750; GFX7-HSA-NEXT:    s_add_u32 s0, s8, 32
11751; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s6
11752; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s7
11753; GFX7-HSA-NEXT:    s_addc_u32 s1, s9, 0
11754; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
11755; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
11756; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
11757; GFX7-HSA-NEXT:    s_add_u32 s0, s8, 16
11758; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s4
11759; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s17
11760; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s5
11761; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s16
11762; GFX7-HSA-NEXT:    s_addc_u32 s1, s9, 0
11763; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
11764; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s1
11765; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s2
11766; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s15
11767; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s3
11768; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s14
11769; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s0
11770; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
11771; GFX7-HSA-NEXT:    v_mov_b32_e32 v4, s8
11772; GFX7-HSA-NEXT:    v_mov_b32_e32 v0, s13
11773; GFX7-HSA-NEXT:    v_mov_b32_e32 v1, s12
11774; GFX7-HSA-NEXT:    v_mov_b32_e32 v2, s11
11775; GFX7-HSA-NEXT:    v_mov_b32_e32 v3, s10
11776; GFX7-HSA-NEXT:    v_mov_b32_e32 v5, s9
11777; GFX7-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
11778; GFX7-HSA-NEXT:    s_endpgm
11779;
11780; GFX8-NOHSA-LABEL: constant_sextload_v32i8_to_v32i16:
11781; GFX8-NOHSA:       ; %bb.0:
11782; GFX8-NOHSA-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x24
11783; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
11784; GFX8-NOHSA-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
11785; GFX8-NOHSA-NEXT:    s_waitcnt lgkmcnt(0)
11786; GFX8-NOHSA-NEXT:    s_lshr_b32 s17, s1, 16
11787; GFX8-NOHSA-NEXT:    s_sext_i32_i16 s18, s1
11788; GFX8-NOHSA-NEXT:    s_bfe_i32 s19, s1, 0x80000
11789; GFX8-NOHSA-NEXT:    s_ashr_i32 s1, s1, 16
11790; GFX8-NOHSA-NEXT:    s_lshl_b32 s18, s18, 8
11791; GFX8-NOHSA-NEXT:    s_lshl_b32 s1, s1, 8
11792; GFX8-NOHSA-NEXT:    s_bfe_i32 s17, s17, 0x80000
11793; GFX8-NOHSA-NEXT:    s_lshr_b32 s16, s0, 16
11794; GFX8-NOHSA-NEXT:    s_and_b32 s18, s18, 0xffff0000
11795; GFX8-NOHSA-NEXT:    s_and_b32 s19, 0xffff, s19
11796; GFX8-NOHSA-NEXT:    s_and_b32 s1, s1, 0xffff0000
11797; GFX8-NOHSA-NEXT:    s_and_b32 s17, 0xffff, s17
11798; GFX8-NOHSA-NEXT:    s_or_b32 s18, s19, s18
11799; GFX8-NOHSA-NEXT:    s_sext_i32_i16 s19, s0
11800; GFX8-NOHSA-NEXT:    s_bfe_i32 s20, s0, 0x80000
11801; GFX8-NOHSA-NEXT:    s_or_b32 s17, s17, s1
11802; GFX8-NOHSA-NEXT:    s_ashr_i32 s0, s0, 24
11803; GFX8-NOHSA-NEXT:    s_bfe_i32 s1, s16, 0x80000
11804; GFX8-NOHSA-NEXT:    s_lshl_b32 s0, s0, 16
11805; GFX8-NOHSA-NEXT:    s_and_b32 s1, 0xffff, s1
11806; GFX8-NOHSA-NEXT:    s_or_b32 s16, s1, s0
11807; GFX8-NOHSA-NEXT:    s_sext_i32_i16 s0, s3
11808; GFX8-NOHSA-NEXT:    s_lshl_b32 s19, s19, 8
11809; GFX8-NOHSA-NEXT:    s_lshl_b32 s0, s0, 8
11810; GFX8-NOHSA-NEXT:    s_bfe_i32 s1, s3, 0x80000
11811; GFX8-NOHSA-NEXT:    s_and_b32 s19, s19, 0xffff0000
11812; GFX8-NOHSA-NEXT:    s_and_b32 s20, 0xffff, s20
11813; GFX8-NOHSA-NEXT:    s_and_b32 s0, s0, 0xffff0000
11814; GFX8-NOHSA-NEXT:    s_and_b32 s1, 0xffff, s1
11815; GFX8-NOHSA-NEXT:    s_or_b32 s19, s20, s19
11816; GFX8-NOHSA-NEXT:    s_or_b32 s20, s1, s0
11817; GFX8-NOHSA-NEXT:    s_sext_i32_i16 s0, s2
11818; GFX8-NOHSA-NEXT:    s_lshl_b32 s0, s0, 8
11819; GFX8-NOHSA-NEXT:    s_bfe_i32 s1, s2, 0x80000
11820; GFX8-NOHSA-NEXT:    s_and_b32 s0, s0, 0xffff0000
11821; GFX8-NOHSA-NEXT:    s_and_b32 s1, 0xffff, s1
11822; GFX8-NOHSA-NEXT:    s_lshr_b32 s15, s3, 16
11823; GFX8-NOHSA-NEXT:    s_or_b32 s21, s1, s0
11824; GFX8-NOHSA-NEXT:    s_ashr_i32 s0, s3, 16
11825; GFX8-NOHSA-NEXT:    s_lshl_b32 s0, s0, 8
11826; GFX8-NOHSA-NEXT:    s_bfe_i32 s1, s15, 0x80000
11827; GFX8-NOHSA-NEXT:    s_lshr_b32 s14, s2, 16
11828; GFX8-NOHSA-NEXT:    s_and_b32 s0, s0, 0xffff0000
11829; GFX8-NOHSA-NEXT:    s_and_b32 s1, 0xffff, s1
11830; GFX8-NOHSA-NEXT:    s_or_b32 s3, s1, s0
11831; GFX8-NOHSA-NEXT:    s_ashr_i32 s0, s2, 24
11832; GFX8-NOHSA-NEXT:    s_bfe_i32 s1, s14, 0x80000
11833; GFX8-NOHSA-NEXT:    s_lshl_b32 s0, s0, 16
11834; GFX8-NOHSA-NEXT:    s_and_b32 s1, 0xffff, s1
11835; GFX8-NOHSA-NEXT:    s_or_b32 s2, s1, s0
11836; GFX8-NOHSA-NEXT:    s_sext_i32_i16 s0, s5
11837; GFX8-NOHSA-NEXT:    s_lshl_b32 s0, s0, 8
11838; GFX8-NOHSA-NEXT:    s_bfe_i32 s1, s5, 0x80000
11839; GFX8-NOHSA-NEXT:    s_and_b32 s0, s0, 0xffff0000
11840; GFX8-NOHSA-NEXT:    s_and_b32 s1, 0xffff, s1
11841; GFX8-NOHSA-NEXT:    s_or_b32 s14, s1, s0
11842; GFX8-NOHSA-NEXT:    s_sext_i32_i16 s0, s4
11843; GFX8-NOHSA-NEXT:    s_lshl_b32 s0, s0, 8
11844; GFX8-NOHSA-NEXT:    s_bfe_i32 s1, s4, 0x80000
11845; GFX8-NOHSA-NEXT:    s_and_b32 s0, s0, 0xffff0000
11846; GFX8-NOHSA-NEXT:    s_and_b32 s1, 0xffff, s1
11847; GFX8-NOHSA-NEXT:    s_lshr_b32 s13, s5, 16
11848; GFX8-NOHSA-NEXT:    s_or_b32 s15, s1, s0
11849; GFX8-NOHSA-NEXT:    s_ashr_i64 s[0:1], s[4:5], 56
11850; GFX8-NOHSA-NEXT:    s_bfe_i32 s1, s13, 0x80000
11851; GFX8-NOHSA-NEXT:    s_lshr_b32 s12, s4, 16
11852; GFX8-NOHSA-NEXT:    s_lshl_b32 s0, s0, 16
11853; GFX8-NOHSA-NEXT:    s_and_b32 s1, 0xffff, s1
11854; GFX8-NOHSA-NEXT:    s_or_b32 s5, s1, s0
11855; GFX8-NOHSA-NEXT:    s_ashr_i32 s0, s4, 24
11856; GFX8-NOHSA-NEXT:    s_bfe_i32 s1, s12, 0x80000
11857; GFX8-NOHSA-NEXT:    s_lshl_b32 s0, s0, 16
11858; GFX8-NOHSA-NEXT:    s_and_b32 s1, 0xffff, s1
11859; GFX8-NOHSA-NEXT:    s_or_b32 s4, s1, s0
11860; GFX8-NOHSA-NEXT:    s_sext_i32_i16 s0, s7
11861; GFX8-NOHSA-NEXT:    s_lshl_b32 s0, s0, 8
11862; GFX8-NOHSA-NEXT:    s_bfe_i32 s1, s7, 0x80000
11863; GFX8-NOHSA-NEXT:    s_and_b32 s0, s0, 0xffff0000
11864; GFX8-NOHSA-NEXT:    s_and_b32 s1, 0xffff, s1
11865; GFX8-NOHSA-NEXT:    s_lshr_b32 s10, s6, 16
11866; GFX8-NOHSA-NEXT:    s_lshr_b32 s11, s7, 16
11867; GFX8-NOHSA-NEXT:    s_or_b32 s0, s1, s0
11868; GFX8-NOHSA-NEXT:    s_sext_i32_i16 s1, s6
11869; GFX8-NOHSA-NEXT:    s_ashr_i32 s7, s7, 16
11870; GFX8-NOHSA-NEXT:    s_lshl_b32 s1, s1, 8
11871; GFX8-NOHSA-NEXT:    s_bfe_i32 s12, s6, 0x80000
11872; GFX8-NOHSA-NEXT:    s_lshl_b32 s7, s7, 8
11873; GFX8-NOHSA-NEXT:    s_bfe_i32 s11, s11, 0x80000
11874; GFX8-NOHSA-NEXT:    s_ashr_i32 s6, s6, 24
11875; GFX8-NOHSA-NEXT:    s_bfe_i32 s10, s10, 0x80000
11876; GFX8-NOHSA-NEXT:    s_and_b32 s1, s1, 0xffff0000
11877; GFX8-NOHSA-NEXT:    s_and_b32 s12, 0xffff, s12
11878; GFX8-NOHSA-NEXT:    s_and_b32 s7, s7, 0xffff0000
11879; GFX8-NOHSA-NEXT:    s_and_b32 s11, 0xffff, s11
11880; GFX8-NOHSA-NEXT:    s_lshl_b32 s6, s6, 16
11881; GFX8-NOHSA-NEXT:    s_and_b32 s10, 0xffff, s10
11882; GFX8-NOHSA-NEXT:    s_or_b32 s1, s12, s1
11883; GFX8-NOHSA-NEXT:    s_or_b32 s7, s11, s7
11884; GFX8-NOHSA-NEXT:    s_or_b32 s6, s10, s6
11885; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s0
11886; GFX8-NOHSA-NEXT:    s_add_u32 s0, s8, 48
11887; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s1
11888; GFX8-NOHSA-NEXT:    s_addc_u32 s1, s9, 0
11889; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
11890; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
11891; GFX8-NOHSA-NEXT:    s_add_u32 s0, s8, 32
11892; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s6
11893; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s7
11894; GFX8-NOHSA-NEXT:    s_addc_u32 s1, s9, 0
11895; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
11896; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
11897; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
11898; GFX8-NOHSA-NEXT:    s_add_u32 s0, s8, 16
11899; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s15
11900; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s4
11901; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s14
11902; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s5
11903; GFX8-NOHSA-NEXT:    s_addc_u32 s1, s9, 0
11904; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
11905; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s1
11906; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s21
11907; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s2
11908; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s20
11909; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s3
11910; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s0
11911; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
11912; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v4, s8
11913; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v0, s19
11914; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v1, s16
11915; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v2, s18
11916; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v3, s17
11917; GFX8-NOHSA-NEXT:    v_mov_b32_e32 v5, s9
11918; GFX8-NOHSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
11919; GFX8-NOHSA-NEXT:    s_endpgm
11920;
11921; EG-LABEL: constant_sextload_v32i8_to_v32i16:
11922; EG:       ; %bb.0:
11923; EG-NEXT:    ALU 1, @14, KC0[CB0:0-32], KC1[]
11924; EG-NEXT:    TEX 1 @10
11925; EG-NEXT:    ALU 104, @16, KC0[], KC1[]
11926; EG-NEXT:    ALU 104, @121, KC0[], KC1[]
11927; EG-NEXT:    ALU 95, @226, KC0[CB0:0-32], KC1[]
11928; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T42.X, 0
11929; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T41.X, 0
11930; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T40.X, 0
11931; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T39.X, 1
11932; EG-NEXT:    CF_END
11933; EG-NEXT:    Fetch clause starting at 10:
11934; EG-NEXT:     VTX_READ_128 T37.XYZW, T35.X, 16, #1
11935; EG-NEXT:     VTX_READ_128 T35.XYZW, T35.X, 0, #1
11936; EG-NEXT:    ALU clause starting at 14:
11937; EG-NEXT:     MOV * T0.Y, T16.X,
11938; EG-NEXT:     MOV * T35.X, KC0[2].Z,
11939; EG-NEXT:    ALU clause starting at 16:
11940; EG-NEXT:     BFE_INT * T0.W, T37.X, 0.0, literal.x,
11941; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
11942; EG-NEXT:     AND_INT T0.W, PV.W, literal.x,
11943; EG-NEXT:     AND_INT * T1.W, T0.Y, literal.y,
11944; EG-NEXT:    65535(9.183409e-41), -65536(nan)
11945; EG-NEXT:     OR_INT * T0.W, PS, PV.W,
11946; EG-NEXT:     MOV * T16.X, PV.W,
11947; EG-NEXT:     MOV T0.Y, PV.X,
11948; EG-NEXT:     LSHR * T0.W, T37.X, literal.x,
11949; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
11950; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
11951; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
11952; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
11953; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
11954; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
11955; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
11956; EG-NEXT:     MOV T16.X, PV.W,
11957; EG-NEXT:     MOV T0.Y, T17.X,
11958; EG-NEXT:     LSHR * T0.W, T37.X, literal.x, BS:VEC_120/SCL_212
11959; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
11960; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
11961; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
11962; EG-NEXT:    8(1.121039e-44), -65536(nan)
11963; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
11964; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
11965; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
11966; EG-NEXT:     MOV * T17.X, PV.W,
11967; EG-NEXT:     MOV T0.Y, PV.X,
11968; EG-NEXT:     ASHR * T0.W, T37.X, literal.x,
11969; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
11970; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
11971; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
11972; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
11973; EG-NEXT:     OR_INT * T36.Y, PV.W, PS,
11974; EG-NEXT:     MOV T17.X, PV.Y,
11975; EG-NEXT:     MOV T0.Y, T12.X,
11976; EG-NEXT:     BFE_INT * T0.W, T37.Y, 0.0, literal.x,
11977; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
11978; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
11979; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
11980; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
11981; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
11982; EG-NEXT:     MOV * T12.X, PV.W,
11983; EG-NEXT:     MOV T0.Y, PV.X,
11984; EG-NEXT:     LSHR * T0.W, T37.Y, literal.x,
11985; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
11986; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
11987; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
11988; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
11989; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
11990; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
11991; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
11992; EG-NEXT:     MOV T12.X, PV.W,
11993; EG-NEXT:     MOV T0.Y, T13.X,
11994; EG-NEXT:     LSHR * T0.W, T37.Y, literal.x,
11995; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
11996; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
11997; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
11998; EG-NEXT:    8(1.121039e-44), -65536(nan)
11999; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
12000; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
12001; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
12002; EG-NEXT:     MOV * T13.X, PV.W,
12003; EG-NEXT:     MOV T0.Y, PV.X,
12004; EG-NEXT:     ASHR * T0.W, T37.Y, literal.x,
12005; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
12006; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
12007; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
12008; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
12009; EG-NEXT:     OR_INT * T36.W, PV.W, PS,
12010; EG-NEXT:     MOV T13.X, PV.W,
12011; EG-NEXT:     MOV T0.Y, T8.X,
12012; EG-NEXT:     BFE_INT * T0.W, T37.Z, 0.0, literal.x,
12013; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
12014; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
12015; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
12016; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
12017; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
12018; EG-NEXT:     MOV * T8.X, PV.W,
12019; EG-NEXT:     MOV T0.Y, PV.X,
12020; EG-NEXT:     LSHR * T0.W, T37.Z, literal.x,
12021; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
12022; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
12023; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
12024; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
12025; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
12026; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
12027; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
12028; EG-NEXT:     MOV T8.X, PV.W,
12029; EG-NEXT:     MOV T0.Y, T9.X,
12030; EG-NEXT:     LSHR * T0.W, T37.Z, literal.x,
12031; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
12032; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
12033; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
12034; EG-NEXT:    8(1.121039e-44), -65536(nan)
12035; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
12036; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
12037; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
12038; EG-NEXT:     MOV * T9.X, PV.W,
12039; EG-NEXT:     MOV T0.Y, PV.X,
12040; EG-NEXT:     ASHR * T0.W, T37.Z, literal.x,
12041; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
12042; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
12043; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
12044; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
12045; EG-NEXT:    ALU clause starting at 121:
12046; EG-NEXT:     OR_INT * T37.Y, T1.W, T0.W,
12047; EG-NEXT:     MOV T9.X, PV.Y,
12048; EG-NEXT:     MOV T0.Y, T4.X,
12049; EG-NEXT:     BFE_INT * T0.W, T37.W, 0.0, literal.x,
12050; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
12051; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
12052; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
12053; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
12054; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
12055; EG-NEXT:     MOV * T4.X, PV.W,
12056; EG-NEXT:     MOV T0.Y, PV.X,
12057; EG-NEXT:     LSHR * T0.W, T37.W, literal.x,
12058; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
12059; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
12060; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
12061; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
12062; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
12063; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
12064; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
12065; EG-NEXT:     MOV T4.X, PV.W,
12066; EG-NEXT:     MOV T0.Y, T5.X,
12067; EG-NEXT:     LSHR * T0.W, T37.W, literal.x,
12068; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
12069; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
12070; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
12071; EG-NEXT:    8(1.121039e-44), -65536(nan)
12072; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
12073; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
12074; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
12075; EG-NEXT:     MOV * T5.X, PV.W,
12076; EG-NEXT:     MOV T0.Y, PV.X,
12077; EG-NEXT:     ASHR * T0.W, T37.W, literal.x,
12078; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
12079; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
12080; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
12081; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
12082; EG-NEXT:     OR_INT * T37.W, PV.W, PS,
12083; EG-NEXT:     MOV T5.X, PV.W,
12084; EG-NEXT:     MOV T0.Y, T32.X,
12085; EG-NEXT:     BFE_INT * T0.W, T35.X, 0.0, literal.x, BS:VEC_120/SCL_212
12086; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
12087; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
12088; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
12089; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
12090; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
12091; EG-NEXT:     MOV * T32.X, PV.W,
12092; EG-NEXT:     MOV T0.Y, PV.X,
12093; EG-NEXT:     LSHR * T0.W, T35.X, literal.x,
12094; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
12095; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
12096; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
12097; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
12098; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
12099; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
12100; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
12101; EG-NEXT:     MOV T32.X, PV.W,
12102; EG-NEXT:     MOV T0.Y, T33.X,
12103; EG-NEXT:     LSHR * T0.W, T35.X, literal.x, BS:VEC_120/SCL_212
12104; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
12105; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
12106; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
12107; EG-NEXT:    8(1.121039e-44), -65536(nan)
12108; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
12109; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
12110; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
12111; EG-NEXT:     MOV * T33.X, PV.W,
12112; EG-NEXT:     MOV T0.Y, PV.X,
12113; EG-NEXT:     ASHR * T0.W, T35.X, literal.x,
12114; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
12115; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
12116; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
12117; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
12118; EG-NEXT:     OR_INT * T38.Y, PV.W, PS,
12119; EG-NEXT:     MOV T33.X, PV.Y,
12120; EG-NEXT:     MOV T0.Y, T28.X,
12121; EG-NEXT:     BFE_INT * T0.W, T35.Y, 0.0, literal.x,
12122; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
12123; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
12124; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
12125; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
12126; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
12127; EG-NEXT:     MOV * T28.X, PV.W,
12128; EG-NEXT:     MOV T0.Y, PV.X,
12129; EG-NEXT:     LSHR * T0.W, T35.Y, literal.x,
12130; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
12131; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
12132; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
12133; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
12134; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
12135; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
12136; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
12137; EG-NEXT:     MOV T28.X, PV.W,
12138; EG-NEXT:     MOV T0.Y, T29.X,
12139; EG-NEXT:     LSHR * T0.W, T35.Y, literal.x,
12140; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
12141; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
12142; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
12143; EG-NEXT:    8(1.121039e-44), -65536(nan)
12144; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
12145; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
12146; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
12147; EG-NEXT:     MOV * T29.X, PV.W,
12148; EG-NEXT:     MOV T0.Y, PV.X,
12149; EG-NEXT:     ASHR * T0.W, T35.Y, literal.x,
12150; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
12151; EG-NEXT:    ALU clause starting at 226:
12152; EG-NEXT:     AND_INT T1.W, T0.Y, literal.x,
12153; EG-NEXT:     LSHL * T0.W, T0.W, literal.y,
12154; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
12155; EG-NEXT:     OR_INT * T38.W, PV.W, PS,
12156; EG-NEXT:     MOV T29.X, PV.W,
12157; EG-NEXT:     MOV T0.Y, T24.X,
12158; EG-NEXT:     BFE_INT * T0.W, T35.Z, 0.0, literal.x,
12159; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
12160; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
12161; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
12162; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
12163; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
12164; EG-NEXT:     MOV * T24.X, PV.W,
12165; EG-NEXT:     MOV T0.Y, PV.X,
12166; EG-NEXT:     LSHR * T0.W, T35.Z, literal.x,
12167; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
12168; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
12169; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
12170; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
12171; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
12172; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
12173; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
12174; EG-NEXT:     MOV T24.X, PV.W,
12175; EG-NEXT:     MOV T0.Y, T25.X,
12176; EG-NEXT:     LSHR * T0.W, T35.Z, literal.x,
12177; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
12178; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
12179; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
12180; EG-NEXT:    8(1.121039e-44), -65536(nan)
12181; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
12182; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
12183; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
12184; EG-NEXT:     MOV * T25.X, PV.W,
12185; EG-NEXT:     MOV T0.Y, PV.X,
12186; EG-NEXT:     ASHR * T0.W, T35.Z, literal.x,
12187; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
12188; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
12189; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
12190; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
12191; EG-NEXT:     OR_INT * T35.Y, PV.W, PS,
12192; EG-NEXT:     MOV T25.X, PV.Y,
12193; EG-NEXT:     MOV T0.Y, T20.X,
12194; EG-NEXT:     BFE_INT * T0.W, T35.W, 0.0, literal.x,
12195; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
12196; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
12197; EG-NEXT:     AND_INT * T0.W, PV.W, literal.y,
12198; EG-NEXT:    -65536(nan), 65535(9.183409e-41)
12199; EG-NEXT:     OR_INT * T0.W, PV.W, PS,
12200; EG-NEXT:     MOV * T20.X, PV.W,
12201; EG-NEXT:     MOV T0.Y, PV.X,
12202; EG-NEXT:     LSHR * T0.W, T35.W, literal.x,
12203; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
12204; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
12205; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
12206; EG-NEXT:    8(1.121039e-44), 65535(9.183409e-41)
12207; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
12208; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
12209; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
12210; EG-NEXT:     MOV T20.X, PV.W,
12211; EG-NEXT:     MOV T0.Y, T21.X,
12212; EG-NEXT:     LSHR * T0.W, T35.W, literal.x,
12213; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
12214; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, literal.x,
12215; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.y,
12216; EG-NEXT:    8(1.121039e-44), -65536(nan)
12217; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
12218; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
12219; EG-NEXT:     OR_INT * T0.W, T1.W, PV.W,
12220; EG-NEXT:     MOV * T21.X, PV.W,
12221; EG-NEXT:     MOV T0.Y, PV.X,
12222; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
12223; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
12224; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
12225; EG-NEXT:     LSHR * T40.X, KC0[2].Y, literal.x,
12226; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
12227; EG-NEXT:     ASHR T0.W, T35.W, literal.x,
12228; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
12229; EG-NEXT:    24(3.363116e-44), 48(6.726233e-44)
12230; EG-NEXT:     LSHR T41.X, PS, literal.x,
12231; EG-NEXT:     AND_INT T0.Z, T0.Y, literal.y,
12232; EG-NEXT:     LSHL T0.W, PV.W, literal.z,
12233; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.w,
12234; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
12235; EG-NEXT:    16(2.242078e-44), 32(4.484155e-44)
12236; EG-NEXT:     LSHR T42.X, PS, literal.x,
12237; EG-NEXT:     OR_INT * T35.W, PV.Z, PV.W,
12238; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
12239; EG-NEXT:     MOV T21.X, PV.W,
12240; EG-NEXT:     MOV * T36.X, T16.X,
12241; EG-NEXT:     MOV * T36.Z, T12.X,
12242; EG-NEXT:     MOV T37.X, T8.X,
12243; EG-NEXT:     MOV T37.Z, T4.X, BS:VEC_120/SCL_212
12244; EG-NEXT:     MOV * T38.X, T32.X,
12245; EG-NEXT:     MOV * T38.Z, T28.X,
12246; EG-NEXT:     MOV T35.X, T24.X,
12247; EG-NEXT:     MOV * T35.Z, T20.X, BS:VEC_120/SCL_212
12248;
12249; GFX12-LABEL: constant_sextload_v32i8_to_v32i16:
12250; GFX12:       ; %bb.0:
12251; GFX12-NEXT:    s_load_b128 s[8:11], s[4:5], 0x24
12252; GFX12-NEXT:    s_wait_kmcnt 0x0
12253; GFX12-NEXT:    s_load_b256 s[0:7], s[10:11], 0x0
12254; GFX12-NEXT:    s_wait_kmcnt 0x0
12255; GFX12-NEXT:    s_lshr_b32 s13, s5, 16
12256; GFX12-NEXT:    s_lshr_b32 s16, s0, 16
12257; GFX12-NEXT:    s_lshr_b32 s17, s1, 16
12258; GFX12-NEXT:    s_ashr_i32 s18, s1, 16
12259; GFX12-NEXT:    s_bfe_i32 s19, s1, 0x80000
12260; GFX12-NEXT:    s_sext_i32_i16 s20, s1
12261; GFX12-NEXT:    s_ashr_i32 s21, s0, 24
12262; GFX12-NEXT:    s_bfe_i32 s22, s0, 0x80000
12263; GFX12-NEXT:    s_sext_i32_i16 s23, s0
12264; GFX12-NEXT:    s_ashr_i64 s[0:1], s[4:5], 56
12265; GFX12-NEXT:    s_lshr_b32 s12, s4, 16
12266; GFX12-NEXT:    s_bfe_i32 s1, s5, 0x80000
12267; GFX12-NEXT:    s_sext_i32_i16 s5, s5
12268; GFX12-NEXT:    s_bfe_i32 s13, s13, 0x80000
12269; GFX12-NEXT:    s_lshr_b32 s5, s5, 8
12270; GFX12-NEXT:    s_pack_ll_b32_b16 s0, s13, s0
12271; GFX12-NEXT:    s_ashr_i32 s13, s4, 24
12272; GFX12-NEXT:    s_bfe_i32 s12, s12, 0x80000
12273; GFX12-NEXT:    s_pack_ll_b32_b16 s1, s1, s5
12274; GFX12-NEXT:    s_pack_ll_b32_b16 s5, s12, s13
12275; GFX12-NEXT:    s_sext_i32_i16 s12, s4
12276; GFX12-NEXT:    s_bfe_i32 s4, s4, 0x80000
12277; GFX12-NEXT:    s_lshr_b32 s12, s12, 8
12278; GFX12-NEXT:    s_ashr_i32 s13, s7, 16
12279; GFX12-NEXT:    s_pack_ll_b32_b16 s4, s4, s12
12280; GFX12-NEXT:    s_lshr_b32 s12, s13, 8
12281; GFX12-NEXT:    s_sext_i32_i16 s13, s7
12282; GFX12-NEXT:    s_lshr_b32 s11, s7, 16
12283; GFX12-NEXT:    s_bfe_i32 s7, s7, 0x80000
12284; GFX12-NEXT:    s_lshr_b32 s13, s13, 8
12285; GFX12-NEXT:    s_lshr_b32 s10, s6, 16
12286; GFX12-NEXT:    s_bfe_i32 s11, s11, 0x80000
12287; GFX12-NEXT:    s_pack_ll_b32_b16 s7, s7, s13
12288; GFX12-NEXT:    s_sext_i32_i16 s13, s6
12289; GFX12-NEXT:    s_lshr_b32 s14, s2, 16
12290; GFX12-NEXT:    s_lshr_b32 s15, s3, 16
12291; GFX12-NEXT:    s_ashr_i32 s24, s3, 16
12292; GFX12-NEXT:    s_bfe_i32 s25, s3, 0x80000
12293; GFX12-NEXT:    s_sext_i32_i16 s3, s3
12294; GFX12-NEXT:    s_ashr_i32 s26, s2, 24
12295; GFX12-NEXT:    s_bfe_i32 s27, s2, 0x80000
12296; GFX12-NEXT:    s_sext_i32_i16 s2, s2
12297; GFX12-NEXT:    s_pack_ll_b32_b16 s11, s11, s12
12298; GFX12-NEXT:    s_ashr_i32 s12, s6, 24
12299; GFX12-NEXT:    s_bfe_i32 s6, s6, 0x80000
12300; GFX12-NEXT:    s_lshr_b32 s13, s13, 8
12301; GFX12-NEXT:    s_bfe_i32 s10, s10, 0x80000
12302; GFX12-NEXT:    s_lshr_b32 s24, s24, 8
12303; GFX12-NEXT:    s_bfe_i32 s15, s15, 0x80000
12304; GFX12-NEXT:    s_lshr_b32 s3, s3, 8
12305; GFX12-NEXT:    s_bfe_i32 s14, s14, 0x80000
12306; GFX12-NEXT:    s_lshr_b32 s2, s2, 8
12307; GFX12-NEXT:    s_pack_ll_b32_b16 s6, s6, s13
12308; GFX12-NEXT:    s_pack_ll_b32_b16 s10, s10, s12
12309; GFX12-NEXT:    s_lshr_b32 s18, s18, 8
12310; GFX12-NEXT:    s_bfe_i32 s17, s17, 0x80000
12311; GFX12-NEXT:    s_lshr_b32 s20, s20, 8
12312; GFX12-NEXT:    s_bfe_i32 s16, s16, 0x80000
12313; GFX12-NEXT:    s_lshr_b32 s23, s23, 8
12314; GFX12-NEXT:    v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v1, s10
12315; GFX12-NEXT:    s_pack_ll_b32_b16 s15, s15, s24
12316; GFX12-NEXT:    s_pack_ll_b32_b16 s3, s25, s3
12317; GFX12-NEXT:    s_pack_ll_b32_b16 s14, s14, s26
12318; GFX12-NEXT:    s_pack_ll_b32_b16 s2, s27, s2
12319; GFX12-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v3, s11
12320; GFX12-NEXT:    v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v5, s5
12321; GFX12-NEXT:    s_pack_ll_b32_b16 s17, s17, s18
12322; GFX12-NEXT:    s_pack_ll_b32_b16 s18, s19, s20
12323; GFX12-NEXT:    s_pack_ll_b32_b16 s16, s16, s21
12324; GFX12-NEXT:    s_pack_ll_b32_b16 s19, s22, s23
12325; GFX12-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v7, s0
12326; GFX12-NEXT:    v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v9, s14
12327; GFX12-NEXT:    v_dual_mov_b32 v8, s2 :: v_dual_mov_b32 v11, s15
12328; GFX12-NEXT:    v_dual_mov_b32 v10, s3 :: v_dual_mov_b32 v13, s16
12329; GFX12-NEXT:    v_dual_mov_b32 v12, s19 :: v_dual_mov_b32 v15, s17
12330; GFX12-NEXT:    v_mov_b32_e32 v14, s18
12331; GFX12-NEXT:    s_clause 0x3
12332; GFX12-NEXT:    global_store_b128 v16, v[0:3], s[8:9] offset:48
12333; GFX12-NEXT:    global_store_b128 v16, v[4:7], s[8:9] offset:32
12334; GFX12-NEXT:    global_store_b128 v16, v[8:11], s[8:9] offset:16
12335; GFX12-NEXT:    global_store_b128 v16, v[12:15], s[8:9]
12336; GFX12-NEXT:    s_endpgm
12337  %load = load <32 x i8>, ptr addrspace(4) %in
12338  %ext = sext <32 x i8> %load to <32 x i16>
12339  store <32 x i16> %ext, ptr addrspace(1) %out
12340  ret void
12341}
12342
12343; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i16:
12344; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
12345;   %load = load <64 x i8>, ptr addrspace(4) %in
12346;   %ext = zext <64 x i8> %load to <64 x i16>
12347;   store <64 x i16> %ext, ptr addrspace(1) %out
12348;   ret void
12349; }
12350
12351; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i16:
12352; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
12353;   %load = load <64 x i8>, ptr addrspace(4) %in
12354;   %ext = sext <64 x i8> %load to <64 x i16>
12355;   store <64 x i16> %ext, ptr addrspace(1) %out
12356;   ret void
12357; }
12358
12359attributes #0 = { nounwind }
12360