xref: /llvm-project/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll (revision 6206f5444fc0732e6495703c75a67f1f90f5b418)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8 %s
4; RUN: llc -mtriple=r600-- -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
5; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
6
7define amdgpu_kernel void @constant_load_i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
8; GFX6-LABEL: constant_load_i1:
9; GFX6:       ; %bb.0:
10; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
11; GFX6-NEXT:    s_mov_b32 s7, 0xf000
12; GFX6-NEXT:    s_mov_b32 s6, -1
13; GFX6-NEXT:    s_mov_b32 s10, s6
14; GFX6-NEXT:    s_mov_b32 s11, s7
15; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
16; GFX6-NEXT:    s_mov_b32 s8, s2
17; GFX6-NEXT:    s_mov_b32 s9, s3
18; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
19; GFX6-NEXT:    s_mov_b32 s4, s0
20; GFX6-NEXT:    s_mov_b32 s5, s1
21; GFX6-NEXT:    s_waitcnt vmcnt(0)
22; GFX6-NEXT:    v_and_b32_e32 v0, 1, v0
23; GFX6-NEXT:    buffer_store_byte v0, off, s[4:7], 0
24; GFX6-NEXT:    s_endpgm
25;
26; GFX8-LABEL: constant_load_i1:
27; GFX8:       ; %bb.0:
28; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
29; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
30; GFX8-NEXT:    v_mov_b32_e32 v0, s2
31; GFX8-NEXT:    v_mov_b32_e32 v1, s3
32; GFX8-NEXT:    flat_load_ubyte v2, v[0:1]
33; GFX8-NEXT:    v_mov_b32_e32 v0, s0
34; GFX8-NEXT:    v_mov_b32_e32 v1, s1
35; GFX8-NEXT:    s_waitcnt vmcnt(0)
36; GFX8-NEXT:    v_and_b32_e32 v2, 1, v2
37; GFX8-NEXT:    flat_store_byte v[0:1], v2
38; GFX8-NEXT:    s_endpgm
39;
40; EG-LABEL: constant_load_i1:
41; EG:       ; %bb.0:
42; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
43; EG-NEXT:    TEX 0 @6
44; EG-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
45; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
46; EG-NEXT:    CF_END
47; EG-NEXT:    PAD
48; EG-NEXT:    Fetch clause starting at 6:
49; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
50; EG-NEXT:    ALU clause starting at 8:
51; EG-NEXT:     MOV * T0.X, KC0[2].Z,
52; EG-NEXT:    ALU clause starting at 9:
53; EG-NEXT:     AND_INT T0.W, KC0[2].Y, literal.x,
54; EG-NEXT:     AND_INT * T1.W, T0.X, 1,
55; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
56; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
57; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
58; EG-NEXT:     LSHL T0.X, T1.W, PV.W,
59; EG-NEXT:     LSHL * T0.W, literal.x, PV.W,
60; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
61; EG-NEXT:     MOV T0.Y, 0.0,
62; EG-NEXT:     MOV * T0.Z, 0.0,
63; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
64; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
65;
66; GFX12-LABEL: constant_load_i1:
67; GFX12:       ; %bb.0:
68; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
69; GFX12-NEXT:    s_wait_kmcnt 0x0
70; GFX12-NEXT:    s_load_u8 s2, s[2:3], 0x0
71; GFX12-NEXT:    s_wait_kmcnt 0x0
72; GFX12-NEXT:    s_and_b32 s2, s2, 1
73; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
74; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
75; GFX12-NEXT:    global_store_b8 v0, v1, s[0:1]
76; GFX12-NEXT:    s_endpgm
77  %load = load i1, ptr addrspace(4) %in
78  store i1 %load, ptr addrspace(1) %out
79  ret void
80}
81
82define amdgpu_kernel void @constant_load_v2i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
83; GFX6-LABEL: constant_load_v2i1:
84; GFX6:       ; %bb.0:
85; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
86; GFX6-NEXT:    s_mov_b32 s7, 0xf000
87; GFX6-NEXT:    s_mov_b32 s6, -1
88; GFX6-NEXT:    s_mov_b32 s10, s6
89; GFX6-NEXT:    s_mov_b32 s11, s7
90; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
91; GFX6-NEXT:    s_mov_b32 s8, s2
92; GFX6-NEXT:    s_mov_b32 s9, s3
93; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
94; GFX6-NEXT:    s_mov_b32 s4, s0
95; GFX6-NEXT:    s_mov_b32 s5, s1
96; GFX6-NEXT:    s_waitcnt vmcnt(0)
97; GFX6-NEXT:    buffer_store_byte v0, off, s[4:7], 0
98; GFX6-NEXT:    s_endpgm
99;
100; GFX8-LABEL: constant_load_v2i1:
101; GFX8:       ; %bb.0:
102; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
103; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
104; GFX8-NEXT:    v_mov_b32_e32 v0, s2
105; GFX8-NEXT:    v_mov_b32_e32 v1, s3
106; GFX8-NEXT:    flat_load_ubyte v2, v[0:1]
107; GFX8-NEXT:    v_mov_b32_e32 v0, s0
108; GFX8-NEXT:    v_mov_b32_e32 v1, s1
109; GFX8-NEXT:    s_waitcnt vmcnt(0)
110; GFX8-NEXT:    flat_store_byte v[0:1], v2
111; GFX8-NEXT:    s_endpgm
112;
113; EG-LABEL: constant_load_v2i1:
114; EG:       ; %bb.0:
115; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
116; EG-NEXT:    TEX 0 @6
117; EG-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
118; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
119; EG-NEXT:    CF_END
120; EG-NEXT:    PAD
121; EG-NEXT:    Fetch clause starting at 6:
122; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
123; EG-NEXT:    ALU clause starting at 8:
124; EG-NEXT:     MOV * T0.X, KC0[2].Z,
125; EG-NEXT:    ALU clause starting at 9:
126; EG-NEXT:     AND_INT T0.W, KC0[2].Y, literal.x,
127; EG-NEXT:     AND_INT * T1.W, T0.X, literal.x,
128; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
129; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
130; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
131; EG-NEXT:     LSHL T0.X, T1.W, PV.W,
132; EG-NEXT:     LSHL * T0.W, literal.x, PV.W,
133; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
134; EG-NEXT:     MOV T0.Y, 0.0,
135; EG-NEXT:     MOV * T0.Z, 0.0,
136; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
137; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
138;
139; GFX12-LABEL: constant_load_v2i1:
140; GFX12:       ; %bb.0:
141; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
142; GFX12-NEXT:    v_mov_b32_e32 v0, 0
143; GFX12-NEXT:    s_wait_kmcnt 0x0
144; GFX12-NEXT:    global_load_u8 v1, v0, s[2:3]
145; GFX12-NEXT:    s_wait_loadcnt 0x0
146; GFX12-NEXT:    global_store_b8 v0, v1, s[0:1]
147; GFX12-NEXT:    s_endpgm
148  %load = load <2 x i1>, ptr addrspace(4) %in
149  store <2 x i1> %load, ptr addrspace(1) %out
150  ret void
151}
152
153define amdgpu_kernel void @constant_load_v3i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
154; GFX6-LABEL: constant_load_v3i1:
155; GFX6:       ; %bb.0:
156; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
157; GFX6-NEXT:    s_mov_b32 s7, 0xf000
158; GFX6-NEXT:    s_mov_b32 s6, -1
159; GFX6-NEXT:    s_mov_b32 s10, s6
160; GFX6-NEXT:    s_mov_b32 s11, s7
161; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
162; GFX6-NEXT:    s_mov_b32 s8, s2
163; GFX6-NEXT:    s_mov_b32 s9, s3
164; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
165; GFX6-NEXT:    s_mov_b32 s4, s0
166; GFX6-NEXT:    s_mov_b32 s5, s1
167; GFX6-NEXT:    s_waitcnt vmcnt(0)
168; GFX6-NEXT:    buffer_store_byte v0, off, s[4:7], 0
169; GFX6-NEXT:    s_endpgm
170;
171; GFX8-LABEL: constant_load_v3i1:
172; GFX8:       ; %bb.0:
173; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
174; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
175; GFX8-NEXT:    v_mov_b32_e32 v0, s2
176; GFX8-NEXT:    v_mov_b32_e32 v1, s3
177; GFX8-NEXT:    flat_load_ubyte v2, v[0:1]
178; GFX8-NEXT:    v_mov_b32_e32 v0, s0
179; GFX8-NEXT:    v_mov_b32_e32 v1, s1
180; GFX8-NEXT:    s_waitcnt vmcnt(0)
181; GFX8-NEXT:    flat_store_byte v[0:1], v2
182; GFX8-NEXT:    s_endpgm
183;
184; EG-LABEL: constant_load_v3i1:
185; EG:       ; %bb.0:
186; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
187; EG-NEXT:    TEX 0 @6
188; EG-NEXT:    ALU 10, @9, KC0[CB0:0-32], KC1[]
189; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
190; EG-NEXT:    CF_END
191; EG-NEXT:    PAD
192; EG-NEXT:    Fetch clause starting at 6:
193; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
194; EG-NEXT:    ALU clause starting at 8:
195; EG-NEXT:     MOV * T0.X, KC0[2].Z,
196; EG-NEXT:    ALU clause starting at 9:
197; EG-NEXT:     AND_INT * T0.W, KC0[2].Y, literal.x,
198; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
199; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
200; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
201; EG-NEXT:     LSHL T0.X, T0.X, PV.W,
202; EG-NEXT:     LSHL * T0.W, literal.x, PV.W,
203; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
204; EG-NEXT:     MOV T0.Y, 0.0,
205; EG-NEXT:     MOV * T0.Z, 0.0,
206; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
207; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
208;
209; GFX12-LABEL: constant_load_v3i1:
210; GFX12:       ; %bb.0:
211; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
212; GFX12-NEXT:    v_mov_b32_e32 v0, 0
213; GFX12-NEXT:    s_wait_kmcnt 0x0
214; GFX12-NEXT:    global_load_u8 v1, v0, s[2:3]
215; GFX12-NEXT:    s_wait_loadcnt 0x0
216; GFX12-NEXT:    global_store_b8 v0, v1, s[0:1]
217; GFX12-NEXT:    s_endpgm
218  %load = load <3 x i1>, ptr addrspace(4) %in
219  store <3 x i1> %load, ptr addrspace(1) %out
220  ret void
221}
222
223define amdgpu_kernel void @constant_load_v4i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
224; GFX6-LABEL: constant_load_v4i1:
225; GFX6:       ; %bb.0:
226; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
227; GFX6-NEXT:    s_mov_b32 s7, 0xf000
228; GFX6-NEXT:    s_mov_b32 s6, -1
229; GFX6-NEXT:    s_mov_b32 s10, s6
230; GFX6-NEXT:    s_mov_b32 s11, s7
231; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
232; GFX6-NEXT:    s_mov_b32 s8, s2
233; GFX6-NEXT:    s_mov_b32 s9, s3
234; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
235; GFX6-NEXT:    s_mov_b32 s4, s0
236; GFX6-NEXT:    s_mov_b32 s5, s1
237; GFX6-NEXT:    s_waitcnt vmcnt(0)
238; GFX6-NEXT:    buffer_store_byte v0, off, s[4:7], 0
239; GFX6-NEXT:    s_endpgm
240;
241; GFX8-LABEL: constant_load_v4i1:
242; GFX8:       ; %bb.0:
243; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
244; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
245; GFX8-NEXT:    v_mov_b32_e32 v0, s2
246; GFX8-NEXT:    v_mov_b32_e32 v1, s3
247; GFX8-NEXT:    flat_load_ubyte v2, v[0:1]
248; GFX8-NEXT:    v_mov_b32_e32 v0, s0
249; GFX8-NEXT:    v_mov_b32_e32 v1, s1
250; GFX8-NEXT:    s_waitcnt vmcnt(0)
251; GFX8-NEXT:    flat_store_byte v[0:1], v2
252; GFX8-NEXT:    s_endpgm
253;
254; EG-LABEL: constant_load_v4i1:
255; EG:       ; %bb.0:
256; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
257; EG-NEXT:    TEX 0 @6
258; EG-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
259; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
260; EG-NEXT:    CF_END
261; EG-NEXT:    PAD
262; EG-NEXT:    Fetch clause starting at 6:
263; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
264; EG-NEXT:    ALU clause starting at 8:
265; EG-NEXT:     MOV * T0.X, KC0[2].Z,
266; EG-NEXT:    ALU clause starting at 9:
267; EG-NEXT:     AND_INT T0.W, KC0[2].Y, literal.x,
268; EG-NEXT:     AND_INT * T1.W, T0.X, literal.y,
269; EG-NEXT:    3(4.203895e-45), 15(2.101948e-44)
270; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
271; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
272; EG-NEXT:     LSHL T0.X, T1.W, PV.W,
273; EG-NEXT:     LSHL * T0.W, literal.x, PV.W,
274; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
275; EG-NEXT:     MOV T0.Y, 0.0,
276; EG-NEXT:     MOV * T0.Z, 0.0,
277; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
278; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
279;
280; GFX12-LABEL: constant_load_v4i1:
281; GFX12:       ; %bb.0:
282; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
283; GFX12-NEXT:    v_mov_b32_e32 v0, 0
284; GFX12-NEXT:    s_wait_kmcnt 0x0
285; GFX12-NEXT:    global_load_u8 v1, v0, s[2:3]
286; GFX12-NEXT:    s_wait_loadcnt 0x0
287; GFX12-NEXT:    global_store_b8 v0, v1, s[0:1]
288; GFX12-NEXT:    s_endpgm
289  %load = load <4 x i1>, ptr addrspace(4) %in
290  store <4 x i1> %load, ptr addrspace(1) %out
291  ret void
292}
293
294define amdgpu_kernel void @constant_load_v8i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
295; GFX6-LABEL: constant_load_v8i1:
296; GFX6:       ; %bb.0:
297; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
298; GFX6-NEXT:    s_mov_b32 s7, 0xf000
299; GFX6-NEXT:    s_mov_b32 s6, -1
300; GFX6-NEXT:    s_mov_b32 s10, s6
301; GFX6-NEXT:    s_mov_b32 s11, s7
302; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
303; GFX6-NEXT:    s_mov_b32 s8, s2
304; GFX6-NEXT:    s_mov_b32 s9, s3
305; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
306; GFX6-NEXT:    s_mov_b32 s4, s0
307; GFX6-NEXT:    s_mov_b32 s5, s1
308; GFX6-NEXT:    s_waitcnt vmcnt(0)
309; GFX6-NEXT:    buffer_store_byte v0, off, s[4:7], 0
310; GFX6-NEXT:    s_endpgm
311;
312; GFX8-LABEL: constant_load_v8i1:
313; GFX8:       ; %bb.0:
314; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
315; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
316; GFX8-NEXT:    v_mov_b32_e32 v0, s2
317; GFX8-NEXT:    v_mov_b32_e32 v1, s3
318; GFX8-NEXT:    flat_load_ubyte v2, v[0:1]
319; GFX8-NEXT:    v_mov_b32_e32 v0, s0
320; GFX8-NEXT:    v_mov_b32_e32 v1, s1
321; GFX8-NEXT:    s_waitcnt vmcnt(0)
322; GFX8-NEXT:    flat_store_byte v[0:1], v2
323; GFX8-NEXT:    s_endpgm
324;
325; EG-LABEL: constant_load_v8i1:
326; EG:       ; %bb.0:
327; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
328; EG-NEXT:    TEX 0 @6
329; EG-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
330; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
331; EG-NEXT:    CF_END
332; EG-NEXT:    PAD
333; EG-NEXT:    Fetch clause starting at 6:
334; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
335; EG-NEXT:    ALU clause starting at 8:
336; EG-NEXT:     MOV * T0.X, KC0[2].Z,
337; EG-NEXT:    ALU clause starting at 9:
338; EG-NEXT:     AND_INT T0.W, KC0[2].Y, literal.x,
339; EG-NEXT:     AND_INT * T1.W, T0.X, literal.y,
340; EG-NEXT:    3(4.203895e-45), 255(3.573311e-43)
341; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
342; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
343; EG-NEXT:     LSHL T0.X, T1.W, PV.W,
344; EG-NEXT:     LSHL * T0.W, literal.x, PV.W,
345; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
346; EG-NEXT:     MOV T0.Y, 0.0,
347; EG-NEXT:     MOV * T0.Z, 0.0,
348; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
349; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
350;
351; GFX12-LABEL: constant_load_v8i1:
352; GFX12:       ; %bb.0:
353; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
354; GFX12-NEXT:    v_mov_b32_e32 v0, 0
355; GFX12-NEXT:    s_wait_kmcnt 0x0
356; GFX12-NEXT:    global_load_u8 v1, v0, s[2:3]
357; GFX12-NEXT:    s_wait_loadcnt 0x0
358; GFX12-NEXT:    global_store_b8 v0, v1, s[0:1]
359; GFX12-NEXT:    s_endpgm
360  %load = load <8 x i1>, ptr addrspace(4) %in
361  store <8 x i1> %load, ptr addrspace(1) %out
362  ret void
363}
364
365define amdgpu_kernel void @constant_load_v16i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
366; GFX6-LABEL: constant_load_v16i1:
367; GFX6:       ; %bb.0:
368; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
369; GFX6-NEXT:    s_mov_b32 s7, 0xf000
370; GFX6-NEXT:    s_mov_b32 s6, -1
371; GFX6-NEXT:    s_mov_b32 s10, s6
372; GFX6-NEXT:    s_mov_b32 s11, s7
373; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
374; GFX6-NEXT:    s_mov_b32 s8, s2
375; GFX6-NEXT:    s_mov_b32 s9, s3
376; GFX6-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
377; GFX6-NEXT:    s_mov_b32 s4, s0
378; GFX6-NEXT:    s_mov_b32 s5, s1
379; GFX6-NEXT:    s_waitcnt vmcnt(0)
380; GFX6-NEXT:    buffer_store_short v0, off, s[4:7], 0
381; GFX6-NEXT:    s_endpgm
382;
383; GFX8-LABEL: constant_load_v16i1:
384; GFX8:       ; %bb.0:
385; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
386; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
387; GFX8-NEXT:    v_mov_b32_e32 v0, s2
388; GFX8-NEXT:    v_mov_b32_e32 v1, s3
389; GFX8-NEXT:    flat_load_ushort v2, v[0:1]
390; GFX8-NEXT:    v_mov_b32_e32 v0, s0
391; GFX8-NEXT:    v_mov_b32_e32 v1, s1
392; GFX8-NEXT:    s_waitcnt vmcnt(0)
393; GFX8-NEXT:    flat_store_short v[0:1], v2
394; GFX8-NEXT:    s_endpgm
395;
396; EG-LABEL: constant_load_v16i1:
397; EG:       ; %bb.0:
398; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
399; EG-NEXT:    TEX 0 @6
400; EG-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
401; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
402; EG-NEXT:    CF_END
403; EG-NEXT:    PAD
404; EG-NEXT:    Fetch clause starting at 6:
405; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
406; EG-NEXT:    ALU clause starting at 8:
407; EG-NEXT:     MOV * T0.X, KC0[2].Z,
408; EG-NEXT:    ALU clause starting at 9:
409; EG-NEXT:     AND_INT T0.W, KC0[2].Y, literal.x,
410; EG-NEXT:     AND_INT * T1.W, T0.X, literal.y,
411; EG-NEXT:    3(4.203895e-45), 65535(9.183409e-41)
412; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
413; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
414; EG-NEXT:     LSHL T0.X, T1.W, PV.W,
415; EG-NEXT:     LSHL * T0.W, literal.x, PV.W,
416; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
417; EG-NEXT:     MOV T0.Y, 0.0,
418; EG-NEXT:     MOV * T0.Z, 0.0,
419; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
420; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
421;
422; GFX12-LABEL: constant_load_v16i1:
423; GFX12:       ; %bb.0:
424; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
425; GFX12-NEXT:    v_mov_b32_e32 v0, 0
426; GFX12-NEXT:    s_wait_kmcnt 0x0
427; GFX12-NEXT:    global_load_u16 v1, v0, s[2:3]
428; GFX12-NEXT:    s_wait_loadcnt 0x0
429; GFX12-NEXT:    global_store_b16 v0, v1, s[0:1]
430; GFX12-NEXT:    s_endpgm
431  %load = load <16 x i1>, ptr addrspace(4) %in
432  store <16 x i1> %load, ptr addrspace(1) %out
433  ret void
434}
435
436define amdgpu_kernel void @constant_load_v32i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
437; GFX6-LABEL: constant_load_v32i1:
438; GFX6:       ; %bb.0:
439; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
440; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
441; GFX6-NEXT:    s_load_dword s4, s[2:3], 0x0
442; GFX6-NEXT:    s_mov_b32 s3, 0xf000
443; GFX6-NEXT:    s_mov_b32 s2, -1
444; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
445; GFX6-NEXT:    v_mov_b32_e32 v0, s4
446; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
447; GFX6-NEXT:    s_endpgm
448;
449; GFX8-LABEL: constant_load_v32i1:
450; GFX8:       ; %bb.0:
451; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
452; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
453; GFX8-NEXT:    s_load_dword s2, s[2:3], 0x0
454; GFX8-NEXT:    v_mov_b32_e32 v0, s0
455; GFX8-NEXT:    v_mov_b32_e32 v1, s1
456; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
457; GFX8-NEXT:    v_mov_b32_e32 v2, s2
458; GFX8-NEXT:    flat_store_dword v[0:1], v2
459; GFX8-NEXT:    s_endpgm
460;
461; EG-LABEL: constant_load_v32i1:
462; EG:       ; %bb.0:
463; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
464; EG-NEXT:    TEX 0 @6
465; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
466; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
467; EG-NEXT:    CF_END
468; EG-NEXT:    PAD
469; EG-NEXT:    Fetch clause starting at 6:
470; EG-NEXT:     VTX_READ_32 T0.X, T0.X, 0, #1
471; EG-NEXT:    ALU clause starting at 8:
472; EG-NEXT:     MOV * T0.X, KC0[2].Z,
473; EG-NEXT:    ALU clause starting at 9:
474; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
475; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
476;
477; GFX12-LABEL: constant_load_v32i1:
478; GFX12:       ; %bb.0:
479; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
480; GFX12-NEXT:    s_wait_kmcnt 0x0
481; GFX12-NEXT:    s_load_b32 s2, s[2:3], 0x0
482; GFX12-NEXT:    s_wait_kmcnt 0x0
483; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
484; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
485; GFX12-NEXT:    s_endpgm
486  %load = load <32 x i1>, ptr addrspace(4) %in
487  store <32 x i1> %load, ptr addrspace(1) %out
488  ret void
489}
490
491define amdgpu_kernel void @constant_load_v64i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
492; GFX6-LABEL: constant_load_v64i1:
493; GFX6:       ; %bb.0:
494; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
495; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
496; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
497; GFX6-NEXT:    s_mov_b32 s3, 0xf000
498; GFX6-NEXT:    s_mov_b32 s2, -1
499; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
500; GFX6-NEXT:    v_mov_b32_e32 v0, s4
501; GFX6-NEXT:    v_mov_b32_e32 v1, s5
502; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
503; GFX6-NEXT:    s_endpgm
504;
505; GFX8-LABEL: constant_load_v64i1:
506; GFX8:       ; %bb.0:
507; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
508; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
509; GFX8-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
510; GFX8-NEXT:    v_mov_b32_e32 v0, s0
511; GFX8-NEXT:    v_mov_b32_e32 v1, s1
512; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
513; GFX8-NEXT:    v_mov_b32_e32 v2, s2
514; GFX8-NEXT:    v_mov_b32_e32 v3, s3
515; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
516; GFX8-NEXT:    s_endpgm
517;
518; EG-LABEL: constant_load_v64i1:
519; EG:       ; %bb.0:
520; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
521; EG-NEXT:    TEX 0 @6
522; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
523; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
524; EG-NEXT:    CF_END
525; EG-NEXT:    PAD
526; EG-NEXT:    Fetch clause starting at 6:
527; EG-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
528; EG-NEXT:    ALU clause starting at 8:
529; EG-NEXT:     MOV * T0.X, KC0[2].Z,
530; EG-NEXT:    ALU clause starting at 9:
531; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
532; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
533;
534; GFX12-LABEL: constant_load_v64i1:
535; GFX12:       ; %bb.0:
536; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
537; GFX12-NEXT:    s_wait_kmcnt 0x0
538; GFX12-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
539; GFX12-NEXT:    v_mov_b32_e32 v2, 0
540; GFX12-NEXT:    s_wait_kmcnt 0x0
541; GFX12-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
542; GFX12-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
543; GFX12-NEXT:    s_endpgm
544  %load = load <64 x i1>, ptr addrspace(4) %in
545  store <64 x i1> %load, ptr addrspace(1) %out
546  ret void
547}
548
549define amdgpu_kernel void @constant_zextload_i1_to_i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
550; GFX6-LABEL: constant_zextload_i1_to_i32:
551; GFX6:       ; %bb.0:
552; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
553; GFX6-NEXT:    s_mov_b32 s7, 0xf000
554; GFX6-NEXT:    s_mov_b32 s6, -1
555; GFX6-NEXT:    s_mov_b32 s10, s6
556; GFX6-NEXT:    s_mov_b32 s11, s7
557; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
558; GFX6-NEXT:    s_mov_b32 s8, s2
559; GFX6-NEXT:    s_mov_b32 s9, s3
560; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
561; GFX6-NEXT:    s_mov_b32 s4, s0
562; GFX6-NEXT:    s_mov_b32 s5, s1
563; GFX6-NEXT:    s_waitcnt vmcnt(0)
564; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
565; GFX6-NEXT:    s_endpgm
566;
567; GFX8-LABEL: constant_zextload_i1_to_i32:
568; GFX8:       ; %bb.0:
569; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
570; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
571; GFX8-NEXT:    v_mov_b32_e32 v0, s2
572; GFX8-NEXT:    v_mov_b32_e32 v1, s3
573; GFX8-NEXT:    flat_load_ubyte v2, v[0:1]
574; GFX8-NEXT:    v_mov_b32_e32 v0, s0
575; GFX8-NEXT:    v_mov_b32_e32 v1, s1
576; GFX8-NEXT:    s_waitcnt vmcnt(0)
577; GFX8-NEXT:    flat_store_dword v[0:1], v2
578; GFX8-NEXT:    s_endpgm
579;
580; EG-LABEL: constant_zextload_i1_to_i32:
581; EG:       ; %bb.0:
582; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
583; EG-NEXT:    TEX 0 @6
584; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
585; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
586; EG-NEXT:    CF_END
587; EG-NEXT:    PAD
588; EG-NEXT:    Fetch clause starting at 6:
589; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
590; EG-NEXT:    ALU clause starting at 8:
591; EG-NEXT:     MOV * T0.X, KC0[2].Z,
592; EG-NEXT:    ALU clause starting at 9:
593; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
594; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
595;
596; GFX12-LABEL: constant_zextload_i1_to_i32:
597; GFX12:       ; %bb.0:
598; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
599; GFX12-NEXT:    s_wait_kmcnt 0x0
600; GFX12-NEXT:    s_load_u8 s2, s[2:3], 0x0
601; GFX12-NEXT:    s_wait_kmcnt 0x0
602; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
603; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
604; GFX12-NEXT:    s_endpgm
605  %a = load i1, ptr addrspace(4) %in
606  %ext = zext i1 %a to i32
607  store i32 %ext, ptr addrspace(1) %out
608  ret void
609}
610
611define amdgpu_kernel void @constant_sextload_i1_to_i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
612; GFX6-LABEL: constant_sextload_i1_to_i32:
613; GFX6:       ; %bb.0:
614; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
615; GFX6-NEXT:    s_mov_b32 s7, 0xf000
616; GFX6-NEXT:    s_mov_b32 s6, -1
617; GFX6-NEXT:    s_mov_b32 s10, s6
618; GFX6-NEXT:    s_mov_b32 s11, s7
619; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
620; GFX6-NEXT:    s_mov_b32 s8, s2
621; GFX6-NEXT:    s_mov_b32 s9, s3
622; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
623; GFX6-NEXT:    s_mov_b32 s4, s0
624; GFX6-NEXT:    s_mov_b32 s5, s1
625; GFX6-NEXT:    s_waitcnt vmcnt(0)
626; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 1
627; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
628; GFX6-NEXT:    s_endpgm
629;
630; GFX8-LABEL: constant_sextload_i1_to_i32:
631; GFX8:       ; %bb.0:
632; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
633; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
634; GFX8-NEXT:    v_mov_b32_e32 v0, s2
635; GFX8-NEXT:    v_mov_b32_e32 v1, s3
636; GFX8-NEXT:    flat_load_ubyte v2, v[0:1]
637; GFX8-NEXT:    v_mov_b32_e32 v0, s0
638; GFX8-NEXT:    v_mov_b32_e32 v1, s1
639; GFX8-NEXT:    s_waitcnt vmcnt(0)
640; GFX8-NEXT:    v_bfe_i32 v2, v2, 0, 1
641; GFX8-NEXT:    flat_store_dword v[0:1], v2
642; GFX8-NEXT:    s_endpgm
643;
644; EG-LABEL: constant_sextload_i1_to_i32:
645; EG:       ; %bb.0:
646; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
647; EG-NEXT:    TEX 0 @6
648; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
649; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
650; EG-NEXT:    CF_END
651; EG-NEXT:    PAD
652; EG-NEXT:    Fetch clause starting at 6:
653; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
654; EG-NEXT:    ALU clause starting at 8:
655; EG-NEXT:     MOV * T0.X, KC0[2].Z,
656; EG-NEXT:    ALU clause starting at 9:
657; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, 1,
658; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
659; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
660;
661; GFX12-LABEL: constant_sextload_i1_to_i32:
662; GFX12:       ; %bb.0:
663; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
664; GFX12-NEXT:    s_wait_kmcnt 0x0
665; GFX12-NEXT:    s_load_u8 s2, s[2:3], 0x0
666; GFX12-NEXT:    s_wait_kmcnt 0x0
667; GFX12-NEXT:    s_bfe_i32 s2, s2, 0x10000
668; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
669; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
670; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
671; GFX12-NEXT:    s_endpgm
672  %a = load i1, ptr addrspace(4) %in
673  %ext = sext i1 %a to i32
674  store i32 %ext, ptr addrspace(1) %out
675  ret void
676}
677
678define amdgpu_kernel void @constant_zextload_v1i1_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
679; GFX6-LABEL: constant_zextload_v1i1_to_v1i32:
680; GFX6:       ; %bb.0:
681; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
682; GFX6-NEXT:    s_mov_b32 s7, 0xf000
683; GFX6-NEXT:    s_mov_b32 s6, -1
684; GFX6-NEXT:    s_mov_b32 s10, s6
685; GFX6-NEXT:    s_mov_b32 s11, s7
686; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
687; GFX6-NEXT:    s_mov_b32 s8, s2
688; GFX6-NEXT:    s_mov_b32 s9, s3
689; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
690; GFX6-NEXT:    s_mov_b32 s4, s0
691; GFX6-NEXT:    s_mov_b32 s5, s1
692; GFX6-NEXT:    s_waitcnt vmcnt(0)
693; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
694; GFX6-NEXT:    s_endpgm
695;
696; GFX8-LABEL: constant_zextload_v1i1_to_v1i32:
697; GFX8:       ; %bb.0:
698; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
699; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
700; GFX8-NEXT:    v_mov_b32_e32 v0, s2
701; GFX8-NEXT:    v_mov_b32_e32 v1, s3
702; GFX8-NEXT:    flat_load_ubyte v2, v[0:1]
703; GFX8-NEXT:    v_mov_b32_e32 v0, s0
704; GFX8-NEXT:    v_mov_b32_e32 v1, s1
705; GFX8-NEXT:    s_waitcnt vmcnt(0)
706; GFX8-NEXT:    flat_store_dword v[0:1], v2
707; GFX8-NEXT:    s_endpgm
708;
709; EG-LABEL: constant_zextload_v1i1_to_v1i32:
710; EG:       ; %bb.0:
711; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
712; EG-NEXT:    TEX 0 @6
713; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
714; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
715; EG-NEXT:    CF_END
716; EG-NEXT:    PAD
717; EG-NEXT:    Fetch clause starting at 6:
718; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
719; EG-NEXT:    ALU clause starting at 8:
720; EG-NEXT:     MOV * T0.X, KC0[2].Z,
721; EG-NEXT:    ALU clause starting at 9:
722; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
723; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
724;
725; GFX12-LABEL: constant_zextload_v1i1_to_v1i32:
726; GFX12:       ; %bb.0:
727; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
728; GFX12-NEXT:    s_wait_kmcnt 0x0
729; GFX12-NEXT:    s_load_u8 s2, s[2:3], 0x0
730; GFX12-NEXT:    s_wait_kmcnt 0x0
731; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
732; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
733; GFX12-NEXT:    s_endpgm
734  %load = load <1 x i1>, ptr addrspace(4) %in
735  %ext = zext <1 x i1> %load to <1 x i32>
736  store <1 x i32> %ext, ptr addrspace(1) %out
737  ret void
738}
739
740define amdgpu_kernel void @constant_sextload_v1i1_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
741; GFX6-LABEL: constant_sextload_v1i1_to_v1i32:
742; GFX6:       ; %bb.0:
743; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
744; GFX6-NEXT:    s_mov_b32 s7, 0xf000
745; GFX6-NEXT:    s_mov_b32 s6, -1
746; GFX6-NEXT:    s_mov_b32 s10, s6
747; GFX6-NEXT:    s_mov_b32 s11, s7
748; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
749; GFX6-NEXT:    s_mov_b32 s8, s2
750; GFX6-NEXT:    s_mov_b32 s9, s3
751; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
752; GFX6-NEXT:    s_mov_b32 s4, s0
753; GFX6-NEXT:    s_mov_b32 s5, s1
754; GFX6-NEXT:    s_waitcnt vmcnt(0)
755; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 1
756; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
757; GFX6-NEXT:    s_endpgm
758;
759; GFX8-LABEL: constant_sextload_v1i1_to_v1i32:
760; GFX8:       ; %bb.0:
761; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
762; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
763; GFX8-NEXT:    v_mov_b32_e32 v0, s2
764; GFX8-NEXT:    v_mov_b32_e32 v1, s3
765; GFX8-NEXT:    flat_load_ubyte v2, v[0:1]
766; GFX8-NEXT:    v_mov_b32_e32 v0, s0
767; GFX8-NEXT:    v_mov_b32_e32 v1, s1
768; GFX8-NEXT:    s_waitcnt vmcnt(0)
769; GFX8-NEXT:    v_bfe_i32 v2, v2, 0, 1
770; GFX8-NEXT:    flat_store_dword v[0:1], v2
771; GFX8-NEXT:    s_endpgm
772;
773; EG-LABEL: constant_sextload_v1i1_to_v1i32:
774; EG:       ; %bb.0:
775; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
776; EG-NEXT:    TEX 0 @6
777; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
778; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
779; EG-NEXT:    CF_END
780; EG-NEXT:    PAD
781; EG-NEXT:    Fetch clause starting at 6:
782; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
783; EG-NEXT:    ALU clause starting at 8:
784; EG-NEXT:     MOV * T0.X, KC0[2].Z,
785; EG-NEXT:    ALU clause starting at 9:
786; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, 1,
787; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
788; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
789;
790; GFX12-LABEL: constant_sextload_v1i1_to_v1i32:
791; GFX12:       ; %bb.0:
792; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
793; GFX12-NEXT:    s_wait_kmcnt 0x0
794; GFX12-NEXT:    s_load_u8 s2, s[2:3], 0x0
795; GFX12-NEXT:    s_wait_kmcnt 0x0
796; GFX12-NEXT:    s_bfe_i32 s2, s2, 0x10000
797; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
798; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
799; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
800; GFX12-NEXT:    s_endpgm
801  %load = load <1 x i1>, ptr addrspace(4) %in
802  %ext = sext <1 x i1> %load to <1 x i32>
803  store <1 x i32> %ext, ptr addrspace(1) %out
804  ret void
805}
806
807define amdgpu_kernel void @constant_zextload_v2i1_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
808; GFX6-LABEL: constant_zextload_v2i1_to_v2i32:
809; GFX6:       ; %bb.0:
810; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
811; GFX6-NEXT:    s_mov_b32 s7, 0xf000
812; GFX6-NEXT:    s_mov_b32 s6, -1
813; GFX6-NEXT:    s_mov_b32 s10, s6
814; GFX6-NEXT:    s_mov_b32 s11, s7
815; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
816; GFX6-NEXT:    s_mov_b32 s8, s2
817; GFX6-NEXT:    s_mov_b32 s9, s3
818; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
819; GFX6-NEXT:    s_mov_b32 s4, s0
820; GFX6-NEXT:    s_mov_b32 s5, s1
821; GFX6-NEXT:    s_waitcnt vmcnt(0)
822; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 1, v0
823; GFX6-NEXT:    v_and_b32_e32 v0, 1, v0
824; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
825; GFX6-NEXT:    s_endpgm
826;
827; GFX8-LABEL: constant_zextload_v2i1_to_v2i32:
828; GFX8:       ; %bb.0:
829; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
830; GFX8-NEXT:    v_mov_b32_e32 v3, 1
831; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
832; GFX8-NEXT:    v_mov_b32_e32 v0, s2
833; GFX8-NEXT:    v_mov_b32_e32 v1, s3
834; GFX8-NEXT:    flat_load_ubyte v2, v[0:1]
835; GFX8-NEXT:    v_mov_b32_e32 v0, s0
836; GFX8-NEXT:    v_mov_b32_e32 v1, s1
837; GFX8-NEXT:    s_waitcnt vmcnt(0)
838; GFX8-NEXT:    v_lshrrev_b32_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
839; GFX8-NEXT:    v_and_b32_e32 v2, 1, v2
840; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
841; GFX8-NEXT:    s_endpgm
842;
843; EG-LABEL: constant_zextload_v2i1_to_v2i32:
844; EG:       ; %bb.0:
845; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
846; EG-NEXT:    TEX 0 @6
847; EG-NEXT:    ALU 3, @9, KC0[CB0:0-32], KC1[]
848; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
849; EG-NEXT:    CF_END
850; EG-NEXT:    PAD
851; EG-NEXT:    Fetch clause starting at 6:
852; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
853; EG-NEXT:    ALU clause starting at 8:
854; EG-NEXT:     MOV * T0.X, KC0[2].Z,
855; EG-NEXT:    ALU clause starting at 9:
856; EG-NEXT:     BFE_UINT * T0.Y, T0.X, 1, 1,
857; EG-NEXT:     AND_INT T0.X, T0.X, 1,
858; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
859; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
860;
861; GFX12-LABEL: constant_zextload_v2i1_to_v2i32:
862; GFX12:       ; %bb.0:
863; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
864; GFX12-NEXT:    v_mov_b32_e32 v2, 0
865; GFX12-NEXT:    s_wait_kmcnt 0x0
866; GFX12-NEXT:    global_load_u8 v0, v2, s[2:3]
867; GFX12-NEXT:    s_wait_loadcnt 0x0
868; GFX12-NEXT:    v_and_b32_e32 v1, 0xffff, v0
869; GFX12-NEXT:    v_and_b32_e32 v0, 1, v0
870; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
871; GFX12-NEXT:    v_lshrrev_b32_e32 v1, 1, v1
872; GFX12-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
873; GFX12-NEXT:    s_endpgm
874  %load = load <2 x i1>, ptr addrspace(4) %in
875  %ext = zext <2 x i1> %load to <2 x i32>
876  store <2 x i32> %ext, ptr addrspace(1) %out
877  ret void
878}
879
880define amdgpu_kernel void @constant_sextload_v2i1_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
881; GFX6-LABEL: constant_sextload_v2i1_to_v2i32:
882; GFX6:       ; %bb.0:
883; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
884; GFX6-NEXT:    s_mov_b32 s7, 0xf000
885; GFX6-NEXT:    s_mov_b32 s6, -1
886; GFX6-NEXT:    s_mov_b32 s10, s6
887; GFX6-NEXT:    s_mov_b32 s11, s7
888; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
889; GFX6-NEXT:    s_mov_b32 s8, s2
890; GFX6-NEXT:    s_mov_b32 s9, s3
891; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
892; GFX6-NEXT:    s_mov_b32 s4, s0
893; GFX6-NEXT:    s_mov_b32 s5, s1
894; GFX6-NEXT:    s_waitcnt vmcnt(0)
895; GFX6-NEXT:    v_bfe_i32 v1, v0, 1, 1
896; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 1
897; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
898; GFX6-NEXT:    s_endpgm
899;
900; GFX8-LABEL: constant_sextload_v2i1_to_v2i32:
901; GFX8:       ; %bb.0:
902; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
903; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
904; GFX8-NEXT:    v_mov_b32_e32 v0, s2
905; GFX8-NEXT:    v_mov_b32_e32 v1, s3
906; GFX8-NEXT:    flat_load_ubyte v2, v[0:1]
907; GFX8-NEXT:    v_mov_b32_e32 v0, s0
908; GFX8-NEXT:    v_mov_b32_e32 v1, s1
909; GFX8-NEXT:    s_waitcnt vmcnt(0)
910; GFX8-NEXT:    v_bfe_i32 v3, v2, 1, 1
911; GFX8-NEXT:    v_bfe_i32 v2, v2, 0, 1
912; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
913; GFX8-NEXT:    s_endpgm
914;
915; EG-LABEL: constant_sextload_v2i1_to_v2i32:
916; EG:       ; %bb.0:
917; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
918; EG-NEXT:    TEX 0 @6
919; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
920; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1
921; EG-NEXT:    CF_END
922; EG-NEXT:    PAD
923; EG-NEXT:    Fetch clause starting at 6:
924; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
925; EG-NEXT:    ALU clause starting at 8:
926; EG-NEXT:     MOV * T0.X, KC0[2].Z,
927; EG-NEXT:    ALU clause starting at 9:
928; EG-NEXT:     BFE_INT T1.X, T0.X, 0.0, 1,
929; EG-NEXT:     LSHR T0.W, T0.X, 1,
930; EG-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
931; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
932; EG-NEXT:     BFE_INT * T1.Y, PV.W, 0.0, 1,
933;
934; GFX12-LABEL: constant_sextload_v2i1_to_v2i32:
935; GFX12:       ; %bb.0:
936; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
937; GFX12-NEXT:    s_wait_kmcnt 0x0
938; GFX12-NEXT:    s_load_u8 s2, s[2:3], 0x0
939; GFX12-NEXT:    s_wait_kmcnt 0x0
940; GFX12-NEXT:    s_bfe_i32 s3, s2, 0x10000
941; GFX12-NEXT:    s_bfe_i32 s2, s2, 0x10001
942; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
943; GFX12-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
944; GFX12-NEXT:    v_mov_b32_e32 v0, s3
945; GFX12-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
946; GFX12-NEXT:    s_endpgm
947  %load = load <2 x i1>, ptr addrspace(4) %in
948  %ext = sext <2 x i1> %load to <2 x i32>
949  store <2 x i32> %ext, ptr addrspace(1) %out
950  ret void
951}
952
953define amdgpu_kernel void @constant_zextload_v3i1_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
954; GFX6-LABEL: constant_zextload_v3i1_to_v3i32:
955; GFX6:       ; %bb.0:
956; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
957; GFX6-NEXT:    s_mov_b32 s7, 0xf000
958; GFX6-NEXT:    s_mov_b32 s6, -1
959; GFX6-NEXT:    s_mov_b32 s10, s6
960; GFX6-NEXT:    s_mov_b32 s11, s7
961; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
962; GFX6-NEXT:    s_mov_b32 s8, s2
963; GFX6-NEXT:    s_mov_b32 s9, s3
964; GFX6-NEXT:    buffer_load_ubyte v2, off, s[8:11], 0
965; GFX6-NEXT:    s_mov_b32 s4, s0
966; GFX6-NEXT:    s_mov_b32 s5, s1
967; GFX6-NEXT:    s_waitcnt vmcnt(0)
968; GFX6-NEXT:    v_and_b32_e32 v0, 1, v2
969; GFX6-NEXT:    v_bfe_u32 v1, v2, 1, 1
970; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
971; GFX6-NEXT:    buffer_store_dword v2, off, s[4:7], 0 offset:8
972; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
973; GFX6-NEXT:    s_endpgm
974;
975; GFX8-LABEL: constant_zextload_v3i1_to_v3i32:
976; GFX8:       ; %bb.0:
977; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
978; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
979; GFX8-NEXT:    v_mov_b32_e32 v0, s2
980; GFX8-NEXT:    v_mov_b32_e32 v1, s3
981; GFX8-NEXT:    flat_load_ubyte v1, v[0:1]
982; GFX8-NEXT:    v_mov_b32_e32 v0, 2
983; GFX8-NEXT:    v_mov_b32_e32 v3, s0
984; GFX8-NEXT:    v_mov_b32_e32 v4, s1
985; GFX8-NEXT:    s_waitcnt vmcnt(0)
986; GFX8-NEXT:    v_lshrrev_b32_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
987; GFX8-NEXT:    v_and_b32_e32 v0, 1, v1
988; GFX8-NEXT:    v_bfe_u32 v1, v1, 1, 1
989; GFX8-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
990; GFX8-NEXT:    s_endpgm
991;
992; EG-LABEL: constant_zextload_v3i1_to_v3i32:
993; EG:       ; %bb.0:
994; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
995; EG-NEXT:    TEX 0 @6
996; EG-NEXT:    ALU 8, @9, KC0[CB0:0-32], KC1[]
997; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T3.X, 0
998; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XY, T2.X, 1
999; EG-NEXT:    CF_END
1000; EG-NEXT:    Fetch clause starting at 6:
1001; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
1002; EG-NEXT:    ALU clause starting at 8:
1003; EG-NEXT:     MOV * T0.X, KC0[2].Z,
1004; EG-NEXT:    ALU clause starting at 9:
1005; EG-NEXT:     BFE_UINT * T1.Y, T0.X, 1, 1,
1006; EG-NEXT:     AND_INT T1.X, T0.X, 1,
1007; EG-NEXT:     LSHR * T2.X, KC0[2].Y, literal.x,
1008; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1009; EG-NEXT:     LSHR T0.X, T0.X, literal.x,
1010; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1011; EG-NEXT:    2(2.802597e-45), 8(1.121039e-44)
1012; EG-NEXT:     LSHR * T3.X, PV.W, literal.x,
1013; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1014;
1015; GFX12-LABEL: constant_zextload_v3i1_to_v3i32:
1016; GFX12:       ; %bb.0:
1017; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1018; GFX12-NEXT:    v_mov_b32_e32 v3, 0
1019; GFX12-NEXT:    s_wait_kmcnt 0x0
1020; GFX12-NEXT:    global_load_u8 v1, v3, s[2:3]
1021; GFX12-NEXT:    s_wait_loadcnt 0x0
1022; GFX12-NEXT:    v_and_b32_e32 v0, 0xffff, v1
1023; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
1024; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 2, v0
1025; GFX12-NEXT:    v_and_b32_e32 v0, 1, v1
1026; GFX12-NEXT:    v_bfe_u32 v1, v1, 1, 1
1027; GFX12-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1028; GFX12-NEXT:    global_store_b96 v3, v[0:2], s[0:1]
1029; GFX12-NEXT:    s_endpgm
1030  %load = load <3 x i1>, ptr addrspace(4) %in
1031  %ext = zext <3 x i1> %load to <3 x i32>
1032  store <3 x i32> %ext, ptr addrspace(1) %out
1033  ret void
1034}
1035
1036define amdgpu_kernel void @constant_sextload_v3i1_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
1037; GFX6-LABEL: constant_sextload_v3i1_to_v3i32:
1038; GFX6:       ; %bb.0:
1039; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1040; GFX6-NEXT:    s_mov_b32 s7, 0xf000
1041; GFX6-NEXT:    s_mov_b32 s6, -1
1042; GFX6-NEXT:    s_mov_b32 s10, s6
1043; GFX6-NEXT:    s_mov_b32 s11, s7
1044; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1045; GFX6-NEXT:    s_mov_b32 s8, s2
1046; GFX6-NEXT:    s_mov_b32 s9, s3
1047; GFX6-NEXT:    buffer_load_ubyte v2, off, s[8:11], 0
1048; GFX6-NEXT:    s_mov_b32 s4, s0
1049; GFX6-NEXT:    s_mov_b32 s5, s1
1050; GFX6-NEXT:    s_waitcnt vmcnt(0)
1051; GFX6-NEXT:    v_bfe_i32 v1, v2, 1, 1
1052; GFX6-NEXT:    v_bfe_i32 v0, v2, 0, 1
1053; GFX6-NEXT:    v_bfe_i32 v2, v2, 2, 1
1054; GFX6-NEXT:    buffer_store_dword v2, off, s[4:7], 0 offset:8
1055; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1056; GFX6-NEXT:    s_endpgm
1057;
1058; GFX8-LABEL: constant_sextload_v3i1_to_v3i32:
1059; GFX8:       ; %bb.0:
1060; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1061; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1062; GFX8-NEXT:    v_mov_b32_e32 v0, s2
1063; GFX8-NEXT:    v_mov_b32_e32 v1, s3
1064; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
1065; GFX8-NEXT:    v_mov_b32_e32 v3, s0
1066; GFX8-NEXT:    v_mov_b32_e32 v4, s1
1067; GFX8-NEXT:    s_waitcnt vmcnt(0)
1068; GFX8-NEXT:    v_bfe_i32 v2, v0, 2, 1
1069; GFX8-NEXT:    v_bfe_i32 v1, v0, 1, 1
1070; GFX8-NEXT:    v_bfe_i32 v0, v0, 0, 1
1071; GFX8-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
1072; GFX8-NEXT:    s_endpgm
1073;
1074; EG-LABEL: constant_sextload_v3i1_to_v3i32:
1075; EG:       ; %bb.0:
1076; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1077; EG-NEXT:    TEX 0 @6
1078; EG-NEXT:    ALU 10, @9, KC0[CB0:0-32], KC1[]
1079; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T2.X, T0.X, 0
1080; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T3.XY, T1.X, 1
1081; EG-NEXT:    CF_END
1082; EG-NEXT:    Fetch clause starting at 6:
1083; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
1084; EG-NEXT:    ALU clause starting at 8:
1085; EG-NEXT:     MOV * T0.X, KC0[2].Z,
1086; EG-NEXT:    ALU clause starting at 9:
1087; EG-NEXT:     LSHR T1.X, KC0[2].Y, literal.x,
1088; EG-NEXT:     LSHR * T0.W, T0.X, literal.x,
1089; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1090; EG-NEXT:     BFE_INT * T2.X, PV.W, 0.0, 1,
1091; EG-NEXT:     BFE_INT T3.X, T0.X, 0.0, 1,
1092; EG-NEXT:     LSHR T0.W, T0.X, 1,
1093; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.x,
1094; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
1095; EG-NEXT:     LSHR T0.X, PS, literal.x,
1096; EG-NEXT:     BFE_INT * T3.Y, PV.W, 0.0, 1,
1097; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1098;
1099; GFX12-LABEL: constant_sextload_v3i1_to_v3i32:
1100; GFX12:       ; %bb.0:
1101; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1102; GFX12-NEXT:    s_wait_kmcnt 0x0
1103; GFX12-NEXT:    s_load_u8 s2, s[2:3], 0x0
1104; GFX12-NEXT:    s_wait_kmcnt 0x0
1105; GFX12-NEXT:    s_bfe_i32 s3, s2, 0x10002
1106; GFX12-NEXT:    s_bfe_i32 s4, s2, 0x10000
1107; GFX12-NEXT:    s_bfe_i32 s2, s2, 0x10001
1108; GFX12-NEXT:    v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s4
1109; GFX12-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s3
1110; GFX12-NEXT:    global_store_b96 v3, v[0:2], s[0:1]
1111; GFX12-NEXT:    s_endpgm
1112  %load = load <3 x i1>, ptr addrspace(4) %in
1113  %ext = sext <3 x i1> %load to <3 x i32>
1114  store <3 x i32> %ext, ptr addrspace(1) %out
1115  ret void
1116}
1117
1118define amdgpu_kernel void @constant_zextload_v4i1_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
1119; GFX6-LABEL: constant_zextload_v4i1_to_v4i32:
1120; GFX6:       ; %bb.0:
1121; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1122; GFX6-NEXT:    s_mov_b32 s7, 0xf000
1123; GFX6-NEXT:    s_mov_b32 s6, -1
1124; GFX6-NEXT:    s_mov_b32 s10, s6
1125; GFX6-NEXT:    s_mov_b32 s11, s7
1126; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1127; GFX6-NEXT:    s_mov_b32 s8, s2
1128; GFX6-NEXT:    s_mov_b32 s9, s3
1129; GFX6-NEXT:    buffer_load_ubyte v1, off, s[8:11], 0
1130; GFX6-NEXT:    s_mov_b32 s4, s0
1131; GFX6-NEXT:    s_mov_b32 s5, s1
1132; GFX6-NEXT:    s_waitcnt vmcnt(0)
1133; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 3, v1
1134; GFX6-NEXT:    v_and_b32_e32 v0, 1, v1
1135; GFX6-NEXT:    v_bfe_u32 v2, v1, 2, 1
1136; GFX6-NEXT:    v_bfe_u32 v1, v1, 1, 1
1137; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1138; GFX6-NEXT:    s_endpgm
1139;
1140; GFX8-LABEL: constant_zextload_v4i1_to_v4i32:
1141; GFX8:       ; %bb.0:
1142; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1143; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1144; GFX8-NEXT:    v_mov_b32_e32 v0, s2
1145; GFX8-NEXT:    v_mov_b32_e32 v1, s3
1146; GFX8-NEXT:    flat_load_ubyte v1, v[0:1]
1147; GFX8-NEXT:    v_mov_b32_e32 v0, 3
1148; GFX8-NEXT:    v_mov_b32_e32 v4, s0
1149; GFX8-NEXT:    v_mov_b32_e32 v5, s1
1150; GFX8-NEXT:    s_waitcnt vmcnt(0)
1151; GFX8-NEXT:    v_lshrrev_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
1152; GFX8-NEXT:    v_and_b32_e32 v0, 1, v1
1153; GFX8-NEXT:    v_bfe_u32 v2, v1, 2, 1
1154; GFX8-NEXT:    v_bfe_u32 v1, v1, 1, 1
1155; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1156; GFX8-NEXT:    s_endpgm
1157;
1158; EG-LABEL: constant_zextload_v4i1_to_v4i32:
1159; EG:       ; %bb.0:
1160; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1161; EG-NEXT:    TEX 0 @6
1162; EG-NEXT:    ALU 7, @9, KC0[CB0:0-32], KC1[]
1163; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
1164; EG-NEXT:    CF_END
1165; EG-NEXT:    PAD
1166; EG-NEXT:    Fetch clause starting at 6:
1167; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
1168; EG-NEXT:    ALU clause starting at 8:
1169; EG-NEXT:     MOV * T0.X, KC0[2].Z,
1170; EG-NEXT:    ALU clause starting at 9:
1171; EG-NEXT:     BFE_UINT * T0.W, T0.X, literal.x, 1,
1172; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
1173; EG-NEXT:     BFE_UINT * T0.Z, T0.X, literal.x, 1,
1174; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1175; EG-NEXT:     BFE_UINT * T0.Y, T0.X, 1, 1,
1176; EG-NEXT:     AND_INT T0.X, T0.X, 1,
1177; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
1178; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1179;
1180; GFX12-LABEL: constant_zextload_v4i1_to_v4i32:
1181; GFX12:       ; %bb.0:
1182; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1183; GFX12-NEXT:    v_mov_b32_e32 v4, 0
1184; GFX12-NEXT:    s_wait_kmcnt 0x0
1185; GFX12-NEXT:    global_load_u8 v1, v4, s[2:3]
1186; GFX12-NEXT:    s_wait_loadcnt 0x0
1187; GFX12-NEXT:    v_and_b32_e32 v3, 0xffff, v1
1188; GFX12-NEXT:    v_and_b32_e32 v0, 1, v1
1189; GFX12-NEXT:    v_bfe_u32 v2, v1, 2, 1
1190; GFX12-NEXT:    v_bfe_u32 v1, v1, 1, 1
1191; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4)
1192; GFX12-NEXT:    v_lshrrev_b32_e32 v3, 3, v3
1193; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[0:1]
1194; GFX12-NEXT:    s_endpgm
1195  %load = load <4 x i1>, ptr addrspace(4) %in
1196  %ext = zext <4 x i1> %load to <4 x i32>
1197  store <4 x i32> %ext, ptr addrspace(1) %out
1198  ret void
1199}
1200
1201define amdgpu_kernel void @constant_sextload_v4i1_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
1202; GFX6-LABEL: constant_sextload_v4i1_to_v4i32:
1203; GFX6:       ; %bb.0:
1204; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1205; GFX6-NEXT:    s_mov_b32 s7, 0xf000
1206; GFX6-NEXT:    s_mov_b32 s6, -1
1207; GFX6-NEXT:    s_mov_b32 s10, s6
1208; GFX6-NEXT:    s_mov_b32 s11, s7
1209; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1210; GFX6-NEXT:    s_mov_b32 s8, s2
1211; GFX6-NEXT:    s_mov_b32 s9, s3
1212; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
1213; GFX6-NEXT:    s_mov_b32 s4, s0
1214; GFX6-NEXT:    s_mov_b32 s5, s1
1215; GFX6-NEXT:    s_waitcnt vmcnt(0)
1216; GFX6-NEXT:    v_bfe_i32 v3, v0, 3, 1
1217; GFX6-NEXT:    v_bfe_i32 v2, v0, 2, 1
1218; GFX6-NEXT:    v_bfe_i32 v1, v0, 1, 1
1219; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 1
1220; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1221; GFX6-NEXT:    s_endpgm
1222;
1223; GFX8-LABEL: constant_sextload_v4i1_to_v4i32:
1224; GFX8:       ; %bb.0:
1225; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1226; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1227; GFX8-NEXT:    v_mov_b32_e32 v0, s2
1228; GFX8-NEXT:    v_mov_b32_e32 v1, s3
1229; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
1230; GFX8-NEXT:    v_mov_b32_e32 v4, s0
1231; GFX8-NEXT:    v_mov_b32_e32 v5, s1
1232; GFX8-NEXT:    s_waitcnt vmcnt(0)
1233; GFX8-NEXT:    v_bfe_i32 v3, v0, 3, 1
1234; GFX8-NEXT:    v_bfe_i32 v2, v0, 2, 1
1235; GFX8-NEXT:    v_bfe_i32 v1, v0, 1, 1
1236; GFX8-NEXT:    v_bfe_i32 v0, v0, 0, 1
1237; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1238; GFX8-NEXT:    s_endpgm
1239;
1240; EG-LABEL: constant_sextload_v4i1_to_v4i32:
1241; EG:       ; %bb.0:
1242; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1243; EG-NEXT:    TEX 0 @6
1244; EG-NEXT:    ALU 10, @9, KC0[CB0:0-32], KC1[]
1245; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
1246; EG-NEXT:    CF_END
1247; EG-NEXT:    PAD
1248; EG-NEXT:    Fetch clause starting at 6:
1249; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
1250; EG-NEXT:    ALU clause starting at 8:
1251; EG-NEXT:     MOV * T0.X, KC0[2].Z,
1252; EG-NEXT:    ALU clause starting at 9:
1253; EG-NEXT:     BFE_INT T1.X, T0.X, 0.0, 1,
1254; EG-NEXT:     LSHR * T0.W, T0.X, literal.x,
1255; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
1256; EG-NEXT:     BFE_INT T1.W, PV.W, 0.0, 1,
1257; EG-NEXT:     LSHR * T0.W, T0.X, literal.x,
1258; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1259; EG-NEXT:     BFE_INT T1.Z, PS, 0.0, 1,
1260; EG-NEXT:     LSHR * T0.W, T0.X, 1,
1261; EG-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
1262; EG-NEXT:     BFE_INT * T1.Y, PV.W, 0.0, 1,
1263; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1264;
1265; GFX12-LABEL: constant_sextload_v4i1_to_v4i32:
1266; GFX12:       ; %bb.0:
1267; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1268; GFX12-NEXT:    s_wait_kmcnt 0x0
1269; GFX12-NEXT:    s_load_u8 s2, s[2:3], 0x0
1270; GFX12-NEXT:    s_wait_kmcnt 0x0
1271; GFX12-NEXT:    s_bfe_i32 s3, s2, 0x10003
1272; GFX12-NEXT:    s_bfe_i32 s4, s2, 0x10002
1273; GFX12-NEXT:    s_bfe_i32 s5, s2, 0x10000
1274; GFX12-NEXT:    s_bfe_i32 s2, s2, 0x10001
1275; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1276; GFX12-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s2
1277; GFX12-NEXT:    v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v3, s3
1278; GFX12-NEXT:    v_mov_b32_e32 v2, s4
1279; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[0:1]
1280; GFX12-NEXT:    s_endpgm
1281  %load = load <4 x i1>, ptr addrspace(4) %in
1282  %ext = sext <4 x i1> %load to <4 x i32>
1283  store <4 x i32> %ext, ptr addrspace(1) %out
1284  ret void
1285}
1286
1287define amdgpu_kernel void @constant_zextload_v8i1_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
1288; GFX6-LABEL: constant_zextload_v8i1_to_v8i32:
1289; GFX6:       ; %bb.0:
1290; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1291; GFX6-NEXT:    s_mov_b32 s7, 0xf000
1292; GFX6-NEXT:    s_mov_b32 s6, -1
1293; GFX6-NEXT:    s_mov_b32 s10, s6
1294; GFX6-NEXT:    s_mov_b32 s11, s7
1295; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1296; GFX6-NEXT:    s_mov_b32 s8, s2
1297; GFX6-NEXT:    s_mov_b32 s9, s3
1298; GFX6-NEXT:    buffer_load_ubyte v4, off, s[8:11], 0
1299; GFX6-NEXT:    s_mov_b32 s4, s0
1300; GFX6-NEXT:    s_mov_b32 s5, s1
1301; GFX6-NEXT:    s_waitcnt vmcnt(0)
1302; GFX6-NEXT:    v_bfe_u32 v3, v4, 3, 1
1303; GFX6-NEXT:    v_bfe_u32 v1, v4, 1, 1
1304; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 7, v4
1305; GFX6-NEXT:    v_bfe_u32 v5, v4, 5, 1
1306; GFX6-NEXT:    v_and_b32_e32 v0, 1, v4
1307; GFX6-NEXT:    v_bfe_u32 v2, v4, 2, 1
1308; GFX6-NEXT:    v_bfe_u32 v6, v4, 6, 1
1309; GFX6-NEXT:    v_bfe_u32 v4, v4, 4, 1
1310; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16
1311; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1312; GFX6-NEXT:    s_endpgm
1313;
1314; GFX8-LABEL: constant_zextload_v8i1_to_v8i32:
1315; GFX8:       ; %bb.0:
1316; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1317; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1318; GFX8-NEXT:    v_mov_b32_e32 v0, s2
1319; GFX8-NEXT:    v_mov_b32_e32 v1, s3
1320; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
1321; GFX8-NEXT:    v_mov_b32_e32 v9, s1
1322; GFX8-NEXT:    v_mov_b32_e32 v8, s0
1323; GFX8-NEXT:    s_waitcnt vmcnt(0)
1324; GFX8-NEXT:    v_readfirstlane_b32 s2, v0
1325; GFX8-NEXT:    s_bfe_u32 s3, s2, 0x10003
1326; GFX8-NEXT:    s_bfe_u32 s4, s2, 0x10001
1327; GFX8-NEXT:    s_bfe_u32 s5, s2, 0x10005
1328; GFX8-NEXT:    s_and_b32 s6, s2, 1
1329; GFX8-NEXT:    s_bfe_u32 s7, s2, 0x10002
1330; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x10004
1331; GFX8-NEXT:    s_add_u32 s0, s0, 16
1332; GFX8-NEXT:    s_addc_u32 s1, s1, 0
1333; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1334; GFX8-NEXT:    v_mov_b32_e32 v11, s1
1335; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 7, v0
1336; GFX8-NEXT:    v_bfe_u32 v2, v0, 6, 1
1337; GFX8-NEXT:    v_mov_b32_e32 v0, s2
1338; GFX8-NEXT:    v_mov_b32_e32 v1, s5
1339; GFX8-NEXT:    v_mov_b32_e32 v10, s0
1340; GFX8-NEXT:    v_mov_b32_e32 v4, s6
1341; GFX8-NEXT:    v_mov_b32_e32 v5, s4
1342; GFX8-NEXT:    v_mov_b32_e32 v6, s7
1343; GFX8-NEXT:    v_mov_b32_e32 v7, s3
1344; GFX8-NEXT:    flat_store_dwordx4 v[10:11], v[0:3]
1345; GFX8-NEXT:    flat_store_dwordx4 v[8:9], v[4:7]
1346; GFX8-NEXT:    s_endpgm
1347;
1348; EG-LABEL: constant_zextload_v8i1_to_v8i32:
1349; EG:       ; %bb.0:
1350; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1351; EG-NEXT:    TEX 0 @6
1352; EG-NEXT:    ALU 17, @9, KC0[CB0:0-32], KC1[]
1353; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T8.X, 0
1354; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T7.X, 1
1355; EG-NEXT:    CF_END
1356; EG-NEXT:    Fetch clause starting at 6:
1357; EG-NEXT:     VTX_READ_8 T5.X, T5.X, 0, #1
1358; EG-NEXT:    ALU clause starting at 8:
1359; EG-NEXT:     MOV * T5.X, KC0[2].Z,
1360; EG-NEXT:    ALU clause starting at 9:
1361; EG-NEXT:     BFE_UINT * T6.W, T5.X, literal.x, 1,
1362; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
1363; EG-NEXT:     BFE_UINT * T6.Z, T5.X, literal.x, 1,
1364; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1365; EG-NEXT:     BFE_UINT T6.Y, T5.X, 1, 1,
1366; EG-NEXT:     BFE_UINT * T5.W, T5.X, literal.x, 1,
1367; EG-NEXT:    7(9.809089e-45), 0(0.000000e+00)
1368; EG-NEXT:     AND_INT T6.X, T5.X, 1,
1369; EG-NEXT:     BFE_UINT T5.Z, T5.X, literal.x, 1,
1370; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.y,
1371; EG-NEXT:    6(8.407791e-45), 2(2.802597e-45)
1372; EG-NEXT:     BFE_UINT * T5.Y, T5.X, literal.x, 1,
1373; EG-NEXT:    5(7.006492e-45), 0(0.000000e+00)
1374; EG-NEXT:     BFE_UINT T5.X, T5.X, literal.x, 1,
1375; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1376; EG-NEXT:    4(5.605194e-45), 16(2.242078e-44)
1377; EG-NEXT:     LSHR * T8.X, PV.W, literal.x,
1378; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1379;
1380; GFX12-LABEL: constant_zextload_v8i1_to_v8i32:
1381; GFX12:       ; %bb.0:
1382; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1383; GFX12-NEXT:    v_mov_b32_e32 v8, 0
1384; GFX12-NEXT:    s_wait_kmcnt 0x0
1385; GFX12-NEXT:    global_load_u8 v0, v8, s[2:3]
1386; GFX12-NEXT:    s_wait_loadcnt 0x0
1387; GFX12-NEXT:    v_readfirstlane_b32 s2, v0
1388; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1389; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10001
1390; GFX12-NEXT:    v_dual_mov_b32 v5, s4 :: v_dual_and_b32 v0, 0xffff, v0
1391; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10003
1392; GFX12-NEXT:    s_bfe_u32 s5, s2, 0x10005
1393; GFX12-NEXT:    s_and_b32 s6, s2, 1
1394; GFX12-NEXT:    s_bfe_u32 s7, s2, 0x10002
1395; GFX12-NEXT:    s_bfe_u32 s2, s2, 0x10004
1396; GFX12-NEXT:    v_lshrrev_b32_e32 v3, 7, v0
1397; GFX12-NEXT:    v_bfe_u32 v2, v0, 6, 1
1398; GFX12-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v7, s3
1399; GFX12-NEXT:    s_wait_alu 0xfffe
1400; GFX12-NEXT:    v_dual_mov_b32 v1, s5 :: v_dual_mov_b32 v4, s6
1401; GFX12-NEXT:    v_mov_b32_e32 v6, s7
1402; GFX12-NEXT:    s_clause 0x1
1403; GFX12-NEXT:    global_store_b128 v8, v[0:3], s[0:1] offset:16
1404; GFX12-NEXT:    global_store_b128 v8, v[4:7], s[0:1]
1405; GFX12-NEXT:    s_endpgm
1406  %load = load <8 x i1>, ptr addrspace(4) %in
1407  %ext = zext <8 x i1> %load to <8 x i32>
1408  store <8 x i32> %ext, ptr addrspace(1) %out
1409  ret void
1410}
1411
1412define amdgpu_kernel void @constant_sextload_v8i1_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
1413; GFX6-LABEL: constant_sextload_v8i1_to_v8i32:
1414; GFX6:       ; %bb.0:
1415; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1416; GFX6-NEXT:    s_mov_b32 s7, 0xf000
1417; GFX6-NEXT:    s_mov_b32 s6, -1
1418; GFX6-NEXT:    s_mov_b32 s10, s6
1419; GFX6-NEXT:    s_mov_b32 s11, s7
1420; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1421; GFX6-NEXT:    s_mov_b32 s8, s2
1422; GFX6-NEXT:    s_mov_b32 s9, s3
1423; GFX6-NEXT:    buffer_load_ubyte v4, off, s[8:11], 0
1424; GFX6-NEXT:    s_mov_b32 s4, s0
1425; GFX6-NEXT:    s_mov_b32 s5, s1
1426; GFX6-NEXT:    s_waitcnt vmcnt(0)
1427; GFX6-NEXT:    v_bfe_i32 v3, v4, 3, 1
1428; GFX6-NEXT:    v_bfe_i32 v2, v4, 2, 1
1429; GFX6-NEXT:    v_bfe_i32 v1, v4, 1, 1
1430; GFX6-NEXT:    v_bfe_i32 v0, v4, 0, 1
1431; GFX6-NEXT:    v_bfe_i32 v7, v4, 7, 1
1432; GFX6-NEXT:    v_bfe_i32 v6, v4, 6, 1
1433; GFX6-NEXT:    v_bfe_i32 v5, v4, 5, 1
1434; GFX6-NEXT:    v_bfe_i32 v4, v4, 4, 1
1435; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16
1436; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1437; GFX6-NEXT:    s_endpgm
1438;
1439; GFX8-LABEL: constant_sextload_v8i1_to_v8i32:
1440; GFX8:       ; %bb.0:
1441; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1442; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1443; GFX8-NEXT:    v_mov_b32_e32 v0, s2
1444; GFX8-NEXT:    v_mov_b32_e32 v1, s3
1445; GFX8-NEXT:    flat_load_ubyte v4, v[0:1]
1446; GFX8-NEXT:    s_add_u32 s2, s0, 16
1447; GFX8-NEXT:    s_addc_u32 s3, s1, 0
1448; GFX8-NEXT:    v_mov_b32_e32 v11, s3
1449; GFX8-NEXT:    v_mov_b32_e32 v9, s1
1450; GFX8-NEXT:    v_mov_b32_e32 v10, s2
1451; GFX8-NEXT:    v_mov_b32_e32 v8, s0
1452; GFX8-NEXT:    s_waitcnt vmcnt(0)
1453; GFX8-NEXT:    v_bfe_i32 v3, v4, 3, 1
1454; GFX8-NEXT:    v_bfe_i32 v2, v4, 2, 1
1455; GFX8-NEXT:    v_bfe_i32 v1, v4, 1, 1
1456; GFX8-NEXT:    v_bfe_i32 v0, v4, 0, 1
1457; GFX8-NEXT:    v_bfe_i32 v7, v4, 7, 1
1458; GFX8-NEXT:    v_bfe_i32 v6, v4, 6, 1
1459; GFX8-NEXT:    v_bfe_i32 v5, v4, 5, 1
1460; GFX8-NEXT:    v_bfe_i32 v4, v4, 4, 1
1461; GFX8-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
1462; GFX8-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
1463; GFX8-NEXT:    s_endpgm
1464;
1465; EG-LABEL: constant_sextload_v8i1_to_v8i32:
1466; EG:       ; %bb.0:
1467; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1468; EG-NEXT:    TEX 0 @6
1469; EG-NEXT:    ALU 23, @9, KC0[CB0:0-32], KC1[]
1470; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 0
1471; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 1
1472; EG-NEXT:    CF_END
1473; EG-NEXT:    Fetch clause starting at 6:
1474; EG-NEXT:     VTX_READ_8 T5.X, T5.X, 0, #1
1475; EG-NEXT:    ALU clause starting at 8:
1476; EG-NEXT:     MOV * T5.X, KC0[2].Z,
1477; EG-NEXT:    ALU clause starting at 9:
1478; EG-NEXT:     LSHR * T0.W, T5.X, literal.x,
1479; EG-NEXT:    7(9.809089e-45), 0(0.000000e+00)
1480; EG-NEXT:     BFE_INT T6.W, PV.W, 0.0, 1,
1481; EG-NEXT:     LSHR * T0.W, T5.X, literal.x,
1482; EG-NEXT:    6(8.407791e-45), 0(0.000000e+00)
1483; EG-NEXT:     BFE_INT T7.X, T5.X, 0.0, 1,
1484; EG-NEXT:     BFE_INT T6.Z, PS, 0.0, 1,
1485; EG-NEXT:     LSHR T0.W, T5.X, literal.x,
1486; EG-NEXT:     LSHR * T1.W, T5.X, literal.y,
1487; EG-NEXT:    3(4.203895e-45), 5(7.006492e-45)
1488; EG-NEXT:     LSHR T8.X, KC0[2].Y, literal.x,
1489; EG-NEXT:     BFE_INT T6.Y, PS, 0.0, 1,
1490; EG-NEXT:     LSHR T0.Z, T5.X, literal.x,
1491; EG-NEXT:     BFE_INT T7.W, PV.W, 0.0, 1,
1492; EG-NEXT:     LSHR * T0.W, T5.X, literal.y,
1493; EG-NEXT:    2(2.802597e-45), 4(5.605194e-45)
1494; EG-NEXT:     BFE_INT T6.X, PS, 0.0, 1,
1495; EG-NEXT:     BFE_INT T7.Z, PV.Z, 0.0, 1,
1496; EG-NEXT:     LSHR T0.W, T5.X, 1,
1497; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.x,
1498; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1499; EG-NEXT:     LSHR T5.X, PS, literal.x,
1500; EG-NEXT:     BFE_INT * T7.Y, PV.W, 0.0, 1,
1501; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1502;
1503; GFX12-LABEL: constant_sextload_v8i1_to_v8i32:
1504; GFX12:       ; %bb.0:
1505; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1506; GFX12-NEXT:    s_wait_kmcnt 0x0
1507; GFX12-NEXT:    s_load_u8 s2, s[2:3], 0x0
1508; GFX12-NEXT:    s_wait_kmcnt 0x0
1509; GFX12-NEXT:    s_bfe_i32 s3, s2, 0x10003
1510; GFX12-NEXT:    s_bfe_i32 s4, s2, 0x10002
1511; GFX12-NEXT:    s_bfe_i32 s5, s2, 0x10001
1512; GFX12-NEXT:    s_bfe_i32 s6, s2, 0x10000
1513; GFX12-NEXT:    s_bfe_i32 s7, s2, 0x10007
1514; GFX12-NEXT:    s_bfe_i32 s8, s2, 0x10006
1515; GFX12-NEXT:    s_bfe_i32 s9, s2, 0x10004
1516; GFX12-NEXT:    s_bfe_i32 s2, s2, 0x10005
1517; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1518; GFX12-NEXT:    v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s2
1519; GFX12-NEXT:    v_dual_mov_b32 v0, s9 :: v_dual_mov_b32 v3, s7
1520; GFX12-NEXT:    v_dual_mov_b32 v2, s8 :: v_dual_mov_b32 v5, s5
1521; GFX12-NEXT:    v_dual_mov_b32 v4, s6 :: v_dual_mov_b32 v7, s3
1522; GFX12-NEXT:    v_mov_b32_e32 v6, s4
1523; GFX12-NEXT:    s_clause 0x1
1524; GFX12-NEXT:    global_store_b128 v8, v[0:3], s[0:1] offset:16
1525; GFX12-NEXT:    global_store_b128 v8, v[4:7], s[0:1]
1526; GFX12-NEXT:    s_endpgm
1527  %load = load <8 x i1>, ptr addrspace(4) %in
1528  %ext = sext <8 x i1> %load to <8 x i32>
1529  store <8 x i32> %ext, ptr addrspace(1) %out
1530  ret void
1531}
1532
1533define amdgpu_kernel void @constant_zextload_v16i1_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
1534; GFX6-LABEL: constant_zextload_v16i1_to_v16i32:
1535; GFX6:       ; %bb.0:
1536; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
1537; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1538; GFX6-NEXT:    s_mov_b32 s2, -1
1539; GFX6-NEXT:    s_mov_b32 s10, s2
1540; GFX6-NEXT:    s_mov_b32 s11, s3
1541; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1542; GFX6-NEXT:    s_mov_b32 s8, s6
1543; GFX6-NEXT:    s_mov_b32 s9, s7
1544; GFX6-NEXT:    buffer_load_ushort v12, off, s[8:11], 0
1545; GFX6-NEXT:    s_mov_b32 s0, s4
1546; GFX6-NEXT:    s_mov_b32 s1, s5
1547; GFX6-NEXT:    s_waitcnt vmcnt(0)
1548; GFX6-NEXT:    v_bfe_u32 v3, v12, 3, 1
1549; GFX6-NEXT:    v_bfe_u32 v1, v12, 1, 1
1550; GFX6-NEXT:    v_bfe_u32 v7, v12, 7, 1
1551; GFX6-NEXT:    v_bfe_u32 v5, v12, 5, 1
1552; GFX6-NEXT:    v_bfe_u32 v11, v12, 11, 1
1553; GFX6-NEXT:    v_bfe_u32 v9, v12, 9, 1
1554; GFX6-NEXT:    v_lshrrev_b32_e32 v15, 15, v12
1555; GFX6-NEXT:    v_bfe_u32 v13, v12, 13, 1
1556; GFX6-NEXT:    v_and_b32_e32 v0, 1, v12
1557; GFX6-NEXT:    v_bfe_u32 v2, v12, 2, 1
1558; GFX6-NEXT:    v_bfe_u32 v6, v12, 6, 1
1559; GFX6-NEXT:    v_bfe_u32 v4, v12, 4, 1
1560; GFX6-NEXT:    v_bfe_u32 v10, v12, 10, 1
1561; GFX6-NEXT:    v_bfe_u32 v8, v12, 8, 1
1562; GFX6-NEXT:    v_bfe_u32 v14, v12, 14, 1
1563; GFX6-NEXT:    v_bfe_u32 v12, v12, 12, 1
1564; GFX6-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48
1565; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
1566; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
1567; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1568; GFX6-NEXT:    s_endpgm
1569;
1570; GFX8-LABEL: constant_zextload_v16i1_to_v16i32:
1571; GFX8:       ; %bb.0:
1572; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1573; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1574; GFX8-NEXT:    v_mov_b32_e32 v0, s2
1575; GFX8-NEXT:    v_mov_b32_e32 v1, s3
1576; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
1577; GFX8-NEXT:    v_mov_b32_e32 v17, s1
1578; GFX8-NEXT:    v_mov_b32_e32 v16, s0
1579; GFX8-NEXT:    s_waitcnt vmcnt(0)
1580; GFX8-NEXT:    v_readfirstlane_b32 s2, v0
1581; GFX8-NEXT:    s_and_b32 s6, 0xffff, s2
1582; GFX8-NEXT:    s_bfe_u32 s3, s2, 0x10003
1583; GFX8-NEXT:    s_bfe_u32 s4, s2, 0x10001
1584; GFX8-NEXT:    s_bfe_u32 s5, s2, 0x10007
1585; GFX8-NEXT:    s_bfe_u32 s7, s2, 0x10009
1586; GFX8-NEXT:    s_bfe_u32 s8, s2, 0x1000d
1587; GFX8-NEXT:    s_and_b32 s9, s2, 1
1588; GFX8-NEXT:    s_bfe_u32 s10, s2, 0x1000a
1589; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x1000c
1590; GFX8-NEXT:    s_bfe_u32 s11, s6, 0x10005
1591; GFX8-NEXT:    s_bfe_u32 s12, s6, 0x1000b
1592; GFX8-NEXT:    s_lshr_b32 s13, s6, 15
1593; GFX8-NEXT:    s_bfe_u32 s14, s6, 0x10002
1594; GFX8-NEXT:    s_bfe_u32 s15, s6, 0x10006
1595; GFX8-NEXT:    s_bfe_u32 s16, s6, 0x10004
1596; GFX8-NEXT:    s_bfe_u32 s17, s6, 0x10008
1597; GFX8-NEXT:    s_bfe_u32 s6, s6, 0x1000e
1598; GFX8-NEXT:    v_mov_b32_e32 v0, s2
1599; GFX8-NEXT:    s_add_u32 s2, s0, 48
1600; GFX8-NEXT:    v_mov_b32_e32 v15, s3
1601; GFX8-NEXT:    s_addc_u32 s3, s1, 0
1602; GFX8-NEXT:    v_mov_b32_e32 v19, s3
1603; GFX8-NEXT:    v_mov_b32_e32 v1, s8
1604; GFX8-NEXT:    v_mov_b32_e32 v2, s6
1605; GFX8-NEXT:    v_mov_b32_e32 v3, s13
1606; GFX8-NEXT:    v_mov_b32_e32 v18, s2
1607; GFX8-NEXT:    s_add_u32 s2, s0, 32
1608; GFX8-NEXT:    flat_store_dwordx4 v[18:19], v[0:3]
1609; GFX8-NEXT:    s_addc_u32 s3, s1, 0
1610; GFX8-NEXT:    v_mov_b32_e32 v0, s2
1611; GFX8-NEXT:    v_mov_b32_e32 v5, s7
1612; GFX8-NEXT:    v_mov_b32_e32 v6, s10
1613; GFX8-NEXT:    v_mov_b32_e32 v4, s17
1614; GFX8-NEXT:    v_mov_b32_e32 v7, s12
1615; GFX8-NEXT:    v_mov_b32_e32 v1, s3
1616; GFX8-NEXT:    s_add_u32 s0, s0, 16
1617; GFX8-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
1618; GFX8-NEXT:    s_addc_u32 s1, s1, 0
1619; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1620; GFX8-NEXT:    v_mov_b32_e32 v11, s5
1621; GFX8-NEXT:    v_mov_b32_e32 v8, s16
1622; GFX8-NEXT:    v_mov_b32_e32 v9, s11
1623; GFX8-NEXT:    v_mov_b32_e32 v10, s15
1624; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1625; GFX8-NEXT:    v_mov_b32_e32 v12, s9
1626; GFX8-NEXT:    v_mov_b32_e32 v13, s4
1627; GFX8-NEXT:    v_mov_b32_e32 v14, s14
1628; GFX8-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
1629; GFX8-NEXT:    flat_store_dwordx4 v[16:17], v[12:15]
1630; GFX8-NEXT:    s_endpgm
1631;
1632; EG-LABEL: constant_zextload_v16i1_to_v16i32:
1633; EG:       ; %bb.0:
1634; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
1635; EG-NEXT:    TEX 0 @8
1636; EG-NEXT:    ALU 36, @11, KC0[CB0:0-32], KC1[]
1637; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T14.X, 0
1638; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T13.X, 0
1639; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T12.X, 0
1640; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T10.X, 1
1641; EG-NEXT:    CF_END
1642; EG-NEXT:    Fetch clause starting at 8:
1643; EG-NEXT:     VTX_READ_16 T7.X, T7.X, 0, #1
1644; EG-NEXT:    ALU clause starting at 10:
1645; EG-NEXT:     MOV * T7.X, KC0[2].Z,
1646; EG-NEXT:    ALU clause starting at 11:
1647; EG-NEXT:     BFE_UINT * T8.W, T7.X, literal.x, 1,
1648; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
1649; EG-NEXT:     BFE_UINT * T8.Z, T7.X, literal.x, 1,
1650; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1651; EG-NEXT:     BFE_UINT T8.Y, T7.X, 1, 1,
1652; EG-NEXT:     BFE_UINT * T9.W, T7.X, literal.x, 1,
1653; EG-NEXT:    7(9.809089e-45), 0(0.000000e+00)
1654; EG-NEXT:     AND_INT T8.X, T7.X, 1,
1655; EG-NEXT:     BFE_UINT T9.Z, T7.X, literal.x, 1,
1656; EG-NEXT:     LSHR * T10.X, KC0[2].Y, literal.y,
1657; EG-NEXT:    6(8.407791e-45), 2(2.802597e-45)
1658; EG-NEXT:     BFE_UINT T9.Y, T7.X, literal.x, 1,
1659; EG-NEXT:     BFE_UINT * T11.W, T7.X, literal.y, 1,
1660; EG-NEXT:    5(7.006492e-45), 11(1.541428e-44)
1661; EG-NEXT:     BFE_UINT T9.X, T7.X, literal.x, 1,
1662; EG-NEXT:     BFE_UINT T11.Z, T7.X, literal.y, 1,
1663; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
1664; EG-NEXT:    4(5.605194e-45), 10(1.401298e-44)
1665; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1666; EG-NEXT:     LSHR T12.X, PV.W, literal.x,
1667; EG-NEXT:     BFE_UINT T11.Y, T7.X, literal.y, 1,
1668; EG-NEXT:     LSHR * T7.W, T7.X, literal.z,
1669; EG-NEXT:    2(2.802597e-45), 9(1.261169e-44)
1670; EG-NEXT:    15(2.101948e-44), 0(0.000000e+00)
1671; EG-NEXT:     BFE_UINT T11.X, T7.X, literal.x, 1,
1672; EG-NEXT:     BFE_UINT T7.Z, T7.X, literal.y, 1,
1673; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
1674; EG-NEXT:    8(1.121039e-44), 14(1.961818e-44)
1675; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
1676; EG-NEXT:     LSHR T13.X, PV.W, literal.x,
1677; EG-NEXT:     BFE_UINT * T7.Y, T7.X, literal.y, 1,
1678; EG-NEXT:    2(2.802597e-45), 13(1.821688e-44)
1679; EG-NEXT:     BFE_UINT T7.X, T7.X, literal.x, 1,
1680; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1681; EG-NEXT:    12(1.681558e-44), 48(6.726233e-44)
1682; EG-NEXT:     LSHR * T14.X, PV.W, literal.x,
1683; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1684;
1685; GFX12-LABEL: constant_zextload_v16i1_to_v16i32:
1686; GFX12:       ; %bb.0:
1687; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1688; GFX12-NEXT:    v_mov_b32_e32 v16, 0
1689; GFX12-NEXT:    s_wait_kmcnt 0x0
1690; GFX12-NEXT:    global_load_u16 v0, v16, s[2:3]
1691; GFX12-NEXT:    s_wait_loadcnt 0x0
1692; GFX12-NEXT:    v_readfirstlane_b32 s2, v0
1693; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1694; GFX12-NEXT:    s_and_b32 s6, 0xffff, s2
1695; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10003
1696; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10001
1697; GFX12-NEXT:    s_bfe_u32 s5, s2, 0x10007
1698; GFX12-NEXT:    s_bfe_u32 s7, s2, 0x10009
1699; GFX12-NEXT:    s_bfe_u32 s8, s2, 0x1000d
1700; GFX12-NEXT:    s_and_b32 s9, s2, 1
1701; GFX12-NEXT:    v_mov_b32_e32 v1, s8
1702; GFX12-NEXT:    s_bfe_u32 s10, s2, 0x1000a
1703; GFX12-NEXT:    s_bfe_u32 s2, s2, 0x1000c
1704; GFX12-NEXT:    s_bfe_u32 s11, s6, 0x10005
1705; GFX12-NEXT:    s_bfe_u32 s12, s6, 0x1000b
1706; GFX12-NEXT:    s_lshr_b32 s13, s6, 15
1707; GFX12-NEXT:    s_bfe_u32 s14, s6, 0x10002
1708; GFX12-NEXT:    s_bfe_u32 s15, s6, 0x10006
1709; GFX12-NEXT:    s_bfe_u32 s16, s6, 0x10004
1710; GFX12-NEXT:    s_bfe_u32 s17, s6, 0x10008
1711; GFX12-NEXT:    s_bfe_u32 s6, s6, 0x1000e
1712; GFX12-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v5, s7
1713; GFX12-NEXT:    v_dual_mov_b32 v15, s3 :: v_dual_mov_b32 v2, s6
1714; GFX12-NEXT:    v_dual_mov_b32 v3, s13 :: v_dual_mov_b32 v4, s17
1715; GFX12-NEXT:    v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v11, s5
1716; GFX12-NEXT:    v_dual_mov_b32 v7, s12 :: v_dual_mov_b32 v8, s16
1717; GFX12-NEXT:    v_dual_mov_b32 v9, s11 :: v_dual_mov_b32 v10, s15
1718; GFX12-NEXT:    v_dual_mov_b32 v12, s9 :: v_dual_mov_b32 v13, s4
1719; GFX12-NEXT:    v_mov_b32_e32 v14, s14
1720; GFX12-NEXT:    s_clause 0x3
1721; GFX12-NEXT:    global_store_b128 v16, v[0:3], s[0:1] offset:48
1722; GFX12-NEXT:    global_store_b128 v16, v[4:7], s[0:1] offset:32
1723; GFX12-NEXT:    global_store_b128 v16, v[8:11], s[0:1] offset:16
1724; GFX12-NEXT:    global_store_b128 v16, v[12:15], s[0:1]
1725; GFX12-NEXT:    s_endpgm
1726  %load = load <16 x i1>, ptr addrspace(4) %in
1727  %ext = zext <16 x i1> %load to <16 x i32>
1728  store <16 x i32> %ext, ptr addrspace(1) %out
1729  ret void
1730}
1731
1732define amdgpu_kernel void @constant_sextload_v16i1_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
1733; GFX6-LABEL: constant_sextload_v16i1_to_v16i32:
1734; GFX6:       ; %bb.0:
1735; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
1736; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1737; GFX6-NEXT:    s_mov_b32 s2, -1
1738; GFX6-NEXT:    s_mov_b32 s10, s2
1739; GFX6-NEXT:    s_mov_b32 s11, s3
1740; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1741; GFX6-NEXT:    s_mov_b32 s8, s6
1742; GFX6-NEXT:    s_mov_b32 s9, s7
1743; GFX6-NEXT:    buffer_load_ushort v12, off, s[8:11], 0
1744; GFX6-NEXT:    s_mov_b32 s0, s4
1745; GFX6-NEXT:    s_mov_b32 s1, s5
1746; GFX6-NEXT:    s_waitcnt vmcnt(0)
1747; GFX6-NEXT:    v_bfe_i32 v3, v12, 3, 1
1748; GFX6-NEXT:    v_bfe_i32 v2, v12, 2, 1
1749; GFX6-NEXT:    v_bfe_i32 v1, v12, 1, 1
1750; GFX6-NEXT:    v_bfe_i32 v0, v12, 0, 1
1751; GFX6-NEXT:    v_bfe_i32 v7, v12, 7, 1
1752; GFX6-NEXT:    v_bfe_i32 v6, v12, 6, 1
1753; GFX6-NEXT:    v_bfe_i32 v5, v12, 5, 1
1754; GFX6-NEXT:    v_bfe_i32 v4, v12, 4, 1
1755; GFX6-NEXT:    v_bfe_i32 v11, v12, 11, 1
1756; GFX6-NEXT:    v_bfe_i32 v10, v12, 10, 1
1757; GFX6-NEXT:    v_bfe_i32 v9, v12, 9, 1
1758; GFX6-NEXT:    v_bfe_i32 v8, v12, 8, 1
1759; GFX6-NEXT:    v_bfe_i32 v15, v12, 15, 1
1760; GFX6-NEXT:    v_bfe_i32 v14, v12, 14, 1
1761; GFX6-NEXT:    v_bfe_i32 v13, v12, 13, 1
1762; GFX6-NEXT:    v_bfe_i32 v12, v12, 12, 1
1763; GFX6-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48
1764; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
1765; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
1766; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1767; GFX6-NEXT:    s_endpgm
1768;
1769; GFX8-LABEL: constant_sextload_v16i1_to_v16i32:
1770; GFX8:       ; %bb.0:
1771; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1772; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1773; GFX8-NEXT:    v_mov_b32_e32 v0, s2
1774; GFX8-NEXT:    v_mov_b32_e32 v1, s3
1775; GFX8-NEXT:    flat_load_ushort v18, v[0:1]
1776; GFX8-NEXT:    s_add_u32 s2, s0, 48
1777; GFX8-NEXT:    s_addc_u32 s3, s1, 0
1778; GFX8-NEXT:    v_mov_b32_e32 v9, s3
1779; GFX8-NEXT:    v_mov_b32_e32 v8, s2
1780; GFX8-NEXT:    s_add_u32 s2, s0, 32
1781; GFX8-NEXT:    v_mov_b32_e32 v13, s1
1782; GFX8-NEXT:    s_addc_u32 s3, s1, 0
1783; GFX8-NEXT:    v_mov_b32_e32 v12, s0
1784; GFX8-NEXT:    s_add_u32 s0, s0, 16
1785; GFX8-NEXT:    v_mov_b32_e32 v15, s3
1786; GFX8-NEXT:    s_addc_u32 s1, s1, 0
1787; GFX8-NEXT:    v_mov_b32_e32 v14, s2
1788; GFX8-NEXT:    v_mov_b32_e32 v17, s1
1789; GFX8-NEXT:    v_mov_b32_e32 v16, s0
1790; GFX8-NEXT:    s_waitcnt vmcnt(0)
1791; GFX8-NEXT:    v_bfe_i32 v7, v18, 15, 1
1792; GFX8-NEXT:    v_bfe_i32 v6, v18, 14, 1
1793; GFX8-NEXT:    v_bfe_i32 v5, v18, 13, 1
1794; GFX8-NEXT:    v_bfe_i32 v4, v18, 12, 1
1795; GFX8-NEXT:    flat_store_dwordx4 v[8:9], v[4:7]
1796; GFX8-NEXT:    v_bfe_i32 v11, v18, 11, 1
1797; GFX8-NEXT:    v_bfe_i32 v10, v18, 10, 1
1798; GFX8-NEXT:    v_bfe_i32 v9, v18, 9, 1
1799; GFX8-NEXT:    v_bfe_i32 v8, v18, 8, 1
1800; GFX8-NEXT:    v_bfe_i32 v3, v18, 3, 1
1801; GFX8-NEXT:    v_bfe_i32 v2, v18, 2, 1
1802; GFX8-NEXT:    v_bfe_i32 v1, v18, 1, 1
1803; GFX8-NEXT:    v_bfe_i32 v0, v18, 0, 1
1804; GFX8-NEXT:    v_bfe_i32 v7, v18, 7, 1
1805; GFX8-NEXT:    v_bfe_i32 v6, v18, 6, 1
1806; GFX8-NEXT:    v_bfe_i32 v5, v18, 5, 1
1807; GFX8-NEXT:    v_bfe_i32 v4, v18, 4, 1
1808; GFX8-NEXT:    flat_store_dwordx4 v[14:15], v[8:11]
1809; GFX8-NEXT:    flat_store_dwordx4 v[16:17], v[4:7]
1810; GFX8-NEXT:    flat_store_dwordx4 v[12:13], v[0:3]
1811; GFX8-NEXT:    s_endpgm
1812;
1813; EG-LABEL: constant_sextload_v16i1_to_v16i32:
1814; EG:       ; %bb.0:
1815; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
1816; EG-NEXT:    TEX 0 @8
1817; EG-NEXT:    ALU 51, @11, KC0[CB0:0-32], KC1[]
1818; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T7.X, 0
1819; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T14.X, 0
1820; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T11.X, 0
1821; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T9.X, 1
1822; EG-NEXT:    CF_END
1823; EG-NEXT:    Fetch clause starting at 8:
1824; EG-NEXT:     VTX_READ_16 T7.X, T7.X, 0, #1
1825; EG-NEXT:    ALU clause starting at 10:
1826; EG-NEXT:     MOV * T7.X, KC0[2].Z,
1827; EG-NEXT:    ALU clause starting at 11:
1828; EG-NEXT:     LSHR * T0.W, T7.X, literal.x,
1829; EG-NEXT:    7(9.809089e-45), 0(0.000000e+00)
1830; EG-NEXT:     BFE_INT T8.W, PV.W, 0.0, 1,
1831; EG-NEXT:     LSHR * T0.W, T7.X, literal.x,
1832; EG-NEXT:    6(8.407791e-45), 0(0.000000e+00)
1833; EG-NEXT:     BFE_INT T8.Z, PS, 0.0, 1,
1834; EG-NEXT:     LSHR T0.W, T7.X, literal.x,
1835; EG-NEXT:     LSHR * T1.W, T7.X, literal.y,
1836; EG-NEXT:    11(1.541428e-44), 5(7.006492e-45)
1837; EG-NEXT:     LSHR T9.X, KC0[2].Y, literal.x,
1838; EG-NEXT:     BFE_INT T8.Y, PS, 0.0, 1,
1839; EG-NEXT:     LSHR T0.Z, T7.X, literal.y,
1840; EG-NEXT:     BFE_INT T10.W, PV.W, 0.0, 1,
1841; EG-NEXT:     LSHR * T0.W, T7.X, literal.z,
1842; EG-NEXT:    2(2.802597e-45), 10(1.401298e-44)
1843; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
1844; EG-NEXT:     BFE_INT T8.X, PS, 0.0, 1,
1845; EG-NEXT:     BFE_INT T10.Z, PV.Z, 0.0, 1,
1846; EG-NEXT:     LSHR T0.W, T7.X, literal.x,
1847; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
1848; EG-NEXT:    9(1.261169e-44), 16(2.242078e-44)
1849; EG-NEXT:     LSHR T11.X, PS, literal.x,
1850; EG-NEXT:     BFE_INT T10.Y, PV.W, 0.0, 1,
1851; EG-NEXT:     LSHR T0.W, T7.X, literal.y,
1852; EG-NEXT:     LSHR * T1.W, T7.X, literal.z,
1853; EG-NEXT:    2(2.802597e-45), 15(2.101948e-44)
1854; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
1855; EG-NEXT:     BFE_INT T10.X, PS, 0.0, 1,
1856; EG-NEXT:     BFE_INT T12.W, PV.W, 0.0, 1,
1857; EG-NEXT:     LSHR * T0.W, T7.X, literal.x,
1858; EG-NEXT:    14(1.961818e-44), 0(0.000000e+00)
1859; EG-NEXT:     BFE_INT T13.X, T7.X, 0.0, 1,
1860; EG-NEXT:     LSHR T0.Y, T7.X, literal.x,
1861; EG-NEXT:     BFE_INT T12.Z, PS, 0.0, 1,
1862; EG-NEXT:     LSHR T0.W, T7.X, literal.y,
1863; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
1864; EG-NEXT:    3(4.203895e-45), 13(1.821688e-44)
1865; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
1866; EG-NEXT:     LSHR T14.X, PS, literal.x,
1867; EG-NEXT:     BFE_INT T12.Y, PV.W, 0.0, 1,
1868; EG-NEXT:     LSHR T0.Z, T7.X, literal.x,
1869; EG-NEXT:     BFE_INT T13.W, PV.Y, 0.0, 1,
1870; EG-NEXT:     LSHR * T0.W, T7.X, literal.y,
1871; EG-NEXT:    2(2.802597e-45), 12(1.681558e-44)
1872; EG-NEXT:     BFE_INT T12.X, PS, 0.0, 1,
1873; EG-NEXT:     BFE_INT T13.Z, PV.Z, 0.0, 1,
1874; EG-NEXT:     LSHR T0.W, T7.X, 1,
1875; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.x,
1876; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
1877; EG-NEXT:     LSHR T7.X, PS, literal.x,
1878; EG-NEXT:     BFE_INT * T13.Y, PV.W, 0.0, 1,
1879; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1880;
1881; GFX12-LABEL: constant_sextload_v16i1_to_v16i32:
1882; GFX12:       ; %bb.0:
1883; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1884; GFX12-NEXT:    s_wait_kmcnt 0x0
1885; GFX12-NEXT:    s_load_u16 s2, s[2:3], 0x0
1886; GFX12-NEXT:    s_wait_kmcnt 0x0
1887; GFX12-NEXT:    s_bfe_i32 s3, s2, 0x10003
1888; GFX12-NEXT:    s_bfe_i32 s4, s2, 0x10002
1889; GFX12-NEXT:    s_bfe_i32 s5, s2, 0x10001
1890; GFX12-NEXT:    s_bfe_i32 s6, s2, 0x10000
1891; GFX12-NEXT:    s_bfe_i32 s7, s2, 0x10007
1892; GFX12-NEXT:    s_bfe_i32 s8, s2, 0x10006
1893; GFX12-NEXT:    s_bfe_i32 s9, s2, 0x10005
1894; GFX12-NEXT:    s_bfe_i32 s10, s2, 0x10004
1895; GFX12-NEXT:    s_bfe_i32 s11, s2, 0x1000b
1896; GFX12-NEXT:    s_bfe_i32 s12, s2, 0x1000a
1897; GFX12-NEXT:    s_bfe_i32 s13, s2, 0x10009
1898; GFX12-NEXT:    s_bfe_i32 s14, s2, 0x10008
1899; GFX12-NEXT:    s_bfe_i32 s15, s2, 0x1000f
1900; GFX12-NEXT:    s_bfe_i32 s16, s2, 0x1000e
1901; GFX12-NEXT:    s_bfe_i32 s17, s2, 0x1000c
1902; GFX12-NEXT:    s_bfe_i32 s2, s2, 0x1000d
1903; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1904; GFX12-NEXT:    v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v1, s2
1905; GFX12-NEXT:    v_dual_mov_b32 v0, s17 :: v_dual_mov_b32 v3, s15
1906; GFX12-NEXT:    v_dual_mov_b32 v2, s16 :: v_dual_mov_b32 v5, s13
1907; GFX12-NEXT:    v_dual_mov_b32 v4, s14 :: v_dual_mov_b32 v7, s11
1908; GFX12-NEXT:    v_dual_mov_b32 v6, s12 :: v_dual_mov_b32 v9, s9
1909; GFX12-NEXT:    v_dual_mov_b32 v8, s10 :: v_dual_mov_b32 v11, s7
1910; GFX12-NEXT:    v_dual_mov_b32 v10, s8 :: v_dual_mov_b32 v13, s5
1911; GFX12-NEXT:    v_dual_mov_b32 v12, s6 :: v_dual_mov_b32 v15, s3
1912; GFX12-NEXT:    v_mov_b32_e32 v14, s4
1913; GFX12-NEXT:    s_clause 0x3
1914; GFX12-NEXT:    global_store_b128 v16, v[0:3], s[0:1] offset:48
1915; GFX12-NEXT:    global_store_b128 v16, v[4:7], s[0:1] offset:32
1916; GFX12-NEXT:    global_store_b128 v16, v[8:11], s[0:1] offset:16
1917; GFX12-NEXT:    global_store_b128 v16, v[12:15], s[0:1]
1918; GFX12-NEXT:    s_endpgm
1919  %load = load <16 x i1>, ptr addrspace(4) %in
1920  %ext = sext <16 x i1> %load to <16 x i32>
1921  store <16 x i32> %ext, ptr addrspace(1) %out
1922  ret void
1923}
1924
1925define amdgpu_kernel void @constant_zextload_v32i1_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
1926; GFX6-LABEL: constant_zextload_v32i1_to_v32i32:
1927; GFX6:       ; %bb.0:
1928; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1929; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1930; GFX6-NEXT:    s_load_dword s4, s[2:3], 0x0
1931; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1932; GFX6-NEXT:    s_mov_b32 s2, -1
1933; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1934; GFX6-NEXT:    s_bfe_u32 s5, s4, 0x10003
1935; GFX6-NEXT:    s_bfe_u32 s6, s4, 0x10001
1936; GFX6-NEXT:    s_bfe_u32 s7, s4, 0x10007
1937; GFX6-NEXT:    s_bfe_u32 s8, s4, 0x10005
1938; GFX6-NEXT:    s_bfe_u32 s9, s4, 0x1000b
1939; GFX6-NEXT:    s_bfe_u32 s10, s4, 0x10009
1940; GFX6-NEXT:    s_bfe_u32 s11, s4, 0x1000f
1941; GFX6-NEXT:    s_bfe_u32 s12, s4, 0x1000d
1942; GFX6-NEXT:    s_bfe_u32 s13, s4, 0x10013
1943; GFX6-NEXT:    s_bfe_u32 s14, s4, 0x10011
1944; GFX6-NEXT:    s_bfe_u32 s15, s4, 0x10017
1945; GFX6-NEXT:    s_bfe_u32 s16, s4, 0x10015
1946; GFX6-NEXT:    s_bfe_u32 s17, s4, 0x1001b
1947; GFX6-NEXT:    s_bfe_u32 s18, s4, 0x10019
1948; GFX6-NEXT:    s_lshr_b32 s19, s4, 31
1949; GFX6-NEXT:    s_bfe_u32 s20, s4, 0x1001d
1950; GFX6-NEXT:    s_and_b32 s21, s4, 1
1951; GFX6-NEXT:    s_bfe_u32 s22, s4, 0x10002
1952; GFX6-NEXT:    s_bfe_u32 s23, s4, 0x10006
1953; GFX6-NEXT:    s_bfe_u32 s24, s4, 0x10004
1954; GFX6-NEXT:    s_bfe_u32 s25, s4, 0x1000a
1955; GFX6-NEXT:    s_bfe_u32 s26, s4, 0x10008
1956; GFX6-NEXT:    s_bfe_u32 s27, s4, 0x1000e
1957; GFX6-NEXT:    s_bfe_u32 s28, s4, 0x1000c
1958; GFX6-NEXT:    s_bfe_u32 s29, s4, 0x10012
1959; GFX6-NEXT:    s_bfe_u32 s30, s4, 0x10010
1960; GFX6-NEXT:    s_bfe_u32 s31, s4, 0x10016
1961; GFX6-NEXT:    s_bfe_u32 s33, s4, 0x10014
1962; GFX6-NEXT:    s_bfe_u32 s34, s4, 0x1001a
1963; GFX6-NEXT:    s_bfe_u32 s35, s4, 0x1001e
1964; GFX6-NEXT:    s_bfe_u32 s36, s4, 0x1001c
1965; GFX6-NEXT:    s_bfe_u32 s4, s4, 0x10018
1966; GFX6-NEXT:    v_mov_b32_e32 v0, s36
1967; GFX6-NEXT:    v_mov_b32_e32 v1, s20
1968; GFX6-NEXT:    v_mov_b32_e32 v2, s35
1969; GFX6-NEXT:    v_mov_b32_e32 v3, s19
1970; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
1971; GFX6-NEXT:    s_waitcnt expcnt(0)
1972; GFX6-NEXT:    v_mov_b32_e32 v0, s4
1973; GFX6-NEXT:    v_mov_b32_e32 v1, s18
1974; GFX6-NEXT:    v_mov_b32_e32 v2, s34
1975; GFX6-NEXT:    v_mov_b32_e32 v3, s17
1976; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
1977; GFX6-NEXT:    s_waitcnt expcnt(0)
1978; GFX6-NEXT:    v_mov_b32_e32 v0, s33
1979; GFX6-NEXT:    v_mov_b32_e32 v1, s16
1980; GFX6-NEXT:    v_mov_b32_e32 v2, s31
1981; GFX6-NEXT:    v_mov_b32_e32 v3, s15
1982; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
1983; GFX6-NEXT:    s_waitcnt expcnt(0)
1984; GFX6-NEXT:    v_mov_b32_e32 v0, s30
1985; GFX6-NEXT:    v_mov_b32_e32 v1, s14
1986; GFX6-NEXT:    v_mov_b32_e32 v2, s29
1987; GFX6-NEXT:    v_mov_b32_e32 v3, s13
1988; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
1989; GFX6-NEXT:    s_waitcnt expcnt(0)
1990; GFX6-NEXT:    v_mov_b32_e32 v0, s28
1991; GFX6-NEXT:    v_mov_b32_e32 v1, s12
1992; GFX6-NEXT:    v_mov_b32_e32 v2, s27
1993; GFX6-NEXT:    v_mov_b32_e32 v3, s11
1994; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
1995; GFX6-NEXT:    s_waitcnt expcnt(0)
1996; GFX6-NEXT:    v_mov_b32_e32 v0, s26
1997; GFX6-NEXT:    v_mov_b32_e32 v1, s10
1998; GFX6-NEXT:    v_mov_b32_e32 v2, s25
1999; GFX6-NEXT:    v_mov_b32_e32 v3, s9
2000; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2001; GFX6-NEXT:    s_waitcnt expcnt(0)
2002; GFX6-NEXT:    v_mov_b32_e32 v0, s24
2003; GFX6-NEXT:    v_mov_b32_e32 v1, s8
2004; GFX6-NEXT:    v_mov_b32_e32 v2, s23
2005; GFX6-NEXT:    v_mov_b32_e32 v3, s7
2006; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2007; GFX6-NEXT:    s_waitcnt expcnt(0)
2008; GFX6-NEXT:    v_mov_b32_e32 v0, s21
2009; GFX6-NEXT:    v_mov_b32_e32 v1, s6
2010; GFX6-NEXT:    v_mov_b32_e32 v2, s22
2011; GFX6-NEXT:    v_mov_b32_e32 v3, s5
2012; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2013; GFX6-NEXT:    s_endpgm
2014;
2015; GFX8-LABEL: constant_zextload_v32i1_to_v32i32:
2016; GFX8:       ; %bb.0:
2017; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
2018; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
2019; GFX8-NEXT:    s_load_dword s2, s[2:3], 0x0
2020; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
2021; GFX8-NEXT:    s_bfe_u32 s4, s2, 0x10003
2022; GFX8-NEXT:    s_bfe_u32 s5, s2, 0x10001
2023; GFX8-NEXT:    s_bfe_u32 s6, s2, 0x10007
2024; GFX8-NEXT:    s_bfe_u32 s7, s2, 0x10005
2025; GFX8-NEXT:    s_bfe_u32 s8, s2, 0x1000b
2026; GFX8-NEXT:    s_bfe_u32 s9, s2, 0x10009
2027; GFX8-NEXT:    s_bfe_u32 s10, s2, 0x1000f
2028; GFX8-NEXT:    s_bfe_u32 s11, s2, 0x1000d
2029; GFX8-NEXT:    s_bfe_u32 s12, s2, 0x10013
2030; GFX8-NEXT:    s_bfe_u32 s13, s2, 0x10011
2031; GFX8-NEXT:    s_bfe_u32 s14, s2, 0x10017
2032; GFX8-NEXT:    s_bfe_u32 s15, s2, 0x1001b
2033; GFX8-NEXT:    s_bfe_u32 s16, s2, 0x10019
2034; GFX8-NEXT:    s_lshr_b32 s3, s2, 31
2035; GFX8-NEXT:    s_bfe_u32 s17, s2, 0x1001d
2036; GFX8-NEXT:    s_and_b32 s18, s2, 1
2037; GFX8-NEXT:    s_bfe_u32 s19, s2, 0x10002
2038; GFX8-NEXT:    s_bfe_u32 s20, s2, 0x10006
2039; GFX8-NEXT:    s_bfe_u32 s21, s2, 0x10004
2040; GFX8-NEXT:    s_bfe_u32 s22, s2, 0x1000a
2041; GFX8-NEXT:    s_bfe_u32 s23, s2, 0x10008
2042; GFX8-NEXT:    s_bfe_u32 s24, s2, 0x1000e
2043; GFX8-NEXT:    s_bfe_u32 s25, s2, 0x1000c
2044; GFX8-NEXT:    s_bfe_u32 s26, s2, 0x10012
2045; GFX8-NEXT:    s_bfe_u32 s27, s2, 0x10010
2046; GFX8-NEXT:    s_bfe_u32 s28, s2, 0x10016
2047; GFX8-NEXT:    s_bfe_u32 s29, s2, 0x10015
2048; GFX8-NEXT:    s_bfe_u32 s30, s2, 0x10014
2049; GFX8-NEXT:    s_bfe_u32 s31, s2, 0x1001a
2050; GFX8-NEXT:    s_bfe_u32 s33, s2, 0x10018
2051; GFX8-NEXT:    s_bfe_u32 s34, s2, 0x1001e
2052; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x1001c
2053; GFX8-NEXT:    v_mov_b32_e32 v0, s2
2054; GFX8-NEXT:    s_add_u32 s2, s0, 0x70
2055; GFX8-NEXT:    v_mov_b32_e32 v3, s3
2056; GFX8-NEXT:    s_addc_u32 s3, s1, 0
2057; GFX8-NEXT:    v_mov_b32_e32 v5, s3
2058; GFX8-NEXT:    v_mov_b32_e32 v4, s2
2059; GFX8-NEXT:    s_add_u32 s2, s0, 0x60
2060; GFX8-NEXT:    v_mov_b32_e32 v1, s17
2061; GFX8-NEXT:    v_mov_b32_e32 v2, s34
2062; GFX8-NEXT:    s_addc_u32 s3, s1, 0
2063; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2064; GFX8-NEXT:    v_mov_b32_e32 v5, s3
2065; GFX8-NEXT:    v_mov_b32_e32 v4, s2
2066; GFX8-NEXT:    s_add_u32 s2, s0, 0x50
2067; GFX8-NEXT:    v_mov_b32_e32 v0, s33
2068; GFX8-NEXT:    v_mov_b32_e32 v1, s16
2069; GFX8-NEXT:    v_mov_b32_e32 v2, s31
2070; GFX8-NEXT:    v_mov_b32_e32 v3, s15
2071; GFX8-NEXT:    s_addc_u32 s3, s1, 0
2072; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2073; GFX8-NEXT:    v_mov_b32_e32 v5, s3
2074; GFX8-NEXT:    v_mov_b32_e32 v4, s2
2075; GFX8-NEXT:    s_add_u32 s2, s0, 64
2076; GFX8-NEXT:    v_mov_b32_e32 v0, s30
2077; GFX8-NEXT:    v_mov_b32_e32 v1, s29
2078; GFX8-NEXT:    v_mov_b32_e32 v2, s28
2079; GFX8-NEXT:    v_mov_b32_e32 v3, s14
2080; GFX8-NEXT:    s_addc_u32 s3, s1, 0
2081; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2082; GFX8-NEXT:    v_mov_b32_e32 v5, s3
2083; GFX8-NEXT:    v_mov_b32_e32 v4, s2
2084; GFX8-NEXT:    s_add_u32 s2, s0, 48
2085; GFX8-NEXT:    v_mov_b32_e32 v0, s27
2086; GFX8-NEXT:    v_mov_b32_e32 v1, s13
2087; GFX8-NEXT:    v_mov_b32_e32 v2, s26
2088; GFX8-NEXT:    v_mov_b32_e32 v3, s12
2089; GFX8-NEXT:    s_addc_u32 s3, s1, 0
2090; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2091; GFX8-NEXT:    v_mov_b32_e32 v5, s3
2092; GFX8-NEXT:    v_mov_b32_e32 v4, s2
2093; GFX8-NEXT:    s_add_u32 s2, s0, 32
2094; GFX8-NEXT:    v_mov_b32_e32 v0, s25
2095; GFX8-NEXT:    v_mov_b32_e32 v1, s11
2096; GFX8-NEXT:    v_mov_b32_e32 v2, s24
2097; GFX8-NEXT:    v_mov_b32_e32 v3, s10
2098; GFX8-NEXT:    s_addc_u32 s3, s1, 0
2099; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2100; GFX8-NEXT:    v_mov_b32_e32 v5, s3
2101; GFX8-NEXT:    v_mov_b32_e32 v4, s2
2102; GFX8-NEXT:    s_add_u32 s2, s0, 16
2103; GFX8-NEXT:    v_mov_b32_e32 v0, s23
2104; GFX8-NEXT:    v_mov_b32_e32 v1, s9
2105; GFX8-NEXT:    v_mov_b32_e32 v2, s22
2106; GFX8-NEXT:    v_mov_b32_e32 v3, s8
2107; GFX8-NEXT:    s_addc_u32 s3, s1, 0
2108; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2109; GFX8-NEXT:    v_mov_b32_e32 v5, s3
2110; GFX8-NEXT:    v_mov_b32_e32 v0, s21
2111; GFX8-NEXT:    v_mov_b32_e32 v1, s7
2112; GFX8-NEXT:    v_mov_b32_e32 v2, s20
2113; GFX8-NEXT:    v_mov_b32_e32 v3, s6
2114; GFX8-NEXT:    v_mov_b32_e32 v4, s2
2115; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2116; GFX8-NEXT:    v_mov_b32_e32 v5, s1
2117; GFX8-NEXT:    v_mov_b32_e32 v0, s18
2118; GFX8-NEXT:    v_mov_b32_e32 v1, s5
2119; GFX8-NEXT:    v_mov_b32_e32 v2, s19
2120; GFX8-NEXT:    v_mov_b32_e32 v3, s4
2121; GFX8-NEXT:    v_mov_b32_e32 v4, s0
2122; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2123; GFX8-NEXT:    s_endpgm
2124;
2125; EG-LABEL: constant_zextload_v32i1_to_v32i32:
2126; EG:       ; %bb.0:
2127; EG-NEXT:    ALU 0, @14, KC0[CB0:0-32], KC1[]
2128; EG-NEXT:    TEX 0 @12
2129; EG-NEXT:    ALU 76, @15, KC0[CB0:0-32], KC1[]
2130; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T26.X, 0
2131; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T25.X, 0
2132; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T24.X, 0
2133; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T22.X, 0
2134; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T20.X, 0
2135; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T18.X, 0
2136; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T16.X, 0
2137; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T14.X, 1
2138; EG-NEXT:    CF_END
2139; EG-NEXT:    Fetch clause starting at 12:
2140; EG-NEXT:     VTX_READ_32 T11.X, T11.X, 0, #1
2141; EG-NEXT:    ALU clause starting at 14:
2142; EG-NEXT:     MOV * T11.X, KC0[2].Z,
2143; EG-NEXT:    ALU clause starting at 15:
2144; EG-NEXT:     BFE_UINT * T12.W, T11.X, literal.x, 1,
2145; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
2146; EG-NEXT:     BFE_UINT * T12.Z, T11.X, literal.x, 1,
2147; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2148; EG-NEXT:     BFE_UINT T12.Y, T11.X, 1, 1,
2149; EG-NEXT:     BFE_UINT * T13.W, T11.X, literal.x, 1,
2150; EG-NEXT:    7(9.809089e-45), 0(0.000000e+00)
2151; EG-NEXT:     AND_INT T12.X, T11.X, 1,
2152; EG-NEXT:     BFE_UINT T13.Z, T11.X, literal.x, 1,
2153; EG-NEXT:     LSHR * T14.X, KC0[2].Y, literal.y,
2154; EG-NEXT:    6(8.407791e-45), 2(2.802597e-45)
2155; EG-NEXT:     BFE_UINT T13.Y, T11.X, literal.x, 1,
2156; EG-NEXT:     BFE_UINT * T15.W, T11.X, literal.y, 1,
2157; EG-NEXT:    5(7.006492e-45), 11(1.541428e-44)
2158; EG-NEXT:     BFE_UINT T13.X, T11.X, literal.x, 1,
2159; EG-NEXT:     BFE_UINT T15.Z, T11.X, literal.y, 1,
2160; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
2161; EG-NEXT:    4(5.605194e-45), 10(1.401298e-44)
2162; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2163; EG-NEXT:     LSHR T16.X, PV.W, literal.x,
2164; EG-NEXT:     BFE_UINT T15.Y, T11.X, literal.y, 1,
2165; EG-NEXT:     BFE_UINT * T17.W, T11.X, literal.z, 1,
2166; EG-NEXT:    2(2.802597e-45), 9(1.261169e-44)
2167; EG-NEXT:    15(2.101948e-44), 0(0.000000e+00)
2168; EG-NEXT:     BFE_UINT T15.X, T11.X, literal.x, 1,
2169; EG-NEXT:     BFE_UINT T17.Z, T11.X, literal.y, 1,
2170; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
2171; EG-NEXT:    8(1.121039e-44), 14(1.961818e-44)
2172; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
2173; EG-NEXT:     LSHR T18.X, PV.W, literal.x,
2174; EG-NEXT:     BFE_UINT T17.Y, T11.X, literal.y, 1,
2175; EG-NEXT:     BFE_UINT * T19.W, T11.X, literal.z, 1,
2176; EG-NEXT:    2(2.802597e-45), 13(1.821688e-44)
2177; EG-NEXT:    19(2.662467e-44), 0(0.000000e+00)
2178; EG-NEXT:     BFE_UINT T17.X, T11.X, literal.x, 1,
2179; EG-NEXT:     BFE_UINT T19.Z, T11.X, literal.y, 1,
2180; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
2181; EG-NEXT:    12(1.681558e-44), 18(2.522337e-44)
2182; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
2183; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
2184; EG-NEXT:     BFE_UINT T19.Y, T11.X, literal.y, 1,
2185; EG-NEXT:     BFE_UINT * T21.W, T11.X, literal.z, 1,
2186; EG-NEXT:    2(2.802597e-45), 17(2.382207e-44)
2187; EG-NEXT:    23(3.222986e-44), 0(0.000000e+00)
2188; EG-NEXT:     BFE_UINT T19.X, T11.X, literal.x, 1,
2189; EG-NEXT:     BFE_UINT T21.Z, T11.X, literal.y, 1,
2190; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
2191; EG-NEXT:    16(2.242078e-44), 22(3.082857e-44)
2192; EG-NEXT:    64(8.968310e-44), 0(0.000000e+00)
2193; EG-NEXT:     LSHR T22.X, PV.W, literal.x,
2194; EG-NEXT:     BFE_UINT T21.Y, T11.X, literal.y, 1,
2195; EG-NEXT:     BFE_UINT * T23.W, T11.X, literal.z, 1,
2196; EG-NEXT:    2(2.802597e-45), 21(2.942727e-44)
2197; EG-NEXT:    27(3.783506e-44), 0(0.000000e+00)
2198; EG-NEXT:     BFE_UINT T21.X, T11.X, literal.x, 1,
2199; EG-NEXT:     BFE_UINT T23.Z, T11.X, literal.y, 1,
2200; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
2201; EG-NEXT:    20(2.802597e-44), 26(3.643376e-44)
2202; EG-NEXT:    80(1.121039e-43), 0(0.000000e+00)
2203; EG-NEXT:     LSHR T24.X, PV.W, literal.x,
2204; EG-NEXT:     BFE_UINT T23.Y, T11.X, literal.y, 1,
2205; EG-NEXT:     LSHR * T11.W, T11.X, literal.z,
2206; EG-NEXT:    2(2.802597e-45), 25(3.503246e-44)
2207; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2208; EG-NEXT:     BFE_UINT T23.X, T11.X, literal.x, 1,
2209; EG-NEXT:     BFE_UINT T11.Z, T11.X, literal.y, 1,
2210; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
2211; EG-NEXT:    24(3.363116e-44), 30(4.203895e-44)
2212; EG-NEXT:    96(1.345247e-43), 0(0.000000e+00)
2213; EG-NEXT:     LSHR T25.X, PV.W, literal.x,
2214; EG-NEXT:     BFE_UINT * T11.Y, T11.X, literal.y, 1,
2215; EG-NEXT:    2(2.802597e-45), 29(4.063766e-44)
2216; EG-NEXT:     BFE_UINT T11.X, T11.X, literal.x, 1,
2217; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2218; EG-NEXT:    28(3.923636e-44), 112(1.569454e-43)
2219; EG-NEXT:     LSHR * T26.X, PV.W, literal.x,
2220; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2221;
2222; GFX12-LABEL: constant_zextload_v32i1_to_v32i32:
2223; GFX12:       ; %bb.0:
2224; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
2225; GFX12-NEXT:    s_wait_kmcnt 0x0
2226; GFX12-NEXT:    s_load_b32 s2, s[2:3], 0x0
2227; GFX12-NEXT:    s_wait_kmcnt 0x0
2228; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10003
2229; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10001
2230; GFX12-NEXT:    s_bfe_u32 s5, s2, 0x10007
2231; GFX12-NEXT:    s_bfe_u32 s6, s2, 0x10005
2232; GFX12-NEXT:    s_bfe_u32 s7, s2, 0x1000b
2233; GFX12-NEXT:    s_bfe_u32 s8, s2, 0x10009
2234; GFX12-NEXT:    s_bfe_u32 s9, s2, 0x1000f
2235; GFX12-NEXT:    s_bfe_u32 s10, s2, 0x1000d
2236; GFX12-NEXT:    s_bfe_u32 s11, s2, 0x10013
2237; GFX12-NEXT:    s_bfe_u32 s12, s2, 0x10011
2238; GFX12-NEXT:    s_bfe_u32 s13, s2, 0x10017
2239; GFX12-NEXT:    s_bfe_u32 s14, s2, 0x1001b
2240; GFX12-NEXT:    s_bfe_u32 s15, s2, 0x10019
2241; GFX12-NEXT:    s_lshr_b32 s16, s2, 31
2242; GFX12-NEXT:    s_bfe_u32 s17, s2, 0x1001d
2243; GFX12-NEXT:    s_and_b32 s18, s2, 1
2244; GFX12-NEXT:    s_bfe_u32 s19, s2, 0x10002
2245; GFX12-NEXT:    s_bfe_u32 s20, s2, 0x10006
2246; GFX12-NEXT:    s_bfe_u32 s21, s2, 0x10004
2247; GFX12-NEXT:    s_bfe_u32 s22, s2, 0x1000a
2248; GFX12-NEXT:    s_bfe_u32 s23, s2, 0x10008
2249; GFX12-NEXT:    s_bfe_u32 s24, s2, 0x1000e
2250; GFX12-NEXT:    s_bfe_u32 s25, s2, 0x1000c
2251; GFX12-NEXT:    s_bfe_u32 s26, s2, 0x10012
2252; GFX12-NEXT:    s_bfe_u32 s27, s2, 0x10010
2253; GFX12-NEXT:    s_bfe_u32 s28, s2, 0x10016
2254; GFX12-NEXT:    s_bfe_u32 s29, s2, 0x10015
2255; GFX12-NEXT:    s_bfe_u32 s30, s2, 0x10014
2256; GFX12-NEXT:    s_bfe_u32 s31, s2, 0x1001a
2257; GFX12-NEXT:    s_bfe_u32 s33, s2, 0x10018
2258; GFX12-NEXT:    s_bfe_u32 s34, s2, 0x1001c
2259; GFX12-NEXT:    s_bfe_u32 s2, s2, 0x1001e
2260; GFX12-NEXT:    v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s17
2261; GFX12-NEXT:    v_dual_mov_b32 v0, s34 :: v_dual_mov_b32 v3, s16
2262; GFX12-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v5, s15
2263; GFX12-NEXT:    v_dual_mov_b32 v4, s33 :: v_dual_mov_b32 v7, s14
2264; GFX12-NEXT:    v_dual_mov_b32 v6, s31 :: v_dual_mov_b32 v9, s29
2265; GFX12-NEXT:    v_dual_mov_b32 v8, s30 :: v_dual_mov_b32 v11, s13
2266; GFX12-NEXT:    v_mov_b32_e32 v10, s28
2267; GFX12-NEXT:    s_clause 0x1
2268; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[0:1] offset:112
2269; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[0:1] offset:96
2270; GFX12-NEXT:    v_dual_mov_b32 v1, s12 :: v_dual_mov_b32 v0, s27
2271; GFX12-NEXT:    v_dual_mov_b32 v3, s11 :: v_dual_mov_b32 v2, s26
2272; GFX12-NEXT:    v_dual_mov_b32 v5, s10 :: v_dual_mov_b32 v4, s25
2273; GFX12-NEXT:    v_dual_mov_b32 v7, s9 :: v_dual_mov_b32 v6, s24
2274; GFX12-NEXT:    v_dual_mov_b32 v13, s8 :: v_dual_mov_b32 v12, s23
2275; GFX12-NEXT:    v_dual_mov_b32 v15, s7 :: v_dual_mov_b32 v14, s22
2276; GFX12-NEXT:    v_dual_mov_b32 v17, s6 :: v_dual_mov_b32 v16, s21
2277; GFX12-NEXT:    v_dual_mov_b32 v19, s5 :: v_dual_mov_b32 v18, s20
2278; GFX12-NEXT:    v_dual_mov_b32 v21, s4 :: v_dual_mov_b32 v20, s18
2279; GFX12-NEXT:    v_dual_mov_b32 v23, s3 :: v_dual_mov_b32 v22, s19
2280; GFX12-NEXT:    s_clause 0x5
2281; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[0:1] offset:80
2282; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[0:1] offset:64
2283; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[0:1] offset:48
2284; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[0:1] offset:32
2285; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[0:1] offset:16
2286; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[0:1]
2287; GFX12-NEXT:    s_endpgm
2288  %load = load <32 x i1>, ptr addrspace(4) %in
2289  %ext = zext <32 x i1> %load to <32 x i32>
2290  store <32 x i32> %ext, ptr addrspace(1) %out
2291  ret void
2292}
2293
2294define amdgpu_kernel void @constant_sextload_v32i1_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
2295; GFX6-LABEL: constant_sextload_v32i1_to_v32i32:
2296; GFX6:       ; %bb.0:
2297; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
2298; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
2299; GFX6-NEXT:    s_load_dword s4, s[2:3], 0x0
2300; GFX6-NEXT:    s_mov_b32 s3, 0xf000
2301; GFX6-NEXT:    s_mov_b32 s2, -1
2302; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
2303; GFX6-NEXT:    s_bfe_i32 s5, s4, 0x10003
2304; GFX6-NEXT:    s_bfe_i32 s6, s4, 0x10002
2305; GFX6-NEXT:    s_bfe_i32 s7, s4, 0x10001
2306; GFX6-NEXT:    s_bfe_i32 s8, s4, 0x10000
2307; GFX6-NEXT:    s_bfe_i32 s9, s4, 0x10007
2308; GFX6-NEXT:    s_bfe_i32 s10, s4, 0x10006
2309; GFX6-NEXT:    s_bfe_i32 s11, s4, 0x10005
2310; GFX6-NEXT:    s_bfe_i32 s12, s4, 0x10004
2311; GFX6-NEXT:    s_bfe_i32 s13, s4, 0x1000b
2312; GFX6-NEXT:    s_bfe_i32 s14, s4, 0x1000a
2313; GFX6-NEXT:    s_bfe_i32 s15, s4, 0x10009
2314; GFX6-NEXT:    s_bfe_i32 s16, s4, 0x10008
2315; GFX6-NEXT:    s_bfe_i32 s17, s4, 0x1000f
2316; GFX6-NEXT:    s_bfe_i32 s18, s4, 0x1000e
2317; GFX6-NEXT:    s_bfe_i32 s19, s4, 0x1000d
2318; GFX6-NEXT:    s_bfe_i32 s20, s4, 0x1000c
2319; GFX6-NEXT:    s_bfe_i32 s21, s4, 0x10013
2320; GFX6-NEXT:    s_bfe_i32 s22, s4, 0x10012
2321; GFX6-NEXT:    s_bfe_i32 s23, s4, 0x10011
2322; GFX6-NEXT:    s_bfe_i32 s24, s4, 0x10010
2323; GFX6-NEXT:    s_bfe_i32 s25, s4, 0x10017
2324; GFX6-NEXT:    s_bfe_i32 s26, s4, 0x10016
2325; GFX6-NEXT:    s_bfe_i32 s27, s4, 0x10015
2326; GFX6-NEXT:    s_bfe_i32 s28, s4, 0x10014
2327; GFX6-NEXT:    s_bfe_i32 s29, s4, 0x1001b
2328; GFX6-NEXT:    s_bfe_i32 s30, s4, 0x1001a
2329; GFX6-NEXT:    s_bfe_i32 s31, s4, 0x10019
2330; GFX6-NEXT:    s_ashr_i32 s33, s4, 31
2331; GFX6-NEXT:    s_bfe_i32 s34, s4, 0x1001e
2332; GFX6-NEXT:    s_bfe_i32 s35, s4, 0x1001d
2333; GFX6-NEXT:    s_bfe_i32 s36, s4, 0x1001c
2334; GFX6-NEXT:    s_bfe_i32 s4, s4, 0x10018
2335; GFX6-NEXT:    v_mov_b32_e32 v0, s36
2336; GFX6-NEXT:    v_mov_b32_e32 v1, s35
2337; GFX6-NEXT:    v_mov_b32_e32 v2, s34
2338; GFX6-NEXT:    v_mov_b32_e32 v3, s33
2339; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
2340; GFX6-NEXT:    s_waitcnt expcnt(0)
2341; GFX6-NEXT:    v_mov_b32_e32 v0, s4
2342; GFX6-NEXT:    v_mov_b32_e32 v1, s31
2343; GFX6-NEXT:    v_mov_b32_e32 v2, s30
2344; GFX6-NEXT:    v_mov_b32_e32 v3, s29
2345; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
2346; GFX6-NEXT:    s_waitcnt expcnt(0)
2347; GFX6-NEXT:    v_mov_b32_e32 v0, s28
2348; GFX6-NEXT:    v_mov_b32_e32 v1, s27
2349; GFX6-NEXT:    v_mov_b32_e32 v2, s26
2350; GFX6-NEXT:    v_mov_b32_e32 v3, s25
2351; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
2352; GFX6-NEXT:    s_waitcnt expcnt(0)
2353; GFX6-NEXT:    v_mov_b32_e32 v0, s24
2354; GFX6-NEXT:    v_mov_b32_e32 v1, s23
2355; GFX6-NEXT:    v_mov_b32_e32 v2, s22
2356; GFX6-NEXT:    v_mov_b32_e32 v3, s21
2357; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
2358; GFX6-NEXT:    s_waitcnt expcnt(0)
2359; GFX6-NEXT:    v_mov_b32_e32 v0, s20
2360; GFX6-NEXT:    v_mov_b32_e32 v1, s19
2361; GFX6-NEXT:    v_mov_b32_e32 v2, s18
2362; GFX6-NEXT:    v_mov_b32_e32 v3, s17
2363; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
2364; GFX6-NEXT:    s_waitcnt expcnt(0)
2365; GFX6-NEXT:    v_mov_b32_e32 v0, s16
2366; GFX6-NEXT:    v_mov_b32_e32 v1, s15
2367; GFX6-NEXT:    v_mov_b32_e32 v2, s14
2368; GFX6-NEXT:    v_mov_b32_e32 v3, s13
2369; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2370; GFX6-NEXT:    s_waitcnt expcnt(0)
2371; GFX6-NEXT:    v_mov_b32_e32 v0, s12
2372; GFX6-NEXT:    v_mov_b32_e32 v1, s11
2373; GFX6-NEXT:    v_mov_b32_e32 v2, s10
2374; GFX6-NEXT:    v_mov_b32_e32 v3, s9
2375; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2376; GFX6-NEXT:    s_waitcnt expcnt(0)
2377; GFX6-NEXT:    v_mov_b32_e32 v0, s8
2378; GFX6-NEXT:    v_mov_b32_e32 v1, s7
2379; GFX6-NEXT:    v_mov_b32_e32 v2, s6
2380; GFX6-NEXT:    v_mov_b32_e32 v3, s5
2381; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2382; GFX6-NEXT:    s_endpgm
2383;
2384; GFX8-LABEL: constant_sextload_v32i1_to_v32i32:
2385; GFX8:       ; %bb.0:
2386; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
2387; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
2388; GFX8-NEXT:    s_load_dword s2, s[2:3], 0x0
2389; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
2390; GFX8-NEXT:    s_bfe_i32 s4, s2, 0x10003
2391; GFX8-NEXT:    s_bfe_i32 s5, s2, 0x10002
2392; GFX8-NEXT:    s_bfe_i32 s6, s2, 0x10001
2393; GFX8-NEXT:    s_bfe_i32 s7, s2, 0x10000
2394; GFX8-NEXT:    s_bfe_i32 s8, s2, 0x10007
2395; GFX8-NEXT:    s_bfe_i32 s9, s2, 0x10006
2396; GFX8-NEXT:    s_bfe_i32 s10, s2, 0x10005
2397; GFX8-NEXT:    s_bfe_i32 s11, s2, 0x10004
2398; GFX8-NEXT:    s_bfe_i32 s12, s2, 0x1000b
2399; GFX8-NEXT:    s_bfe_i32 s13, s2, 0x1000a
2400; GFX8-NEXT:    s_bfe_i32 s14, s2, 0x10009
2401; GFX8-NEXT:    s_bfe_i32 s15, s2, 0x10008
2402; GFX8-NEXT:    s_bfe_i32 s16, s2, 0x1000f
2403; GFX8-NEXT:    s_bfe_i32 s17, s2, 0x1000e
2404; GFX8-NEXT:    s_bfe_i32 s18, s2, 0x1000d
2405; GFX8-NEXT:    s_bfe_i32 s19, s2, 0x1000c
2406; GFX8-NEXT:    s_bfe_i32 s20, s2, 0x10013
2407; GFX8-NEXT:    s_bfe_i32 s21, s2, 0x10012
2408; GFX8-NEXT:    s_bfe_i32 s22, s2, 0x10011
2409; GFX8-NEXT:    s_bfe_i32 s23, s2, 0x10010
2410; GFX8-NEXT:    s_bfe_i32 s24, s2, 0x10017
2411; GFX8-NEXT:    s_bfe_i32 s25, s2, 0x10016
2412; GFX8-NEXT:    s_bfe_i32 s26, s2, 0x10015
2413; GFX8-NEXT:    s_bfe_i32 s27, s2, 0x10014
2414; GFX8-NEXT:    s_bfe_i32 s28, s2, 0x1001b
2415; GFX8-NEXT:    s_bfe_i32 s29, s2, 0x1001a
2416; GFX8-NEXT:    s_bfe_i32 s30, s2, 0x10019
2417; GFX8-NEXT:    s_bfe_i32 s31, s2, 0x10018
2418; GFX8-NEXT:    s_ashr_i32 s3, s2, 31
2419; GFX8-NEXT:    s_bfe_i32 s33, s2, 0x1001e
2420; GFX8-NEXT:    s_bfe_i32 s34, s2, 0x1001d
2421; GFX8-NEXT:    s_bfe_i32 s2, s2, 0x1001c
2422; GFX8-NEXT:    v_mov_b32_e32 v0, s2
2423; GFX8-NEXT:    s_add_u32 s2, s0, 0x70
2424; GFX8-NEXT:    v_mov_b32_e32 v3, s3
2425; GFX8-NEXT:    s_addc_u32 s3, s1, 0
2426; GFX8-NEXT:    v_mov_b32_e32 v5, s3
2427; GFX8-NEXT:    v_mov_b32_e32 v4, s2
2428; GFX8-NEXT:    s_add_u32 s2, s0, 0x60
2429; GFX8-NEXT:    v_mov_b32_e32 v1, s34
2430; GFX8-NEXT:    v_mov_b32_e32 v2, s33
2431; GFX8-NEXT:    s_addc_u32 s3, s1, 0
2432; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2433; GFX8-NEXT:    v_mov_b32_e32 v5, s3
2434; GFX8-NEXT:    v_mov_b32_e32 v4, s2
2435; GFX8-NEXT:    s_add_u32 s2, s0, 0x50
2436; GFX8-NEXT:    v_mov_b32_e32 v0, s31
2437; GFX8-NEXT:    v_mov_b32_e32 v1, s30
2438; GFX8-NEXT:    v_mov_b32_e32 v2, s29
2439; GFX8-NEXT:    v_mov_b32_e32 v3, s28
2440; GFX8-NEXT:    s_addc_u32 s3, s1, 0
2441; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2442; GFX8-NEXT:    v_mov_b32_e32 v5, s3
2443; GFX8-NEXT:    v_mov_b32_e32 v4, s2
2444; GFX8-NEXT:    s_add_u32 s2, s0, 64
2445; GFX8-NEXT:    v_mov_b32_e32 v0, s27
2446; GFX8-NEXT:    v_mov_b32_e32 v1, s26
2447; GFX8-NEXT:    v_mov_b32_e32 v2, s25
2448; GFX8-NEXT:    v_mov_b32_e32 v3, s24
2449; GFX8-NEXT:    s_addc_u32 s3, s1, 0
2450; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2451; GFX8-NEXT:    v_mov_b32_e32 v5, s3
2452; GFX8-NEXT:    v_mov_b32_e32 v4, s2
2453; GFX8-NEXT:    s_add_u32 s2, s0, 48
2454; GFX8-NEXT:    v_mov_b32_e32 v0, s23
2455; GFX8-NEXT:    v_mov_b32_e32 v1, s22
2456; GFX8-NEXT:    v_mov_b32_e32 v2, s21
2457; GFX8-NEXT:    v_mov_b32_e32 v3, s20
2458; GFX8-NEXT:    s_addc_u32 s3, s1, 0
2459; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2460; GFX8-NEXT:    v_mov_b32_e32 v5, s3
2461; GFX8-NEXT:    v_mov_b32_e32 v4, s2
2462; GFX8-NEXT:    s_add_u32 s2, s0, 32
2463; GFX8-NEXT:    v_mov_b32_e32 v0, s19
2464; GFX8-NEXT:    v_mov_b32_e32 v1, s18
2465; GFX8-NEXT:    v_mov_b32_e32 v2, s17
2466; GFX8-NEXT:    v_mov_b32_e32 v3, s16
2467; GFX8-NEXT:    s_addc_u32 s3, s1, 0
2468; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2469; GFX8-NEXT:    v_mov_b32_e32 v5, s3
2470; GFX8-NEXT:    v_mov_b32_e32 v4, s2
2471; GFX8-NEXT:    s_add_u32 s2, s0, 16
2472; GFX8-NEXT:    v_mov_b32_e32 v0, s15
2473; GFX8-NEXT:    v_mov_b32_e32 v1, s14
2474; GFX8-NEXT:    v_mov_b32_e32 v2, s13
2475; GFX8-NEXT:    v_mov_b32_e32 v3, s12
2476; GFX8-NEXT:    s_addc_u32 s3, s1, 0
2477; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2478; GFX8-NEXT:    v_mov_b32_e32 v5, s3
2479; GFX8-NEXT:    v_mov_b32_e32 v0, s11
2480; GFX8-NEXT:    v_mov_b32_e32 v1, s10
2481; GFX8-NEXT:    v_mov_b32_e32 v2, s9
2482; GFX8-NEXT:    v_mov_b32_e32 v3, s8
2483; GFX8-NEXT:    v_mov_b32_e32 v4, s2
2484; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2485; GFX8-NEXT:    v_mov_b32_e32 v5, s1
2486; GFX8-NEXT:    v_mov_b32_e32 v0, s7
2487; GFX8-NEXT:    v_mov_b32_e32 v1, s6
2488; GFX8-NEXT:    v_mov_b32_e32 v2, s5
2489; GFX8-NEXT:    v_mov_b32_e32 v3, s4
2490; GFX8-NEXT:    v_mov_b32_e32 v4, s0
2491; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2492; GFX8-NEXT:    s_endpgm
2493;
2494; EG-LABEL: constant_sextload_v32i1_to_v32i32:
2495; EG:       ; %bb.0:
2496; EG-NEXT:    ALU 0, @16, KC0[CB0:0-32], KC1[]
2497; EG-NEXT:    TEX 0 @14
2498; EG-NEXT:    ALU 99, @17, KC0[CB0:0-32], KC1[]
2499; EG-NEXT:    ALU 5, @117, KC0[CB0:0-32], KC1[]
2500; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T11.X, 0
2501; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T26.X, 0
2502; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T23.X, 0
2503; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T21.X, 0
2504; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T19.X, 0
2505; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T17.X, 0
2506; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T15.X, 0
2507; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T13.X, 1
2508; EG-NEXT:    CF_END
2509; EG-NEXT:    PAD
2510; EG-NEXT:    Fetch clause starting at 14:
2511; EG-NEXT:     VTX_READ_32 T11.X, T11.X, 0, #1
2512; EG-NEXT:    ALU clause starting at 16:
2513; EG-NEXT:     MOV * T11.X, KC0[2].Z,
2514; EG-NEXT:    ALU clause starting at 17:
2515; EG-NEXT:     LSHR * T0.W, T11.X, literal.x,
2516; EG-NEXT:    7(9.809089e-45), 0(0.000000e+00)
2517; EG-NEXT:     BFE_INT T12.W, PV.W, 0.0, 1,
2518; EG-NEXT:     LSHR * T0.W, T11.X, literal.x,
2519; EG-NEXT:    6(8.407791e-45), 0(0.000000e+00)
2520; EG-NEXT:     BFE_INT T12.Z, PS, 0.0, 1,
2521; EG-NEXT:     LSHR T0.W, T11.X, literal.x,
2522; EG-NEXT:     LSHR * T1.W, T11.X, literal.y,
2523; EG-NEXT:    11(1.541428e-44), 5(7.006492e-45)
2524; EG-NEXT:     LSHR T13.X, KC0[2].Y, literal.x,
2525; EG-NEXT:     BFE_INT T12.Y, PS, 0.0, 1,
2526; EG-NEXT:     LSHR T0.Z, T11.X, literal.y,
2527; EG-NEXT:     BFE_INT T14.W, PV.W, 0.0, 1,
2528; EG-NEXT:     LSHR * T0.W, T11.X, literal.z,
2529; EG-NEXT:    2(2.802597e-45), 10(1.401298e-44)
2530; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
2531; EG-NEXT:     BFE_INT T12.X, PS, 0.0, 1,
2532; EG-NEXT:     LSHR T0.Y, T11.X, literal.x,
2533; EG-NEXT:     BFE_INT T14.Z, PV.Z, 0.0, 1,
2534; EG-NEXT:     LSHR T0.W, T11.X, literal.y,
2535; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
2536; EG-NEXT:    15(2.101948e-44), 9(1.261169e-44)
2537; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2538; EG-NEXT:     LSHR T15.X, PS, literal.x,
2539; EG-NEXT:     BFE_INT T14.Y, PV.W, 0.0, 1,
2540; EG-NEXT:     LSHR T0.Z, T11.X, literal.y,
2541; EG-NEXT:     BFE_INT T16.W, PV.Y, 0.0, 1,
2542; EG-NEXT:     LSHR * T0.W, T11.X, literal.z,
2543; EG-NEXT:    2(2.802597e-45), 14(1.961818e-44)
2544; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
2545; EG-NEXT:     BFE_INT T14.X, PS, 0.0, 1,
2546; EG-NEXT:     LSHR T0.Y, T11.X, literal.x,
2547; EG-NEXT:     BFE_INT T16.Z, PV.Z, 0.0, 1,
2548; EG-NEXT:     LSHR T0.W, T11.X, literal.y,
2549; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
2550; EG-NEXT:    19(2.662467e-44), 13(1.821688e-44)
2551; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
2552; EG-NEXT:     LSHR T17.X, PS, literal.x,
2553; EG-NEXT:     BFE_INT T16.Y, PV.W, 0.0, 1,
2554; EG-NEXT:     LSHR T0.Z, T11.X, literal.y,
2555; EG-NEXT:     BFE_INT T18.W, PV.Y, 0.0, 1,
2556; EG-NEXT:     LSHR * T0.W, T11.X, literal.z,
2557; EG-NEXT:    2(2.802597e-45), 18(2.522337e-44)
2558; EG-NEXT:    12(1.681558e-44), 0(0.000000e+00)
2559; EG-NEXT:     BFE_INT T16.X, PS, 0.0, 1,
2560; EG-NEXT:     LSHR T0.Y, T11.X, literal.x,
2561; EG-NEXT:     BFE_INT T18.Z, PV.Z, 0.0, 1,
2562; EG-NEXT:     LSHR T0.W, T11.X, literal.y,
2563; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
2564; EG-NEXT:    23(3.222986e-44), 17(2.382207e-44)
2565; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
2566; EG-NEXT:     LSHR T19.X, PS, literal.x,
2567; EG-NEXT:     BFE_INT T18.Y, PV.W, 0.0, 1,
2568; EG-NEXT:     LSHR T0.Z, T11.X, literal.y,
2569; EG-NEXT:     BFE_INT T20.W, PV.Y, 0.0, 1,
2570; EG-NEXT:     LSHR * T0.W, T11.X, literal.z,
2571; EG-NEXT:    2(2.802597e-45), 22(3.082857e-44)
2572; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2573; EG-NEXT:     BFE_INT T18.X, PS, 0.0, 1,
2574; EG-NEXT:     LSHR T0.Y, T11.X, literal.x,
2575; EG-NEXT:     BFE_INT T20.Z, PV.Z, 0.0, 1,
2576; EG-NEXT:     LSHR T0.W, T11.X, literal.y,
2577; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
2578; EG-NEXT:    27(3.783506e-44), 21(2.942727e-44)
2579; EG-NEXT:    64(8.968310e-44), 0(0.000000e+00)
2580; EG-NEXT:     LSHR T21.X, PS, literal.x,
2581; EG-NEXT:     BFE_INT T20.Y, PV.W, 0.0, 1,
2582; EG-NEXT:     LSHR T0.Z, T11.X, literal.y,
2583; EG-NEXT:     BFE_INT T22.W, PV.Y, 0.0, 1,
2584; EG-NEXT:     LSHR * T0.W, T11.X, literal.z,
2585; EG-NEXT:    2(2.802597e-45), 26(3.643376e-44)
2586; EG-NEXT:    20(2.802597e-44), 0(0.000000e+00)
2587; EG-NEXT:     BFE_INT T20.X, PS, 0.0, 1,
2588; EG-NEXT:     BFE_INT T22.Z, PV.Z, 0.0, 1,
2589; EG-NEXT:     LSHR T0.W, T11.X, literal.x,
2590; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
2591; EG-NEXT:    25(3.503246e-44), 80(1.121039e-43)
2592; EG-NEXT:     LSHR T23.X, PS, literal.x,
2593; EG-NEXT:     BFE_INT T22.Y, PV.W, 0.0, 1,
2594; EG-NEXT:     LSHR * T0.W, T11.X, literal.y,
2595; EG-NEXT:    2(2.802597e-45), 24(3.363116e-44)
2596; EG-NEXT:     BFE_INT T22.X, PV.W, 0.0, 1,
2597; EG-NEXT:     LSHR T0.W, T11.X, literal.x,
2598; EG-NEXT:     ASHR * T24.W, T11.X, literal.y,
2599; EG-NEXT:    30(4.203895e-44), 31(4.344025e-44)
2600; EG-NEXT:     BFE_INT T25.X, T11.X, 0.0, 1,
2601; EG-NEXT:     LSHR T0.Y, T11.X, literal.x,
2602; EG-NEXT:     BFE_INT T24.Z, PV.W, 0.0, 1,
2603; EG-NEXT:     LSHR T0.W, T11.X, literal.y,
2604; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
2605; EG-NEXT:    3(4.203895e-45), 29(4.063766e-44)
2606; EG-NEXT:    96(1.345247e-43), 0(0.000000e+00)
2607; EG-NEXT:     LSHR T26.X, PS, literal.x,
2608; EG-NEXT:     BFE_INT T24.Y, PV.W, 0.0, 1,
2609; EG-NEXT:     LSHR T0.Z, T11.X, literal.x,
2610; EG-NEXT:     BFE_INT T25.W, PV.Y, 0.0, 1,
2611; EG-NEXT:     LSHR * T0.W, T11.X, literal.y,
2612; EG-NEXT:    2(2.802597e-45), 28(3.923636e-44)
2613; EG-NEXT:     BFE_INT T24.X, PS, 0.0, 1,
2614; EG-NEXT:     BFE_INT * T25.Z, PV.Z, 0.0, 1,
2615; EG-NEXT:    ALU clause starting at 117:
2616; EG-NEXT:     LSHR T0.W, T11.X, 1,
2617; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.x,
2618; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
2619; EG-NEXT:     LSHR T11.X, PS, literal.x,
2620; EG-NEXT:     BFE_INT * T25.Y, PV.W, 0.0, 1,
2621; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2622;
2623; GFX12-LABEL: constant_sextload_v32i1_to_v32i32:
2624; GFX12:       ; %bb.0:
2625; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
2626; GFX12-NEXT:    s_wait_kmcnt 0x0
2627; GFX12-NEXT:    s_load_b32 s2, s[2:3], 0x0
2628; GFX12-NEXT:    s_wait_kmcnt 0x0
2629; GFX12-NEXT:    s_bfe_i32 s3, s2, 0x10003
2630; GFX12-NEXT:    s_bfe_i32 s4, s2, 0x10002
2631; GFX12-NEXT:    s_bfe_i32 s5, s2, 0x10001
2632; GFX12-NEXT:    s_bfe_i32 s6, s2, 0x10000
2633; GFX12-NEXT:    s_bfe_i32 s7, s2, 0x10007
2634; GFX12-NEXT:    s_bfe_i32 s8, s2, 0x10006
2635; GFX12-NEXT:    s_bfe_i32 s9, s2, 0x10005
2636; GFX12-NEXT:    s_bfe_i32 s10, s2, 0x10004
2637; GFX12-NEXT:    s_bfe_i32 s11, s2, 0x1000b
2638; GFX12-NEXT:    s_bfe_i32 s12, s2, 0x1000a
2639; GFX12-NEXT:    s_bfe_i32 s13, s2, 0x10009
2640; GFX12-NEXT:    s_bfe_i32 s14, s2, 0x10008
2641; GFX12-NEXT:    s_bfe_i32 s15, s2, 0x1000f
2642; GFX12-NEXT:    s_bfe_i32 s16, s2, 0x1000e
2643; GFX12-NEXT:    s_bfe_i32 s17, s2, 0x1000d
2644; GFX12-NEXT:    s_bfe_i32 s18, s2, 0x1000c
2645; GFX12-NEXT:    s_bfe_i32 s19, s2, 0x10013
2646; GFX12-NEXT:    s_bfe_i32 s20, s2, 0x10012
2647; GFX12-NEXT:    s_bfe_i32 s21, s2, 0x10011
2648; GFX12-NEXT:    s_bfe_i32 s22, s2, 0x10010
2649; GFX12-NEXT:    s_bfe_i32 s23, s2, 0x10017
2650; GFX12-NEXT:    s_bfe_i32 s24, s2, 0x10016
2651; GFX12-NEXT:    s_bfe_i32 s25, s2, 0x10015
2652; GFX12-NEXT:    s_bfe_i32 s26, s2, 0x10014
2653; GFX12-NEXT:    s_bfe_i32 s27, s2, 0x1001b
2654; GFX12-NEXT:    s_bfe_i32 s28, s2, 0x1001a
2655; GFX12-NEXT:    s_bfe_i32 s29, s2, 0x10019
2656; GFX12-NEXT:    s_bfe_i32 s30, s2, 0x10018
2657; GFX12-NEXT:    s_ashr_i32 s31, s2, 31
2658; GFX12-NEXT:    s_bfe_i32 s33, s2, 0x1001e
2659; GFX12-NEXT:    s_bfe_i32 s34, s2, 0x1001c
2660; GFX12-NEXT:    s_bfe_i32 s2, s2, 0x1001d
2661; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2662; GFX12-NEXT:    v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s2
2663; GFX12-NEXT:    v_dual_mov_b32 v0, s34 :: v_dual_mov_b32 v3, s31
2664; GFX12-NEXT:    v_dual_mov_b32 v2, s33 :: v_dual_mov_b32 v5, s29
2665; GFX12-NEXT:    v_dual_mov_b32 v4, s30 :: v_dual_mov_b32 v7, s27
2666; GFX12-NEXT:    v_dual_mov_b32 v6, s28 :: v_dual_mov_b32 v9, s25
2667; GFX12-NEXT:    v_dual_mov_b32 v8, s26 :: v_dual_mov_b32 v11, s23
2668; GFX12-NEXT:    v_mov_b32_e32 v10, s24
2669; GFX12-NEXT:    s_clause 0x1
2670; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[0:1] offset:112
2671; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[0:1] offset:96
2672; GFX12-NEXT:    v_dual_mov_b32 v1, s21 :: v_dual_mov_b32 v0, s22
2673; GFX12-NEXT:    v_dual_mov_b32 v3, s19 :: v_dual_mov_b32 v2, s20
2674; GFX12-NEXT:    v_dual_mov_b32 v5, s17 :: v_dual_mov_b32 v4, s18
2675; GFX12-NEXT:    v_dual_mov_b32 v7, s15 :: v_dual_mov_b32 v6, s16
2676; GFX12-NEXT:    v_dual_mov_b32 v13, s13 :: v_dual_mov_b32 v12, s14
2677; GFX12-NEXT:    v_dual_mov_b32 v15, s11 :: v_dual_mov_b32 v14, s12
2678; GFX12-NEXT:    v_dual_mov_b32 v17, s9 :: v_dual_mov_b32 v16, s10
2679; GFX12-NEXT:    v_dual_mov_b32 v19, s7 :: v_dual_mov_b32 v18, s8
2680; GFX12-NEXT:    v_dual_mov_b32 v21, s5 :: v_dual_mov_b32 v20, s6
2681; GFX12-NEXT:    v_dual_mov_b32 v23, s3 :: v_dual_mov_b32 v22, s4
2682; GFX12-NEXT:    s_clause 0x5
2683; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[0:1] offset:80
2684; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[0:1] offset:64
2685; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[0:1] offset:48
2686; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[0:1] offset:32
2687; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[0:1] offset:16
2688; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[0:1]
2689; GFX12-NEXT:    s_endpgm
2690  %load = load <32 x i1>, ptr addrspace(4) %in
2691  %ext = sext <32 x i1> %load to <32 x i32>
2692  store <32 x i32> %ext, ptr addrspace(1) %out
2693  ret void
2694}
2695
2696define amdgpu_kernel void @constant_zextload_v64i1_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
2697; GFX6-LABEL: constant_zextload_v64i1_to_v64i32:
2698; GFX6:       ; %bb.0:
2699; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
2700; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
2701; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
2702; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
2703; GFX6-NEXT:    s_bfe_u32 s4, s2, 0x10003
2704; GFX6-NEXT:    s_bfe_u32 s5, s2, 0x10001
2705; GFX6-NEXT:    s_bfe_u32 s6, s2, 0x10007
2706; GFX6-NEXT:    s_bfe_u32 s7, s2, 0x10005
2707; GFX6-NEXT:    s_bfe_u32 s8, s2, 0x1000b
2708; GFX6-NEXT:    s_bfe_u32 s9, s2, 0x10009
2709; GFX6-NEXT:    s_bfe_u32 s10, s2, 0x1000f
2710; GFX6-NEXT:    s_bfe_u32 s13, s2, 0x1000d
2711; GFX6-NEXT:    s_bfe_u32 s14, s2, 0x10013
2712; GFX6-NEXT:    s_bfe_u32 s15, s2, 0x10011
2713; GFX6-NEXT:    s_bfe_u32 s16, s2, 0x10017
2714; GFX6-NEXT:    s_bfe_u32 s17, s2, 0x10015
2715; GFX6-NEXT:    s_bfe_u32 s18, s2, 0x1001b
2716; GFX6-NEXT:    s_bfe_u32 s19, s2, 0x10019
2717; GFX6-NEXT:    s_lshr_b32 s20, s2, 31
2718; GFX6-NEXT:    s_bfe_u32 s21, s2, 0x1001d
2719; GFX6-NEXT:    s_bfe_u32 s22, s3, 0x10003
2720; GFX6-NEXT:    s_bfe_u32 s23, s3, 0x10001
2721; GFX6-NEXT:    s_bfe_u32 s24, s3, 0x10007
2722; GFX6-NEXT:    s_bfe_u32 s25, s3, 0x10005
2723; GFX6-NEXT:    s_bfe_u32 s26, s3, 0x1000b
2724; GFX6-NEXT:    s_bfe_u32 s27, s3, 0x10009
2725; GFX6-NEXT:    s_bfe_u32 s28, s3, 0x1000f
2726; GFX6-NEXT:    s_bfe_u32 s29, s3, 0x1000d
2727; GFX6-NEXT:    s_bfe_u32 s30, s3, 0x10013
2728; GFX6-NEXT:    s_bfe_u32 s31, s3, 0x10011
2729; GFX6-NEXT:    s_bfe_u32 s33, s3, 0x10017
2730; GFX6-NEXT:    s_bfe_u32 s34, s3, 0x10015
2731; GFX6-NEXT:    s_bfe_u32 s35, s3, 0x1001b
2732; GFX6-NEXT:    s_bfe_u32 s36, s3, 0x10019
2733; GFX6-NEXT:    s_lshr_b32 s37, s3, 31
2734; GFX6-NEXT:    s_bfe_u32 s38, s3, 0x1001d
2735; GFX6-NEXT:    s_and_b32 s12, s2, 1
2736; GFX6-NEXT:    s_bfe_u32 s11, s2, 0x10002
2737; GFX6-NEXT:    s_bfe_u32 s39, s2, 0x10006
2738; GFX6-NEXT:    s_bfe_u32 s40, s2, 0x10004
2739; GFX6-NEXT:    s_bfe_u32 s41, s2, 0x1000a
2740; GFX6-NEXT:    s_bfe_u32 s42, s2, 0x10008
2741; GFX6-NEXT:    s_bfe_u32 s43, s2, 0x1000e
2742; GFX6-NEXT:    s_bfe_u32 s44, s2, 0x1000c
2743; GFX6-NEXT:    s_bfe_u32 s45, s2, 0x10012
2744; GFX6-NEXT:    s_bfe_u32 s46, s2, 0x10010
2745; GFX6-NEXT:    s_bfe_u32 s47, s2, 0x10016
2746; GFX6-NEXT:    s_bfe_u32 s48, s2, 0x10014
2747; GFX6-NEXT:    s_bfe_u32 s49, s2, 0x1001a
2748; GFX6-NEXT:    s_bfe_u32 s50, s2, 0x10018
2749; GFX6-NEXT:    s_bfe_u32 s51, s2, 0x1001e
2750; GFX6-NEXT:    s_bfe_u32 s52, s2, 0x1001c
2751; GFX6-NEXT:    s_and_b32 s53, s3, 1
2752; GFX6-NEXT:    s_bfe_u32 s54, s3, 0x10002
2753; GFX6-NEXT:    s_bfe_u32 s55, s3, 0x10006
2754; GFX6-NEXT:    s_bfe_u32 s56, s3, 0x10004
2755; GFX6-NEXT:    s_bfe_u32 s57, s3, 0x10008
2756; GFX6-NEXT:    s_bfe_u32 s58, s3, 0x1000e
2757; GFX6-NEXT:    s_bfe_u32 s59, s3, 0x1000c
2758; GFX6-NEXT:    s_bfe_u32 s60, s3, 0x10012
2759; GFX6-NEXT:    s_bfe_u32 s61, s3, 0x10010
2760; GFX6-NEXT:    s_bfe_u32 s62, s3, 0x10016
2761; GFX6-NEXT:    s_bfe_u32 s63, s3, 0x10014
2762; GFX6-NEXT:    s_bfe_u32 s64, s3, 0x1001a
2763; GFX6-NEXT:    s_bfe_u32 s65, s3, 0x10018
2764; GFX6-NEXT:    s_bfe_u32 s66, s3, 0x1001e
2765; GFX6-NEXT:    s_bfe_u32 s67, s3, 0x1001c
2766; GFX6-NEXT:    s_bfe_u32 s68, s3, 0x1000a
2767; GFX6-NEXT:    s_mov_b32 s3, 0xf000
2768; GFX6-NEXT:    s_mov_b32 s2, -1
2769; GFX6-NEXT:    v_mov_b32_e32 v0, s67
2770; GFX6-NEXT:    v_mov_b32_e32 v1, s38
2771; GFX6-NEXT:    v_mov_b32_e32 v2, s66
2772; GFX6-NEXT:    v_mov_b32_e32 v3, s37
2773; GFX6-NEXT:    v_mov_b32_e32 v4, s65
2774; GFX6-NEXT:    v_mov_b32_e32 v5, s36
2775; GFX6-NEXT:    v_mov_b32_e32 v6, s64
2776; GFX6-NEXT:    v_mov_b32_e32 v7, s35
2777; GFX6-NEXT:    v_mov_b32_e32 v8, s63
2778; GFX6-NEXT:    v_mov_b32_e32 v9, s34
2779; GFX6-NEXT:    v_mov_b32_e32 v10, s62
2780; GFX6-NEXT:    v_mov_b32_e32 v11, s33
2781; GFX6-NEXT:    v_mov_b32_e32 v12, s61
2782; GFX6-NEXT:    v_mov_b32_e32 v13, s31
2783; GFX6-NEXT:    v_mov_b32_e32 v14, s60
2784; GFX6-NEXT:    v_mov_b32_e32 v15, s30
2785; GFX6-NEXT:    v_mov_b32_e32 v16, s59
2786; GFX6-NEXT:    v_mov_b32_e32 v17, s29
2787; GFX6-NEXT:    v_mov_b32_e32 v18, s58
2788; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
2789; GFX6-NEXT:    s_waitcnt expcnt(0)
2790; GFX6-NEXT:    v_mov_b32_e32 v0, s57
2791; GFX6-NEXT:    v_mov_b32_e32 v19, s28
2792; GFX6-NEXT:    v_mov_b32_e32 v1, s27
2793; GFX6-NEXT:    v_mov_b32_e32 v2, s68
2794; GFX6-NEXT:    v_mov_b32_e32 v3, s26
2795; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
2796; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
2797; GFX6-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
2798; GFX6-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
2799; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
2800; GFX6-NEXT:    s_waitcnt expcnt(0)
2801; GFX6-NEXT:    v_mov_b32_e32 v0, s56
2802; GFX6-NEXT:    v_mov_b32_e32 v1, s25
2803; GFX6-NEXT:    v_mov_b32_e32 v2, s55
2804; GFX6-NEXT:    v_mov_b32_e32 v3, s24
2805; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
2806; GFX6-NEXT:    s_waitcnt expcnt(0)
2807; GFX6-NEXT:    v_mov_b32_e32 v0, s53
2808; GFX6-NEXT:    v_mov_b32_e32 v1, s23
2809; GFX6-NEXT:    v_mov_b32_e32 v2, s54
2810; GFX6-NEXT:    v_mov_b32_e32 v3, s22
2811; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
2812; GFX6-NEXT:    s_waitcnt expcnt(0)
2813; GFX6-NEXT:    v_mov_b32_e32 v0, s52
2814; GFX6-NEXT:    v_mov_b32_e32 v1, s21
2815; GFX6-NEXT:    v_mov_b32_e32 v2, s51
2816; GFX6-NEXT:    v_mov_b32_e32 v3, s20
2817; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
2818; GFX6-NEXT:    s_waitcnt expcnt(0)
2819; GFX6-NEXT:    v_mov_b32_e32 v0, s50
2820; GFX6-NEXT:    v_mov_b32_e32 v1, s19
2821; GFX6-NEXT:    v_mov_b32_e32 v2, s49
2822; GFX6-NEXT:    v_mov_b32_e32 v3, s18
2823; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
2824; GFX6-NEXT:    s_waitcnt expcnt(0)
2825; GFX6-NEXT:    v_mov_b32_e32 v0, s48
2826; GFX6-NEXT:    v_mov_b32_e32 v1, s17
2827; GFX6-NEXT:    v_mov_b32_e32 v2, s47
2828; GFX6-NEXT:    v_mov_b32_e32 v3, s16
2829; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
2830; GFX6-NEXT:    s_waitcnt expcnt(0)
2831; GFX6-NEXT:    v_mov_b32_e32 v0, s46
2832; GFX6-NEXT:    v_mov_b32_e32 v1, s15
2833; GFX6-NEXT:    v_mov_b32_e32 v2, s45
2834; GFX6-NEXT:    v_mov_b32_e32 v3, s14
2835; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
2836; GFX6-NEXT:    s_waitcnt expcnt(0)
2837; GFX6-NEXT:    v_mov_b32_e32 v0, s44
2838; GFX6-NEXT:    v_mov_b32_e32 v1, s13
2839; GFX6-NEXT:    v_mov_b32_e32 v2, s43
2840; GFX6-NEXT:    v_mov_b32_e32 v3, s10
2841; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
2842; GFX6-NEXT:    s_waitcnt expcnt(0)
2843; GFX6-NEXT:    v_mov_b32_e32 v0, s42
2844; GFX6-NEXT:    v_mov_b32_e32 v1, s9
2845; GFX6-NEXT:    v_mov_b32_e32 v2, s41
2846; GFX6-NEXT:    v_mov_b32_e32 v3, s8
2847; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2848; GFX6-NEXT:    s_waitcnt expcnt(0)
2849; GFX6-NEXT:    v_mov_b32_e32 v0, s40
2850; GFX6-NEXT:    v_mov_b32_e32 v1, s7
2851; GFX6-NEXT:    v_mov_b32_e32 v2, s39
2852; GFX6-NEXT:    v_mov_b32_e32 v3, s6
2853; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2854; GFX6-NEXT:    s_waitcnt expcnt(0)
2855; GFX6-NEXT:    v_mov_b32_e32 v0, s12
2856; GFX6-NEXT:    v_mov_b32_e32 v1, s5
2857; GFX6-NEXT:    v_mov_b32_e32 v2, s11
2858; GFX6-NEXT:    v_mov_b32_e32 v3, s4
2859; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2860; GFX6-NEXT:    s_endpgm
2861;
2862; GFX8-LABEL: constant_zextload_v64i1_to_v64i32:
2863; GFX8:       ; %bb.0:
2864; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
2865; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
2866; GFX8-NEXT:    s_load_dwordx2 s[26:27], s[2:3], 0x0
2867; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
2868; GFX8-NEXT:    s_bfe_u32 s2, s26, 0x10003
2869; GFX8-NEXT:    s_bfe_u32 s3, s26, 0x10001
2870; GFX8-NEXT:    s_bfe_u32 s4, s26, 0x10007
2871; GFX8-NEXT:    s_bfe_u32 s5, s26, 0x10005
2872; GFX8-NEXT:    s_bfe_u32 s6, s26, 0x1000b
2873; GFX8-NEXT:    s_bfe_u32 s9, s26, 0x10009
2874; GFX8-NEXT:    s_bfe_u32 s11, s26, 0x1000f
2875; GFX8-NEXT:    s_bfe_u32 s13, s26, 0x1000d
2876; GFX8-NEXT:    s_bfe_u32 s15, s26, 0x10013
2877; GFX8-NEXT:    s_bfe_u32 s17, s26, 0x10011
2878; GFX8-NEXT:    s_bfe_u32 s19, s26, 0x10017
2879; GFX8-NEXT:    s_bfe_u32 s21, s26, 0x1001b
2880; GFX8-NEXT:    s_bfe_u32 s23, s26, 0x10019
2881; GFX8-NEXT:    s_lshr_b32 s25, s26, 31
2882; GFX8-NEXT:    s_bfe_u32 s28, s26, 0x1001d
2883; GFX8-NEXT:    s_bfe_u32 s29, s27, 0x10003
2884; GFX8-NEXT:    s_bfe_u32 s30, s27, 0x10001
2885; GFX8-NEXT:    s_bfe_u32 s31, s27, 0x10007
2886; GFX8-NEXT:    s_bfe_u32 s33, s27, 0x10005
2887; GFX8-NEXT:    s_bfe_u32 s34, s27, 0x1000b
2888; GFX8-NEXT:    s_bfe_u32 s35, s27, 0x10009
2889; GFX8-NEXT:    s_bfe_u32 s36, s27, 0x1000f
2890; GFX8-NEXT:    s_bfe_u32 s37, s27, 0x1000d
2891; GFX8-NEXT:    s_bfe_u32 s38, s27, 0x10013
2892; GFX8-NEXT:    s_bfe_u32 s39, s27, 0x10011
2893; GFX8-NEXT:    s_bfe_u32 s40, s27, 0x10017
2894; GFX8-NEXT:    s_bfe_u32 s41, s27, 0x1001b
2895; GFX8-NEXT:    s_bfe_u32 s42, s27, 0x10019
2896; GFX8-NEXT:    s_lshr_b32 s43, s27, 31
2897; GFX8-NEXT:    s_bfe_u32 s44, s27, 0x1001d
2898; GFX8-NEXT:    s_and_b32 s8, s26, 1
2899; GFX8-NEXT:    s_bfe_u32 s7, s26, 0x10002
2900; GFX8-NEXT:    s_bfe_u32 s10, s26, 0x10006
2901; GFX8-NEXT:    s_bfe_u32 s12, s26, 0x10004
2902; GFX8-NEXT:    s_bfe_u32 s14, s26, 0x1000a
2903; GFX8-NEXT:    s_bfe_u32 s16, s26, 0x10008
2904; GFX8-NEXT:    s_bfe_u32 s18, s26, 0x1000e
2905; GFX8-NEXT:    s_bfe_u32 s20, s26, 0x1000c
2906; GFX8-NEXT:    s_bfe_u32 s22, s26, 0x10012
2907; GFX8-NEXT:    s_bfe_u32 s24, s26, 0x10010
2908; GFX8-NEXT:    s_bfe_u32 s45, s26, 0x10016
2909; GFX8-NEXT:    s_bfe_u32 s46, s26, 0x10015
2910; GFX8-NEXT:    s_bfe_u32 s47, s26, 0x10014
2911; GFX8-NEXT:    s_bfe_u32 s48, s26, 0x1001a
2912; GFX8-NEXT:    s_bfe_u32 s49, s26, 0x10018
2913; GFX8-NEXT:    s_bfe_u32 s50, s26, 0x1001e
2914; GFX8-NEXT:    s_bfe_u32 s51, s26, 0x1001c
2915; GFX8-NEXT:    s_and_b32 s52, s27, 1
2916; GFX8-NEXT:    s_bfe_u32 s53, s27, 0x10002
2917; GFX8-NEXT:    s_bfe_u32 s54, s27, 0x10006
2918; GFX8-NEXT:    s_bfe_u32 s55, s27, 0x10004
2919; GFX8-NEXT:    s_bfe_u32 s56, s27, 0x1000a
2920; GFX8-NEXT:    s_bfe_u32 s57, s27, 0x10008
2921; GFX8-NEXT:    s_bfe_u32 s58, s27, 0x1000e
2922; GFX8-NEXT:    s_bfe_u32 s59, s27, 0x1000c
2923; GFX8-NEXT:    s_bfe_u32 s60, s27, 0x10012
2924; GFX8-NEXT:    s_bfe_u32 s61, s27, 0x10010
2925; GFX8-NEXT:    s_bfe_u32 s62, s27, 0x10016
2926; GFX8-NEXT:    s_bfe_u32 s63, s27, 0x10015
2927; GFX8-NEXT:    s_bfe_u32 s64, s27, 0x10014
2928; GFX8-NEXT:    s_bfe_u32 s65, s27, 0x1001a
2929; GFX8-NEXT:    s_bfe_u32 s66, s27, 0x10018
2930; GFX8-NEXT:    s_bfe_u32 s26, s27, 0x1001e
2931; GFX8-NEXT:    s_bfe_u32 s27, s27, 0x1001c
2932; GFX8-NEXT:    v_mov_b32_e32 v2, s26
2933; GFX8-NEXT:    s_add_u32 s26, s0, 0xf0
2934; GFX8-NEXT:    v_mov_b32_e32 v0, s27
2935; GFX8-NEXT:    s_addc_u32 s27, s1, 0
2936; GFX8-NEXT:    v_mov_b32_e32 v4, s26
2937; GFX8-NEXT:    v_mov_b32_e32 v1, s44
2938; GFX8-NEXT:    v_mov_b32_e32 v3, s43
2939; GFX8-NEXT:    v_mov_b32_e32 v5, s27
2940; GFX8-NEXT:    s_add_u32 s26, s0, 0xe0
2941; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2942; GFX8-NEXT:    s_addc_u32 s27, s1, 0
2943; GFX8-NEXT:    v_mov_b32_e32 v4, s26
2944; GFX8-NEXT:    v_mov_b32_e32 v0, s66
2945; GFX8-NEXT:    v_mov_b32_e32 v1, s42
2946; GFX8-NEXT:    v_mov_b32_e32 v2, s65
2947; GFX8-NEXT:    v_mov_b32_e32 v3, s41
2948; GFX8-NEXT:    v_mov_b32_e32 v5, s27
2949; GFX8-NEXT:    s_add_u32 s26, s0, 0xd0
2950; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2951; GFX8-NEXT:    s_addc_u32 s27, s1, 0
2952; GFX8-NEXT:    v_mov_b32_e32 v4, s26
2953; GFX8-NEXT:    v_mov_b32_e32 v0, s64
2954; GFX8-NEXT:    v_mov_b32_e32 v1, s63
2955; GFX8-NEXT:    v_mov_b32_e32 v2, s62
2956; GFX8-NEXT:    v_mov_b32_e32 v3, s40
2957; GFX8-NEXT:    v_mov_b32_e32 v5, s27
2958; GFX8-NEXT:    s_add_u32 s26, s0, 0xc0
2959; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2960; GFX8-NEXT:    s_addc_u32 s27, s1, 0
2961; GFX8-NEXT:    v_mov_b32_e32 v4, s26
2962; GFX8-NEXT:    v_mov_b32_e32 v0, s61
2963; GFX8-NEXT:    v_mov_b32_e32 v1, s39
2964; GFX8-NEXT:    v_mov_b32_e32 v2, s60
2965; GFX8-NEXT:    v_mov_b32_e32 v3, s38
2966; GFX8-NEXT:    v_mov_b32_e32 v5, s27
2967; GFX8-NEXT:    s_add_u32 s26, s0, 0xb0
2968; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2969; GFX8-NEXT:    s_addc_u32 s27, s1, 0
2970; GFX8-NEXT:    v_mov_b32_e32 v4, s26
2971; GFX8-NEXT:    v_mov_b32_e32 v0, s59
2972; GFX8-NEXT:    v_mov_b32_e32 v1, s37
2973; GFX8-NEXT:    v_mov_b32_e32 v2, s58
2974; GFX8-NEXT:    v_mov_b32_e32 v3, s36
2975; GFX8-NEXT:    v_mov_b32_e32 v5, s27
2976; GFX8-NEXT:    s_add_u32 s26, s0, 0xa0
2977; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2978; GFX8-NEXT:    s_addc_u32 s27, s1, 0
2979; GFX8-NEXT:    v_mov_b32_e32 v4, s26
2980; GFX8-NEXT:    v_mov_b32_e32 v0, s57
2981; GFX8-NEXT:    v_mov_b32_e32 v1, s35
2982; GFX8-NEXT:    v_mov_b32_e32 v2, s56
2983; GFX8-NEXT:    v_mov_b32_e32 v3, s34
2984; GFX8-NEXT:    v_mov_b32_e32 v5, s27
2985; GFX8-NEXT:    s_add_u32 s26, s0, 0x90
2986; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2987; GFX8-NEXT:    s_addc_u32 s27, s1, 0
2988; GFX8-NEXT:    v_mov_b32_e32 v4, s26
2989; GFX8-NEXT:    v_mov_b32_e32 v0, s55
2990; GFX8-NEXT:    v_mov_b32_e32 v1, s33
2991; GFX8-NEXT:    v_mov_b32_e32 v2, s54
2992; GFX8-NEXT:    v_mov_b32_e32 v3, s31
2993; GFX8-NEXT:    v_mov_b32_e32 v5, s27
2994; GFX8-NEXT:    s_add_u32 s26, s0, 0x80
2995; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2996; GFX8-NEXT:    s_addc_u32 s27, s1, 0
2997; GFX8-NEXT:    v_mov_b32_e32 v4, s26
2998; GFX8-NEXT:    v_mov_b32_e32 v0, s52
2999; GFX8-NEXT:    v_mov_b32_e32 v1, s30
3000; GFX8-NEXT:    v_mov_b32_e32 v2, s53
3001; GFX8-NEXT:    v_mov_b32_e32 v3, s29
3002; GFX8-NEXT:    v_mov_b32_e32 v5, s27
3003; GFX8-NEXT:    s_add_u32 s26, s0, 0x70
3004; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3005; GFX8-NEXT:    s_addc_u32 s27, s1, 0
3006; GFX8-NEXT:    v_mov_b32_e32 v4, s26
3007; GFX8-NEXT:    v_mov_b32_e32 v0, s51
3008; GFX8-NEXT:    v_mov_b32_e32 v1, s28
3009; GFX8-NEXT:    v_mov_b32_e32 v2, s50
3010; GFX8-NEXT:    v_mov_b32_e32 v3, s25
3011; GFX8-NEXT:    v_mov_b32_e32 v5, s27
3012; GFX8-NEXT:    s_add_u32 s26, s0, 0x60
3013; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3014; GFX8-NEXT:    s_addc_u32 s27, s1, 0
3015; GFX8-NEXT:    v_mov_b32_e32 v4, s26
3016; GFX8-NEXT:    v_mov_b32_e32 v0, s49
3017; GFX8-NEXT:    v_mov_b32_e32 v1, s23
3018; GFX8-NEXT:    v_mov_b32_e32 v2, s48
3019; GFX8-NEXT:    v_mov_b32_e32 v3, s21
3020; GFX8-NEXT:    v_mov_b32_e32 v5, s27
3021; GFX8-NEXT:    s_add_u32 s26, s0, 0x50
3022; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3023; GFX8-NEXT:    s_addc_u32 s27, s1, 0
3024; GFX8-NEXT:    v_mov_b32_e32 v4, s26
3025; GFX8-NEXT:    v_mov_b32_e32 v0, s47
3026; GFX8-NEXT:    v_mov_b32_e32 v1, s46
3027; GFX8-NEXT:    v_mov_b32_e32 v2, s45
3028; GFX8-NEXT:    v_mov_b32_e32 v3, s19
3029; GFX8-NEXT:    v_mov_b32_e32 v5, s27
3030; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3031; GFX8-NEXT:    s_nop 0
3032; GFX8-NEXT:    v_mov_b32_e32 v2, s22
3033; GFX8-NEXT:    s_add_u32 s22, s0, 64
3034; GFX8-NEXT:    s_addc_u32 s23, s1, 0
3035; GFX8-NEXT:    v_mov_b32_e32 v4, s22
3036; GFX8-NEXT:    v_mov_b32_e32 v0, s24
3037; GFX8-NEXT:    v_mov_b32_e32 v1, s17
3038; GFX8-NEXT:    v_mov_b32_e32 v3, s15
3039; GFX8-NEXT:    v_mov_b32_e32 v5, s23
3040; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3041; GFX8-NEXT:    s_nop 0
3042; GFX8-NEXT:    v_mov_b32_e32 v2, s18
3043; GFX8-NEXT:    s_add_u32 s18, s0, 48
3044; GFX8-NEXT:    s_addc_u32 s19, s1, 0
3045; GFX8-NEXT:    v_mov_b32_e32 v4, s18
3046; GFX8-NEXT:    v_mov_b32_e32 v0, s20
3047; GFX8-NEXT:    v_mov_b32_e32 v1, s13
3048; GFX8-NEXT:    v_mov_b32_e32 v3, s11
3049; GFX8-NEXT:    v_mov_b32_e32 v5, s19
3050; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3051; GFX8-NEXT:    s_nop 0
3052; GFX8-NEXT:    v_mov_b32_e32 v2, s14
3053; GFX8-NEXT:    s_add_u32 s14, s0, 32
3054; GFX8-NEXT:    s_addc_u32 s15, s1, 0
3055; GFX8-NEXT:    v_mov_b32_e32 v4, s14
3056; GFX8-NEXT:    v_mov_b32_e32 v0, s16
3057; GFX8-NEXT:    v_mov_b32_e32 v1, s9
3058; GFX8-NEXT:    v_mov_b32_e32 v3, s6
3059; GFX8-NEXT:    v_mov_b32_e32 v5, s15
3060; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3061; GFX8-NEXT:    s_nop 0
3062; GFX8-NEXT:    v_mov_b32_e32 v3, s4
3063; GFX8-NEXT:    s_add_u32 s4, s0, 16
3064; GFX8-NEXT:    v_mov_b32_e32 v1, s5
3065; GFX8-NEXT:    s_addc_u32 s5, s1, 0
3066; GFX8-NEXT:    v_mov_b32_e32 v4, s4
3067; GFX8-NEXT:    v_mov_b32_e32 v0, s12
3068; GFX8-NEXT:    v_mov_b32_e32 v2, s10
3069; GFX8-NEXT:    v_mov_b32_e32 v5, s5
3070; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3071; GFX8-NEXT:    v_mov_b32_e32 v5, s1
3072; GFX8-NEXT:    v_mov_b32_e32 v0, s8
3073; GFX8-NEXT:    v_mov_b32_e32 v1, s3
3074; GFX8-NEXT:    v_mov_b32_e32 v2, s7
3075; GFX8-NEXT:    v_mov_b32_e32 v3, s2
3076; GFX8-NEXT:    v_mov_b32_e32 v4, s0
3077; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3078; GFX8-NEXT:    s_endpgm
3079;
3080; EG-LABEL: constant_zextload_v64i1_to_v64i32:
3081; EG:       ; %bb.0:
3082; EG-NEXT:    ALU 0, @24, KC0[CB0:0-32], KC1[]
3083; EG-NEXT:    TEX 0 @22
3084; EG-NEXT:    ALU 96, @25, KC0[CB0:0-32], KC1[]
3085; EG-NEXT:    ALU 57, @122, KC0[CB0:0-32], KC1[]
3086; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T50.X, 0
3087; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T49.X, 0
3088; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T47.X, 0
3089; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T45.X, 0
3090; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T43.X, 0
3091; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T41.X, 0
3092; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T39.X, 0
3093; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T37.X, 0
3094; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T21.X, 0
3095; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T34.X, 0
3096; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T32.X, 0
3097; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T30.X, 0
3098; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T28.X, 0
3099; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T26.X, 0
3100; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T24.X, 0
3101; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T22.X, 1
3102; EG-NEXT:    CF_END
3103; EG-NEXT:    PAD
3104; EG-NEXT:    Fetch clause starting at 22:
3105; EG-NEXT:     VTX_READ_64 T21.XY, T19.X, 0, #1
3106; EG-NEXT:    ALU clause starting at 24:
3107; EG-NEXT:     MOV * T19.X, KC0[2].Z,
3108; EG-NEXT:    ALU clause starting at 25:
3109; EG-NEXT:     BFE_UINT * T19.W, T21.X, literal.x, 1,
3110; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
3111; EG-NEXT:     BFE_UINT * T19.Z, T21.X, literal.x, 1,
3112; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
3113; EG-NEXT:     BFE_UINT T19.Y, T21.X, 1, 1,
3114; EG-NEXT:     BFE_UINT * T20.W, T21.X, literal.x, 1,
3115; EG-NEXT:    7(9.809089e-45), 0(0.000000e+00)
3116; EG-NEXT:     AND_INT T19.X, T21.X, 1,
3117; EG-NEXT:     BFE_UINT T20.Z, T21.X, literal.x, 1,
3118; EG-NEXT:     LSHR * T22.X, KC0[2].Y, literal.y,
3119; EG-NEXT:    6(8.407791e-45), 2(2.802597e-45)
3120; EG-NEXT:     BFE_UINT T20.Y, T21.X, literal.x, 1,
3121; EG-NEXT:     BFE_UINT * T23.W, T21.X, literal.y, 1,
3122; EG-NEXT:    5(7.006492e-45), 11(1.541428e-44)
3123; EG-NEXT:     BFE_UINT T20.X, T21.X, literal.x, 1,
3124; EG-NEXT:     BFE_UINT T23.Z, T21.X, literal.y, 1,
3125; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3126; EG-NEXT:    4(5.605194e-45), 10(1.401298e-44)
3127; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3128; EG-NEXT:     LSHR T24.X, PV.W, literal.x,
3129; EG-NEXT:     BFE_UINT T23.Y, T21.X, literal.y, 1,
3130; EG-NEXT:     BFE_UINT * T25.W, T21.X, literal.z, 1,
3131; EG-NEXT:    2(2.802597e-45), 9(1.261169e-44)
3132; EG-NEXT:    15(2.101948e-44), 0(0.000000e+00)
3133; EG-NEXT:     BFE_UINT T23.X, T21.X, literal.x, 1,
3134; EG-NEXT:     BFE_UINT T25.Z, T21.X, literal.y, 1,
3135; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3136; EG-NEXT:    8(1.121039e-44), 14(1.961818e-44)
3137; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
3138; EG-NEXT:     LSHR T26.X, PV.W, literal.x,
3139; EG-NEXT:     BFE_UINT T25.Y, T21.X, literal.y, 1,
3140; EG-NEXT:     BFE_UINT * T27.W, T21.X, literal.z, 1,
3141; EG-NEXT:    2(2.802597e-45), 13(1.821688e-44)
3142; EG-NEXT:    19(2.662467e-44), 0(0.000000e+00)
3143; EG-NEXT:     BFE_UINT T25.X, T21.X, literal.x, 1,
3144; EG-NEXT:     BFE_UINT T27.Z, T21.X, literal.y, 1,
3145; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3146; EG-NEXT:    12(1.681558e-44), 18(2.522337e-44)
3147; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
3148; EG-NEXT:     LSHR T28.X, PV.W, literal.x,
3149; EG-NEXT:     BFE_UINT T27.Y, T21.X, literal.y, 1,
3150; EG-NEXT:     BFE_UINT * T29.W, T21.X, literal.z, 1,
3151; EG-NEXT:    2(2.802597e-45), 17(2.382207e-44)
3152; EG-NEXT:    23(3.222986e-44), 0(0.000000e+00)
3153; EG-NEXT:     BFE_UINT T27.X, T21.X, literal.x, 1,
3154; EG-NEXT:     BFE_UINT T29.Z, T21.X, literal.y, 1,
3155; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3156; EG-NEXT:    16(2.242078e-44), 22(3.082857e-44)
3157; EG-NEXT:    64(8.968310e-44), 0(0.000000e+00)
3158; EG-NEXT:     LSHR T30.X, PV.W, literal.x,
3159; EG-NEXT:     BFE_UINT T29.Y, T21.X, literal.y, 1,
3160; EG-NEXT:     BFE_UINT * T31.W, T21.X, literal.z, 1,
3161; EG-NEXT:    2(2.802597e-45), 21(2.942727e-44)
3162; EG-NEXT:    27(3.783506e-44), 0(0.000000e+00)
3163; EG-NEXT:     BFE_UINT T29.X, T21.X, literal.x, 1,
3164; EG-NEXT:     BFE_UINT T31.Z, T21.X, literal.y, 1,
3165; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3166; EG-NEXT:    20(2.802597e-44), 26(3.643376e-44)
3167; EG-NEXT:    80(1.121039e-43), 0(0.000000e+00)
3168; EG-NEXT:     LSHR T32.X, PV.W, literal.x,
3169; EG-NEXT:     BFE_UINT T31.Y, T21.X, literal.y, 1,
3170; EG-NEXT:     LSHR * T33.W, T21.X, literal.z,
3171; EG-NEXT:    2(2.802597e-45), 25(3.503246e-44)
3172; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
3173; EG-NEXT:     BFE_UINT T31.X, T21.X, literal.x, 1,
3174; EG-NEXT:     BFE_UINT T33.Z, T21.X, literal.y, 1,
3175; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3176; EG-NEXT:    24(3.363116e-44), 30(4.203895e-44)
3177; EG-NEXT:    96(1.345247e-43), 0(0.000000e+00)
3178; EG-NEXT:     LSHR T34.X, PV.W, literal.x,
3179; EG-NEXT:     BFE_UINT T33.Y, T21.X, literal.y, 1,
3180; EG-NEXT:     BFE_UINT * T35.W, T21.Y, literal.z, 1,
3181; EG-NEXT:    2(2.802597e-45), 29(4.063766e-44)
3182; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
3183; EG-NEXT:     BFE_UINT T33.X, T21.X, literal.x, 1,
3184; EG-NEXT:     BFE_UINT T35.Z, T21.Y, literal.y, 1,
3185; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3186; EG-NEXT:    28(3.923636e-44), 2(2.802597e-45)
3187; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
3188; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
3189; EG-NEXT:     BFE_UINT T35.Y, T21.Y, 1, 1,
3190; EG-NEXT:     BFE_UINT T36.W, T21.Y, literal.y, 1,
3191; EG-NEXT:     AND_INT * T35.X, T21.Y, 1,
3192; EG-NEXT:    2(2.802597e-45), 7(9.809089e-45)
3193; EG-NEXT:     BFE_UINT T36.Z, T21.Y, literal.x, 1,
3194; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3195; EG-NEXT:    6(8.407791e-45), 128(1.793662e-43)
3196; EG-NEXT:     LSHR T37.X, PV.W, literal.x,
3197; EG-NEXT:     BFE_UINT T36.Y, T21.Y, literal.y, 1,
3198; EG-NEXT:     BFE_UINT * T38.W, T21.Y, literal.z, 1,
3199; EG-NEXT:    2(2.802597e-45), 5(7.006492e-45)
3200; EG-NEXT:    11(1.541428e-44), 0(0.000000e+00)
3201; EG-NEXT:     BFE_UINT T36.X, T21.Y, literal.x, 1,
3202; EG-NEXT:     BFE_UINT T38.Z, T21.Y, literal.y, 1,
3203; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3204; EG-NEXT:    4(5.605194e-45), 10(1.401298e-44)
3205; EG-NEXT:    144(2.017870e-43), 0(0.000000e+00)
3206; EG-NEXT:    ALU clause starting at 122:
3207; EG-NEXT:     LSHR T39.X, T0.W, literal.x,
3208; EG-NEXT:     BFE_UINT T38.Y, T21.Y, literal.y, 1,
3209; EG-NEXT:     BFE_UINT * T40.W, T21.Y, literal.z, 1,
3210; EG-NEXT:    2(2.802597e-45), 9(1.261169e-44)
3211; EG-NEXT:    15(2.101948e-44), 0(0.000000e+00)
3212; EG-NEXT:     BFE_UINT T38.X, T21.Y, literal.x, 1,
3213; EG-NEXT:     BFE_UINT T40.Z, T21.Y, literal.y, 1,
3214; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3215; EG-NEXT:    8(1.121039e-44), 14(1.961818e-44)
3216; EG-NEXT:    160(2.242078e-43), 0(0.000000e+00)
3217; EG-NEXT:     LSHR T41.X, PV.W, literal.x,
3218; EG-NEXT:     BFE_UINT T40.Y, T21.Y, literal.y, 1,
3219; EG-NEXT:     BFE_UINT * T42.W, T21.Y, literal.z, 1,
3220; EG-NEXT:    2(2.802597e-45), 13(1.821688e-44)
3221; EG-NEXT:    19(2.662467e-44), 0(0.000000e+00)
3222; EG-NEXT:     BFE_UINT T40.X, T21.Y, literal.x, 1,
3223; EG-NEXT:     BFE_UINT T42.Z, T21.Y, literal.y, 1,
3224; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3225; EG-NEXT:    12(1.681558e-44), 18(2.522337e-44)
3226; EG-NEXT:    176(2.466285e-43), 0(0.000000e+00)
3227; EG-NEXT:     LSHR T43.X, PV.W, literal.x,
3228; EG-NEXT:     BFE_UINT T42.Y, T21.Y, literal.y, 1,
3229; EG-NEXT:     BFE_UINT * T44.W, T21.Y, literal.z, 1,
3230; EG-NEXT:    2(2.802597e-45), 17(2.382207e-44)
3231; EG-NEXT:    23(3.222986e-44), 0(0.000000e+00)
3232; EG-NEXT:     BFE_UINT T42.X, T21.Y, literal.x, 1,
3233; EG-NEXT:     BFE_UINT T44.Z, T21.Y, literal.y, 1,
3234; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3235; EG-NEXT:    16(2.242078e-44), 22(3.082857e-44)
3236; EG-NEXT:    192(2.690493e-43), 0(0.000000e+00)
3237; EG-NEXT:     LSHR T45.X, PV.W, literal.x,
3238; EG-NEXT:     BFE_UINT T44.Y, T21.Y, literal.y, 1,
3239; EG-NEXT:     BFE_UINT * T46.W, T21.Y, literal.z, 1,
3240; EG-NEXT:    2(2.802597e-45), 21(2.942727e-44)
3241; EG-NEXT:    27(3.783506e-44), 0(0.000000e+00)
3242; EG-NEXT:     BFE_UINT T44.X, T21.Y, literal.x, 1,
3243; EG-NEXT:     BFE_UINT T46.Z, T21.Y, literal.y, 1,
3244; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3245; EG-NEXT:    20(2.802597e-44), 26(3.643376e-44)
3246; EG-NEXT:    208(2.914701e-43), 0(0.000000e+00)
3247; EG-NEXT:     LSHR T47.X, PV.W, literal.x,
3248; EG-NEXT:     BFE_UINT T46.Y, T21.Y, literal.y, 1,
3249; EG-NEXT:     LSHR * T48.W, T21.Y, literal.z,
3250; EG-NEXT:    2(2.802597e-45), 25(3.503246e-44)
3251; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
3252; EG-NEXT:     BFE_UINT T46.X, T21.Y, literal.x, 1,
3253; EG-NEXT:     BFE_UINT T48.Z, T21.Y, literal.y, 1,
3254; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3255; EG-NEXT:    24(3.363116e-44), 30(4.203895e-44)
3256; EG-NEXT:    224(3.138909e-43), 0(0.000000e+00)
3257; EG-NEXT:     LSHR T49.X, PV.W, literal.x,
3258; EG-NEXT:     BFE_UINT * T48.Y, T21.Y, literal.y, 1,
3259; EG-NEXT:    2(2.802597e-45), 29(4.063766e-44)
3260; EG-NEXT:     BFE_UINT T48.X, T21.Y, literal.x, 1,
3261; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3262; EG-NEXT:    28(3.923636e-44), 240(3.363116e-43)
3263; EG-NEXT:     LSHR * T50.X, PV.W, literal.x,
3264; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
3265;
3266; GFX12-LABEL: constant_zextload_v64i1_to_v64i32:
3267; GFX12:       ; %bb.0:
3268; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
3269; GFX12-NEXT:    s_wait_kmcnt 0x0
3270; GFX12-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
3271; GFX12-NEXT:    s_wait_kmcnt 0x0
3272; GFX12-NEXT:    s_lshr_b32 s33, s3, 31
3273; GFX12-NEXT:    s_bfe_u32 s34, s3, 0x1001d
3274; GFX12-NEXT:    s_bfe_u32 s65, s3, 0x1001c
3275; GFX12-NEXT:    s_bfe_u32 s66, s3, 0x1001e
3276; GFX12-NEXT:    s_bfe_u32 s30, s3, 0x1001b
3277; GFX12-NEXT:    s_bfe_u32 s31, s3, 0x10019
3278; GFX12-NEXT:    s_bfe_u32 s63, s3, 0x1001a
3279; GFX12-NEXT:    s_bfe_u32 s64, s3, 0x10018
3280; GFX12-NEXT:    v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s34
3281; GFX12-NEXT:    s_bfe_u32 s29, s3, 0x10017
3282; GFX12-NEXT:    s_bfe_u32 s60, s3, 0x10016
3283; GFX12-NEXT:    s_bfe_u32 s61, s3, 0x10015
3284; GFX12-NEXT:    s_bfe_u32 s62, s3, 0x10014
3285; GFX12-NEXT:    v_dual_mov_b32 v0, s65 :: v_dual_mov_b32 v3, s33
3286; GFX12-NEXT:    v_dual_mov_b32 v2, s66 :: v_dual_mov_b32 v5, s31
3287; GFX12-NEXT:    s_bfe_u32 s27, s3, 0x10013
3288; GFX12-NEXT:    s_bfe_u32 s28, s3, 0x10011
3289; GFX12-NEXT:    s_bfe_u32 s58, s3, 0x10012
3290; GFX12-NEXT:    s_bfe_u32 s59, s3, 0x10010
3291; GFX12-NEXT:    v_dual_mov_b32 v4, s64 :: v_dual_mov_b32 v7, s30
3292; GFX12-NEXT:    v_dual_mov_b32 v6, s63 :: v_dual_mov_b32 v9, s61
3293; GFX12-NEXT:    v_dual_mov_b32 v8, s62 :: v_dual_mov_b32 v11, s29
3294; GFX12-NEXT:    v_dual_mov_b32 v10, s60 :: v_dual_mov_b32 v13, s28
3295; GFX12-NEXT:    s_bfe_u32 s19, s3, 0x10003
3296; GFX12-NEXT:    s_bfe_u32 s20, s3, 0x10001
3297; GFX12-NEXT:    s_bfe_u32 s21, s3, 0x10007
3298; GFX12-NEXT:    s_bfe_u32 s22, s3, 0x10005
3299; GFX12-NEXT:    s_bfe_u32 s23, s3, 0x1000b
3300; GFX12-NEXT:    s_bfe_u32 s24, s3, 0x10009
3301; GFX12-NEXT:    s_bfe_u32 s25, s3, 0x1000f
3302; GFX12-NEXT:    s_bfe_u32 s26, s3, 0x1000d
3303; GFX12-NEXT:    s_and_b32 s51, s3, 1
3304; GFX12-NEXT:    s_bfe_u32 s52, s3, 0x10002
3305; GFX12-NEXT:    s_bfe_u32 s53, s3, 0x10006
3306; GFX12-NEXT:    s_bfe_u32 s54, s3, 0x10004
3307; GFX12-NEXT:    s_bfe_u32 s55, s3, 0x1000a
3308; GFX12-NEXT:    s_bfe_u32 s56, s3, 0x10008
3309; GFX12-NEXT:    s_bfe_u32 s57, s3, 0x1000e
3310; GFX12-NEXT:    v_dual_mov_b32 v12, s59 :: v_dual_mov_b32 v15, s27
3311; GFX12-NEXT:    v_mov_b32_e32 v14, s58
3312; GFX12-NEXT:    s_bfe_u32 s3, s3, 0x1000c
3313; GFX12-NEXT:    s_clause 0x3
3314; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[0:1] offset:240
3315; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[0:1] offset:224
3316; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[0:1] offset:208
3317; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[0:1] offset:192
3318; GFX12-NEXT:    v_dual_mov_b32 v1, s26 :: v_dual_mov_b32 v0, s3
3319; GFX12-NEXT:    v_dual_mov_b32 v3, s25 :: v_dual_mov_b32 v2, s57
3320; GFX12-NEXT:    v_dual_mov_b32 v5, s24 :: v_dual_mov_b32 v4, s56
3321; GFX12-NEXT:    v_dual_mov_b32 v7, s23 :: v_dual_mov_b32 v6, s55
3322; GFX12-NEXT:    v_mov_b32_e32 v9, s22
3323; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10003
3324; GFX12-NEXT:    s_bfe_u32 s5, s2, 0x10001
3325; GFX12-NEXT:    s_bfe_u32 s6, s2, 0x10007
3326; GFX12-NEXT:    s_bfe_u32 s7, s2, 0x10005
3327; GFX12-NEXT:    s_bfe_u32 s8, s2, 0x1000b
3328; GFX12-NEXT:    s_bfe_u32 s9, s2, 0x10009
3329; GFX12-NEXT:    s_bfe_u32 s10, s2, 0x1000f
3330; GFX12-NEXT:    s_bfe_u32 s11, s2, 0x1000d
3331; GFX12-NEXT:    s_bfe_u32 s12, s2, 0x10013
3332; GFX12-NEXT:    s_bfe_u32 s13, s2, 0x10011
3333; GFX12-NEXT:    s_bfe_u32 s14, s2, 0x10017
3334; GFX12-NEXT:    s_bfe_u32 s15, s2, 0x1001b
3335; GFX12-NEXT:    s_bfe_u32 s16, s2, 0x10019
3336; GFX12-NEXT:    s_lshr_b32 s17, s2, 31
3337; GFX12-NEXT:    s_bfe_u32 s18, s2, 0x1001d
3338; GFX12-NEXT:    s_and_b32 s35, s2, 1
3339; GFX12-NEXT:    s_bfe_u32 s36, s2, 0x10002
3340; GFX12-NEXT:    s_bfe_u32 s37, s2, 0x10006
3341; GFX12-NEXT:    s_bfe_u32 s38, s2, 0x10004
3342; GFX12-NEXT:    s_bfe_u32 s39, s2, 0x1000a
3343; GFX12-NEXT:    s_bfe_u32 s40, s2, 0x10008
3344; GFX12-NEXT:    s_bfe_u32 s41, s2, 0x1000e
3345; GFX12-NEXT:    s_bfe_u32 s42, s2, 0x1000c
3346; GFX12-NEXT:    s_bfe_u32 s43, s2, 0x10012
3347; GFX12-NEXT:    s_bfe_u32 s44, s2, 0x10010
3348; GFX12-NEXT:    s_bfe_u32 s45, s2, 0x10016
3349; GFX12-NEXT:    s_bfe_u32 s46, s2, 0x10015
3350; GFX12-NEXT:    s_bfe_u32 s47, s2, 0x10014
3351; GFX12-NEXT:    s_bfe_u32 s48, s2, 0x1001a
3352; GFX12-NEXT:    s_bfe_u32 s49, s2, 0x10018
3353; GFX12-NEXT:    s_bfe_u32 s50, s2, 0x1001e
3354; GFX12-NEXT:    s_bfe_u32 s2, s2, 0x1001c
3355; GFX12-NEXT:    v_dual_mov_b32 v8, s54 :: v_dual_mov_b32 v11, s21
3356; GFX12-NEXT:    v_dual_mov_b32 v10, s53 :: v_dual_mov_b32 v13, s20
3357; GFX12-NEXT:    v_dual_mov_b32 v12, s51 :: v_dual_mov_b32 v15, s19
3358; GFX12-NEXT:    v_dual_mov_b32 v14, s52 :: v_dual_mov_b32 v17, s18
3359; GFX12-NEXT:    s_wait_alu 0xfffe
3360; GFX12-NEXT:    v_dual_mov_b32 v16, s2 :: v_dual_mov_b32 v19, s17
3361; GFX12-NEXT:    v_dual_mov_b32 v18, s50 :: v_dual_mov_b32 v21, s16
3362; GFX12-NEXT:    v_dual_mov_b32 v20, s49 :: v_dual_mov_b32 v23, s15
3363; GFX12-NEXT:    v_mov_b32_e32 v22, s48
3364; GFX12-NEXT:    s_clause 0x5
3365; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[0:1] offset:176
3366; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[0:1] offset:160
3367; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[0:1] offset:144
3368; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[0:1] offset:128
3369; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[0:1] offset:112
3370; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[0:1] offset:96
3371; GFX12-NEXT:    v_dual_mov_b32 v1, s46 :: v_dual_mov_b32 v0, s47
3372; GFX12-NEXT:    v_dual_mov_b32 v3, s14 :: v_dual_mov_b32 v2, s45
3373; GFX12-NEXT:    v_dual_mov_b32 v5, s13 :: v_dual_mov_b32 v4, s44
3374; GFX12-NEXT:    v_dual_mov_b32 v7, s12 :: v_dual_mov_b32 v6, s43
3375; GFX12-NEXT:    v_dual_mov_b32 v9, s11 :: v_dual_mov_b32 v8, s42
3376; GFX12-NEXT:    v_dual_mov_b32 v11, s10 :: v_dual_mov_b32 v10, s41
3377; GFX12-NEXT:    v_dual_mov_b32 v13, s9 :: v_dual_mov_b32 v12, s40
3378; GFX12-NEXT:    v_dual_mov_b32 v15, s8 :: v_dual_mov_b32 v14, s39
3379; GFX12-NEXT:    v_dual_mov_b32 v17, s7 :: v_dual_mov_b32 v16, s38
3380; GFX12-NEXT:    v_dual_mov_b32 v19, s6 :: v_dual_mov_b32 v18, s37
3381; GFX12-NEXT:    v_dual_mov_b32 v21, s5 :: v_dual_mov_b32 v20, s35
3382; GFX12-NEXT:    v_dual_mov_b32 v23, s4 :: v_dual_mov_b32 v22, s36
3383; GFX12-NEXT:    s_clause 0x5
3384; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[0:1] offset:80
3385; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[0:1] offset:64
3386; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[0:1] offset:48
3387; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[0:1] offset:32
3388; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[0:1] offset:16
3389; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[0:1]
3390; GFX12-NEXT:    s_endpgm
3391  %load = load <64 x i1>, ptr addrspace(4) %in
3392  %ext = zext <64 x i1> %load to <64 x i32>
3393  store <64 x i32> %ext, ptr addrspace(1) %out
3394  ret void
3395}
3396
3397define amdgpu_kernel void @constant_sextload_v64i1_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
3398; GFX6-LABEL: constant_sextload_v64i1_to_v64i32:
3399; GFX6:       ; %bb.0:
3400; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
3401; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
3402; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
3403; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
3404; GFX6-NEXT:    s_bfe_i32 s4, s2, 0x10003
3405; GFX6-NEXT:    s_bfe_i32 s5, s2, 0x10002
3406; GFX6-NEXT:    s_bfe_i32 s6, s2, 0x10001
3407; GFX6-NEXT:    s_bfe_i32 s7, s2, 0x10000
3408; GFX6-NEXT:    s_bfe_i32 s8, s2, 0x10007
3409; GFX6-NEXT:    s_bfe_i32 s9, s2, 0x10006
3410; GFX6-NEXT:    s_bfe_i32 s10, s2, 0x10005
3411; GFX6-NEXT:    s_bfe_i32 s11, s2, 0x10004
3412; GFX6-NEXT:    s_bfe_i32 s12, s2, 0x1000b
3413; GFX6-NEXT:    s_bfe_i32 s13, s2, 0x1000a
3414; GFX6-NEXT:    s_bfe_i32 s14, s2, 0x10009
3415; GFX6-NEXT:    s_bfe_i32 s15, s2, 0x10008
3416; GFX6-NEXT:    s_bfe_i32 s16, s2, 0x1000f
3417; GFX6-NEXT:    s_bfe_i32 s17, s2, 0x1000e
3418; GFX6-NEXT:    s_bfe_i32 s18, s2, 0x1000d
3419; GFX6-NEXT:    s_bfe_i32 s19, s2, 0x1000c
3420; GFX6-NEXT:    s_bfe_i32 s20, s2, 0x10013
3421; GFX6-NEXT:    s_bfe_i32 s21, s2, 0x10012
3422; GFX6-NEXT:    s_bfe_i32 s22, s2, 0x10011
3423; GFX6-NEXT:    s_bfe_i32 s23, s2, 0x10010
3424; GFX6-NEXT:    s_bfe_i32 s24, s2, 0x10017
3425; GFX6-NEXT:    s_bfe_i32 s25, s2, 0x10016
3426; GFX6-NEXT:    s_bfe_i32 s26, s2, 0x10015
3427; GFX6-NEXT:    s_bfe_i32 s27, s2, 0x10014
3428; GFX6-NEXT:    s_bfe_i32 s28, s2, 0x1001b
3429; GFX6-NEXT:    s_bfe_i32 s29, s2, 0x1001a
3430; GFX6-NEXT:    s_bfe_i32 s30, s2, 0x10019
3431; GFX6-NEXT:    s_bfe_i32 s31, s2, 0x10018
3432; GFX6-NEXT:    s_ashr_i32 s33, s2, 31
3433; GFX6-NEXT:    s_bfe_i32 s34, s2, 0x1001e
3434; GFX6-NEXT:    s_bfe_i32 s35, s2, 0x1001d
3435; GFX6-NEXT:    s_bfe_i32 s36, s2, 0x1001c
3436; GFX6-NEXT:    s_bfe_i32 s37, s3, 0x10003
3437; GFX6-NEXT:    s_bfe_i32 s38, s3, 0x10002
3438; GFX6-NEXT:    s_bfe_i32 s39, s3, 0x10001
3439; GFX6-NEXT:    s_bfe_i32 s40, s3, 0x10000
3440; GFX6-NEXT:    s_bfe_i32 s41, s3, 0x10007
3441; GFX6-NEXT:    s_bfe_i32 s42, s3, 0x10006
3442; GFX6-NEXT:    s_bfe_i32 s43, s3, 0x10005
3443; GFX6-NEXT:    s_bfe_i32 s44, s3, 0x10004
3444; GFX6-NEXT:    s_bfe_i32 s45, s3, 0x1000b
3445; GFX6-NEXT:    s_bfe_i32 s46, s3, 0x1000a
3446; GFX6-NEXT:    s_bfe_i32 s47, s3, 0x10009
3447; GFX6-NEXT:    s_bfe_i32 s48, s3, 0x10008
3448; GFX6-NEXT:    s_bfe_i32 s49, s3, 0x1000e
3449; GFX6-NEXT:    s_bfe_i32 s50, s3, 0x1000d
3450; GFX6-NEXT:    s_bfe_i32 s51, s3, 0x1000c
3451; GFX6-NEXT:    s_bfe_i32 s52, s3, 0x10013
3452; GFX6-NEXT:    s_bfe_i32 s53, s3, 0x10012
3453; GFX6-NEXT:    s_bfe_i32 s54, s3, 0x10011
3454; GFX6-NEXT:    s_bfe_i32 s55, s3, 0x10010
3455; GFX6-NEXT:    s_bfe_i32 s56, s3, 0x10017
3456; GFX6-NEXT:    s_bfe_i32 s57, s3, 0x10016
3457; GFX6-NEXT:    s_bfe_i32 s58, s3, 0x10015
3458; GFX6-NEXT:    s_bfe_i32 s59, s3, 0x10014
3459; GFX6-NEXT:    s_bfe_i32 s60, s3, 0x1001b
3460; GFX6-NEXT:    s_bfe_i32 s61, s3, 0x1001a
3461; GFX6-NEXT:    s_bfe_i32 s62, s3, 0x10019
3462; GFX6-NEXT:    s_bfe_i32 s63, s3, 0x10018
3463; GFX6-NEXT:    s_ashr_i32 s64, s3, 31
3464; GFX6-NEXT:    s_bfe_i32 s65, s3, 0x1001e
3465; GFX6-NEXT:    s_bfe_i32 s66, s3, 0x1001d
3466; GFX6-NEXT:    s_bfe_i32 s67, s3, 0x1001c
3467; GFX6-NEXT:    s_bfe_i32 s68, s3, 0x1000f
3468; GFX6-NEXT:    s_mov_b32 s3, 0xf000
3469; GFX6-NEXT:    s_mov_b32 s2, -1
3470; GFX6-NEXT:    v_mov_b32_e32 v0, s67
3471; GFX6-NEXT:    v_mov_b32_e32 v1, s66
3472; GFX6-NEXT:    v_mov_b32_e32 v2, s65
3473; GFX6-NEXT:    v_mov_b32_e32 v3, s64
3474; GFX6-NEXT:    v_mov_b32_e32 v4, s63
3475; GFX6-NEXT:    v_mov_b32_e32 v5, s62
3476; GFX6-NEXT:    v_mov_b32_e32 v6, s61
3477; GFX6-NEXT:    v_mov_b32_e32 v7, s60
3478; GFX6-NEXT:    v_mov_b32_e32 v8, s59
3479; GFX6-NEXT:    v_mov_b32_e32 v9, s58
3480; GFX6-NEXT:    v_mov_b32_e32 v10, s57
3481; GFX6-NEXT:    v_mov_b32_e32 v11, s56
3482; GFX6-NEXT:    v_mov_b32_e32 v12, s55
3483; GFX6-NEXT:    v_mov_b32_e32 v13, s54
3484; GFX6-NEXT:    v_mov_b32_e32 v14, s53
3485; GFX6-NEXT:    v_mov_b32_e32 v15, s52
3486; GFX6-NEXT:    v_mov_b32_e32 v16, s51
3487; GFX6-NEXT:    v_mov_b32_e32 v17, s50
3488; GFX6-NEXT:    v_mov_b32_e32 v18, s49
3489; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
3490; GFX6-NEXT:    s_waitcnt expcnt(0)
3491; GFX6-NEXT:    v_mov_b32_e32 v0, s48
3492; GFX6-NEXT:    v_mov_b32_e32 v19, s68
3493; GFX6-NEXT:    v_mov_b32_e32 v1, s47
3494; GFX6-NEXT:    v_mov_b32_e32 v2, s46
3495; GFX6-NEXT:    v_mov_b32_e32 v3, s45
3496; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
3497; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
3498; GFX6-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
3499; GFX6-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
3500; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
3501; GFX6-NEXT:    s_waitcnt expcnt(0)
3502; GFX6-NEXT:    v_mov_b32_e32 v0, s44
3503; GFX6-NEXT:    v_mov_b32_e32 v1, s43
3504; GFX6-NEXT:    v_mov_b32_e32 v2, s42
3505; GFX6-NEXT:    v_mov_b32_e32 v3, s41
3506; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
3507; GFX6-NEXT:    s_waitcnt expcnt(0)
3508; GFX6-NEXT:    v_mov_b32_e32 v0, s40
3509; GFX6-NEXT:    v_mov_b32_e32 v1, s39
3510; GFX6-NEXT:    v_mov_b32_e32 v2, s38
3511; GFX6-NEXT:    v_mov_b32_e32 v3, s37
3512; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
3513; GFX6-NEXT:    s_waitcnt expcnt(0)
3514; GFX6-NEXT:    v_mov_b32_e32 v0, s36
3515; GFX6-NEXT:    v_mov_b32_e32 v1, s35
3516; GFX6-NEXT:    v_mov_b32_e32 v2, s34
3517; GFX6-NEXT:    v_mov_b32_e32 v3, s33
3518; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
3519; GFX6-NEXT:    s_waitcnt expcnt(0)
3520; GFX6-NEXT:    v_mov_b32_e32 v0, s31
3521; GFX6-NEXT:    v_mov_b32_e32 v1, s30
3522; GFX6-NEXT:    v_mov_b32_e32 v2, s29
3523; GFX6-NEXT:    v_mov_b32_e32 v3, s28
3524; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
3525; GFX6-NEXT:    s_waitcnt expcnt(0)
3526; GFX6-NEXT:    v_mov_b32_e32 v0, s27
3527; GFX6-NEXT:    v_mov_b32_e32 v1, s26
3528; GFX6-NEXT:    v_mov_b32_e32 v2, s25
3529; GFX6-NEXT:    v_mov_b32_e32 v3, s24
3530; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
3531; GFX6-NEXT:    s_waitcnt expcnt(0)
3532; GFX6-NEXT:    v_mov_b32_e32 v0, s23
3533; GFX6-NEXT:    v_mov_b32_e32 v1, s22
3534; GFX6-NEXT:    v_mov_b32_e32 v2, s21
3535; GFX6-NEXT:    v_mov_b32_e32 v3, s20
3536; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
3537; GFX6-NEXT:    s_waitcnt expcnt(0)
3538; GFX6-NEXT:    v_mov_b32_e32 v0, s19
3539; GFX6-NEXT:    v_mov_b32_e32 v1, s18
3540; GFX6-NEXT:    v_mov_b32_e32 v2, s17
3541; GFX6-NEXT:    v_mov_b32_e32 v3, s16
3542; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
3543; GFX6-NEXT:    s_waitcnt expcnt(0)
3544; GFX6-NEXT:    v_mov_b32_e32 v0, s15
3545; GFX6-NEXT:    v_mov_b32_e32 v1, s14
3546; GFX6-NEXT:    v_mov_b32_e32 v2, s13
3547; GFX6-NEXT:    v_mov_b32_e32 v3, s12
3548; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
3549; GFX6-NEXT:    s_waitcnt expcnt(0)
3550; GFX6-NEXT:    v_mov_b32_e32 v0, s11
3551; GFX6-NEXT:    v_mov_b32_e32 v1, s10
3552; GFX6-NEXT:    v_mov_b32_e32 v2, s9
3553; GFX6-NEXT:    v_mov_b32_e32 v3, s8
3554; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
3555; GFX6-NEXT:    s_waitcnt expcnt(0)
3556; GFX6-NEXT:    v_mov_b32_e32 v0, s7
3557; GFX6-NEXT:    v_mov_b32_e32 v1, s6
3558; GFX6-NEXT:    v_mov_b32_e32 v2, s5
3559; GFX6-NEXT:    v_mov_b32_e32 v3, s4
3560; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
3561; GFX6-NEXT:    s_endpgm
3562;
3563; GFX8-LABEL: constant_sextload_v64i1_to_v64i32:
3564; GFX8:       ; %bb.0:
3565; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
3566; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
3567; GFX8-NEXT:    s_load_dwordx2 s[26:27], s[2:3], 0x0
3568; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
3569; GFX8-NEXT:    s_bfe_i32 s2, s26, 0x10003
3570; GFX8-NEXT:    s_bfe_i32 s3, s26, 0x10002
3571; GFX8-NEXT:    s_bfe_i32 s4, s26, 0x10001
3572; GFX8-NEXT:    s_bfe_i32 s5, s26, 0x10000
3573; GFX8-NEXT:    s_bfe_i32 s6, s26, 0x10007
3574; GFX8-NEXT:    s_bfe_i32 s7, s26, 0x10006
3575; GFX8-NEXT:    s_bfe_i32 s8, s26, 0x10005
3576; GFX8-NEXT:    s_bfe_i32 s9, s26, 0x10004
3577; GFX8-NEXT:    s_bfe_i32 s10, s26, 0x1000b
3578; GFX8-NEXT:    s_bfe_i32 s11, s26, 0x1000a
3579; GFX8-NEXT:    s_bfe_i32 s12, s26, 0x10009
3580; GFX8-NEXT:    s_bfe_i32 s13, s26, 0x10008
3581; GFX8-NEXT:    s_bfe_i32 s14, s26, 0x1000f
3582; GFX8-NEXT:    s_bfe_i32 s15, s26, 0x1000e
3583; GFX8-NEXT:    s_bfe_i32 s16, s26, 0x1000d
3584; GFX8-NEXT:    s_bfe_i32 s17, s26, 0x1000c
3585; GFX8-NEXT:    s_bfe_i32 s18, s26, 0x10013
3586; GFX8-NEXT:    s_bfe_i32 s19, s26, 0x10012
3587; GFX8-NEXT:    s_bfe_i32 s20, s26, 0x10011
3588; GFX8-NEXT:    s_bfe_i32 s21, s26, 0x10010
3589; GFX8-NEXT:    s_bfe_i32 s22, s26, 0x10017
3590; GFX8-NEXT:    s_bfe_i32 s23, s26, 0x10016
3591; GFX8-NEXT:    s_bfe_i32 s24, s26, 0x10015
3592; GFX8-NEXT:    s_bfe_i32 s25, s26, 0x10014
3593; GFX8-NEXT:    s_bfe_i32 s28, s26, 0x1001b
3594; GFX8-NEXT:    s_bfe_i32 s29, s26, 0x1001a
3595; GFX8-NEXT:    s_bfe_i32 s30, s26, 0x10019
3596; GFX8-NEXT:    s_bfe_i32 s31, s26, 0x10018
3597; GFX8-NEXT:    s_ashr_i32 s33, s26, 31
3598; GFX8-NEXT:    s_bfe_i32 s34, s26, 0x1001e
3599; GFX8-NEXT:    s_bfe_i32 s35, s26, 0x1001d
3600; GFX8-NEXT:    s_bfe_i32 s36, s26, 0x1001c
3601; GFX8-NEXT:    s_bfe_i32 s37, s27, 0x10003
3602; GFX8-NEXT:    s_bfe_i32 s38, s27, 0x10002
3603; GFX8-NEXT:    s_bfe_i32 s39, s27, 0x10001
3604; GFX8-NEXT:    s_bfe_i32 s40, s27, 0x10000
3605; GFX8-NEXT:    s_bfe_i32 s41, s27, 0x10007
3606; GFX8-NEXT:    s_bfe_i32 s42, s27, 0x10006
3607; GFX8-NEXT:    s_bfe_i32 s43, s27, 0x10005
3608; GFX8-NEXT:    s_bfe_i32 s44, s27, 0x10004
3609; GFX8-NEXT:    s_bfe_i32 s45, s27, 0x1000b
3610; GFX8-NEXT:    s_bfe_i32 s46, s27, 0x1000a
3611; GFX8-NEXT:    s_bfe_i32 s47, s27, 0x10009
3612; GFX8-NEXT:    s_bfe_i32 s48, s27, 0x10008
3613; GFX8-NEXT:    s_bfe_i32 s49, s27, 0x1000f
3614; GFX8-NEXT:    s_bfe_i32 s50, s27, 0x1000e
3615; GFX8-NEXT:    s_bfe_i32 s51, s27, 0x1000d
3616; GFX8-NEXT:    s_bfe_i32 s52, s27, 0x1000c
3617; GFX8-NEXT:    s_bfe_i32 s53, s27, 0x10013
3618; GFX8-NEXT:    s_bfe_i32 s54, s27, 0x10012
3619; GFX8-NEXT:    s_bfe_i32 s55, s27, 0x10011
3620; GFX8-NEXT:    s_bfe_i32 s56, s27, 0x10010
3621; GFX8-NEXT:    s_bfe_i32 s57, s27, 0x10017
3622; GFX8-NEXT:    s_bfe_i32 s58, s27, 0x10016
3623; GFX8-NEXT:    s_bfe_i32 s59, s27, 0x10015
3624; GFX8-NEXT:    s_bfe_i32 s60, s27, 0x10014
3625; GFX8-NEXT:    s_bfe_i32 s61, s27, 0x1001b
3626; GFX8-NEXT:    s_bfe_i32 s62, s27, 0x1001a
3627; GFX8-NEXT:    s_bfe_i32 s63, s27, 0x10019
3628; GFX8-NEXT:    s_bfe_i32 s64, s27, 0x10018
3629; GFX8-NEXT:    s_ashr_i32 s26, s27, 31
3630; GFX8-NEXT:    s_bfe_i32 s65, s27, 0x1001e
3631; GFX8-NEXT:    s_bfe_i32 s66, s27, 0x1001d
3632; GFX8-NEXT:    s_bfe_i32 s27, s27, 0x1001c
3633; GFX8-NEXT:    v_mov_b32_e32 v3, s26
3634; GFX8-NEXT:    s_add_u32 s26, s0, 0xf0
3635; GFX8-NEXT:    v_mov_b32_e32 v0, s27
3636; GFX8-NEXT:    s_addc_u32 s27, s1, 0
3637; GFX8-NEXT:    v_mov_b32_e32 v4, s26
3638; GFX8-NEXT:    v_mov_b32_e32 v1, s66
3639; GFX8-NEXT:    v_mov_b32_e32 v2, s65
3640; GFX8-NEXT:    v_mov_b32_e32 v5, s27
3641; GFX8-NEXT:    s_add_u32 s26, s0, 0xe0
3642; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3643; GFX8-NEXT:    s_addc_u32 s27, s1, 0
3644; GFX8-NEXT:    v_mov_b32_e32 v4, s26
3645; GFX8-NEXT:    v_mov_b32_e32 v0, s64
3646; GFX8-NEXT:    v_mov_b32_e32 v1, s63
3647; GFX8-NEXT:    v_mov_b32_e32 v2, s62
3648; GFX8-NEXT:    v_mov_b32_e32 v3, s61
3649; GFX8-NEXT:    v_mov_b32_e32 v5, s27
3650; GFX8-NEXT:    s_add_u32 s26, s0, 0xd0
3651; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3652; GFX8-NEXT:    s_addc_u32 s27, s1, 0
3653; GFX8-NEXT:    v_mov_b32_e32 v4, s26
3654; GFX8-NEXT:    v_mov_b32_e32 v0, s60
3655; GFX8-NEXT:    v_mov_b32_e32 v1, s59
3656; GFX8-NEXT:    v_mov_b32_e32 v2, s58
3657; GFX8-NEXT:    v_mov_b32_e32 v3, s57
3658; GFX8-NEXT:    v_mov_b32_e32 v5, s27
3659; GFX8-NEXT:    s_add_u32 s26, s0, 0xc0
3660; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3661; GFX8-NEXT:    s_addc_u32 s27, s1, 0
3662; GFX8-NEXT:    v_mov_b32_e32 v4, s26
3663; GFX8-NEXT:    v_mov_b32_e32 v0, s56
3664; GFX8-NEXT:    v_mov_b32_e32 v1, s55
3665; GFX8-NEXT:    v_mov_b32_e32 v2, s54
3666; GFX8-NEXT:    v_mov_b32_e32 v3, s53
3667; GFX8-NEXT:    v_mov_b32_e32 v5, s27
3668; GFX8-NEXT:    s_add_u32 s26, s0, 0xb0
3669; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3670; GFX8-NEXT:    s_addc_u32 s27, s1, 0
3671; GFX8-NEXT:    v_mov_b32_e32 v4, s26
3672; GFX8-NEXT:    v_mov_b32_e32 v0, s52
3673; GFX8-NEXT:    v_mov_b32_e32 v1, s51
3674; GFX8-NEXT:    v_mov_b32_e32 v2, s50
3675; GFX8-NEXT:    v_mov_b32_e32 v3, s49
3676; GFX8-NEXT:    v_mov_b32_e32 v5, s27
3677; GFX8-NEXT:    s_add_u32 s26, s0, 0xa0
3678; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3679; GFX8-NEXT:    s_addc_u32 s27, s1, 0
3680; GFX8-NEXT:    v_mov_b32_e32 v4, s26
3681; GFX8-NEXT:    v_mov_b32_e32 v0, s48
3682; GFX8-NEXT:    v_mov_b32_e32 v1, s47
3683; GFX8-NEXT:    v_mov_b32_e32 v2, s46
3684; GFX8-NEXT:    v_mov_b32_e32 v3, s45
3685; GFX8-NEXT:    v_mov_b32_e32 v5, s27
3686; GFX8-NEXT:    s_add_u32 s26, s0, 0x90
3687; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3688; GFX8-NEXT:    s_addc_u32 s27, s1, 0
3689; GFX8-NEXT:    v_mov_b32_e32 v4, s26
3690; GFX8-NEXT:    v_mov_b32_e32 v0, s44
3691; GFX8-NEXT:    v_mov_b32_e32 v1, s43
3692; GFX8-NEXT:    v_mov_b32_e32 v2, s42
3693; GFX8-NEXT:    v_mov_b32_e32 v3, s41
3694; GFX8-NEXT:    v_mov_b32_e32 v5, s27
3695; GFX8-NEXT:    s_add_u32 s26, s0, 0x80
3696; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3697; GFX8-NEXT:    s_addc_u32 s27, s1, 0
3698; GFX8-NEXT:    v_mov_b32_e32 v4, s26
3699; GFX8-NEXT:    v_mov_b32_e32 v0, s40
3700; GFX8-NEXT:    v_mov_b32_e32 v1, s39
3701; GFX8-NEXT:    v_mov_b32_e32 v2, s38
3702; GFX8-NEXT:    v_mov_b32_e32 v3, s37
3703; GFX8-NEXT:    v_mov_b32_e32 v5, s27
3704; GFX8-NEXT:    s_add_u32 s26, s0, 0x70
3705; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3706; GFX8-NEXT:    s_addc_u32 s27, s1, 0
3707; GFX8-NEXT:    v_mov_b32_e32 v4, s26
3708; GFX8-NEXT:    v_mov_b32_e32 v0, s36
3709; GFX8-NEXT:    v_mov_b32_e32 v1, s35
3710; GFX8-NEXT:    v_mov_b32_e32 v2, s34
3711; GFX8-NEXT:    v_mov_b32_e32 v3, s33
3712; GFX8-NEXT:    v_mov_b32_e32 v5, s27
3713; GFX8-NEXT:    s_add_u32 s26, s0, 0x60
3714; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3715; GFX8-NEXT:    s_addc_u32 s27, s1, 0
3716; GFX8-NEXT:    v_mov_b32_e32 v4, s26
3717; GFX8-NEXT:    v_mov_b32_e32 v0, s31
3718; GFX8-NEXT:    v_mov_b32_e32 v1, s30
3719; GFX8-NEXT:    v_mov_b32_e32 v2, s29
3720; GFX8-NEXT:    v_mov_b32_e32 v3, s28
3721; GFX8-NEXT:    v_mov_b32_e32 v5, s27
3722; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3723; GFX8-NEXT:    s_nop 0
3724; GFX8-NEXT:    v_mov_b32_e32 v3, s22
3725; GFX8-NEXT:    s_add_u32 s22, s0, 0x50
3726; GFX8-NEXT:    v_mov_b32_e32 v2, s23
3727; GFX8-NEXT:    s_addc_u32 s23, s1, 0
3728; GFX8-NEXT:    v_mov_b32_e32 v4, s22
3729; GFX8-NEXT:    v_mov_b32_e32 v0, s25
3730; GFX8-NEXT:    v_mov_b32_e32 v1, s24
3731; GFX8-NEXT:    v_mov_b32_e32 v5, s23
3732; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3733; GFX8-NEXT:    s_nop 0
3734; GFX8-NEXT:    v_mov_b32_e32 v3, s18
3735; GFX8-NEXT:    s_add_u32 s18, s0, 64
3736; GFX8-NEXT:    v_mov_b32_e32 v2, s19
3737; GFX8-NEXT:    s_addc_u32 s19, s1, 0
3738; GFX8-NEXT:    v_mov_b32_e32 v4, s18
3739; GFX8-NEXT:    v_mov_b32_e32 v0, s21
3740; GFX8-NEXT:    v_mov_b32_e32 v1, s20
3741; GFX8-NEXT:    v_mov_b32_e32 v5, s19
3742; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3743; GFX8-NEXT:    s_nop 0
3744; GFX8-NEXT:    v_mov_b32_e32 v3, s14
3745; GFX8-NEXT:    s_add_u32 s14, s0, 48
3746; GFX8-NEXT:    v_mov_b32_e32 v2, s15
3747; GFX8-NEXT:    s_addc_u32 s15, s1, 0
3748; GFX8-NEXT:    v_mov_b32_e32 v4, s14
3749; GFX8-NEXT:    v_mov_b32_e32 v0, s17
3750; GFX8-NEXT:    v_mov_b32_e32 v1, s16
3751; GFX8-NEXT:    v_mov_b32_e32 v5, s15
3752; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3753; GFX8-NEXT:    s_nop 0
3754; GFX8-NEXT:    v_mov_b32_e32 v3, s10
3755; GFX8-NEXT:    s_add_u32 s10, s0, 32
3756; GFX8-NEXT:    v_mov_b32_e32 v2, s11
3757; GFX8-NEXT:    s_addc_u32 s11, s1, 0
3758; GFX8-NEXT:    v_mov_b32_e32 v4, s10
3759; GFX8-NEXT:    v_mov_b32_e32 v0, s13
3760; GFX8-NEXT:    v_mov_b32_e32 v1, s12
3761; GFX8-NEXT:    v_mov_b32_e32 v5, s11
3762; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3763; GFX8-NEXT:    s_nop 0
3764; GFX8-NEXT:    v_mov_b32_e32 v3, s6
3765; GFX8-NEXT:    s_add_u32 s6, s0, 16
3766; GFX8-NEXT:    v_mov_b32_e32 v2, s7
3767; GFX8-NEXT:    s_addc_u32 s7, s1, 0
3768; GFX8-NEXT:    v_mov_b32_e32 v4, s6
3769; GFX8-NEXT:    v_mov_b32_e32 v0, s9
3770; GFX8-NEXT:    v_mov_b32_e32 v1, s8
3771; GFX8-NEXT:    v_mov_b32_e32 v5, s7
3772; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3773; GFX8-NEXT:    v_mov_b32_e32 v5, s1
3774; GFX8-NEXT:    v_mov_b32_e32 v0, s5
3775; GFX8-NEXT:    v_mov_b32_e32 v1, s4
3776; GFX8-NEXT:    v_mov_b32_e32 v2, s3
3777; GFX8-NEXT:    v_mov_b32_e32 v3, s2
3778; GFX8-NEXT:    v_mov_b32_e32 v4, s0
3779; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3780; GFX8-NEXT:    s_endpgm
3781;
3782; EG-LABEL: constant_sextload_v64i1_to_v64i32:
3783; EG:       ; %bb.0:
3784; EG-NEXT:    ALU 0, @24, KC0[CB0:0-32], KC1[]
3785; EG-NEXT:    TEX 0 @22
3786; EG-NEXT:    ALU 99, @25, KC0[CB0:0-32], KC1[]
3787; EG-NEXT:    ALU 98, @125, KC0[CB0:0-32], KC1[]
3788; EG-NEXT:    ALU 13, @224, KC0[CB0:0-32], KC1[]
3789; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T50.X, 0
3790; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T49.X, 0
3791; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T46.X, 0
3792; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T44.X, 0
3793; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T42.X, 0
3794; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T40.X, 0
3795; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T38.X, 0
3796; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T36.X, 0
3797; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T35.X, 0
3798; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T33.X, 0
3799; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T31.X, 0
3800; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T29.X, 0
3801; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T27.X, 0
3802; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T25.X, 0
3803; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T23.X, 0
3804; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T21.X, 1
3805; EG-NEXT:    CF_END
3806; EG-NEXT:    Fetch clause starting at 22:
3807; EG-NEXT:     VTX_READ_64 T19.XY, T19.X, 0, #1
3808; EG-NEXT:    ALU clause starting at 24:
3809; EG-NEXT:     MOV * T19.X, KC0[2].Z,
3810; EG-NEXT:    ALU clause starting at 25:
3811; EG-NEXT:     LSHR * T0.W, T19.X, literal.x,
3812; EG-NEXT:    7(9.809089e-45), 0(0.000000e+00)
3813; EG-NEXT:     BFE_INT T20.W, PV.W, 0.0, 1,
3814; EG-NEXT:     LSHR * T0.W, T19.X, literal.x,
3815; EG-NEXT:    6(8.407791e-45), 0(0.000000e+00)
3816; EG-NEXT:     BFE_INT T20.Z, PS, 0.0, 1,
3817; EG-NEXT:     LSHR T0.W, T19.X, literal.x,
3818; EG-NEXT:     LSHR * T1.W, T19.X, literal.y,
3819; EG-NEXT:    11(1.541428e-44), 5(7.006492e-45)
3820; EG-NEXT:     LSHR T21.X, KC0[2].Y, literal.x,
3821; EG-NEXT:     BFE_INT T20.Y, PS, 0.0, 1,
3822; EG-NEXT:     LSHR T0.Z, T19.X, literal.y,
3823; EG-NEXT:     BFE_INT T22.W, PV.W, 0.0, 1,
3824; EG-NEXT:     LSHR * T0.W, T19.X, literal.z,
3825; EG-NEXT:    2(2.802597e-45), 10(1.401298e-44)
3826; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
3827; EG-NEXT:     BFE_INT T20.X, PS, 0.0, 1,
3828; EG-NEXT:     LSHR T0.Y, T19.X, literal.x,
3829; EG-NEXT:     BFE_INT T22.Z, PV.Z, 0.0, 1,
3830; EG-NEXT:     LSHR T0.W, T19.X, literal.y,
3831; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
3832; EG-NEXT:    15(2.101948e-44), 9(1.261169e-44)
3833; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3834; EG-NEXT:     LSHR T23.X, PS, literal.x,
3835; EG-NEXT:     BFE_INT T22.Y, PV.W, 0.0, 1,
3836; EG-NEXT:     LSHR T0.Z, T19.X, literal.y,
3837; EG-NEXT:     BFE_INT T24.W, PV.Y, 0.0, 1,
3838; EG-NEXT:     LSHR * T0.W, T19.X, literal.z,
3839; EG-NEXT:    2(2.802597e-45), 14(1.961818e-44)
3840; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
3841; EG-NEXT:     BFE_INT T22.X, PS, 0.0, 1,
3842; EG-NEXT:     LSHR T0.Y, T19.X, literal.x,
3843; EG-NEXT:     BFE_INT T24.Z, PV.Z, 0.0, 1,
3844; EG-NEXT:     LSHR T0.W, T19.X, literal.y,
3845; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
3846; EG-NEXT:    19(2.662467e-44), 13(1.821688e-44)
3847; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
3848; EG-NEXT:     LSHR T25.X, PS, literal.x,
3849; EG-NEXT:     BFE_INT T24.Y, PV.W, 0.0, 1,
3850; EG-NEXT:     LSHR T0.Z, T19.X, literal.y,
3851; EG-NEXT:     BFE_INT T26.W, PV.Y, 0.0, 1,
3852; EG-NEXT:     LSHR * T0.W, T19.X, literal.z,
3853; EG-NEXT:    2(2.802597e-45), 18(2.522337e-44)
3854; EG-NEXT:    12(1.681558e-44), 0(0.000000e+00)
3855; EG-NEXT:     BFE_INT T24.X, PS, 0.0, 1,
3856; EG-NEXT:     LSHR T0.Y, T19.X, literal.x,
3857; EG-NEXT:     BFE_INT T26.Z, PV.Z, 0.0, 1,
3858; EG-NEXT:     LSHR T0.W, T19.X, literal.y,
3859; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
3860; EG-NEXT:    23(3.222986e-44), 17(2.382207e-44)
3861; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
3862; EG-NEXT:     LSHR T27.X, PS, literal.x,
3863; EG-NEXT:     BFE_INT T26.Y, PV.W, 0.0, 1,
3864; EG-NEXT:     LSHR T0.Z, T19.X, literal.y,
3865; EG-NEXT:     BFE_INT T28.W, PV.Y, 0.0, 1,
3866; EG-NEXT:     LSHR * T0.W, T19.X, literal.z,
3867; EG-NEXT:    2(2.802597e-45), 22(3.082857e-44)
3868; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3869; EG-NEXT:     BFE_INT T26.X, PS, 0.0, 1,
3870; EG-NEXT:     LSHR T0.Y, T19.X, literal.x,
3871; EG-NEXT:     BFE_INT T28.Z, PV.Z, 0.0, 1,
3872; EG-NEXT:     LSHR T0.W, T19.X, literal.y,
3873; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
3874; EG-NEXT:    27(3.783506e-44), 21(2.942727e-44)
3875; EG-NEXT:    64(8.968310e-44), 0(0.000000e+00)
3876; EG-NEXT:     LSHR T29.X, PS, literal.x,
3877; EG-NEXT:     BFE_INT T28.Y, PV.W, 0.0, 1,
3878; EG-NEXT:     LSHR T0.Z, T19.X, literal.y,
3879; EG-NEXT:     BFE_INT T30.W, PV.Y, 0.0, 1,
3880; EG-NEXT:     LSHR * T0.W, T19.X, literal.z,
3881; EG-NEXT:    2(2.802597e-45), 26(3.643376e-44)
3882; EG-NEXT:    20(2.802597e-44), 0(0.000000e+00)
3883; EG-NEXT:     BFE_INT T28.X, PS, 0.0, 1,
3884; EG-NEXT:     BFE_INT T30.Z, PV.Z, 0.0, 1,
3885; EG-NEXT:     LSHR T0.W, T19.X, literal.x,
3886; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
3887; EG-NEXT:    25(3.503246e-44), 80(1.121039e-43)
3888; EG-NEXT:     LSHR T31.X, PS, literal.x,
3889; EG-NEXT:     BFE_INT T30.Y, PV.W, 0.0, 1,
3890; EG-NEXT:     LSHR T0.Z, T19.X, literal.y,
3891; EG-NEXT:     LSHR T0.W, T19.X, literal.z,
3892; EG-NEXT:     ASHR * T32.W, T19.X, literal.w,
3893; EG-NEXT:    2(2.802597e-45), 30(4.203895e-44)
3894; EG-NEXT:    24(3.363116e-44), 31(4.344025e-44)
3895; EG-NEXT:     BFE_INT T30.X, PV.W, 0.0, 1,
3896; EG-NEXT:     BFE_INT T32.Z, PV.Z, 0.0, 1,
3897; EG-NEXT:     LSHR T0.W, T19.X, literal.x,
3898; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
3899; EG-NEXT:    29(4.063766e-44), 96(1.345247e-43)
3900; EG-NEXT:     LSHR T33.X, PS, literal.x,
3901; EG-NEXT:     BFE_INT T32.Y, PV.W, 0.0, 1,
3902; EG-NEXT:     LSHR T0.W, T19.Y, literal.y,
3903; EG-NEXT:     LSHR * T1.W, T19.X, literal.z,
3904; EG-NEXT:    2(2.802597e-45), 7(9.809089e-45)
3905; EG-NEXT:    28(3.923636e-44), 0(0.000000e+00)
3906; EG-NEXT:     BFE_INT T32.X, PS, 0.0, 1,
3907; EG-NEXT:     LSHR T0.Z, T19.Y, literal.x,
3908; EG-NEXT:     BFE_INT T34.W, PV.W, 0.0, 1,
3909; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3910; EG-NEXT:    6(8.407791e-45), 112(1.569454e-43)
3911; EG-NEXT:    ALU clause starting at 125:
3912; EG-NEXT:     LSHR T35.X, T0.W, literal.x,
3913; EG-NEXT:     LSHR T0.Y, T19.Y, literal.y,
3914; EG-NEXT:     BFE_INT T34.Z, T0.Z, 0.0, 1,
3915; EG-NEXT:     LSHR T0.W, T19.Y, literal.z,
3916; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.w,
3917; EG-NEXT:    2(2.802597e-45), 11(1.541428e-44)
3918; EG-NEXT:    5(7.006492e-45), 128(1.793662e-43)
3919; EG-NEXT:     LSHR T36.X, PS, literal.x,
3920; EG-NEXT:     BFE_INT T34.Y, PV.W, 0.0, 1,
3921; EG-NEXT:     LSHR T0.Z, T19.Y, literal.y,
3922; EG-NEXT:     BFE_INT T37.W, PV.Y, 0.0, 1,
3923; EG-NEXT:     LSHR * T0.W, T19.Y, literal.z,
3924; EG-NEXT:    2(2.802597e-45), 10(1.401298e-44)
3925; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
3926; EG-NEXT:     BFE_INT T34.X, PS, 0.0, 1,
3927; EG-NEXT:     LSHR T0.Y, T19.Y, literal.x,
3928; EG-NEXT:     BFE_INT T37.Z, PV.Z, 0.0, 1,
3929; EG-NEXT:     LSHR T0.W, T19.Y, literal.y,
3930; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
3931; EG-NEXT:    15(2.101948e-44), 9(1.261169e-44)
3932; EG-NEXT:    144(2.017870e-43), 0(0.000000e+00)
3933; EG-NEXT:     LSHR T38.X, PS, literal.x,
3934; EG-NEXT:     BFE_INT T37.Y, PV.W, 0.0, 1,
3935; EG-NEXT:     LSHR T0.Z, T19.Y, literal.y,
3936; EG-NEXT:     BFE_INT T39.W, PV.Y, 0.0, 1,
3937; EG-NEXT:     LSHR * T0.W, T19.Y, literal.z,
3938; EG-NEXT:    2(2.802597e-45), 14(1.961818e-44)
3939; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
3940; EG-NEXT:     BFE_INT T37.X, PS, 0.0, 1,
3941; EG-NEXT:     LSHR T0.Y, T19.Y, literal.x,
3942; EG-NEXT:     BFE_INT T39.Z, PV.Z, 0.0, 1,
3943; EG-NEXT:     LSHR T0.W, T19.Y, literal.y,
3944; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
3945; EG-NEXT:    19(2.662467e-44), 13(1.821688e-44)
3946; EG-NEXT:    160(2.242078e-43), 0(0.000000e+00)
3947; EG-NEXT:     LSHR T40.X, PS, literal.x,
3948; EG-NEXT:     BFE_INT T39.Y, PV.W, 0.0, 1,
3949; EG-NEXT:     LSHR T0.Z, T19.Y, literal.y,
3950; EG-NEXT:     BFE_INT T41.W, PV.Y, 0.0, 1,
3951; EG-NEXT:     LSHR * T0.W, T19.Y, literal.z,
3952; EG-NEXT:    2(2.802597e-45), 18(2.522337e-44)
3953; EG-NEXT:    12(1.681558e-44), 0(0.000000e+00)
3954; EG-NEXT:     BFE_INT T39.X, PS, 0.0, 1,
3955; EG-NEXT:     LSHR T0.Y, T19.Y, literal.x,
3956; EG-NEXT:     BFE_INT T41.Z, PV.Z, 0.0, 1,
3957; EG-NEXT:     LSHR T0.W, T19.Y, literal.y,
3958; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
3959; EG-NEXT:    23(3.222986e-44), 17(2.382207e-44)
3960; EG-NEXT:    176(2.466285e-43), 0(0.000000e+00)
3961; EG-NEXT:     LSHR T42.X, PS, literal.x,
3962; EG-NEXT:     BFE_INT T41.Y, PV.W, 0.0, 1,
3963; EG-NEXT:     LSHR T0.Z, T19.Y, literal.y,
3964; EG-NEXT:     BFE_INT T43.W, PV.Y, 0.0, 1,
3965; EG-NEXT:     LSHR * T0.W, T19.Y, literal.z,
3966; EG-NEXT:    2(2.802597e-45), 22(3.082857e-44)
3967; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3968; EG-NEXT:     BFE_INT T41.X, PS, 0.0, 1,
3969; EG-NEXT:     LSHR T0.Y, T19.Y, literal.x,
3970; EG-NEXT:     BFE_INT T43.Z, PV.Z, 0.0, 1,
3971; EG-NEXT:     LSHR T0.W, T19.Y, literal.y,
3972; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
3973; EG-NEXT:    27(3.783506e-44), 21(2.942727e-44)
3974; EG-NEXT:    192(2.690493e-43), 0(0.000000e+00)
3975; EG-NEXT:     LSHR T44.X, PS, literal.x,
3976; EG-NEXT:     BFE_INT T43.Y, PV.W, 0.0, 1,
3977; EG-NEXT:     LSHR T0.Z, T19.Y, literal.y,
3978; EG-NEXT:     BFE_INT T45.W, PV.Y, 0.0, 1,
3979; EG-NEXT:     LSHR * T0.W, T19.Y, literal.z,
3980; EG-NEXT:    2(2.802597e-45), 26(3.643376e-44)
3981; EG-NEXT:    20(2.802597e-44), 0(0.000000e+00)
3982; EG-NEXT:     BFE_INT T43.X, PS, 0.0, 1,
3983; EG-NEXT:     BFE_INT T45.Z, PV.Z, 0.0, 1,
3984; EG-NEXT:     LSHR T0.W, T19.Y, literal.x,
3985; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
3986; EG-NEXT:    25(3.503246e-44), 208(2.914701e-43)
3987; EG-NEXT:     LSHR T46.X, PS, literal.x,
3988; EG-NEXT:     BFE_INT T45.Y, PV.W, 0.0, 1,
3989; EG-NEXT:     LSHR * T0.W, T19.Y, literal.y,
3990; EG-NEXT:    2(2.802597e-45), 24(3.363116e-44)
3991; EG-NEXT:     BFE_INT T45.X, PV.W, 0.0, 1,
3992; EG-NEXT:     LSHR T0.Z, T19.Y, literal.x,
3993; EG-NEXT:     LSHR T0.W, T19.X, 1,
3994; EG-NEXT:     LSHR * T1.W, T19.Y, literal.y,
3995; EG-NEXT:    2(2.802597e-45), 3(4.203895e-45)
3996; EG-NEXT:     BFE_INT T47.X, T19.X, 0.0, 1,
3997; EG-NEXT:     LSHR T0.Y, T19.X, literal.x,
3998; EG-NEXT:     LSHR T1.Z, T19.X, literal.y,
3999; EG-NEXT:     LSHR T2.W, T19.Y, literal.z,
4000; EG-NEXT:     ASHR * T48.W, T19.Y, literal.w,
4001; EG-NEXT:    2(2.802597e-45), 3(4.203895e-45)
4002; EG-NEXT:    30(4.203895e-44), 31(4.344025e-44)
4003; EG-NEXT:     BFE_INT T19.X, T19.Y, 0.0, 1,
4004; EG-NEXT:     LSHR T1.Y, T19.Y, literal.x,
4005; EG-NEXT:     BFE_INT T48.Z, PV.W, 0.0, 1,
4006; EG-NEXT:     BFE_INT T47.W, PV.Z, 0.0, 1,
4007; EG-NEXT:     ADD_INT * T2.W, KC0[2].Y, literal.y,
4008; EG-NEXT:    29(4.063766e-44), 224(3.138909e-43)
4009; EG-NEXT:     LSHR * T49.X, PS, literal.x,
4010; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4011; EG-NEXT:    ALU clause starting at 224:
4012; EG-NEXT:     BFE_INT T48.Y, T1.Y, 0.0, 1,
4013; EG-NEXT:     BFE_INT T47.Z, T0.Y, 0.0, 1, BS:VEC_120/SCL_212
4014; EG-NEXT:     BFE_INT T19.W, T1.W, 0.0, 1,
4015; EG-NEXT:     LSHR * T1.W, T19.Y, literal.x,
4016; EG-NEXT:    28(3.923636e-44), 0(0.000000e+00)
4017; EG-NEXT:     BFE_INT T48.X, PS, 0.0, 1,
4018; EG-NEXT:     BFE_INT T47.Y, T0.W, 0.0, 1,
4019; EG-NEXT:     BFE_INT T19.Z, T0.Z, 0.0, 1,
4020; EG-NEXT:     LSHR T0.W, T19.Y, 1,
4021; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.x,
4022; EG-NEXT:    240(3.363116e-43), 0(0.000000e+00)
4023; EG-NEXT:     LSHR T50.X, PS, literal.x,
4024; EG-NEXT:     BFE_INT * T19.Y, PV.W, 0.0, 1,
4025; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4026;
4027; GFX12-LABEL: constant_sextload_v64i1_to_v64i32:
4028; GFX12:       ; %bb.0:
4029; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
4030; GFX12-NEXT:    s_wait_kmcnt 0x0
4031; GFX12-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
4032; GFX12-NEXT:    s_wait_kmcnt 0x0
4033; GFX12-NEXT:    s_ashr_i32 s63, s3, 31
4034; GFX12-NEXT:    s_bfe_i32 s64, s3, 0x1001e
4035; GFX12-NEXT:    s_bfe_i32 s65, s3, 0x1001c
4036; GFX12-NEXT:    s_bfe_i32 s66, s3, 0x1001d
4037; GFX12-NEXT:    s_bfe_i32 s59, s3, 0x1001b
4038; GFX12-NEXT:    s_bfe_i32 s60, s3, 0x1001a
4039; GFX12-NEXT:    s_bfe_i32 s61, s3, 0x10019
4040; GFX12-NEXT:    s_bfe_i32 s62, s3, 0x10018
4041; GFX12-NEXT:    v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s66
4042; GFX12-NEXT:    s_bfe_i32 s55, s3, 0x10017
4043; GFX12-NEXT:    s_bfe_i32 s56, s3, 0x10016
4044; GFX12-NEXT:    s_bfe_i32 s57, s3, 0x10015
4045; GFX12-NEXT:    s_bfe_i32 s58, s3, 0x10014
4046; GFX12-NEXT:    v_dual_mov_b32 v0, s65 :: v_dual_mov_b32 v3, s63
4047; GFX12-NEXT:    v_dual_mov_b32 v2, s64 :: v_dual_mov_b32 v5, s61
4048; GFX12-NEXT:    s_bfe_i32 s51, s3, 0x10013
4049; GFX12-NEXT:    s_bfe_i32 s52, s3, 0x10012
4050; GFX12-NEXT:    s_bfe_i32 s53, s3, 0x10011
4051; GFX12-NEXT:    s_bfe_i32 s54, s3, 0x10010
4052; GFX12-NEXT:    v_dual_mov_b32 v4, s62 :: v_dual_mov_b32 v7, s59
4053; GFX12-NEXT:    v_dual_mov_b32 v6, s60 :: v_dual_mov_b32 v9, s57
4054; GFX12-NEXT:    v_dual_mov_b32 v8, s58 :: v_dual_mov_b32 v11, s55
4055; GFX12-NEXT:    v_dual_mov_b32 v10, s56 :: v_dual_mov_b32 v13, s53
4056; GFX12-NEXT:    s_bfe_i32 s36, s3, 0x10003
4057; GFX12-NEXT:    s_bfe_i32 s37, s3, 0x10002
4058; GFX12-NEXT:    s_bfe_i32 s38, s3, 0x10001
4059; GFX12-NEXT:    s_bfe_i32 s39, s3, 0x10000
4060; GFX12-NEXT:    s_bfe_i32 s40, s3, 0x10007
4061; GFX12-NEXT:    s_bfe_i32 s41, s3, 0x10006
4062; GFX12-NEXT:    s_bfe_i32 s42, s3, 0x10005
4063; GFX12-NEXT:    s_bfe_i32 s43, s3, 0x10004
4064; GFX12-NEXT:    s_bfe_i32 s44, s3, 0x1000b
4065; GFX12-NEXT:    s_bfe_i32 s45, s3, 0x1000a
4066; GFX12-NEXT:    s_bfe_i32 s46, s3, 0x10009
4067; GFX12-NEXT:    s_bfe_i32 s47, s3, 0x10008
4068; GFX12-NEXT:    s_bfe_i32 s48, s3, 0x1000f
4069; GFX12-NEXT:    s_bfe_i32 s49, s3, 0x1000e
4070; GFX12-NEXT:    s_bfe_i32 s50, s3, 0x1000d
4071; GFX12-NEXT:    v_dual_mov_b32 v12, s54 :: v_dual_mov_b32 v15, s51
4072; GFX12-NEXT:    v_mov_b32_e32 v14, s52
4073; GFX12-NEXT:    s_bfe_i32 s3, s3, 0x1000c
4074; GFX12-NEXT:    s_clause 0x3
4075; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[0:1] offset:240
4076; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[0:1] offset:224
4077; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[0:1] offset:208
4078; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[0:1] offset:192
4079; GFX12-NEXT:    v_dual_mov_b32 v1, s50 :: v_dual_mov_b32 v0, s3
4080; GFX12-NEXT:    v_dual_mov_b32 v3, s48 :: v_dual_mov_b32 v2, s49
4081; GFX12-NEXT:    v_dual_mov_b32 v5, s46 :: v_dual_mov_b32 v4, s47
4082; GFX12-NEXT:    v_dual_mov_b32 v7, s44 :: v_dual_mov_b32 v6, s45
4083; GFX12-NEXT:    v_mov_b32_e32 v9, s42
4084; GFX12-NEXT:    s_bfe_i32 s4, s2, 0x10003
4085; GFX12-NEXT:    s_bfe_i32 s5, s2, 0x10002
4086; GFX12-NEXT:    s_bfe_i32 s6, s2, 0x10001
4087; GFX12-NEXT:    s_bfe_i32 s7, s2, 0x10000
4088; GFX12-NEXT:    s_bfe_i32 s8, s2, 0x10007
4089; GFX12-NEXT:    s_bfe_i32 s9, s2, 0x10006
4090; GFX12-NEXT:    s_bfe_i32 s10, s2, 0x10005
4091; GFX12-NEXT:    s_bfe_i32 s11, s2, 0x10004
4092; GFX12-NEXT:    s_bfe_i32 s12, s2, 0x1000b
4093; GFX12-NEXT:    s_bfe_i32 s13, s2, 0x1000a
4094; GFX12-NEXT:    s_bfe_i32 s14, s2, 0x10009
4095; GFX12-NEXT:    s_bfe_i32 s15, s2, 0x10008
4096; GFX12-NEXT:    s_bfe_i32 s16, s2, 0x1000f
4097; GFX12-NEXT:    s_bfe_i32 s17, s2, 0x1000e
4098; GFX12-NEXT:    s_bfe_i32 s18, s2, 0x1000d
4099; GFX12-NEXT:    s_bfe_i32 s19, s2, 0x1000c
4100; GFX12-NEXT:    s_bfe_i32 s20, s2, 0x10013
4101; GFX12-NEXT:    s_bfe_i32 s21, s2, 0x10012
4102; GFX12-NEXT:    s_bfe_i32 s22, s2, 0x10011
4103; GFX12-NEXT:    s_bfe_i32 s23, s2, 0x10010
4104; GFX12-NEXT:    s_bfe_i32 s24, s2, 0x10017
4105; GFX12-NEXT:    s_bfe_i32 s25, s2, 0x10016
4106; GFX12-NEXT:    s_bfe_i32 s26, s2, 0x10015
4107; GFX12-NEXT:    s_bfe_i32 s27, s2, 0x10014
4108; GFX12-NEXT:    s_bfe_i32 s28, s2, 0x1001b
4109; GFX12-NEXT:    s_bfe_i32 s29, s2, 0x1001a
4110; GFX12-NEXT:    s_bfe_i32 s30, s2, 0x10019
4111; GFX12-NEXT:    s_bfe_i32 s31, s2, 0x10018
4112; GFX12-NEXT:    s_ashr_i32 s33, s2, 31
4113; GFX12-NEXT:    s_bfe_i32 s34, s2, 0x1001e
4114; GFX12-NEXT:    s_bfe_i32 s35, s2, 0x1001d
4115; GFX12-NEXT:    s_bfe_i32 s2, s2, 0x1001c
4116; GFX12-NEXT:    v_dual_mov_b32 v8, s43 :: v_dual_mov_b32 v11, s40
4117; GFX12-NEXT:    v_dual_mov_b32 v10, s41 :: v_dual_mov_b32 v13, s38
4118; GFX12-NEXT:    v_dual_mov_b32 v12, s39 :: v_dual_mov_b32 v15, s36
4119; GFX12-NEXT:    v_dual_mov_b32 v14, s37 :: v_dual_mov_b32 v17, s35
4120; GFX12-NEXT:    s_wait_alu 0xfffe
4121; GFX12-NEXT:    v_dual_mov_b32 v16, s2 :: v_dual_mov_b32 v19, s33
4122; GFX12-NEXT:    v_dual_mov_b32 v18, s34 :: v_dual_mov_b32 v21, s30
4123; GFX12-NEXT:    v_dual_mov_b32 v20, s31 :: v_dual_mov_b32 v23, s28
4124; GFX12-NEXT:    v_mov_b32_e32 v22, s29
4125; GFX12-NEXT:    s_clause 0x5
4126; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[0:1] offset:176
4127; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[0:1] offset:160
4128; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[0:1] offset:144
4129; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[0:1] offset:128
4130; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[0:1] offset:112
4131; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[0:1] offset:96
4132; GFX12-NEXT:    v_dual_mov_b32 v1, s26 :: v_dual_mov_b32 v0, s27
4133; GFX12-NEXT:    v_dual_mov_b32 v3, s24 :: v_dual_mov_b32 v2, s25
4134; GFX12-NEXT:    v_dual_mov_b32 v5, s22 :: v_dual_mov_b32 v4, s23
4135; GFX12-NEXT:    v_dual_mov_b32 v7, s20 :: v_dual_mov_b32 v6, s21
4136; GFX12-NEXT:    v_dual_mov_b32 v9, s18 :: v_dual_mov_b32 v8, s19
4137; GFX12-NEXT:    v_dual_mov_b32 v11, s16 :: v_dual_mov_b32 v10, s17
4138; GFX12-NEXT:    v_dual_mov_b32 v13, s14 :: v_dual_mov_b32 v12, s15
4139; GFX12-NEXT:    v_dual_mov_b32 v15, s12 :: v_dual_mov_b32 v14, s13
4140; GFX12-NEXT:    v_dual_mov_b32 v17, s10 :: v_dual_mov_b32 v16, s11
4141; GFX12-NEXT:    v_dual_mov_b32 v19, s8 :: v_dual_mov_b32 v18, s9
4142; GFX12-NEXT:    v_dual_mov_b32 v21, s6 :: v_dual_mov_b32 v20, s7
4143; GFX12-NEXT:    v_dual_mov_b32 v23, s4 :: v_dual_mov_b32 v22, s5
4144; GFX12-NEXT:    s_clause 0x5
4145; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[0:1] offset:80
4146; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[0:1] offset:64
4147; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[0:1] offset:48
4148; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[0:1] offset:32
4149; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[0:1] offset:16
4150; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[0:1]
4151; GFX12-NEXT:    s_endpgm
4152  %load = load <64 x i1>, ptr addrspace(4) %in
4153  %ext = sext <64 x i1> %load to <64 x i32>
4154  store <64 x i32> %ext, ptr addrspace(1) %out
4155  ret void
4156}
4157
4158define amdgpu_kernel void @constant_zextload_i1_to_i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
4159; GFX6-LABEL: constant_zextload_i1_to_i64:
4160; GFX6:       ; %bb.0:
4161; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
4162; GFX6-NEXT:    s_mov_b32 s7, 0xf000
4163; GFX6-NEXT:    s_mov_b32 s6, -1
4164; GFX6-NEXT:    s_mov_b32 s10, s6
4165; GFX6-NEXT:    s_mov_b32 s11, s7
4166; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
4167; GFX6-NEXT:    s_mov_b32 s8, s2
4168; GFX6-NEXT:    s_mov_b32 s9, s3
4169; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
4170; GFX6-NEXT:    s_mov_b32 s4, s0
4171; GFX6-NEXT:    s_mov_b32 s5, s1
4172; GFX6-NEXT:    s_waitcnt vmcnt(0)
4173; GFX6-NEXT:    v_and_b32_e32 v0, 1, v0
4174; GFX6-NEXT:    v_mov_b32_e32 v1, 0
4175; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4176; GFX6-NEXT:    s_endpgm
4177;
4178; GFX8-LABEL: constant_zextload_i1_to_i64:
4179; GFX8:       ; %bb.0:
4180; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
4181; GFX8-NEXT:    v_mov_b32_e32 v3, 0
4182; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
4183; GFX8-NEXT:    v_mov_b32_e32 v0, s2
4184; GFX8-NEXT:    v_mov_b32_e32 v1, s3
4185; GFX8-NEXT:    flat_load_ubyte v2, v[0:1]
4186; GFX8-NEXT:    v_mov_b32_e32 v0, s0
4187; GFX8-NEXT:    v_mov_b32_e32 v1, s1
4188; GFX8-NEXT:    s_waitcnt vmcnt(0)
4189; GFX8-NEXT:    v_and_b32_e32 v2, 1, v2
4190; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
4191; GFX8-NEXT:    s_endpgm
4192;
4193; EG-LABEL: constant_zextload_i1_to_i64:
4194; EG:       ; %bb.0:
4195; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4196; EG-NEXT:    TEX 0 @6
4197; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
4198; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
4199; EG-NEXT:    CF_END
4200; EG-NEXT:    PAD
4201; EG-NEXT:    Fetch clause starting at 6:
4202; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
4203; EG-NEXT:    ALU clause starting at 8:
4204; EG-NEXT:     MOV * T0.X, KC0[2].Z,
4205; EG-NEXT:    ALU clause starting at 9:
4206; EG-NEXT:     MOV * T0.Y, 0.0,
4207; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
4208; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4209;
4210; GFX12-LABEL: constant_zextload_i1_to_i64:
4211; GFX12:       ; %bb.0:
4212; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
4213; GFX12-NEXT:    s_wait_kmcnt 0x0
4214; GFX12-NEXT:    s_load_u8 s2, s[2:3], 0x0
4215; GFX12-NEXT:    s_wait_kmcnt 0x0
4216; GFX12-NEXT:    s_and_b32 s2, s2, 1
4217; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
4218; GFX12-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
4219; GFX12-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
4220; GFX12-NEXT:    s_endpgm
4221  %a = load i1, ptr addrspace(4) %in
4222  %ext = zext i1 %a to i64
4223  store i64 %ext, ptr addrspace(1) %out
4224  ret void
4225}
4226
4227define amdgpu_kernel void @constant_sextload_i1_to_i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
4228; GFX6-LABEL: constant_sextload_i1_to_i64:
4229; GFX6:       ; %bb.0:
4230; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
4231; GFX6-NEXT:    s_mov_b32 s7, 0xf000
4232; GFX6-NEXT:    s_mov_b32 s6, -1
4233; GFX6-NEXT:    s_mov_b32 s10, s6
4234; GFX6-NEXT:    s_mov_b32 s11, s7
4235; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
4236; GFX6-NEXT:    s_mov_b32 s8, s2
4237; GFX6-NEXT:    s_mov_b32 s9, s3
4238; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
4239; GFX6-NEXT:    s_mov_b32 s4, s0
4240; GFX6-NEXT:    s_mov_b32 s5, s1
4241; GFX6-NEXT:    s_waitcnt vmcnt(0)
4242; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 1
4243; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4244; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4245; GFX6-NEXT:    s_endpgm
4246;
4247; GFX8-LABEL: constant_sextload_i1_to_i64:
4248; GFX8:       ; %bb.0:
4249; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
4250; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
4251; GFX8-NEXT:    v_mov_b32_e32 v0, s2
4252; GFX8-NEXT:    v_mov_b32_e32 v1, s3
4253; GFX8-NEXT:    flat_load_ubyte v2, v[0:1]
4254; GFX8-NEXT:    v_mov_b32_e32 v0, s0
4255; GFX8-NEXT:    v_mov_b32_e32 v1, s1
4256; GFX8-NEXT:    s_waitcnt vmcnt(0)
4257; GFX8-NEXT:    v_bfe_i32 v2, v2, 0, 1
4258; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
4259; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
4260; GFX8-NEXT:    s_endpgm
4261;
4262; EG-LABEL: constant_sextload_i1_to_i64:
4263; EG:       ; %bb.0:
4264; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4265; EG-NEXT:    TEX 0 @6
4266; EG-NEXT:    ALU 3, @9, KC0[CB0:0-32], KC1[]
4267; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
4268; EG-NEXT:    CF_END
4269; EG-NEXT:    PAD
4270; EG-NEXT:    Fetch clause starting at 6:
4271; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
4272; EG-NEXT:    ALU clause starting at 8:
4273; EG-NEXT:     MOV * T0.X, KC0[2].Z,
4274; EG-NEXT:    ALU clause starting at 9:
4275; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, 1,
4276; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
4277; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4278; EG-NEXT:     MOV * T0.Y, PV.X,
4279;
4280; GFX12-LABEL: constant_sextload_i1_to_i64:
4281; GFX12:       ; %bb.0:
4282; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
4283; GFX12-NEXT:    s_wait_kmcnt 0x0
4284; GFX12-NEXT:    s_load_u8 s2, s[2:3], 0x0
4285; GFX12-NEXT:    s_wait_kmcnt 0x0
4286; GFX12-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x10000
4287; GFX12-NEXT:    v_mov_b32_e32 v2, 0
4288; GFX12-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
4289; GFX12-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
4290; GFX12-NEXT:    s_endpgm
4291  %a = load i1, ptr addrspace(4) %in
4292  %ext = sext i1 %a to i64
4293  store i64 %ext, ptr addrspace(1) %out
4294  ret void
4295}
4296
4297define amdgpu_kernel void @constant_zextload_v1i1_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
4298; GFX6-LABEL: constant_zextload_v1i1_to_v1i64:
4299; GFX6:       ; %bb.0:
4300; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
4301; GFX6-NEXT:    s_mov_b32 s7, 0xf000
4302; GFX6-NEXT:    s_mov_b32 s6, -1
4303; GFX6-NEXT:    s_mov_b32 s10, s6
4304; GFX6-NEXT:    s_mov_b32 s11, s7
4305; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
4306; GFX6-NEXT:    s_mov_b32 s8, s2
4307; GFX6-NEXT:    s_mov_b32 s9, s3
4308; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
4309; GFX6-NEXT:    s_mov_b32 s4, s0
4310; GFX6-NEXT:    s_mov_b32 s5, s1
4311; GFX6-NEXT:    s_waitcnt vmcnt(0)
4312; GFX6-NEXT:    v_and_b32_e32 v0, 1, v0
4313; GFX6-NEXT:    v_mov_b32_e32 v1, 0
4314; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4315; GFX6-NEXT:    s_endpgm
4316;
4317; GFX8-LABEL: constant_zextload_v1i1_to_v1i64:
4318; GFX8:       ; %bb.0:
4319; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
4320; GFX8-NEXT:    v_mov_b32_e32 v3, 0
4321; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
4322; GFX8-NEXT:    v_mov_b32_e32 v0, s2
4323; GFX8-NEXT:    v_mov_b32_e32 v1, s3
4324; GFX8-NEXT:    flat_load_ubyte v2, v[0:1]
4325; GFX8-NEXT:    v_mov_b32_e32 v0, s0
4326; GFX8-NEXT:    v_mov_b32_e32 v1, s1
4327; GFX8-NEXT:    s_waitcnt vmcnt(0)
4328; GFX8-NEXT:    v_and_b32_e32 v2, 1, v2
4329; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
4330; GFX8-NEXT:    s_endpgm
4331;
4332; EG-LABEL: constant_zextload_v1i1_to_v1i64:
4333; EG:       ; %bb.0:
4334; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4335; EG-NEXT:    TEX 0 @6
4336; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
4337; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
4338; EG-NEXT:    CF_END
4339; EG-NEXT:    PAD
4340; EG-NEXT:    Fetch clause starting at 6:
4341; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
4342; EG-NEXT:    ALU clause starting at 8:
4343; EG-NEXT:     MOV * T0.X, KC0[2].Z,
4344; EG-NEXT:    ALU clause starting at 9:
4345; EG-NEXT:     MOV * T0.Y, 0.0,
4346; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
4347; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4348;
4349; GFX12-LABEL: constant_zextload_v1i1_to_v1i64:
4350; GFX12:       ; %bb.0:
4351; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
4352; GFX12-NEXT:    s_wait_kmcnt 0x0
4353; GFX12-NEXT:    s_load_u8 s2, s[2:3], 0x0
4354; GFX12-NEXT:    s_wait_kmcnt 0x0
4355; GFX12-NEXT:    s_and_b32 s2, s2, 1
4356; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
4357; GFX12-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
4358; GFX12-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
4359; GFX12-NEXT:    s_endpgm
4360  %load = load <1 x i1>, ptr addrspace(4) %in
4361  %ext = zext <1 x i1> %load to <1 x i64>
4362  store <1 x i64> %ext, ptr addrspace(1) %out
4363  ret void
4364}
4365
4366define amdgpu_kernel void @constant_sextload_v1i1_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
4367; GFX6-LABEL: constant_sextload_v1i1_to_v1i64:
4368; GFX6:       ; %bb.0:
4369; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
4370; GFX6-NEXT:    s_mov_b32 s7, 0xf000
4371; GFX6-NEXT:    s_mov_b32 s6, -1
4372; GFX6-NEXT:    s_mov_b32 s10, s6
4373; GFX6-NEXT:    s_mov_b32 s11, s7
4374; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
4375; GFX6-NEXT:    s_mov_b32 s8, s2
4376; GFX6-NEXT:    s_mov_b32 s9, s3
4377; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
4378; GFX6-NEXT:    s_mov_b32 s4, s0
4379; GFX6-NEXT:    s_mov_b32 s5, s1
4380; GFX6-NEXT:    s_waitcnt vmcnt(0)
4381; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 1
4382; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4383; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4384; GFX6-NEXT:    s_endpgm
4385;
4386; GFX8-LABEL: constant_sextload_v1i1_to_v1i64:
4387; GFX8:       ; %bb.0:
4388; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
4389; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
4390; GFX8-NEXT:    v_mov_b32_e32 v0, s2
4391; GFX8-NEXT:    v_mov_b32_e32 v1, s3
4392; GFX8-NEXT:    flat_load_ubyte v2, v[0:1]
4393; GFX8-NEXT:    v_mov_b32_e32 v0, s0
4394; GFX8-NEXT:    v_mov_b32_e32 v1, s1
4395; GFX8-NEXT:    s_waitcnt vmcnt(0)
4396; GFX8-NEXT:    v_bfe_i32 v2, v2, 0, 1
4397; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
4398; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
4399; GFX8-NEXT:    s_endpgm
4400;
4401; EG-LABEL: constant_sextload_v1i1_to_v1i64:
4402; EG:       ; %bb.0:
4403; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4404; EG-NEXT:    TEX 0 @6
4405; EG-NEXT:    ALU 3, @9, KC0[CB0:0-32], KC1[]
4406; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
4407; EG-NEXT:    CF_END
4408; EG-NEXT:    PAD
4409; EG-NEXT:    Fetch clause starting at 6:
4410; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
4411; EG-NEXT:    ALU clause starting at 8:
4412; EG-NEXT:     MOV * T0.X, KC0[2].Z,
4413; EG-NEXT:    ALU clause starting at 9:
4414; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, 1,
4415; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
4416; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4417; EG-NEXT:     MOV * T0.Y, PV.X,
4418;
4419; GFX12-LABEL: constant_sextload_v1i1_to_v1i64:
4420; GFX12:       ; %bb.0:
4421; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
4422; GFX12-NEXT:    s_wait_kmcnt 0x0
4423; GFX12-NEXT:    s_load_u8 s2, s[2:3], 0x0
4424; GFX12-NEXT:    s_wait_kmcnt 0x0
4425; GFX12-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x10000
4426; GFX12-NEXT:    v_mov_b32_e32 v2, 0
4427; GFX12-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
4428; GFX12-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
4429; GFX12-NEXT:    s_endpgm
4430  %load = load <1 x i1>, ptr addrspace(4) %in
4431  %ext = sext <1 x i1> %load to <1 x i64>
4432  store <1 x i64> %ext, ptr addrspace(1) %out
4433  ret void
4434}
4435
4436define amdgpu_kernel void @constant_zextload_v2i1_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
4437; GFX6-LABEL: constant_zextload_v2i1_to_v2i64:
4438; GFX6:       ; %bb.0:
4439; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
4440; GFX6-NEXT:    s_mov_b32 s7, 0xf000
4441; GFX6-NEXT:    s_mov_b32 s6, -1
4442; GFX6-NEXT:    s_mov_b32 s10, s6
4443; GFX6-NEXT:    s_mov_b32 s11, s7
4444; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
4445; GFX6-NEXT:    s_mov_b32 s8, s2
4446; GFX6-NEXT:    s_mov_b32 s9, s3
4447; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
4448; GFX6-NEXT:    v_mov_b32_e32 v1, 0
4449; GFX6-NEXT:    s_mov_b32 s4, s0
4450; GFX6-NEXT:    s_mov_b32 s5, s1
4451; GFX6-NEXT:    s_waitcnt vmcnt(0)
4452; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 1, v0
4453; GFX6-NEXT:    v_and_b32_e32 v0, 1, v0
4454; GFX6-NEXT:    v_mov_b32_e32 v3, v1
4455; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
4456; GFX6-NEXT:    s_endpgm
4457;
4458; GFX8-LABEL: constant_zextload_v2i1_to_v2i64:
4459; GFX8:       ; %bb.0:
4460; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
4461; GFX8-NEXT:    v_mov_b32_e32 v2, 1
4462; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
4463; GFX8-NEXT:    v_mov_b32_e32 v0, s2
4464; GFX8-NEXT:    v_mov_b32_e32 v1, s3
4465; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
4466; GFX8-NEXT:    v_mov_b32_e32 v1, 0
4467; GFX8-NEXT:    v_mov_b32_e32 v4, s0
4468; GFX8-NEXT:    v_mov_b32_e32 v5, s1
4469; GFX8-NEXT:    v_mov_b32_e32 v3, v1
4470; GFX8-NEXT:    s_waitcnt vmcnt(0)
4471; GFX8-NEXT:    v_lshrrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
4472; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
4473; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4474; GFX8-NEXT:    s_endpgm
4475;
4476; EG-LABEL: constant_zextload_v2i1_to_v2i64:
4477; EG:       ; %bb.0:
4478; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4479; EG-NEXT:    TEX 0 @6
4480; EG-NEXT:    ALU 5, @9, KC0[CB0:0-32], KC1[]
4481; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
4482; EG-NEXT:    CF_END
4483; EG-NEXT:    PAD
4484; EG-NEXT:    Fetch clause starting at 6:
4485; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
4486; EG-NEXT:    ALU clause starting at 8:
4487; EG-NEXT:     MOV * T0.X, KC0[2].Z,
4488; EG-NEXT:    ALU clause starting at 9:
4489; EG-NEXT:     BFE_UINT * T0.Z, T0.X, 1, 1,
4490; EG-NEXT:     AND_INT T0.X, T0.X, 1,
4491; EG-NEXT:     MOV T0.Y, 0.0,
4492; EG-NEXT:     MOV T0.W, 0.0,
4493; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
4494; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4495;
4496; GFX12-LABEL: constant_zextload_v2i1_to_v2i64:
4497; GFX12:       ; %bb.0:
4498; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
4499; GFX12-NEXT:    v_mov_b32_e32 v1, 0
4500; GFX12-NEXT:    s_wait_kmcnt 0x0
4501; GFX12-NEXT:    global_load_u8 v0, v1, s[2:3]
4502; GFX12-NEXT:    s_wait_loadcnt 0x0
4503; GFX12-NEXT:    v_and_b32_e32 v2, 0xffff, v0
4504; GFX12-NEXT:    v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v0, 1, v0
4505; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4506; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 1, v2
4507; GFX12-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4508; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
4509; GFX12-NEXT:    v_and_b32_e32 v2, 0xffff, v2
4510; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1]
4511; GFX12-NEXT:    s_endpgm
4512  %load = load <2 x i1>, ptr addrspace(4) %in
4513  %ext = zext <2 x i1> %load to <2 x i64>
4514  store <2 x i64> %ext, ptr addrspace(1) %out
4515  ret void
4516}
4517
4518define amdgpu_kernel void @constant_sextload_v2i1_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
4519; GFX6-LABEL: constant_sextload_v2i1_to_v2i64:
4520; GFX6:       ; %bb.0:
4521; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
4522; GFX6-NEXT:    s_mov_b32 s7, 0xf000
4523; GFX6-NEXT:    s_mov_b32 s6, -1
4524; GFX6-NEXT:    s_mov_b32 s10, s6
4525; GFX6-NEXT:    s_mov_b32 s11, s7
4526; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
4527; GFX6-NEXT:    s_mov_b32 s8, s2
4528; GFX6-NEXT:    s_mov_b32 s9, s3
4529; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
4530; GFX6-NEXT:    s_mov_b32 s4, s0
4531; GFX6-NEXT:    s_mov_b32 s5, s1
4532; GFX6-NEXT:    s_waitcnt vmcnt(0)
4533; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 1, v0
4534; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 1
4535; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4536; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 1
4537; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
4538; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
4539; GFX6-NEXT:    s_endpgm
4540;
4541; GFX8-LABEL: constant_sextload_v2i1_to_v2i64:
4542; GFX8:       ; %bb.0:
4543; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
4544; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
4545; GFX8-NEXT:    v_mov_b32_e32 v0, s2
4546; GFX8-NEXT:    v_mov_b32_e32 v1, s3
4547; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
4548; GFX8-NEXT:    v_mov_b32_e32 v4, s0
4549; GFX8-NEXT:    v_mov_b32_e32 v5, s1
4550; GFX8-NEXT:    s_waitcnt vmcnt(0)
4551; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 1, v0
4552; GFX8-NEXT:    v_bfe_i32 v0, v0, 0, 1
4553; GFX8-NEXT:    v_bfe_i32 v2, v2, 0, 1
4554; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4555; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
4556; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4557; GFX8-NEXT:    s_endpgm
4558;
4559; EG-LABEL: constant_sextload_v2i1_to_v2i64:
4560; EG:       ; %bb.0:
4561; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4562; EG-NEXT:    TEX 0 @6
4563; EG-NEXT:    ALU 6, @9, KC0[CB0:0-32], KC1[]
4564; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
4565; EG-NEXT:    CF_END
4566; EG-NEXT:    PAD
4567; EG-NEXT:    Fetch clause starting at 6:
4568; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
4569; EG-NEXT:    ALU clause starting at 8:
4570; EG-NEXT:     MOV * T0.X, KC0[2].Z,
4571; EG-NEXT:    ALU clause starting at 9:
4572; EG-NEXT:     BFE_INT T1.X, T0.X, 0.0, 1,
4573; EG-NEXT:     LSHR * T0.W, T0.X, 1,
4574; EG-NEXT:     BFE_INT * T1.Z, PV.W, 0.0, 1,
4575; EG-NEXT:     MOV * T1.Y, T1.X,
4576; EG-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
4577; EG-NEXT:     MOV * T1.W, T1.Z,
4578; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4579;
4580; GFX12-LABEL: constant_sextload_v2i1_to_v2i64:
4581; GFX12:       ; %bb.0:
4582; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
4583; GFX12-NEXT:    v_mov_b32_e32 v4, 0
4584; GFX12-NEXT:    s_wait_kmcnt 0x0
4585; GFX12-NEXT:    global_load_u8 v0, v4, s[2:3]
4586; GFX12-NEXT:    s_wait_loadcnt 0x0
4587; GFX12-NEXT:    v_lshrrev_b32_e32 v1, 1, v0
4588; GFX12-NEXT:    v_bfe_i32 v0, v0, 0, 1
4589; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4590; GFX12-NEXT:    v_bfe_i32 v2, v1, 0, 1
4591; GFX12-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4592; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
4593; GFX12-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
4594; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[0:1]
4595; GFX12-NEXT:    s_endpgm
4596  %load = load <2 x i1>, ptr addrspace(4) %in
4597  %ext = sext <2 x i1> %load to <2 x i64>
4598  store <2 x i64> %ext, ptr addrspace(1) %out
4599  ret void
4600}
4601
4602define amdgpu_kernel void @constant_zextload_v3i1_to_v3i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
4603; GFX6-LABEL: constant_zextload_v3i1_to_v3i64:
4604; GFX6:       ; %bb.0:
4605; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
4606; GFX6-NEXT:    s_mov_b32 s7, 0xf000
4607; GFX6-NEXT:    s_mov_b32 s6, -1
4608; GFX6-NEXT:    s_mov_b32 s10, s6
4609; GFX6-NEXT:    s_mov_b32 s11, s7
4610; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
4611; GFX6-NEXT:    s_mov_b32 s8, s2
4612; GFX6-NEXT:    s_mov_b32 s9, s3
4613; GFX6-NEXT:    buffer_load_ubyte v4, off, s[8:11], 0
4614; GFX6-NEXT:    v_mov_b32_e32 v5, 0
4615; GFX6-NEXT:    v_mov_b32_e32 v1, v5
4616; GFX6-NEXT:    v_mov_b32_e32 v3, v5
4617; GFX6-NEXT:    s_mov_b32 s4, s0
4618; GFX6-NEXT:    s_mov_b32 s5, s1
4619; GFX6-NEXT:    s_waitcnt vmcnt(0)
4620; GFX6-NEXT:    v_and_b32_e32 v0, 1, v4
4621; GFX6-NEXT:    v_bfe_u32 v2, v4, 1, 1
4622; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 2, v4
4623; GFX6-NEXT:    buffer_store_dwordx2 v[4:5], off, s[4:7], 0 offset:16
4624; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
4625; GFX6-NEXT:    s_endpgm
4626;
4627; GFX8-LABEL: constant_zextload_v3i1_to_v3i64:
4628; GFX8:       ; %bb.0:
4629; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
4630; GFX8-NEXT:    v_mov_b32_e32 v10, 2
4631; GFX8-NEXT:    v_mov_b32_e32 v5, 0
4632; GFX8-NEXT:    v_mov_b32_e32 v3, v5
4633; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
4634; GFX8-NEXT:    v_mov_b32_e32 v0, s2
4635; GFX8-NEXT:    v_mov_b32_e32 v1, s3
4636; GFX8-NEXT:    flat_load_ubyte v4, v[0:1]
4637; GFX8-NEXT:    s_add_u32 s2, s0, 16
4638; GFX8-NEXT:    s_addc_u32 s3, s1, 0
4639; GFX8-NEXT:    v_mov_b32_e32 v9, s3
4640; GFX8-NEXT:    v_mov_b32_e32 v7, s1
4641; GFX8-NEXT:    v_mov_b32_e32 v8, s2
4642; GFX8-NEXT:    v_mov_b32_e32 v1, v5
4643; GFX8-NEXT:    v_mov_b32_e32 v6, s0
4644; GFX8-NEXT:    s_waitcnt vmcnt(0)
4645; GFX8-NEXT:    v_and_b32_e32 v0, 1, v4
4646; GFX8-NEXT:    v_bfe_u32 v2, v4, 1, 1
4647; GFX8-NEXT:    v_lshrrev_b32_sdwa v4, v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
4648; GFX8-NEXT:    flat_store_dwordx2 v[8:9], v[4:5]
4649; GFX8-NEXT:    flat_store_dwordx4 v[6:7], v[0:3]
4650; GFX8-NEXT:    s_endpgm
4651;
4652; EG-LABEL: constant_zextload_v3i1_to_v3i64:
4653; EG:       ; %bb.0:
4654; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4655; EG-NEXT:    TEX 0 @6
4656; EG-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
4657; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T3.X, 0
4658; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 1
4659; EG-NEXT:    CF_END
4660; EG-NEXT:    Fetch clause starting at 6:
4661; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
4662; EG-NEXT:    ALU clause starting at 8:
4663; EG-NEXT:     MOV * T0.X, KC0[2].Z,
4664; EG-NEXT:    ALU clause starting at 9:
4665; EG-NEXT:     BFE_UINT * T1.Z, T0.X, 1, 1,
4666; EG-NEXT:     AND_INT T1.X, T0.X, 1,
4667; EG-NEXT:     MOV T1.Y, 0.0,
4668; EG-NEXT:     LSHR * T0.X, T0.X, literal.x,
4669; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4670; EG-NEXT:     MOV T0.Y, 0.0,
4671; EG-NEXT:     MOV * T1.W, 0.0,
4672; EG-NEXT:     LSHR T2.X, KC0[2].Y, literal.x,
4673; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4674; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4675; EG-NEXT:     LSHR * T3.X, PV.W, literal.x,
4676; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4677;
4678; GFX12-LABEL: constant_zextload_v3i1_to_v3i64:
4679; GFX12:       ; %bb.0:
4680; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
4681; GFX12-NEXT:    v_mov_b32_e32 v5, 0
4682; GFX12-NEXT:    s_wait_kmcnt 0x0
4683; GFX12-NEXT:    global_load_u8 v0, v5, s[2:3]
4684; GFX12-NEXT:    s_wait_loadcnt 0x0
4685; GFX12-NEXT:    v_and_b32_e32 v1, 0xffff, v0
4686; GFX12-NEXT:    v_bfe_u32 v2, v0, 1, 1
4687; GFX12-NEXT:    v_and_b32_e32 v0, 1, v0
4688; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
4689; GFX12-NEXT:    v_lshrrev_b32_e32 v4, 2, v1
4690; GFX12-NEXT:    v_mov_b32_e32 v3, v5
4691; GFX12-NEXT:    v_dual_mov_b32 v1, v5 :: v_dual_and_b32 v2, 0xffff, v2
4692; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
4693; GFX12-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4694; GFX12-NEXT:    v_and_b32_e32 v4, 0xffff, v4
4695; GFX12-NEXT:    s_clause 0x1
4696; GFX12-NEXT:    global_store_b64 v5, v[4:5], s[0:1] offset:16
4697; GFX12-NEXT:    global_store_b128 v5, v[0:3], s[0:1]
4698; GFX12-NEXT:    s_endpgm
4699  %load = load <3 x i1>, ptr addrspace(4) %in
4700  %ext = zext <3 x i1> %load to <3 x i64>
4701  store <3 x i64> %ext, ptr addrspace(1) %out
4702  ret void
4703}
4704
4705define amdgpu_kernel void @constant_sextload_v3i1_to_v3i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
4706; GFX6-LABEL: constant_sextload_v3i1_to_v3i64:
4707; GFX6:       ; %bb.0:
4708; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
4709; GFX6-NEXT:    s_mov_b32 s7, 0xf000
4710; GFX6-NEXT:    s_mov_b32 s6, -1
4711; GFX6-NEXT:    s_mov_b32 s10, s6
4712; GFX6-NEXT:    s_mov_b32 s11, s7
4713; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
4714; GFX6-NEXT:    s_mov_b32 s8, s2
4715; GFX6-NEXT:    s_mov_b32 s9, s3
4716; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
4717; GFX6-NEXT:    s_mov_b32 s4, s0
4718; GFX6-NEXT:    s_mov_b32 s5, s1
4719; GFX6-NEXT:    s_waitcnt vmcnt(0)
4720; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 2, v0
4721; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 1, v0
4722; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 1
4723; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4724; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 1
4725; GFX6-NEXT:    v_bfe_i32 v4, v3, 0, 1
4726; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
4727; GFX6-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
4728; GFX6-NEXT:    buffer_store_dwordx2 v[4:5], off, s[4:7], 0 offset:16
4729; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
4730; GFX6-NEXT:    s_endpgm
4731;
4732; GFX8-LABEL: constant_sextload_v3i1_to_v3i64:
4733; GFX8:       ; %bb.0:
4734; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
4735; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
4736; GFX8-NEXT:    v_mov_b32_e32 v0, s2
4737; GFX8-NEXT:    v_mov_b32_e32 v1, s3
4738; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
4739; GFX8-NEXT:    s_add_u32 s2, s0, 16
4740; GFX8-NEXT:    s_addc_u32 s3, s1, 0
4741; GFX8-NEXT:    v_mov_b32_e32 v7, s3
4742; GFX8-NEXT:    v_mov_b32_e32 v5, s1
4743; GFX8-NEXT:    v_mov_b32_e32 v6, s2
4744; GFX8-NEXT:    v_mov_b32_e32 v4, s0
4745; GFX8-NEXT:    s_waitcnt vmcnt(0)
4746; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 2, v0
4747; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 1, v0
4748; GFX8-NEXT:    v_bfe_i32 v8, v3, 0, 1
4749; GFX8-NEXT:    v_bfe_i32 v0, v0, 0, 1
4750; GFX8-NEXT:    v_bfe_i32 v2, v2, 0, 1
4751; GFX8-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
4752; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4753; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
4754; GFX8-NEXT:    flat_store_dwordx2 v[6:7], v[8:9]
4755; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4756; GFX8-NEXT:    s_endpgm
4757;
4758; EG-LABEL: constant_sextload_v3i1_to_v3i64:
4759; EG:       ; %bb.0:
4760; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4761; EG-NEXT:    TEX 0 @6
4762; EG-NEXT:    ALU 14, @9, KC0[CB0:0-32], KC1[]
4763; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T3.X, 0
4764; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 1
4765; EG-NEXT:    CF_END
4766; EG-NEXT:    Fetch clause starting at 6:
4767; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
4768; EG-NEXT:    ALU clause starting at 8:
4769; EG-NEXT:     MOV * T0.X, KC0[2].Z,
4770; EG-NEXT:    ALU clause starting at 9:
4771; EG-NEXT:     BFE_INT T1.X, T0.X, 0.0, 1,
4772; EG-NEXT:     LSHR T0.W, T0.X, 1,
4773; EG-NEXT:     LSHR * T2.X, KC0[2].Y, literal.x,
4774; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4775; EG-NEXT:     BFE_INT T1.Z, PV.W, 0.0, 1,
4776; EG-NEXT:     LSHR * T0.W, T0.X, literal.x,
4777; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4778; EG-NEXT:     BFE_INT T0.X, PV.W, 0.0, 1,
4779; EG-NEXT:     MOV T1.Y, T1.X,
4780; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
4781; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4782; EG-NEXT:     LSHR T3.X, PV.W, literal.x,
4783; EG-NEXT:     MOV T0.Y, PV.X,
4784; EG-NEXT:     MOV * T1.W, T1.Z,
4785; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4786;
4787; GFX12-LABEL: constant_sextload_v3i1_to_v3i64:
4788; GFX12:       ; %bb.0:
4789; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
4790; GFX12-NEXT:    v_mov_b32_e32 v6, 0
4791; GFX12-NEXT:    s_wait_kmcnt 0x0
4792; GFX12-NEXT:    global_load_u8 v0, v6, s[2:3]
4793; GFX12-NEXT:    s_wait_loadcnt 0x0
4794; GFX12-NEXT:    v_lshrrev_b32_e32 v1, 2, v0
4795; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 1, v0
4796; GFX12-NEXT:    v_bfe_i32 v0, v0, 0, 1
4797; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
4798; GFX12-NEXT:    v_bfe_i32 v4, v1, 0, 1
4799; GFX12-NEXT:    v_bfe_i32 v2, v2, 0, 1
4800; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
4801; GFX12-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4802; GFX12-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
4803; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3)
4804; GFX12-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
4805; GFX12-NEXT:    s_clause 0x1
4806; GFX12-NEXT:    global_store_b64 v6, v[4:5], s[0:1] offset:16
4807; GFX12-NEXT:    global_store_b128 v6, v[0:3], s[0:1]
4808; GFX12-NEXT:    s_endpgm
4809  %load = load <3 x i1>, ptr addrspace(4) %in
4810  %ext = sext <3 x i1> %load to <3 x i64>
4811  store <3 x i64> %ext, ptr addrspace(1) %out
4812  ret void
4813}
4814
4815define amdgpu_kernel void @constant_zextload_v4i1_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
4816; GFX6-LABEL: constant_zextload_v4i1_to_v4i64:
4817; GFX6:       ; %bb.0:
4818; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
4819; GFX6-NEXT:    s_mov_b32 s7, 0xf000
4820; GFX6-NEXT:    s_mov_b32 s6, -1
4821; GFX6-NEXT:    s_mov_b32 s10, s6
4822; GFX6-NEXT:    s_mov_b32 s11, s7
4823; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
4824; GFX6-NEXT:    s_mov_b32 s8, s2
4825; GFX6-NEXT:    s_mov_b32 s9, s3
4826; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
4827; GFX6-NEXT:    v_mov_b32_e32 v1, 0
4828; GFX6-NEXT:    v_mov_b32_e32 v3, v1
4829; GFX6-NEXT:    v_mov_b32_e32 v5, v1
4830; GFX6-NEXT:    v_mov_b32_e32 v7, v1
4831; GFX6-NEXT:    s_mov_b32 s4, s0
4832; GFX6-NEXT:    s_mov_b32 s5, s1
4833; GFX6-NEXT:    s_waitcnt vmcnt(0)
4834; GFX6-NEXT:    v_and_b32_e32 v4, 1, v0
4835; GFX6-NEXT:    v_bfe_u32 v6, v0, 1, 1
4836; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 3, v0
4837; GFX6-NEXT:    v_bfe_u32 v0, v0, 2, 1
4838; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16
4839; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
4840; GFX6-NEXT:    s_endpgm
4841;
4842; GFX8-LABEL: constant_zextload_v4i1_to_v4i64:
4843; GFX8:       ; %bb.0:
4844; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
4845; GFX8-NEXT:    v_mov_b32_e32 v2, 3
4846; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
4847; GFX8-NEXT:    v_mov_b32_e32 v0, s2
4848; GFX8-NEXT:    v_mov_b32_e32 v1, s3
4849; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
4850; GFX8-NEXT:    s_add_u32 s2, s0, 16
4851; GFX8-NEXT:    s_addc_u32 s3, s1, 0
4852; GFX8-NEXT:    v_mov_b32_e32 v1, 0
4853; GFX8-NEXT:    v_mov_b32_e32 v11, s3
4854; GFX8-NEXT:    v_mov_b32_e32 v3, v1
4855; GFX8-NEXT:    v_mov_b32_e32 v9, s1
4856; GFX8-NEXT:    v_mov_b32_e32 v10, s2
4857; GFX8-NEXT:    v_mov_b32_e32 v5, v1
4858; GFX8-NEXT:    v_mov_b32_e32 v7, v1
4859; GFX8-NEXT:    v_mov_b32_e32 v8, s0
4860; GFX8-NEXT:    s_waitcnt vmcnt(0)
4861; GFX8-NEXT:    v_and_b32_e32 v4, 1, v0
4862; GFX8-NEXT:    v_bfe_u32 v6, v0, 1, 1
4863; GFX8-NEXT:    v_lshrrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
4864; GFX8-NEXT:    v_bfe_u32 v0, v0, 2, 1
4865; GFX8-NEXT:    flat_store_dwordx4 v[10:11], v[0:3]
4866; GFX8-NEXT:    flat_store_dwordx4 v[8:9], v[4:7]
4867; GFX8-NEXT:    s_endpgm
4868;
4869; EG-LABEL: constant_zextload_v4i1_to_v4i64:
4870; EG:       ; %bb.0:
4871; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4872; EG-NEXT:    TEX 0 @6
4873; EG-NEXT:    ALU 14, @9, KC0[CB0:0-32], KC1[]
4874; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T3.X, 0
4875; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1
4876; EG-NEXT:    CF_END
4877; EG-NEXT:    Fetch clause starting at 6:
4878; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
4879; EG-NEXT:    ALU clause starting at 8:
4880; EG-NEXT:     MOV * T0.X, KC0[2].Z,
4881; EG-NEXT:    ALU clause starting at 9:
4882; EG-NEXT:     BFE_UINT * T1.Z, T0.X, literal.x, 1,
4883; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
4884; EG-NEXT:     BFE_UINT T1.X, T0.X, literal.x, 1,
4885; EG-NEXT:     MOV T1.Y, 0.0,
4886; EG-NEXT:     BFE_UINT T0.Z, T0.X, 1, 1,
4887; EG-NEXT:     AND_INT * T0.X, T0.X, 1,
4888; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4889; EG-NEXT:     MOV T0.Y, 0.0,
4890; EG-NEXT:     MOV T1.W, 0.0,
4891; EG-NEXT:     MOV * T0.W, 0.0,
4892; EG-NEXT:     LSHR T2.X, KC0[2].Y, literal.x,
4893; EG-NEXT:     ADD_INT * T2.W, KC0[2].Y, literal.y,
4894; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4895; EG-NEXT:     LSHR * T3.X, PV.W, literal.x,
4896; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4897;
4898; GFX12-LABEL: constant_zextload_v4i1_to_v4i64:
4899; GFX12:       ; %bb.0:
4900; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
4901; GFX12-NEXT:    v_mov_b32_e32 v1, 0
4902; GFX12-NEXT:    s_wait_kmcnt 0x0
4903; GFX12-NEXT:    global_load_u8 v0, v1, s[2:3]
4904; GFX12-NEXT:    s_wait_loadcnt 0x0
4905; GFX12-NEXT:    v_readfirstlane_b32 s2, v0
4906; GFX12-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4907; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4908; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10002
4909; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 3, v0
4910; GFX12-NEXT:    s_and_b32 s3, 0xffff, s3
4911; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
4912; GFX12-NEXT:    v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v0, s3
4913; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10001
4914; GFX12-NEXT:    v_and_b32_e32 v2, 0xffff, v2
4915; GFX12-NEXT:    s_and_b32 s2, s2, 1
4916; GFX12-NEXT:    s_wait_alu 0xfffe
4917; GFX12-NEXT:    s_and_b32 s3, 0xffff, s3
4918; GFX12-NEXT:    s_and_b32 s2, 0xffff, s2
4919; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:16
4920; GFX12-NEXT:    s_wait_alu 0xfffe
4921; GFX12-NEXT:    v_mov_b32_e32 v0, s2
4922; GFX12-NEXT:    v_mov_b32_e32 v2, s3
4923; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1]
4924; GFX12-NEXT:    s_endpgm
4925  %load = load <4 x i1>, ptr addrspace(4) %in
4926  %ext = zext <4 x i1> %load to <4 x i64>
4927  store <4 x i64> %ext, ptr addrspace(1) %out
4928  ret void
4929}
4930
4931define amdgpu_kernel void @constant_sextload_v4i1_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
4932; GFX6-LABEL: constant_sextload_v4i1_to_v4i64:
4933; GFX6:       ; %bb.0:
4934; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
4935; GFX6-NEXT:    s_mov_b32 s7, 0xf000
4936; GFX6-NEXT:    s_mov_b32 s6, -1
4937; GFX6-NEXT:    s_mov_b32 s10, s6
4938; GFX6-NEXT:    s_mov_b32 s11, s7
4939; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
4940; GFX6-NEXT:    s_mov_b32 s8, s2
4941; GFX6-NEXT:    s_mov_b32 s9, s3
4942; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
4943; GFX6-NEXT:    s_mov_b32 s4, s0
4944; GFX6-NEXT:    s_mov_b32 s5, s1
4945; GFX6-NEXT:    s_waitcnt vmcnt(0)
4946; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 2, v0
4947; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 3, v0
4948; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 1, v0
4949; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 1
4950; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4951; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 1
4952; GFX6-NEXT:    v_bfe_i32 v6, v4, 0, 1
4953; GFX6-NEXT:    v_bfe_i32 v4, v3, 0, 1
4954; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
4955; GFX6-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
4956; GFX6-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
4957; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16
4958; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
4959; GFX6-NEXT:    s_endpgm
4960;
4961; GFX8-LABEL: constant_sextload_v4i1_to_v4i64:
4962; GFX8:       ; %bb.0:
4963; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
4964; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
4965; GFX8-NEXT:    v_mov_b32_e32 v0, s2
4966; GFX8-NEXT:    v_mov_b32_e32 v1, s3
4967; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
4968; GFX8-NEXT:    s_add_u32 s2, s0, 16
4969; GFX8-NEXT:    s_addc_u32 s3, s1, 0
4970; GFX8-NEXT:    v_mov_b32_e32 v11, s3
4971; GFX8-NEXT:    v_mov_b32_e32 v9, s1
4972; GFX8-NEXT:    v_mov_b32_e32 v10, s2
4973; GFX8-NEXT:    v_mov_b32_e32 v8, s0
4974; GFX8-NEXT:    s_waitcnt vmcnt(0)
4975; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 2, v0
4976; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 3, v0
4977; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 1, v0
4978; GFX8-NEXT:    v_bfe_i32 v6, v4, 0, 1
4979; GFX8-NEXT:    v_bfe_i32 v4, v3, 0, 1
4980; GFX8-NEXT:    v_bfe_i32 v0, v0, 0, 1
4981; GFX8-NEXT:    v_bfe_i32 v2, v2, 0, 1
4982; GFX8-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
4983; GFX8-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
4984; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
4985; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
4986; GFX8-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
4987; GFX8-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
4988; GFX8-NEXT:    s_endpgm
4989;
4990; EG-LABEL: constant_sextload_v4i1_to_v4i64:
4991; EG:       ; %bb.0:
4992; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4993; EG-NEXT:    TEX 0 @6
4994; EG-NEXT:    ALU 17, @9, KC0[CB0:0-32], KC1[]
4995; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T3.X, 0
4996; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
4997; EG-NEXT:    CF_END
4998; EG-NEXT:    Fetch clause starting at 6:
4999; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
5000; EG-NEXT:    ALU clause starting at 8:
5001; EG-NEXT:     MOV * T0.X, KC0[2].Z,
5002; EG-NEXT:    ALU clause starting at 9:
5003; EG-NEXT:     LSHR * T0.W, T0.X, literal.x,
5004; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
5005; EG-NEXT:     BFE_INT T1.X, T0.X, 0.0, 1,
5006; EG-NEXT:     BFE_INT T2.Z, PV.W, 0.0, 1,
5007; EG-NEXT:     LSHR * T0.W, T0.X, literal.x,
5008; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5009; EG-NEXT:     BFE_INT T2.X, PV.W, 0.0, 1,
5010; EG-NEXT:     LSHR * T0.W, T0.X, 1,
5011; EG-NEXT:     MOV T2.Y, PV.X,
5012; EG-NEXT:     BFE_INT * T1.Z, PV.W, 0.0, 1,
5013; EG-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
5014; EG-NEXT:     MOV T1.Y, T1.X,
5015; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5016; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5017; EG-NEXT:     LSHR T3.X, PV.W, literal.x,
5018; EG-NEXT:     MOV T1.W, T1.Z,
5019; EG-NEXT:     MOV * T2.W, T2.Z,
5020; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5021;
5022; GFX12-LABEL: constant_sextload_v4i1_to_v4i64:
5023; GFX12:       ; %bb.0:
5024; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
5025; GFX12-NEXT:    v_mov_b32_e32 v8, 0
5026; GFX12-NEXT:    s_wait_kmcnt 0x0
5027; GFX12-NEXT:    global_load_u8 v0, v8, s[2:3]
5028; GFX12-NEXT:    s_wait_loadcnt 0x0
5029; GFX12-NEXT:    v_lshrrev_b32_e32 v1, 3, v0
5030; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 2, v0
5031; GFX12-NEXT:    v_lshrrev_b32_e32 v3, 1, v0
5032; GFX12-NEXT:    v_bfe_i32 v0, v0, 0, 1
5033; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
5034; GFX12-NEXT:    v_bfe_i32 v6, v1, 0, 1
5035; GFX12-NEXT:    v_bfe_i32 v4, v2, 0, 1
5036; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
5037; GFX12-NEXT:    v_bfe_i32 v2, v3, 0, 1
5038; GFX12-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5039; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
5040; GFX12-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
5041; GFX12-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
5042; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4)
5043; GFX12-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5044; GFX12-NEXT:    s_clause 0x1
5045; GFX12-NEXT:    global_store_b128 v8, v[4:7], s[0:1] offset:16
5046; GFX12-NEXT:    global_store_b128 v8, v[0:3], s[0:1]
5047; GFX12-NEXT:    s_endpgm
5048  %load = load <4 x i1>, ptr addrspace(4) %in
5049  %ext = sext <4 x i1> %load to <4 x i64>
5050  store <4 x i64> %ext, ptr addrspace(1) %out
5051  ret void
5052}
5053
5054define amdgpu_kernel void @constant_zextload_v8i1_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
5055; GFX6-LABEL: constant_zextload_v8i1_to_v8i64:
5056; GFX6:       ; %bb.0:
5057; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
5058; GFX6-NEXT:    s_mov_b32 s3, 0xf000
5059; GFX6-NEXT:    s_mov_b32 s2, -1
5060; GFX6-NEXT:    s_mov_b32 s10, s2
5061; GFX6-NEXT:    s_mov_b32 s11, s3
5062; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
5063; GFX6-NEXT:    s_mov_b32 s8, s6
5064; GFX6-NEXT:    s_mov_b32 s9, s7
5065; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
5066; GFX6-NEXT:    v_mov_b32_e32 v1, 0
5067; GFX6-NEXT:    v_mov_b32_e32 v3, v1
5068; GFX6-NEXT:    v_mov_b32_e32 v5, v1
5069; GFX6-NEXT:    v_mov_b32_e32 v7, v1
5070; GFX6-NEXT:    v_mov_b32_e32 v9, v1
5071; GFX6-NEXT:    v_mov_b32_e32 v11, v1
5072; GFX6-NEXT:    v_mov_b32_e32 v13, v1
5073; GFX6-NEXT:    v_mov_b32_e32 v15, v1
5074; GFX6-NEXT:    s_mov_b32 s0, s4
5075; GFX6-NEXT:    s_mov_b32 s1, s5
5076; GFX6-NEXT:    s_waitcnt vmcnt(0)
5077; GFX6-NEXT:    v_bfe_u32 v14, v0, 1, 1
5078; GFX6-NEXT:    v_bfe_u32 v10, v0, 3, 1
5079; GFX6-NEXT:    v_bfe_u32 v6, v0, 5, 1
5080; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 7, v0
5081; GFX6-NEXT:    v_and_b32_e32 v12, 1, v0
5082; GFX6-NEXT:    v_bfe_u32 v8, v0, 2, 1
5083; GFX6-NEXT:    v_bfe_u32 v4, v0, 4, 1
5084; GFX6-NEXT:    v_bfe_u32 v0, v0, 6, 1
5085; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5086; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32
5087; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16
5088; GFX6-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0
5089; GFX6-NEXT:    s_endpgm
5090;
5091; GFX8-LABEL: constant_zextload_v8i1_to_v8i64:
5092; GFX8:       ; %bb.0:
5093; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
5094; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
5095; GFX8-NEXT:    v_mov_b32_e32 v0, s2
5096; GFX8-NEXT:    v_mov_b32_e32 v1, s3
5097; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
5098; GFX8-NEXT:    s_add_u32 s2, s0, 48
5099; GFX8-NEXT:    s_addc_u32 s3, s1, 0
5100; GFX8-NEXT:    s_add_u32 s4, s0, 32
5101; GFX8-NEXT:    s_addc_u32 s5, s1, 0
5102; GFX8-NEXT:    v_mov_b32_e32 v1, 0
5103; GFX8-NEXT:    v_mov_b32_e32 v16, s5
5104; GFX8-NEXT:    v_mov_b32_e32 v5, v1
5105; GFX8-NEXT:    v_mov_b32_e32 v7, v1
5106; GFX8-NEXT:    v_mov_b32_e32 v15, s4
5107; GFX8-NEXT:    v_mov_b32_e32 v8, v1
5108; GFX8-NEXT:    v_mov_b32_e32 v10, v1
5109; GFX8-NEXT:    v_mov_b32_e32 v3, v1
5110; GFX8-NEXT:    v_mov_b32_e32 v12, v1
5111; GFX8-NEXT:    v_mov_b32_e32 v14, v1
5112; GFX8-NEXT:    s_waitcnt vmcnt(0)
5113; GFX8-NEXT:    v_bfe_u32 v6, v0, 5, 1
5114; GFX8-NEXT:    v_bfe_u32 v4, v0, 4, 1
5115; GFX8-NEXT:    flat_store_dwordx4 v[15:16], v[4:7]
5116; GFX8-NEXT:    v_mov_b32_e32 v16, s3
5117; GFX8-NEXT:    v_mov_b32_e32 v5, s1
5118; GFX8-NEXT:    v_mov_b32_e32 v4, s0
5119; GFX8-NEXT:    s_add_u32 s0, s0, 16
5120; GFX8-NEXT:    s_addc_u32 s1, s1, 0
5121; GFX8-NEXT:    v_mov_b32_e32 v18, s1
5122; GFX8-NEXT:    v_mov_b32_e32 v17, s0
5123; GFX8-NEXT:    v_and_b32_e32 v6, 0xffff, v0
5124; GFX8-NEXT:    v_bfe_u32 v9, v0, 3, 1
5125; GFX8-NEXT:    v_bfe_u32 v7, v0, 2, 1
5126; GFX8-NEXT:    v_mov_b32_e32 v15, s2
5127; GFX8-NEXT:    v_bfe_u32 v13, v0, 1, 1
5128; GFX8-NEXT:    v_and_b32_e32 v11, 1, v0
5129; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 7, v6
5130; GFX8-NEXT:    v_bfe_u32 v0, v6, 6, 1
5131; GFX8-NEXT:    flat_store_dwordx4 v[17:18], v[7:10]
5132; GFX8-NEXT:    flat_store_dwordx4 v[15:16], v[0:3]
5133; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[11:14]
5134; GFX8-NEXT:    s_endpgm
5135;
5136; EG-LABEL: constant_zextload_v8i1_to_v8i64:
5137; EG:       ; %bb.0:
5138; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
5139; EG-NEXT:    TEX 0 @8
5140; EG-NEXT:    ALU 30, @11, KC0[CB0:0-32], KC1[]
5141; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T12.X, 0
5142; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T11.X, 0
5143; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T10.X, 0
5144; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T9.X, 1
5145; EG-NEXT:    CF_END
5146; EG-NEXT:    Fetch clause starting at 8:
5147; EG-NEXT:     VTX_READ_8 T5.X, T5.X, 0, #1
5148; EG-NEXT:    ALU clause starting at 10:
5149; EG-NEXT:     MOV * T5.X, KC0[2].Z,
5150; EG-NEXT:    ALU clause starting at 11:
5151; EG-NEXT:     BFE_UINT * T6.Z, T5.X, literal.x, 1,
5152; EG-NEXT:    7(9.809089e-45), 0(0.000000e+00)
5153; EG-NEXT:     BFE_UINT T6.X, T5.X, literal.x, 1,
5154; EG-NEXT:     MOV T6.Y, 0.0,
5155; EG-NEXT:     BFE_UINT * T7.Z, T5.X, literal.y, 1,
5156; EG-NEXT:    6(8.407791e-45), 5(7.006492e-45)
5157; EG-NEXT:     BFE_UINT T7.X, T5.X, literal.x, 1,
5158; EG-NEXT:     MOV T7.Y, 0.0,
5159; EG-NEXT:     BFE_UINT * T8.Z, T5.X, literal.y, 1,
5160; EG-NEXT:    4(5.605194e-45), 3(4.203895e-45)
5161; EG-NEXT:     BFE_UINT T8.X, T5.X, literal.x, 1,
5162; EG-NEXT:     MOV T8.Y, 0.0,
5163; EG-NEXT:     BFE_UINT T5.Z, T5.X, 1, 1,
5164; EG-NEXT:     AND_INT * T5.X, T5.X, 1,
5165; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5166; EG-NEXT:     MOV T5.Y, 0.0,
5167; EG-NEXT:     MOV T6.W, 0.0,
5168; EG-NEXT:     MOV * T7.W, 0.0,
5169; EG-NEXT:     MOV T8.W, 0.0,
5170; EG-NEXT:     MOV * T5.W, 0.0,
5171; EG-NEXT:     LSHR T9.X, KC0[2].Y, literal.x,
5172; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5173; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5174; EG-NEXT:     LSHR T10.X, PV.W, literal.x,
5175; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5176; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
5177; EG-NEXT:     LSHR T11.X, PV.W, literal.x,
5178; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5179; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
5180; EG-NEXT:     LSHR * T12.X, PV.W, literal.x,
5181; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5182;
5183; GFX12-LABEL: constant_zextload_v8i1_to_v8i64:
5184; GFX12:       ; %bb.0:
5185; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
5186; GFX12-NEXT:    v_mov_b32_e32 v1, 0
5187; GFX12-NEXT:    s_wait_kmcnt 0x0
5188; GFX12-NEXT:    global_load_u8 v12, v1, s[2:3]
5189; GFX12-NEXT:    s_wait_loadcnt 0x0
5190; GFX12-NEXT:    v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v0, 0xffff, v12
5191; GFX12-NEXT:    v_mov_b32_e32 v5, v1
5192; GFX12-NEXT:    v_mov_b32_e32 v7, v1
5193; GFX12-NEXT:    v_bfe_u32 v6, v12, 5, 1
5194; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4)
5195; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 7, v0
5196; GFX12-NEXT:    v_bfe_u32 v0, v0, 6, 1
5197; GFX12-NEXT:    v_bfe_u32 v4, v12, 4, 1
5198; GFX12-NEXT:    v_mov_b32_e32 v9, v1
5199; GFX12-NEXT:    v_mov_b32_e32 v11, v1
5200; GFX12-NEXT:    v_bfe_u32 v10, v12, 3, 1
5201; GFX12-NEXT:    v_bfe_u32 v8, v12, 2, 1
5202; GFX12-NEXT:    v_mov_b32_e32 v13, v1
5203; GFX12-NEXT:    v_mov_b32_e32 v15, v1
5204; GFX12-NEXT:    v_bfe_u32 v14, v12, 1, 1
5205; GFX12-NEXT:    v_and_b32_e32 v12, 1, v12
5206; GFX12-NEXT:    s_clause 0x3
5207; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:48
5208; GFX12-NEXT:    global_store_b128 v1, v[4:7], s[0:1] offset:32
5209; GFX12-NEXT:    global_store_b128 v1, v[8:11], s[0:1] offset:16
5210; GFX12-NEXT:    global_store_b128 v1, v[12:15], s[0:1]
5211; GFX12-NEXT:    s_endpgm
5212  %load = load <8 x i1>, ptr addrspace(4) %in
5213  %ext = zext <8 x i1> %load to <8 x i64>
5214  store <8 x i64> %ext, ptr addrspace(1) %out
5215  ret void
5216}
5217
5218define amdgpu_kernel void @constant_sextload_v8i1_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
5219; GFX6-LABEL: constant_sextload_v8i1_to_v8i64:
5220; GFX6:       ; %bb.0:
5221; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
5222; GFX6-NEXT:    s_mov_b32 s3, 0xf000
5223; GFX6-NEXT:    s_mov_b32 s2, -1
5224; GFX6-NEXT:    s_mov_b32 s10, s2
5225; GFX6-NEXT:    s_mov_b32 s11, s3
5226; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
5227; GFX6-NEXT:    s_mov_b32 s8, s6
5228; GFX6-NEXT:    s_mov_b32 s9, s7
5229; GFX6-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
5230; GFX6-NEXT:    s_mov_b32 s0, s4
5231; GFX6-NEXT:    s_mov_b32 s1, s5
5232; GFX6-NEXT:    s_waitcnt vmcnt(0)
5233; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 6, v0
5234; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 7, v0
5235; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 4, v0
5236; GFX6-NEXT:    v_lshrrev_b32_e32 v8, 5, v0
5237; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 2, v0
5238; GFX6-NEXT:    v_lshrrev_b32_e32 v6, 3, v0
5239; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 1, v0
5240; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 1
5241; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5242; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 1
5243; GFX6-NEXT:    v_bfe_i32 v6, v6, 0, 1
5244; GFX6-NEXT:    v_bfe_i32 v4, v4, 0, 1
5245; GFX6-NEXT:    v_bfe_i32 v10, v8, 0, 1
5246; GFX6-NEXT:    v_bfe_i32 v8, v7, 0, 1
5247; GFX6-NEXT:    v_bfe_i32 v14, v5, 0, 1
5248; GFX6-NEXT:    v_bfe_i32 v12, v3, 0, 1
5249; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5250; GFX6-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
5251; GFX6-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
5252; GFX6-NEXT:    v_ashrrev_i32_e32 v11, 31, v10
5253; GFX6-NEXT:    v_ashrrev_i32_e32 v9, 31, v8
5254; GFX6-NEXT:    v_ashrrev_i32_e32 v15, 31, v14
5255; GFX6-NEXT:    v_ashrrev_i32_e32 v13, 31, v12
5256; GFX6-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48
5257; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
5258; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
5259; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5260; GFX6-NEXT:    s_endpgm
5261;
5262; GFX8-LABEL: constant_sextload_v8i1_to_v8i64:
5263; GFX8:       ; %bb.0:
5264; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
5265; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
5266; GFX8-NEXT:    v_mov_b32_e32 v0, s2
5267; GFX8-NEXT:    v_mov_b32_e32 v1, s3
5268; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
5269; GFX8-NEXT:    v_mov_b32_e32 v17, s1
5270; GFX8-NEXT:    v_mov_b32_e32 v16, s0
5271; GFX8-NEXT:    s_waitcnt vmcnt(0)
5272; GFX8-NEXT:    v_readfirstlane_b32 s3, v0
5273; GFX8-NEXT:    s_lshr_b32 s2, s3, 6
5274; GFX8-NEXT:    s_lshr_b32 s4, s3, 7
5275; GFX8-NEXT:    s_lshr_b32 s6, s3, 4
5276; GFX8-NEXT:    s_lshr_b32 s8, s3, 5
5277; GFX8-NEXT:    s_lshr_b32 s10, s3, 2
5278; GFX8-NEXT:    s_lshr_b32 s12, s3, 3
5279; GFX8-NEXT:    s_lshr_b32 s14, s3, 1
5280; GFX8-NEXT:    v_mov_b32_e32 v0, s3
5281; GFX8-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x10000
5282; GFX8-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x10000
5283; GFX8-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x10000
5284; GFX8-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x10000
5285; GFX8-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x10000
5286; GFX8-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x10000
5287; GFX8-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x10000
5288; GFX8-NEXT:    v_mov_b32_e32 v4, s2
5289; GFX8-NEXT:    s_add_u32 s2, s0, 48
5290; GFX8-NEXT:    v_mov_b32_e32 v5, s3
5291; GFX8-NEXT:    s_addc_u32 s3, s1, 0
5292; GFX8-NEXT:    v_mov_b32_e32 v19, s3
5293; GFX8-NEXT:    v_mov_b32_e32 v18, s2
5294; GFX8-NEXT:    s_add_u32 s2, s0, 32
5295; GFX8-NEXT:    v_mov_b32_e32 v6, s4
5296; GFX8-NEXT:    v_mov_b32_e32 v7, s5
5297; GFX8-NEXT:    s_addc_u32 s3, s1, 0
5298; GFX8-NEXT:    flat_store_dwordx4 v[18:19], v[4:7]
5299; GFX8-NEXT:    s_add_u32 s0, s0, 16
5300; GFX8-NEXT:    v_mov_b32_e32 v5, s3
5301; GFX8-NEXT:    v_mov_b32_e32 v8, s6
5302; GFX8-NEXT:    v_mov_b32_e32 v9, s7
5303; GFX8-NEXT:    v_mov_b32_e32 v10, s8
5304; GFX8-NEXT:    v_mov_b32_e32 v11, s9
5305; GFX8-NEXT:    v_mov_b32_e32 v4, s2
5306; GFX8-NEXT:    s_addc_u32 s1, s1, 0
5307; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[8:11]
5308; GFX8-NEXT:    v_mov_b32_e32 v5, s1
5309; GFX8-NEXT:    v_bfe_i32 v0, v0, 0, 1
5310; GFX8-NEXT:    v_mov_b32_e32 v12, s10
5311; GFX8-NEXT:    v_mov_b32_e32 v13, s11
5312; GFX8-NEXT:    v_mov_b32_e32 v14, s12
5313; GFX8-NEXT:    v_mov_b32_e32 v15, s13
5314; GFX8-NEXT:    v_mov_b32_e32 v4, s0
5315; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5316; GFX8-NEXT:    v_mov_b32_e32 v2, s14
5317; GFX8-NEXT:    v_mov_b32_e32 v3, s15
5318; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[12:15]
5319; GFX8-NEXT:    flat_store_dwordx4 v[16:17], v[0:3]
5320; GFX8-NEXT:    s_endpgm
5321;
5322; EG-LABEL: constant_sextload_v8i1_to_v8i64:
5323; EG:       ; %bb.0:
5324; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
5325; EG-NEXT:    TEX 0 @8
5326; EG-NEXT:    ALU 37, @11, KC0[CB0:0-32], KC1[]
5327; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T12.X, 0
5328; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T11.X, 0
5329; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
5330; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T6.X, 1
5331; EG-NEXT:    CF_END
5332; EG-NEXT:    Fetch clause starting at 8:
5333; EG-NEXT:     VTX_READ_8 T5.X, T5.X, 0, #1
5334; EG-NEXT:    ALU clause starting at 10:
5335; EG-NEXT:     MOV * T5.X, KC0[2].Z,
5336; EG-NEXT:    ALU clause starting at 11:
5337; EG-NEXT:     LSHR T6.X, KC0[2].Y, literal.x,
5338; EG-NEXT:     LSHR * T0.W, T5.X, literal.y,
5339; EG-NEXT:    2(2.802597e-45), 7(9.809089e-45)
5340; EG-NEXT:     BFE_INT T7.X, T5.X, 0.0, 1,
5341; EG-NEXT:     BFE_INT T8.Z, PV.W, 0.0, 1,
5342; EG-NEXT:     LSHR T0.W, T5.X, literal.x,
5343; EG-NEXT:     LSHR * T1.W, T5.X, literal.y,
5344; EG-NEXT:    3(4.203895e-45), 6(8.407791e-45)
5345; EG-NEXT:     BFE_INT T8.X, PS, 0.0, 1,
5346; EG-NEXT:     BFE_INT T9.Z, PV.W, 0.0, 1,
5347; EG-NEXT:     LSHR T0.W, T5.X, 1,
5348; EG-NEXT:     LSHR * T1.W, T5.X, literal.x,
5349; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5350; EG-NEXT:     BFE_INT T9.X, PS, 0.0, 1,
5351; EG-NEXT:     MOV T8.Y, PV.X,
5352; EG-NEXT:     BFE_INT T7.Z, PV.W, 0.0, 1,
5353; EG-NEXT:     LSHR T0.W, T5.X, literal.x,
5354; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
5355; EG-NEXT:    5(7.006492e-45), 16(2.242078e-44)
5356; EG-NEXT:     LSHR T10.X, PS, literal.x,
5357; EG-NEXT:     MOV T9.Y, PV.X,
5358; EG-NEXT:     BFE_INT T5.Z, PV.W, 0.0, 1,
5359; EG-NEXT:     LSHR * T0.W, T5.X, literal.y,
5360; EG-NEXT:    2(2.802597e-45), 4(5.605194e-45)
5361; EG-NEXT:     BFE_INT T5.X, PV.W, 0.0, 1,
5362; EG-NEXT:     MOV T7.Y, T7.X,
5363; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
5364; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
5365; EG-NEXT:     LSHR T11.X, PV.W, literal.x,
5366; EG-NEXT:     MOV T5.Y, PV.X,
5367; EG-NEXT:     ADD_INT T0.Z, KC0[2].Y, literal.y,
5368; EG-NEXT:     MOV T7.W, T7.Z,
5369; EG-NEXT:     MOV * T9.W, T9.Z,
5370; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
5371; EG-NEXT:     LSHR T12.X, PV.Z, literal.x,
5372; EG-NEXT:     MOV T5.W, T5.Z,
5373; EG-NEXT:     MOV * T8.W, T8.Z,
5374; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5375;
5376; GFX12-LABEL: constant_sextload_v8i1_to_v8i64:
5377; GFX12:       ; %bb.0:
5378; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
5379; GFX12-NEXT:    v_mov_b32_e32 v16, 0
5380; GFX12-NEXT:    s_wait_kmcnt 0x0
5381; GFX12-NEXT:    global_load_u8 v0, v16, s[2:3]
5382; GFX12-NEXT:    s_wait_loadcnt 0x0
5383; GFX12-NEXT:    v_readfirstlane_b32 s3, v0
5384; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5385; GFX12-NEXT:    v_mov_b32_e32 v9, s3
5386; GFX12-NEXT:    s_lshr_b32 s2, s3, 6
5387; GFX12-NEXT:    s_lshr_b32 s4, s3, 7
5388; GFX12-NEXT:    s_lshr_b32 s6, s3, 4
5389; GFX12-NEXT:    s_lshr_b32 s8, s3, 5
5390; GFX12-NEXT:    s_lshr_b32 s10, s3, 2
5391; GFX12-NEXT:    s_lshr_b32 s12, s3, 3
5392; GFX12-NEXT:    s_lshr_b32 s14, s3, 1
5393; GFX12-NEXT:    s_wait_alu 0xfffe
5394; GFX12-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x10000
5395; GFX12-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x10000
5396; GFX12-NEXT:    v_bfe_i32 v12, v9, 0, 1
5397; GFX12-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x10000
5398; GFX12-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x10000
5399; GFX12-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x10000
5400; GFX12-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x10000
5401; GFX12-NEXT:    s_wait_alu 0xfffe
5402; GFX12-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
5403; GFX12-NEXT:    v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
5404; GFX12-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x10000
5405; GFX12-NEXT:    v_dual_mov_b32 v4, s6 :: v_dual_mov_b32 v5, s7
5406; GFX12-NEXT:    v_dual_mov_b32 v6, s8 :: v_dual_mov_b32 v7, s9
5407; GFX12-NEXT:    v_dual_mov_b32 v8, s10 :: v_dual_mov_b32 v9, s11
5408; GFX12-NEXT:    v_dual_mov_b32 v10, s12 :: v_dual_mov_b32 v11, s13
5409; GFX12-NEXT:    v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15
5410; GFX12-NEXT:    v_ashrrev_i32_e32 v13, 31, v12
5411; GFX12-NEXT:    s_clause 0x3
5412; GFX12-NEXT:    global_store_b128 v16, v[0:3], s[0:1] offset:48
5413; GFX12-NEXT:    global_store_b128 v16, v[4:7], s[0:1] offset:32
5414; GFX12-NEXT:    global_store_b128 v16, v[8:11], s[0:1] offset:16
5415; GFX12-NEXT:    global_store_b128 v16, v[12:15], s[0:1]
5416; GFX12-NEXT:    s_endpgm
5417  %load = load <8 x i1>, ptr addrspace(4) %in
5418  %ext = sext <8 x i1> %load to <8 x i64>
5419  store <8 x i64> %ext, ptr addrspace(1) %out
5420  ret void
5421}
5422
5423define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
5424; GFX6-LABEL: constant_zextload_v16i1_to_v16i64:
5425; GFX6:       ; %bb.0:
5426; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
5427; GFX6-NEXT:    s_mov_b32 s3, 0xf000
5428; GFX6-NEXT:    s_mov_b32 s2, -1
5429; GFX6-NEXT:    s_mov_b32 s10, s2
5430; GFX6-NEXT:    s_mov_b32 s11, s3
5431; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
5432; GFX6-NEXT:    s_mov_b32 s8, s6
5433; GFX6-NEXT:    s_mov_b32 s9, s7
5434; GFX6-NEXT:    buffer_load_ushort v29, off, s[8:11], 0
5435; GFX6-NEXT:    v_mov_b32_e32 v1, 0
5436; GFX6-NEXT:    v_mov_b32_e32 v3, v1
5437; GFX6-NEXT:    v_mov_b32_e32 v4, v1
5438; GFX6-NEXT:    v_mov_b32_e32 v6, v1
5439; GFX6-NEXT:    v_mov_b32_e32 v7, v1
5440; GFX6-NEXT:    v_mov_b32_e32 v9, v1
5441; GFX6-NEXT:    v_mov_b32_e32 v10, v1
5442; GFX6-NEXT:    v_mov_b32_e32 v12, v1
5443; GFX6-NEXT:    v_mov_b32_e32 v14, v1
5444; GFX6-NEXT:    v_mov_b32_e32 v16, v1
5445; GFX6-NEXT:    v_mov_b32_e32 v18, v1
5446; GFX6-NEXT:    v_mov_b32_e32 v20, v1
5447; GFX6-NEXT:    v_mov_b32_e32 v22, v1
5448; GFX6-NEXT:    v_mov_b32_e32 v24, v1
5449; GFX6-NEXT:    v_mov_b32_e32 v26, v1
5450; GFX6-NEXT:    v_mov_b32_e32 v28, v1
5451; GFX6-NEXT:    s_mov_b32 s0, s4
5452; GFX6-NEXT:    s_mov_b32 s1, s5
5453; GFX6-NEXT:    s_waitcnt vmcnt(0)
5454; GFX6-NEXT:    v_bfe_u32 v2, v29, 11, 1
5455; GFX6-NEXT:    v_bfe_u32 v0, v29, 10, 1
5456; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
5457; GFX6-NEXT:    v_bfe_u32 v5, v29, 9, 1
5458; GFX6-NEXT:    s_waitcnt expcnt(0)
5459; GFX6-NEXT:    v_bfe_u32 v3, v29, 8, 1
5460; GFX6-NEXT:    buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:64
5461; GFX6-NEXT:    v_lshrrev_b32_e32 v8, 15, v29
5462; GFX6-NEXT:    s_waitcnt expcnt(0)
5463; GFX6-NEXT:    v_bfe_u32 v6, v29, 14, 1
5464; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:112
5465; GFX6-NEXT:    v_bfe_u32 v27, v29, 5, 1
5466; GFX6-NEXT:    v_bfe_u32 v23, v29, 7, 1
5467; GFX6-NEXT:    v_bfe_u32 v19, v29, 1, 1
5468; GFX6-NEXT:    v_bfe_u32 v15, v29, 3, 1
5469; GFX6-NEXT:    v_bfe_u32 v11, v29, 13, 1
5470; GFX6-NEXT:    v_bfe_u32 v25, v29, 4, 1
5471; GFX6-NEXT:    v_bfe_u32 v21, v29, 6, 1
5472; GFX6-NEXT:    v_and_b32_e32 v17, 1, v29
5473; GFX6-NEXT:    v_bfe_u32 v13, v29, 2, 1
5474; GFX6-NEXT:    s_waitcnt expcnt(0)
5475; GFX6-NEXT:    v_bfe_u32 v9, v29, 12, 1
5476; GFX6-NEXT:    buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:96
5477; GFX6-NEXT:    buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:16
5478; GFX6-NEXT:    buffer_store_dwordx4 v[17:20], off, s[0:3], 0
5479; GFX6-NEXT:    buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:48
5480; GFX6-NEXT:    buffer_store_dwordx4 v[25:28], off, s[0:3], 0 offset:32
5481; GFX6-NEXT:    s_endpgm
5482;
5483; GFX8-LABEL: constant_zextload_v16i1_to_v16i64:
5484; GFX8:       ; %bb.0:
5485; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
5486; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
5487; GFX8-NEXT:    v_mov_b32_e32 v0, s2
5488; GFX8-NEXT:    v_mov_b32_e32 v1, s3
5489; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
5490; GFX8-NEXT:    v_mov_b32_e32 v1, 0
5491; GFX8-NEXT:    v_mov_b32_e32 v3, v1
5492; GFX8-NEXT:    v_mov_b32_e32 v5, v1
5493; GFX8-NEXT:    v_mov_b32_e32 v7, v1
5494; GFX8-NEXT:    v_mov_b32_e32 v9, v1
5495; GFX8-NEXT:    v_mov_b32_e32 v11, v1
5496; GFX8-NEXT:    s_waitcnt vmcnt(0)
5497; GFX8-NEXT:    v_readfirstlane_b32 s2, v0
5498; GFX8-NEXT:    s_bfe_u32 s3, s2, 0x10009
5499; GFX8-NEXT:    s_bfe_u32 s4, s2, 0x1000d
5500; GFX8-NEXT:    s_bfe_u32 s5, s2, 0x10007
5501; GFX8-NEXT:    s_bfe_u32 s6, s2, 0x10003
5502; GFX8-NEXT:    s_bfe_u32 s7, s2, 0x10001
5503; GFX8-NEXT:    s_and_b32 s8, s2, 1
5504; GFX8-NEXT:    s_bfe_u32 s9, s2, 0x10002
5505; GFX8-NEXT:    s_bfe_u32 s10, s2, 0x10004
5506; GFX8-NEXT:    s_bfe_u32 s11, s2, 0x10006
5507; GFX8-NEXT:    s_bfe_u32 s12, s2, 0x1000c
5508; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x1000a
5509; GFX8-NEXT:    v_and_b32_e32 v4, 0xffff, v0
5510; GFX8-NEXT:    v_mov_b32_e32 v0, s2
5511; GFX8-NEXT:    s_add_u32 s2, s0, 0x50
5512; GFX8-NEXT:    v_mov_b32_e32 v6, s3
5513; GFX8-NEXT:    s_addc_u32 s3, s1, 0
5514; GFX8-NEXT:    v_mov_b32_e32 v13, s3
5515; GFX8-NEXT:    v_mov_b32_e32 v12, s2
5516; GFX8-NEXT:    s_add_u32 s2, s0, 64
5517; GFX8-NEXT:    v_bfe_u32 v2, v4, 11, 1
5518; GFX8-NEXT:    s_addc_u32 s3, s1, 0
5519; GFX8-NEXT:    flat_store_dwordx4 v[12:13], v[0:3]
5520; GFX8-NEXT:    v_mov_b32_e32 v13, s3
5521; GFX8-NEXT:    v_mov_b32_e32 v12, s2
5522; GFX8-NEXT:    s_add_u32 s2, s0, 0x70
5523; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 15, v4
5524; GFX8-NEXT:    v_bfe_u32 v14, v4, 5, 1
5525; GFX8-NEXT:    v_bfe_u32 v8, v4, 14, 1
5526; GFX8-NEXT:    v_bfe_u32 v4, v4, 8, 1
5527; GFX8-NEXT:    s_addc_u32 s3, s1, 0
5528; GFX8-NEXT:    flat_store_dwordx4 v[12:13], v[4:7]
5529; GFX8-NEXT:    v_mov_b32_e32 v0, s12
5530; GFX8-NEXT:    v_mov_b32_e32 v5, s3
5531; GFX8-NEXT:    v_mov_b32_e32 v4, s2
5532; GFX8-NEXT:    s_add_u32 s2, s0, 0x60
5533; GFX8-NEXT:    s_addc_u32 s3, s1, 0
5534; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[8:11]
5535; GFX8-NEXT:    v_mov_b32_e32 v5, s3
5536; GFX8-NEXT:    v_mov_b32_e32 v4, s2
5537; GFX8-NEXT:    s_add_u32 s2, s0, 48
5538; GFX8-NEXT:    v_mov_b32_e32 v2, s4
5539; GFX8-NEXT:    s_addc_u32 s3, s1, 0
5540; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5541; GFX8-NEXT:    v_mov_b32_e32 v5, s3
5542; GFX8-NEXT:    v_mov_b32_e32 v4, s2
5543; GFX8-NEXT:    s_add_u32 s2, s0, 32
5544; GFX8-NEXT:    v_mov_b32_e32 v0, s11
5545; GFX8-NEXT:    v_mov_b32_e32 v2, s5
5546; GFX8-NEXT:    s_addc_u32 s3, s1, 0
5547; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5548; GFX8-NEXT:    v_mov_b32_e32 v5, s3
5549; GFX8-NEXT:    v_mov_b32_e32 v4, s2
5550; GFX8-NEXT:    s_add_u32 s2, s0, 16
5551; GFX8-NEXT:    v_mov_b32_e32 v0, s10
5552; GFX8-NEXT:    v_mov_b32_e32 v2, v14
5553; GFX8-NEXT:    s_addc_u32 s3, s1, 0
5554; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5555; GFX8-NEXT:    v_mov_b32_e32 v5, s3
5556; GFX8-NEXT:    v_mov_b32_e32 v0, s9
5557; GFX8-NEXT:    v_mov_b32_e32 v2, s6
5558; GFX8-NEXT:    v_mov_b32_e32 v4, s2
5559; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5560; GFX8-NEXT:    v_mov_b32_e32 v5, s1
5561; GFX8-NEXT:    v_mov_b32_e32 v0, s8
5562; GFX8-NEXT:    v_mov_b32_e32 v2, s7
5563; GFX8-NEXT:    v_mov_b32_e32 v4, s0
5564; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5565; GFX8-NEXT:    s_endpgm
5566;
5567; EG-LABEL: constant_zextload_v16i1_to_v16i64:
5568; EG:       ; %bb.0:
5569; EG-NEXT:    ALU 0, @14, KC0[CB0:0-32], KC1[]
5570; EG-NEXT:    TEX 0 @12
5571; EG-NEXT:    ALU 62, @15, KC0[CB0:0-32], KC1[]
5572; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T22.X, 0
5573; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T21.X, 0
5574; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T20.X, 0
5575; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T19.X, 0
5576; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T18.X, 0
5577; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T17.X, 0
5578; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0
5579; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T15.X, 1
5580; EG-NEXT:    CF_END
5581; EG-NEXT:    Fetch clause starting at 12:
5582; EG-NEXT:     VTX_READ_16 T7.X, T7.X, 0, #1
5583; EG-NEXT:    ALU clause starting at 14:
5584; EG-NEXT:     MOV * T7.X, KC0[2].Z,
5585; EG-NEXT:    ALU clause starting at 15:
5586; EG-NEXT:     LSHR * T8.Z, T7.X, literal.x,
5587; EG-NEXT:    15(2.101948e-44), 0(0.000000e+00)
5588; EG-NEXT:     BFE_UINT T8.X, T7.X, literal.x, 1,
5589; EG-NEXT:     MOV T8.Y, 0.0,
5590; EG-NEXT:     BFE_UINT * T9.Z, T7.X, literal.y, 1,
5591; EG-NEXT:    14(1.961818e-44), 13(1.821688e-44)
5592; EG-NEXT:     BFE_UINT T9.X, T7.X, literal.x, 1,
5593; EG-NEXT:     MOV T9.Y, 0.0,
5594; EG-NEXT:     BFE_UINT * T10.Z, T7.X, literal.y, 1,
5595; EG-NEXT:    12(1.681558e-44), 11(1.541428e-44)
5596; EG-NEXT:     BFE_UINT T10.X, T7.X, literal.x, 1,
5597; EG-NEXT:     MOV T10.Y, 0.0,
5598; EG-NEXT:     BFE_UINT * T11.Z, T7.X, literal.y, 1,
5599; EG-NEXT:    10(1.401298e-44), 9(1.261169e-44)
5600; EG-NEXT:     BFE_UINT T11.X, T7.X, literal.x, 1,
5601; EG-NEXT:     MOV T11.Y, 0.0,
5602; EG-NEXT:     BFE_UINT * T12.Z, T7.X, literal.y, 1,
5603; EG-NEXT:    8(1.121039e-44), 7(9.809089e-45)
5604; EG-NEXT:     BFE_UINT T12.X, T7.X, literal.x, 1,
5605; EG-NEXT:     MOV T12.Y, 0.0,
5606; EG-NEXT:     BFE_UINT * T13.Z, T7.X, literal.y, 1,
5607; EG-NEXT:    6(8.407791e-45), 5(7.006492e-45)
5608; EG-NEXT:     BFE_UINT T13.X, T7.X, literal.x, 1,
5609; EG-NEXT:     MOV T13.Y, 0.0,
5610; EG-NEXT:     BFE_UINT * T14.Z, T7.X, literal.y, 1,
5611; EG-NEXT:    4(5.605194e-45), 3(4.203895e-45)
5612; EG-NEXT:     BFE_UINT T14.X, T7.X, literal.x, 1,
5613; EG-NEXT:     MOV T14.Y, 0.0,
5614; EG-NEXT:     BFE_UINT T7.Z, T7.X, 1, 1,
5615; EG-NEXT:     AND_INT * T7.X, T7.X, 1,
5616; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5617; EG-NEXT:     MOV T7.Y, 0.0,
5618; EG-NEXT:     MOV T8.W, 0.0,
5619; EG-NEXT:     MOV * T9.W, 0.0,
5620; EG-NEXT:     MOV T10.W, 0.0,
5621; EG-NEXT:     MOV * T11.W, 0.0,
5622; EG-NEXT:     MOV T12.W, 0.0,
5623; EG-NEXT:     MOV * T13.W, 0.0,
5624; EG-NEXT:     MOV T14.W, 0.0,
5625; EG-NEXT:     MOV * T7.W, 0.0,
5626; EG-NEXT:     LSHR T15.X, KC0[2].Y, literal.x,
5627; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5628; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5629; EG-NEXT:     LSHR T16.X, PV.W, literal.x,
5630; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5631; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
5632; EG-NEXT:     LSHR T17.X, PV.W, literal.x,
5633; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5634; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
5635; EG-NEXT:     LSHR T18.X, PV.W, literal.x,
5636; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5637; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
5638; EG-NEXT:     LSHR T19.X, PV.W, literal.x,
5639; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5640; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
5641; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
5642; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5643; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
5644; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
5645; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5646; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
5647; EG-NEXT:     LSHR * T22.X, PV.W, literal.x,
5648; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5649;
5650; GFX12-LABEL: constant_zextload_v16i1_to_v16i64:
5651; GFX12:       ; %bb.0:
5652; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
5653; GFX12-NEXT:    v_mov_b32_e32 v1, 0
5654; GFX12-NEXT:    s_wait_kmcnt 0x0
5655; GFX12-NEXT:    global_load_u16 v0, v1, s[2:3]
5656; GFX12-NEXT:    s_wait_loadcnt 0x0
5657; GFX12-NEXT:    v_and_b32_e32 v4, 0xffff, v0
5658; GFX12-NEXT:    v_readfirstlane_b32 s2, v0
5659; GFX12-NEXT:    v_mov_b32_e32 v7, v1
5660; GFX12-NEXT:    v_mov_b32_e32 v11, v1
5661; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
5662; GFX12-NEXT:    v_bfe_u32 v2, v4, 11, 1
5663; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x1000a
5664; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5665; GFX12-NEXT:    v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v0, s3
5666; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x1000d
5667; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x1000c
5668; GFX12-NEXT:    v_mov_b32_e32 v5, v1
5669; GFX12-NEXT:    v_bfe_u32 v6, v4, 5, 1
5670; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:80
5671; GFX12-NEXT:    v_mov_b32_e32 v0, s4
5672; GFX12-NEXT:    s_wait_alu 0xfffe
5673; GFX12-NEXT:    v_mov_b32_e32 v2, s3
5674; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10007
5675; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10006
5676; GFX12-NEXT:    v_mov_b32_e32 v9, v1
5677; GFX12-NEXT:    s_bfe_u32 s6, s2, 0x10002
5678; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:96
5679; GFX12-NEXT:    s_wait_alu 0xfffe
5680; GFX12-NEXT:    v_mov_b32_e32 v0, s4
5681; GFX12-NEXT:    v_mov_b32_e32 v2, s3
5682; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10004
5683; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10009
5684; GFX12-NEXT:    s_bfe_u32 s5, s2, 0x10001
5685; GFX12-NEXT:    v_lshrrev_b32_e32 v10, 15, v4
5686; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:48
5687; GFX12-NEXT:    s_wait_alu 0xfffe
5688; GFX12-NEXT:    v_mov_b32_e32 v0, s4
5689; GFX12-NEXT:    v_mov_b32_e32 v2, v6
5690; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10003
5691; GFX12-NEXT:    s_and_b32 s2, s2, 1
5692; GFX12-NEXT:    v_bfe_u32 v8, v4, 14, 1
5693; GFX12-NEXT:    v_bfe_u32 v4, v4, 8, 1
5694; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:32
5695; GFX12-NEXT:    v_mov_b32_e32 v0, s6
5696; GFX12-NEXT:    s_wait_alu 0xfffe
5697; GFX12-NEXT:    v_mov_b32_e32 v2, s4
5698; GFX12-NEXT:    v_mov_b32_e32 v6, s3
5699; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:16
5700; GFX12-NEXT:    v_mov_b32_e32 v0, s2
5701; GFX12-NEXT:    v_mov_b32_e32 v2, s5
5702; GFX12-NEXT:    s_clause 0x2
5703; GFX12-NEXT:    global_store_b128 v1, v[8:11], s[0:1] offset:112
5704; GFX12-NEXT:    global_store_b128 v1, v[4:7], s[0:1] offset:64
5705; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1]
5706; GFX12-NEXT:    s_endpgm
5707  %load = load <16 x i1>, ptr addrspace(4) %in
5708  %ext = zext <16 x i1> %load to <16 x i64>
5709  store <16 x i64> %ext, ptr addrspace(1) %out
5710  ret void
5711}
5712
5713define amdgpu_kernel void @constant_sextload_v16i1_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
5714; GFX6-LABEL: constant_sextload_v16i1_to_v16i64:
5715; GFX6:       ; %bb.0:
5716; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
5717; GFX6-NEXT:    s_mov_b32 s3, 0xf000
5718; GFX6-NEXT:    s_mov_b32 s2, -1
5719; GFX6-NEXT:    s_mov_b32 s10, s2
5720; GFX6-NEXT:    s_mov_b32 s11, s3
5721; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
5722; GFX6-NEXT:    s_mov_b32 s8, s6
5723; GFX6-NEXT:    s_mov_b32 s9, s7
5724; GFX6-NEXT:    buffer_load_ushort v1, off, s[8:11], 0
5725; GFX6-NEXT:    s_mov_b32 s0, s4
5726; GFX6-NEXT:    s_mov_b32 s1, s5
5727; GFX6-NEXT:    s_waitcnt vmcnt(0)
5728; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 14, v1
5729; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 15, v1
5730; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 12, v1
5731; GFX6-NEXT:    v_lshrrev_b32_e32 v8, 13, v1
5732; GFX6-NEXT:    v_lshrrev_b32_e32 v11, 10, v1
5733; GFX6-NEXT:    v_lshrrev_b32_e32 v12, 11, v1
5734; GFX6-NEXT:    v_lshrrev_b32_e32 v14, 8, v1
5735; GFX6-NEXT:    v_lshrrev_b32_e32 v16, 9, v1
5736; GFX6-NEXT:    v_lshrrev_b32_e32 v15, 6, v1
5737; GFX6-NEXT:    v_lshrrev_b32_e32 v9, 4, v1
5738; GFX6-NEXT:    v_lshrrev_b32_e32 v10, 5, v1
5739; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 2, v1
5740; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 3, v1
5741; GFX6-NEXT:    v_lshrrev_b32_e32 v13, 1, v1
5742; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 1
5743; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 1
5744; GFX6-NEXT:    v_bfe_i32 v5, v4, 0, 1
5745; GFX6-NEXT:    v_bfe_i32 v3, v3, 0, 1
5746; GFX6-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
5747; GFX6-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
5748; GFX6-NEXT:    buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:112
5749; GFX6-NEXT:    s_waitcnt expcnt(0)
5750; GFX6-NEXT:    v_bfe_i32 v6, v10, 0, 1
5751; GFX6-NEXT:    v_bfe_i32 v4, v9, 0, 1
5752; GFX6-NEXT:    v_bfe_i32 v9, v8, 0, 1
5753; GFX6-NEXT:    v_bfe_i32 v7, v7, 0, 1
5754; GFX6-NEXT:    v_ashrrev_i32_e32 v10, 31, v9
5755; GFX6-NEXT:    v_ashrrev_i32_e32 v8, 31, v7
5756; GFX6-NEXT:    buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:96
5757; GFX6-NEXT:    s_waitcnt expcnt(0)
5758; GFX6-NEXT:    v_bfe_i32 v9, v12, 0, 1
5759; GFX6-NEXT:    v_bfe_i32 v7, v11, 0, 1
5760; GFX6-NEXT:    v_bfe_i32 v13, v13, 0, 1
5761; GFX6-NEXT:    v_bfe_i32 v11, v1, 0, 1
5762; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 7, v1
5763; GFX6-NEXT:    v_ashrrev_i32_e32 v10, 31, v9
5764; GFX6-NEXT:    v_ashrrev_i32_e32 v8, 31, v7
5765; GFX6-NEXT:    buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:80
5766; GFX6-NEXT:    v_bfe_i32 v17, v1, 0, 1
5767; GFX6-NEXT:    v_bfe_i32 v15, v15, 0, 1
5768; GFX6-NEXT:    v_bfe_i32 v21, v16, 0, 1
5769; GFX6-NEXT:    v_bfe_i32 v19, v14, 0, 1
5770; GFX6-NEXT:    v_ashrrev_i32_e32 v12, 31, v11
5771; GFX6-NEXT:    v_ashrrev_i32_e32 v14, 31, v13
5772; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5773; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5774; GFX6-NEXT:    s_waitcnt expcnt(0)
5775; GFX6-NEXT:    v_ashrrev_i32_e32 v7, 31, v6
5776; GFX6-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
5777; GFX6-NEXT:    v_ashrrev_i32_e32 v18, 31, v17
5778; GFX6-NEXT:    v_ashrrev_i32_e32 v16, 31, v15
5779; GFX6-NEXT:    v_ashrrev_i32_e32 v22, 31, v21
5780; GFX6-NEXT:    v_ashrrev_i32_e32 v20, 31, v19
5781; GFX6-NEXT:    buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:64
5782; GFX6-NEXT:    buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:48
5783; GFX6-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32
5784; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5785; GFX6-NEXT:    buffer_store_dwordx4 v[11:14], off, s[0:3], 0
5786; GFX6-NEXT:    s_endpgm
5787;
5788; GFX8-LABEL: constant_sextload_v16i1_to_v16i64:
5789; GFX8:       ; %bb.0:
5790; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
5791; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
5792; GFX8-NEXT:    v_mov_b32_e32 v0, s2
5793; GFX8-NEXT:    v_mov_b32_e32 v1, s3
5794; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
5795; GFX8-NEXT:    v_mov_b32_e32 v19, s1
5796; GFX8-NEXT:    v_mov_b32_e32 v18, s0
5797; GFX8-NEXT:    s_waitcnt vmcnt(0)
5798; GFX8-NEXT:    v_readfirstlane_b32 s3, v0
5799; GFX8-NEXT:    s_lshr_b32 s2, s3, 14
5800; GFX8-NEXT:    s_lshr_b32 s4, s3, 15
5801; GFX8-NEXT:    s_lshr_b32 s6, s3, 12
5802; GFX8-NEXT:    s_lshr_b32 s8, s3, 13
5803; GFX8-NEXT:    s_lshr_b32 s10, s3, 10
5804; GFX8-NEXT:    s_lshr_b32 s12, s3, 11
5805; GFX8-NEXT:    s_lshr_b32 s14, s3, 8
5806; GFX8-NEXT:    s_lshr_b32 s16, s3, 9
5807; GFX8-NEXT:    s_lshr_b32 s18, s3, 6
5808; GFX8-NEXT:    s_lshr_b32 s20, s3, 7
5809; GFX8-NEXT:    s_lshr_b32 s22, s3, 4
5810; GFX8-NEXT:    s_lshr_b32 s24, s3, 5
5811; GFX8-NEXT:    s_lshr_b32 s26, s3, 2
5812; GFX8-NEXT:    s_lshr_b32 s28, s3, 3
5813; GFX8-NEXT:    s_lshr_b32 s30, s3, 1
5814; GFX8-NEXT:    v_mov_b32_e32 v0, s3
5815; GFX8-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x10000
5816; GFX8-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x10000
5817; GFX8-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x10000
5818; GFX8-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x10000
5819; GFX8-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x10000
5820; GFX8-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x10000
5821; GFX8-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x10000
5822; GFX8-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x10000
5823; GFX8-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x10000
5824; GFX8-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x10000
5825; GFX8-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x10000
5826; GFX8-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x10000
5827; GFX8-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x10000
5828; GFX8-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x10000
5829; GFX8-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x10000
5830; GFX8-NEXT:    v_mov_b32_e32 v2, s2
5831; GFX8-NEXT:    s_add_u32 s2, s0, 0x70
5832; GFX8-NEXT:    v_mov_b32_e32 v3, s3
5833; GFX8-NEXT:    s_addc_u32 s3, s1, 0
5834; GFX8-NEXT:    v_mov_b32_e32 v15, s3
5835; GFX8-NEXT:    v_mov_b32_e32 v14, s2
5836; GFX8-NEXT:    s_add_u32 s2, s0, 0x60
5837; GFX8-NEXT:    v_mov_b32_e32 v4, s4
5838; GFX8-NEXT:    v_mov_b32_e32 v5, s5
5839; GFX8-NEXT:    s_addc_u32 s3, s1, 0
5840; GFX8-NEXT:    flat_store_dwordx4 v[14:15], v[2:5]
5841; GFX8-NEXT:    v_mov_b32_e32 v15, s3
5842; GFX8-NEXT:    v_mov_b32_e32 v14, s2
5843; GFX8-NEXT:    s_add_u32 s2, s0, 0x50
5844; GFX8-NEXT:    v_mov_b32_e32 v6, s6
5845; GFX8-NEXT:    v_mov_b32_e32 v7, s7
5846; GFX8-NEXT:    v_mov_b32_e32 v8, s8
5847; GFX8-NEXT:    v_mov_b32_e32 v9, s9
5848; GFX8-NEXT:    s_addc_u32 s3, s1, 0
5849; GFX8-NEXT:    flat_store_dwordx4 v[14:15], v[6:9]
5850; GFX8-NEXT:    v_mov_b32_e32 v15, s3
5851; GFX8-NEXT:    v_mov_b32_e32 v14, s2
5852; GFX8-NEXT:    s_add_u32 s2, s0, 64
5853; GFX8-NEXT:    v_mov_b32_e32 v10, s10
5854; GFX8-NEXT:    v_mov_b32_e32 v11, s11
5855; GFX8-NEXT:    v_mov_b32_e32 v12, s12
5856; GFX8-NEXT:    v_mov_b32_e32 v13, s13
5857; GFX8-NEXT:    s_addc_u32 s3, s1, 0
5858; GFX8-NEXT:    flat_store_dwordx4 v[14:15], v[10:13]
5859; GFX8-NEXT:    v_mov_b32_e32 v15, s3
5860; GFX8-NEXT:    v_mov_b32_e32 v14, s2
5861; GFX8-NEXT:    s_add_u32 s2, s0, 48
5862; GFX8-NEXT:    v_mov_b32_e32 v2, s14
5863; GFX8-NEXT:    v_mov_b32_e32 v3, s15
5864; GFX8-NEXT:    v_mov_b32_e32 v4, s16
5865; GFX8-NEXT:    v_mov_b32_e32 v5, s17
5866; GFX8-NEXT:    s_addc_u32 s3, s1, 0
5867; GFX8-NEXT:    flat_store_dwordx4 v[14:15], v[2:5]
5868; GFX8-NEXT:    v_mov_b32_e32 v6, s18
5869; GFX8-NEXT:    v_mov_b32_e32 v5, s3
5870; GFX8-NEXT:    v_mov_b32_e32 v4, s2
5871; GFX8-NEXT:    s_add_u32 s2, s0, 32
5872; GFX8-NEXT:    v_mov_b32_e32 v7, s19
5873; GFX8-NEXT:    v_mov_b32_e32 v8, s20
5874; GFX8-NEXT:    v_mov_b32_e32 v9, s21
5875; GFX8-NEXT:    s_addc_u32 s3, s1, 0
5876; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[6:9]
5877; GFX8-NEXT:    v_mov_b32_e32 v5, s3
5878; GFX8-NEXT:    s_add_u32 s0, s0, 16
5879; GFX8-NEXT:    v_mov_b32_e32 v10, s22
5880; GFX8-NEXT:    v_mov_b32_e32 v11, s23
5881; GFX8-NEXT:    v_mov_b32_e32 v12, s24
5882; GFX8-NEXT:    v_mov_b32_e32 v13, s25
5883; GFX8-NEXT:    v_mov_b32_e32 v4, s2
5884; GFX8-NEXT:    s_addc_u32 s1, s1, 0
5885; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[10:13]
5886; GFX8-NEXT:    v_mov_b32_e32 v5, s1
5887; GFX8-NEXT:    v_bfe_i32 v0, v0, 0, 1
5888; GFX8-NEXT:    v_mov_b32_e32 v14, s26
5889; GFX8-NEXT:    v_mov_b32_e32 v15, s27
5890; GFX8-NEXT:    v_mov_b32_e32 v16, s28
5891; GFX8-NEXT:    v_mov_b32_e32 v17, s29
5892; GFX8-NEXT:    v_mov_b32_e32 v4, s0
5893; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5894; GFX8-NEXT:    v_mov_b32_e32 v2, s30
5895; GFX8-NEXT:    v_mov_b32_e32 v3, s31
5896; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[14:17]
5897; GFX8-NEXT:    flat_store_dwordx4 v[18:19], v[0:3]
5898; GFX8-NEXT:    s_endpgm
5899;
5900; EG-LABEL: constant_sextload_v16i1_to_v16i64:
5901; EG:       ; %bb.0:
5902; EG-NEXT:    ALU 0, @14, KC0[CB0:0-32], KC1[]
5903; EG-NEXT:    TEX 0 @12
5904; EG-NEXT:    ALU 78, @15, KC0[CB0:0-32], KC1[]
5905; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T22.X, 0
5906; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T21.X, 0
5907; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T18.X, 0
5908; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T12.X, 0
5909; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T11.X, 0
5910; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T10.X, 0
5911; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T9.X, 0
5912; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T8.X, 1
5913; EG-NEXT:    CF_END
5914; EG-NEXT:    Fetch clause starting at 12:
5915; EG-NEXT:     VTX_READ_16 T7.X, T7.X, 0, #1
5916; EG-NEXT:    ALU clause starting at 14:
5917; EG-NEXT:     MOV * T7.X, KC0[2].Z,
5918; EG-NEXT:    ALU clause starting at 15:
5919; EG-NEXT:     LSHR T8.X, KC0[2].Y, literal.x,
5920; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5921; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5922; EG-NEXT:     LSHR T9.X, PV.W, literal.x,
5923; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5924; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
5925; EG-NEXT:     LSHR T10.X, PV.W, literal.x,
5926; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5927; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
5928; EG-NEXT:     LSHR T11.X, PV.W, literal.x,
5929; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5930; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
5931; EG-NEXT:     LSHR T12.X, PV.W, literal.x,
5932; EG-NEXT:     LSHR * T0.W, T7.X, literal.y,
5933; EG-NEXT:    2(2.802597e-45), 15(2.101948e-44)
5934; EG-NEXT:     BFE_INT T13.X, T7.X, 0.0, 1,
5935; EG-NEXT:     BFE_INT T14.Z, PV.W, 0.0, 1,
5936; EG-NEXT:     LSHR T0.W, T7.X, literal.x,
5937; EG-NEXT:     LSHR * T1.W, T7.X, literal.y,
5938; EG-NEXT:    11(1.541428e-44), 14(1.961818e-44)
5939; EG-NEXT:     BFE_INT T14.X, PS, 0.0, 1,
5940; EG-NEXT:     LSHR T0.Y, T7.X, literal.x,
5941; EG-NEXT:     BFE_INT T15.Z, PV.W, 0.0, 1,
5942; EG-NEXT:     LSHR T0.W, T7.X, literal.y,
5943; EG-NEXT:     LSHR * T1.W, T7.X, literal.z,
5944; EG-NEXT:    12(1.681558e-44), 7(9.809089e-45)
5945; EG-NEXT:    10(1.401298e-44), 0(0.000000e+00)
5946; EG-NEXT:     BFE_INT T15.X, PS, 0.0, 1,
5947; EG-NEXT:     MOV T14.Y, PV.X,
5948; EG-NEXT:     BFE_INT T16.Z, PV.W, 0.0, 1,
5949; EG-NEXT:     LSHR T0.W, T7.X, literal.x,
5950; EG-NEXT:     LSHR * T1.W, T7.X, literal.y,
5951; EG-NEXT:    3(4.203895e-45), 6(8.407791e-45)
5952; EG-NEXT:     BFE_INT T16.X, PS, 0.0, 1,
5953; EG-NEXT:     MOV T15.Y, PV.X,
5954; EG-NEXT:     BFE_INT T17.Z, PV.W, 0.0, 1,
5955; EG-NEXT:     LSHR T0.W, T7.X, 1,
5956; EG-NEXT:     LSHR * T1.W, T7.X, literal.x,
5957; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5958; EG-NEXT:     BFE_INT T17.X, PS, 0.0, 1,
5959; EG-NEXT:     MOV T16.Y, PV.X,
5960; EG-NEXT:     BFE_INT T13.Z, PV.W, 0.0, 1,
5961; EG-NEXT:     LSHR T0.W, T7.X, literal.x,
5962; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
5963; EG-NEXT:    5(7.006492e-45), 80(1.121039e-43)
5964; EG-NEXT:     LSHR T18.X, PS, literal.x,
5965; EG-NEXT:     MOV T17.Y, PV.X,
5966; EG-NEXT:     BFE_INT T19.Z, PV.W, 0.0, 1,
5967; EG-NEXT:     LSHR T0.W, T7.X, literal.y,
5968; EG-NEXT:     LSHR * T1.W, T7.X, literal.z,
5969; EG-NEXT:    2(2.802597e-45), 9(1.261169e-44)
5970; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
5971; EG-NEXT:     BFE_INT T19.X, PS, 0.0, 1,
5972; EG-NEXT:     MOV T13.Y, T13.X,
5973; EG-NEXT:     BFE_INT T7.Z, PV.W, 0.0, 1,
5974; EG-NEXT:     LSHR T0.W, T7.X, literal.x, BS:VEC_120/SCL_212
5975; EG-NEXT:     LSHR * T1.W, T7.X, literal.y,
5976; EG-NEXT:    13(1.821688e-44), 8(1.121039e-44)
5977; EG-NEXT:     BFE_INT T7.X, PS, 0.0, 1,
5978; EG-NEXT:     MOV T19.Y, PV.X,
5979; EG-NEXT:     BFE_INT T20.Z, PV.W, 0.0, 1,
5980; EG-NEXT:     MOV T13.W, T13.Z,
5981; EG-NEXT:     MOV * T17.W, T17.Z,
5982; EG-NEXT:     BFE_INT T20.X, T0.Y, 0.0, 1,
5983; EG-NEXT:     MOV T7.Y, PV.X,
5984; EG-NEXT:     ADD_INT T0.Z, KC0[2].Y, literal.x,
5985; EG-NEXT:     MOV T19.W, T19.Z,
5986; EG-NEXT:     MOV * T16.W, T16.Z,
5987; EG-NEXT:    96(1.345247e-43), 0(0.000000e+00)
5988; EG-NEXT:     LSHR T21.X, PV.Z, literal.x,
5989; EG-NEXT:     MOV T20.Y, PV.X,
5990; EG-NEXT:     ADD_INT T0.Z, KC0[2].Y, literal.y,
5991; EG-NEXT:     MOV T7.W, T7.Z,
5992; EG-NEXT:     MOV * T15.W, T15.Z,
5993; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
5994; EG-NEXT:     LSHR T22.X, PV.Z, literal.x,
5995; EG-NEXT:     MOV T20.W, T20.Z,
5996; EG-NEXT:     MOV * T14.W, T14.Z,
5997; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5998;
5999; GFX12-LABEL: constant_sextload_v16i1_to_v16i64:
6000; GFX12:       ; %bb.0:
6001; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
6002; GFX12-NEXT:    v_mov_b32_e32 v32, 0
6003; GFX12-NEXT:    s_wait_kmcnt 0x0
6004; GFX12-NEXT:    global_load_u16 v0, v32, s[2:3]
6005; GFX12-NEXT:    s_wait_loadcnt 0x0
6006; GFX12-NEXT:    v_readfirstlane_b32 s3, v0
6007; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
6008; GFX12-NEXT:    s_lshr_b32 s4, s3, 15
6009; GFX12-NEXT:    s_lshr_b32 s2, s3, 14
6010; GFX12-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x10000
6011; GFX12-NEXT:    v_dual_mov_b32 v28, s3 :: v_dual_mov_b32 v3, s5
6012; GFX12-NEXT:    s_lshr_b32 s6, s3, 12
6013; GFX12-NEXT:    s_lshr_b32 s8, s3, 13
6014; GFX12-NEXT:    s_lshr_b32 s10, s3, 10
6015; GFX12-NEXT:    s_lshr_b32 s12, s3, 11
6016; GFX12-NEXT:    s_lshr_b32 s14, s3, 8
6017; GFX12-NEXT:    s_lshr_b32 s16, s3, 9
6018; GFX12-NEXT:    s_lshr_b32 s18, s3, 6
6019; GFX12-NEXT:    s_lshr_b32 s20, s3, 7
6020; GFX12-NEXT:    s_lshr_b32 s22, s3, 4
6021; GFX12-NEXT:    s_lshr_b32 s24, s3, 5
6022; GFX12-NEXT:    s_lshr_b32 s26, s3, 2
6023; GFX12-NEXT:    s_lshr_b32 s28, s3, 3
6024; GFX12-NEXT:    s_lshr_b32 s30, s3, 1
6025; GFX12-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x10000
6026; GFX12-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x10000
6027; GFX12-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x10000
6028; GFX12-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x10000
6029; GFX12-NEXT:    s_wait_alu 0xfffe
6030; GFX12-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x10000
6031; GFX12-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x10000
6032; GFX12-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x10000
6033; GFX12-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x10000
6034; GFX12-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x10000
6035; GFX12-NEXT:    s_wait_alu 0xfffe
6036; GFX12-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v5, s7
6037; GFX12-NEXT:    v_dual_mov_b32 v1, s3 :: v_dual_mov_b32 v2, s4
6038; GFX12-NEXT:    v_dual_mov_b32 v7, s9 :: v_dual_mov_b32 v4, s6
6039; GFX12-NEXT:    v_dual_mov_b32 v9, s11 :: v_dual_mov_b32 v6, s8
6040; GFX12-NEXT:    v_dual_mov_b32 v11, s13 :: v_dual_mov_b32 v8, s10
6041; GFX12-NEXT:    v_dual_mov_b32 v13, s15 :: v_dual_mov_b32 v10, s12
6042; GFX12-NEXT:    v_mov_b32_e32 v15, s17
6043; GFX12-NEXT:    v_bfe_i32 v28, v28, 0, 1
6044; GFX12-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x10000
6045; GFX12-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x10000
6046; GFX12-NEXT:    v_dual_mov_b32 v12, s14 :: v_dual_mov_b32 v17, s19
6047; GFX12-NEXT:    v_dual_mov_b32 v14, s16 :: v_dual_mov_b32 v19, s21
6048; GFX12-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x10000
6049; GFX12-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x10000
6050; GFX12-NEXT:    v_dual_mov_b32 v16, s18 :: v_dual_mov_b32 v21, s23
6051; GFX12-NEXT:    v_dual_mov_b32 v18, s20 :: v_dual_mov_b32 v23, s25
6052; GFX12-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x10000
6053; GFX12-NEXT:    v_dual_mov_b32 v20, s22 :: v_dual_mov_b32 v25, s27
6054; GFX12-NEXT:    v_dual_mov_b32 v22, s24 :: v_dual_mov_b32 v27, s29
6055; GFX12-NEXT:    v_dual_mov_b32 v24, s26 :: v_dual_mov_b32 v31, s31
6056; GFX12-NEXT:    v_mov_b32_e32 v26, s28
6057; GFX12-NEXT:    v_mov_b32_e32 v30, s30
6058; GFX12-NEXT:    s_clause 0x1
6059; GFX12-NEXT:    global_store_b128 v32, v[0:3], s[0:1] offset:112
6060; GFX12-NEXT:    global_store_b128 v32, v[4:7], s[0:1] offset:96
6061; GFX12-NEXT:    v_ashrrev_i32_e32 v29, 31, v28
6062; GFX12-NEXT:    s_clause 0x5
6063; GFX12-NEXT:    global_store_b128 v32, v[8:11], s[0:1] offset:80
6064; GFX12-NEXT:    global_store_b128 v32, v[12:15], s[0:1] offset:64
6065; GFX12-NEXT:    global_store_b128 v32, v[16:19], s[0:1] offset:48
6066; GFX12-NEXT:    global_store_b128 v32, v[20:23], s[0:1] offset:32
6067; GFX12-NEXT:    global_store_b128 v32, v[24:27], s[0:1] offset:16
6068; GFX12-NEXT:    global_store_b128 v32, v[28:31], s[0:1]
6069; GFX12-NEXT:    s_endpgm
6070  %load = load <16 x i1>, ptr addrspace(4) %in
6071  %ext = sext <16 x i1> %load to <16 x i64>
6072  store <16 x i64> %ext, ptr addrspace(1) %out
6073  ret void
6074}
6075
6076define amdgpu_kernel void @constant_zextload_v32i1_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
6077; GFX6-LABEL: constant_zextload_v32i1_to_v32i64:
6078; GFX6:       ; %bb.0:
6079; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
6080; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
6081; GFX6-NEXT:    s_load_dword s4, s[2:3], 0x0
6082; GFX6-NEXT:    s_mov_b32 s3, 0xf000
6083; GFX6-NEXT:    v_mov_b32_e32 v1, 0
6084; GFX6-NEXT:    s_mov_b32 s2, -1
6085; GFX6-NEXT:    v_mov_b32_e32 v3, v1
6086; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
6087; GFX6-NEXT:    s_bfe_u32 s5, s4, 0x10001
6088; GFX6-NEXT:    s_bfe_u32 s6, s4, 0x10003
6089; GFX6-NEXT:    s_bfe_u32 s7, s4, 0x10005
6090; GFX6-NEXT:    s_bfe_u32 s8, s4, 0x10007
6091; GFX6-NEXT:    s_bfe_u32 s9, s4, 0x10009
6092; GFX6-NEXT:    s_bfe_u32 s10, s4, 0x1000b
6093; GFX6-NEXT:    s_bfe_u32 s11, s4, 0x1000d
6094; GFX6-NEXT:    s_bfe_u32 s12, s4, 0x1000f
6095; GFX6-NEXT:    s_bfe_u32 s13, s4, 0x10011
6096; GFX6-NEXT:    s_bfe_u32 s14, s4, 0x10013
6097; GFX6-NEXT:    s_bfe_u32 s15, s4, 0x10015
6098; GFX6-NEXT:    s_bfe_u32 s16, s4, 0x10017
6099; GFX6-NEXT:    s_bfe_u32 s17, s4, 0x10019
6100; GFX6-NEXT:    s_bfe_u32 s18, s4, 0x1001b
6101; GFX6-NEXT:    s_bfe_u32 s19, s4, 0x1001d
6102; GFX6-NEXT:    s_lshr_b32 s20, s4, 31
6103; GFX6-NEXT:    s_and_b32 s21, s4, 1
6104; GFX6-NEXT:    s_bfe_u32 s22, s4, 0x10002
6105; GFX6-NEXT:    s_bfe_u32 s23, s4, 0x10004
6106; GFX6-NEXT:    s_bfe_u32 s24, s4, 0x10006
6107; GFX6-NEXT:    s_bfe_u32 s25, s4, 0x10008
6108; GFX6-NEXT:    s_bfe_u32 s26, s4, 0x1000a
6109; GFX6-NEXT:    s_bfe_u32 s27, s4, 0x1000c
6110; GFX6-NEXT:    s_bfe_u32 s28, s4, 0x1000e
6111; GFX6-NEXT:    s_bfe_u32 s29, s4, 0x10010
6112; GFX6-NEXT:    s_bfe_u32 s30, s4, 0x10012
6113; GFX6-NEXT:    s_bfe_u32 s31, s4, 0x10014
6114; GFX6-NEXT:    s_bfe_u32 s33, s4, 0x10016
6115; GFX6-NEXT:    s_bfe_u32 s34, s4, 0x10018
6116; GFX6-NEXT:    s_bfe_u32 s35, s4, 0x1001a
6117; GFX6-NEXT:    s_bfe_u32 s36, s4, 0x1001e
6118; GFX6-NEXT:    s_bfe_u32 s4, s4, 0x1001c
6119; GFX6-NEXT:    v_mov_b32_e32 v0, s36
6120; GFX6-NEXT:    v_mov_b32_e32 v2, s20
6121; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
6122; GFX6-NEXT:    s_waitcnt expcnt(0)
6123; GFX6-NEXT:    v_mov_b32_e32 v0, s4
6124; GFX6-NEXT:    v_mov_b32_e32 v2, s19
6125; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
6126; GFX6-NEXT:    s_waitcnt expcnt(0)
6127; GFX6-NEXT:    v_mov_b32_e32 v0, s35
6128; GFX6-NEXT:    v_mov_b32_e32 v2, s18
6129; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
6130; GFX6-NEXT:    s_waitcnt expcnt(0)
6131; GFX6-NEXT:    v_mov_b32_e32 v0, s34
6132; GFX6-NEXT:    v_mov_b32_e32 v2, s17
6133; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
6134; GFX6-NEXT:    s_waitcnt expcnt(0)
6135; GFX6-NEXT:    v_mov_b32_e32 v0, s33
6136; GFX6-NEXT:    v_mov_b32_e32 v2, s16
6137; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
6138; GFX6-NEXT:    s_waitcnt expcnt(0)
6139; GFX6-NEXT:    v_mov_b32_e32 v0, s31
6140; GFX6-NEXT:    v_mov_b32_e32 v2, s15
6141; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
6142; GFX6-NEXT:    s_waitcnt expcnt(0)
6143; GFX6-NEXT:    v_mov_b32_e32 v0, s30
6144; GFX6-NEXT:    v_mov_b32_e32 v2, s14
6145; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
6146; GFX6-NEXT:    s_waitcnt expcnt(0)
6147; GFX6-NEXT:    v_mov_b32_e32 v0, s29
6148; GFX6-NEXT:    v_mov_b32_e32 v2, s13
6149; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
6150; GFX6-NEXT:    s_waitcnt expcnt(0)
6151; GFX6-NEXT:    v_mov_b32_e32 v0, s28
6152; GFX6-NEXT:    v_mov_b32_e32 v2, s12
6153; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
6154; GFX6-NEXT:    s_waitcnt expcnt(0)
6155; GFX6-NEXT:    v_mov_b32_e32 v0, s27
6156; GFX6-NEXT:    v_mov_b32_e32 v2, s11
6157; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
6158; GFX6-NEXT:    s_waitcnt expcnt(0)
6159; GFX6-NEXT:    v_mov_b32_e32 v0, s26
6160; GFX6-NEXT:    v_mov_b32_e32 v2, s10
6161; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
6162; GFX6-NEXT:    s_waitcnt expcnt(0)
6163; GFX6-NEXT:    v_mov_b32_e32 v0, s25
6164; GFX6-NEXT:    v_mov_b32_e32 v2, s9
6165; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
6166; GFX6-NEXT:    s_waitcnt expcnt(0)
6167; GFX6-NEXT:    v_mov_b32_e32 v0, s24
6168; GFX6-NEXT:    v_mov_b32_e32 v2, s8
6169; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
6170; GFX6-NEXT:    s_waitcnt expcnt(0)
6171; GFX6-NEXT:    v_mov_b32_e32 v0, s23
6172; GFX6-NEXT:    v_mov_b32_e32 v2, s7
6173; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
6174; GFX6-NEXT:    s_waitcnt expcnt(0)
6175; GFX6-NEXT:    v_mov_b32_e32 v0, s22
6176; GFX6-NEXT:    v_mov_b32_e32 v2, s6
6177; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
6178; GFX6-NEXT:    s_waitcnt expcnt(0)
6179; GFX6-NEXT:    v_mov_b32_e32 v0, s21
6180; GFX6-NEXT:    v_mov_b32_e32 v2, s5
6181; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
6182; GFX6-NEXT:    s_endpgm
6183;
6184; GFX8-LABEL: constant_zextload_v32i1_to_v32i64:
6185; GFX8:       ; %bb.0:
6186; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
6187; GFX8-NEXT:    v_mov_b32_e32 v1, 0
6188; GFX8-NEXT:    v_mov_b32_e32 v3, v1
6189; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
6190; GFX8-NEXT:    s_load_dword s6, s[2:3], 0x0
6191; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
6192; GFX8-NEXT:    s_lshr_b32 s7, s6, 31
6193; GFX8-NEXT:    s_bfe_u32 s8, s6, 0x1001d
6194; GFX8-NEXT:    s_bfe_u32 s9, s6, 0x1001b
6195; GFX8-NEXT:    s_bfe_u32 s10, s6, 0x10019
6196; GFX8-NEXT:    s_bfe_u32 s11, s6, 0x10017
6197; GFX8-NEXT:    s_bfe_u32 s12, s6, 0x10013
6198; GFX8-NEXT:    s_bfe_u32 s13, s6, 0x10011
6199; GFX8-NEXT:    s_bfe_u32 s14, s6, 0x1000f
6200; GFX8-NEXT:    s_bfe_u32 s15, s6, 0x1000d
6201; GFX8-NEXT:    s_bfe_u32 s16, s6, 0x1000b
6202; GFX8-NEXT:    s_bfe_u32 s17, s6, 0x10009
6203; GFX8-NEXT:    s_bfe_u32 s18, s6, 0x10007
6204; GFX8-NEXT:    s_bfe_u32 s19, s6, 0x10005
6205; GFX8-NEXT:    s_bfe_u32 s4, s6, 0x10003
6206; GFX8-NEXT:    s_bfe_u32 s2, s6, 0x10001
6207; GFX8-NEXT:    s_and_b32 s3, s6, 1
6208; GFX8-NEXT:    s_bfe_u32 s5, s6, 0x10002
6209; GFX8-NEXT:    s_bfe_u32 s20, s6, 0x10004
6210; GFX8-NEXT:    s_bfe_u32 s21, s6, 0x10006
6211; GFX8-NEXT:    s_bfe_u32 s22, s6, 0x10008
6212; GFX8-NEXT:    s_bfe_u32 s23, s6, 0x1000a
6213; GFX8-NEXT:    s_bfe_u32 s24, s6, 0x1000c
6214; GFX8-NEXT:    s_bfe_u32 s25, s6, 0x1000e
6215; GFX8-NEXT:    s_bfe_u32 s26, s6, 0x10010
6216; GFX8-NEXT:    s_bfe_u32 s27, s6, 0x10012
6217; GFX8-NEXT:    s_bfe_u32 s28, s6, 0x10014
6218; GFX8-NEXT:    s_bfe_u32 s29, s6, 0x10015
6219; GFX8-NEXT:    s_bfe_u32 s30, s6, 0x10016
6220; GFX8-NEXT:    s_bfe_u32 s31, s6, 0x10018
6221; GFX8-NEXT:    s_bfe_u32 s33, s6, 0x1001a
6222; GFX8-NEXT:    s_bfe_u32 s34, s6, 0x1001c
6223; GFX8-NEXT:    s_bfe_u32 s6, s6, 0x1001e
6224; GFX8-NEXT:    v_mov_b32_e32 v0, s6
6225; GFX8-NEXT:    s_add_u32 s6, s0, 0xf0
6226; GFX8-NEXT:    v_mov_b32_e32 v2, s7
6227; GFX8-NEXT:    s_addc_u32 s7, s1, 0
6228; GFX8-NEXT:    v_mov_b32_e32 v4, s6
6229; GFX8-NEXT:    v_mov_b32_e32 v5, s7
6230; GFX8-NEXT:    s_add_u32 s6, s0, 0xe0
6231; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6232; GFX8-NEXT:    s_addc_u32 s7, s1, 0
6233; GFX8-NEXT:    v_mov_b32_e32 v4, s6
6234; GFX8-NEXT:    v_mov_b32_e32 v0, s34
6235; GFX8-NEXT:    v_mov_b32_e32 v2, s8
6236; GFX8-NEXT:    v_mov_b32_e32 v5, s7
6237; GFX8-NEXT:    s_add_u32 s6, s0, 0xd0
6238; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6239; GFX8-NEXT:    s_addc_u32 s7, s1, 0
6240; GFX8-NEXT:    v_mov_b32_e32 v4, s6
6241; GFX8-NEXT:    v_mov_b32_e32 v0, s33
6242; GFX8-NEXT:    v_mov_b32_e32 v2, s9
6243; GFX8-NEXT:    v_mov_b32_e32 v5, s7
6244; GFX8-NEXT:    s_add_u32 s6, s0, 0xc0
6245; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6246; GFX8-NEXT:    s_addc_u32 s7, s1, 0
6247; GFX8-NEXT:    v_mov_b32_e32 v4, s6
6248; GFX8-NEXT:    v_mov_b32_e32 v0, s31
6249; GFX8-NEXT:    v_mov_b32_e32 v2, s10
6250; GFX8-NEXT:    v_mov_b32_e32 v5, s7
6251; GFX8-NEXT:    s_add_u32 s6, s0, 0xb0
6252; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6253; GFX8-NEXT:    s_addc_u32 s7, s1, 0
6254; GFX8-NEXT:    v_mov_b32_e32 v4, s6
6255; GFX8-NEXT:    v_mov_b32_e32 v0, s30
6256; GFX8-NEXT:    v_mov_b32_e32 v2, s11
6257; GFX8-NEXT:    v_mov_b32_e32 v5, s7
6258; GFX8-NEXT:    s_add_u32 s6, s0, 0xa0
6259; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6260; GFX8-NEXT:    s_addc_u32 s7, s1, 0
6261; GFX8-NEXT:    v_mov_b32_e32 v4, s6
6262; GFX8-NEXT:    v_mov_b32_e32 v0, s28
6263; GFX8-NEXT:    v_mov_b32_e32 v2, s29
6264; GFX8-NEXT:    v_mov_b32_e32 v5, s7
6265; GFX8-NEXT:    s_add_u32 s6, s0, 0x90
6266; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6267; GFX8-NEXT:    s_addc_u32 s7, s1, 0
6268; GFX8-NEXT:    v_mov_b32_e32 v4, s6
6269; GFX8-NEXT:    v_mov_b32_e32 v0, s27
6270; GFX8-NEXT:    v_mov_b32_e32 v2, s12
6271; GFX8-NEXT:    v_mov_b32_e32 v5, s7
6272; GFX8-NEXT:    s_add_u32 s6, s0, 0x80
6273; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6274; GFX8-NEXT:    s_addc_u32 s7, s1, 0
6275; GFX8-NEXT:    v_mov_b32_e32 v4, s6
6276; GFX8-NEXT:    v_mov_b32_e32 v0, s26
6277; GFX8-NEXT:    v_mov_b32_e32 v2, s13
6278; GFX8-NEXT:    v_mov_b32_e32 v5, s7
6279; GFX8-NEXT:    s_add_u32 s6, s0, 0x70
6280; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6281; GFX8-NEXT:    s_addc_u32 s7, s1, 0
6282; GFX8-NEXT:    v_mov_b32_e32 v4, s6
6283; GFX8-NEXT:    v_mov_b32_e32 v0, s25
6284; GFX8-NEXT:    v_mov_b32_e32 v2, s14
6285; GFX8-NEXT:    v_mov_b32_e32 v5, s7
6286; GFX8-NEXT:    s_add_u32 s6, s0, 0x60
6287; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6288; GFX8-NEXT:    s_addc_u32 s7, s1, 0
6289; GFX8-NEXT:    v_mov_b32_e32 v4, s6
6290; GFX8-NEXT:    v_mov_b32_e32 v0, s24
6291; GFX8-NEXT:    v_mov_b32_e32 v2, s15
6292; GFX8-NEXT:    v_mov_b32_e32 v5, s7
6293; GFX8-NEXT:    s_add_u32 s6, s0, 0x50
6294; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6295; GFX8-NEXT:    s_addc_u32 s7, s1, 0
6296; GFX8-NEXT:    v_mov_b32_e32 v4, s6
6297; GFX8-NEXT:    v_mov_b32_e32 v0, s23
6298; GFX8-NEXT:    v_mov_b32_e32 v2, s16
6299; GFX8-NEXT:    v_mov_b32_e32 v5, s7
6300; GFX8-NEXT:    s_add_u32 s6, s0, 64
6301; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6302; GFX8-NEXT:    s_addc_u32 s7, s1, 0
6303; GFX8-NEXT:    v_mov_b32_e32 v4, s6
6304; GFX8-NEXT:    v_mov_b32_e32 v0, s22
6305; GFX8-NEXT:    v_mov_b32_e32 v2, s17
6306; GFX8-NEXT:    v_mov_b32_e32 v5, s7
6307; GFX8-NEXT:    s_add_u32 s6, s0, 48
6308; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6309; GFX8-NEXT:    s_addc_u32 s7, s1, 0
6310; GFX8-NEXT:    v_mov_b32_e32 v4, s6
6311; GFX8-NEXT:    v_mov_b32_e32 v0, s21
6312; GFX8-NEXT:    v_mov_b32_e32 v2, s18
6313; GFX8-NEXT:    v_mov_b32_e32 v5, s7
6314; GFX8-NEXT:    s_add_u32 s6, s0, 32
6315; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6316; GFX8-NEXT:    s_addc_u32 s7, s1, 0
6317; GFX8-NEXT:    v_mov_b32_e32 v4, s6
6318; GFX8-NEXT:    v_mov_b32_e32 v0, s20
6319; GFX8-NEXT:    v_mov_b32_e32 v2, s19
6320; GFX8-NEXT:    v_mov_b32_e32 v5, s7
6321; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6322; GFX8-NEXT:    s_nop 0
6323; GFX8-NEXT:    v_mov_b32_e32 v2, s4
6324; GFX8-NEXT:    s_add_u32 s4, s0, 16
6325; GFX8-NEXT:    v_mov_b32_e32 v0, s5
6326; GFX8-NEXT:    s_addc_u32 s5, s1, 0
6327; GFX8-NEXT:    v_mov_b32_e32 v4, s4
6328; GFX8-NEXT:    v_mov_b32_e32 v5, s5
6329; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6330; GFX8-NEXT:    v_mov_b32_e32 v5, s1
6331; GFX8-NEXT:    v_mov_b32_e32 v0, s3
6332; GFX8-NEXT:    v_mov_b32_e32 v2, s2
6333; GFX8-NEXT:    v_mov_b32_e32 v4, s0
6334; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6335; GFX8-NEXT:    s_endpgm
6336;
6337; EG-LABEL: constant_zextload_v32i1_to_v32i64:
6338; EG:       ; %bb.0:
6339; EG-NEXT:    ALU 0, @24, KC0[CB0:0-32], KC1[]
6340; EG-NEXT:    TEX 0 @22
6341; EG-NEXT:    ALU 96, @25, KC0[CB0:0-32], KC1[]
6342; EG-NEXT:    ALU 30, @122, KC0[CB0:0-32], KC1[]
6343; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T42.X, 0
6344; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T41.X, 0
6345; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T40.X, 0
6346; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T39.X, 0
6347; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T38.X, 0
6348; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T37.X, 0
6349; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T36.X, 0
6350; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T35.X, 0
6351; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T34.X, 0
6352; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T33.X, 0
6353; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T32.X, 0
6354; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T31.X, 0
6355; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T30.X, 0
6356; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T29.X, 0
6357; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T28.X, 0
6358; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T27.X, 1
6359; EG-NEXT:    CF_END
6360; EG-NEXT:    PAD
6361; EG-NEXT:    Fetch clause starting at 22:
6362; EG-NEXT:     VTX_READ_32 T11.X, T11.X, 0, #1
6363; EG-NEXT:    ALU clause starting at 24:
6364; EG-NEXT:     MOV * T11.X, KC0[2].Z,
6365; EG-NEXT:    ALU clause starting at 25:
6366; EG-NEXT:     LSHR * T12.Z, T11.X, literal.x,
6367; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6368; EG-NEXT:     BFE_UINT T12.X, T11.X, literal.x, 1,
6369; EG-NEXT:     MOV T12.Y, 0.0,
6370; EG-NEXT:     BFE_UINT * T13.Z, T11.X, literal.y, 1,
6371; EG-NEXT:    30(4.203895e-44), 29(4.063766e-44)
6372; EG-NEXT:     BFE_UINT T13.X, T11.X, literal.x, 1,
6373; EG-NEXT:     MOV T13.Y, 0.0,
6374; EG-NEXT:     BFE_UINT * T14.Z, T11.X, literal.y, 1,
6375; EG-NEXT:    28(3.923636e-44), 27(3.783506e-44)
6376; EG-NEXT:     BFE_UINT T14.X, T11.X, literal.x, 1,
6377; EG-NEXT:     MOV T14.Y, 0.0,
6378; EG-NEXT:     BFE_UINT * T15.Z, T11.X, literal.y, 1,
6379; EG-NEXT:    26(3.643376e-44), 25(3.503246e-44)
6380; EG-NEXT:     BFE_UINT T15.X, T11.X, literal.x, 1,
6381; EG-NEXT:     MOV T15.Y, 0.0,
6382; EG-NEXT:     BFE_UINT * T16.Z, T11.X, literal.y, 1,
6383; EG-NEXT:    24(3.363116e-44), 23(3.222986e-44)
6384; EG-NEXT:     BFE_UINT T16.X, T11.X, literal.x, 1,
6385; EG-NEXT:     MOV T16.Y, 0.0,
6386; EG-NEXT:     BFE_UINT * T17.Z, T11.X, literal.y, 1,
6387; EG-NEXT:    22(3.082857e-44), 21(2.942727e-44)
6388; EG-NEXT:     BFE_UINT T17.X, T11.X, literal.x, 1,
6389; EG-NEXT:     MOV T17.Y, 0.0,
6390; EG-NEXT:     BFE_UINT * T18.Z, T11.X, literal.y, 1,
6391; EG-NEXT:    20(2.802597e-44), 19(2.662467e-44)
6392; EG-NEXT:     BFE_UINT T18.X, T11.X, literal.x, 1,
6393; EG-NEXT:     MOV T18.Y, 0.0,
6394; EG-NEXT:     BFE_UINT * T19.Z, T11.X, literal.y, 1,
6395; EG-NEXT:    18(2.522337e-44), 17(2.382207e-44)
6396; EG-NEXT:     BFE_UINT T19.X, T11.X, literal.x, 1,
6397; EG-NEXT:     MOV T19.Y, 0.0,
6398; EG-NEXT:     BFE_UINT * T20.Z, T11.X, literal.y, 1,
6399; EG-NEXT:    16(2.242078e-44), 15(2.101948e-44)
6400; EG-NEXT:     BFE_UINT T20.X, T11.X, literal.x, 1,
6401; EG-NEXT:     MOV T20.Y, 0.0,
6402; EG-NEXT:     BFE_UINT * T21.Z, T11.X, literal.y, 1,
6403; EG-NEXT:    14(1.961818e-44), 13(1.821688e-44)
6404; EG-NEXT:     BFE_UINT T21.X, T11.X, literal.x, 1,
6405; EG-NEXT:     MOV T21.Y, 0.0,
6406; EG-NEXT:     BFE_UINT * T22.Z, T11.X, literal.y, 1,
6407; EG-NEXT:    12(1.681558e-44), 11(1.541428e-44)
6408; EG-NEXT:     BFE_UINT T22.X, T11.X, literal.x, 1,
6409; EG-NEXT:     MOV T22.Y, 0.0,
6410; EG-NEXT:     BFE_UINT * T23.Z, T11.X, literal.y, 1,
6411; EG-NEXT:    10(1.401298e-44), 9(1.261169e-44)
6412; EG-NEXT:     BFE_UINT T23.X, T11.X, literal.x, 1,
6413; EG-NEXT:     MOV T23.Y, 0.0,
6414; EG-NEXT:     BFE_UINT * T24.Z, T11.X, literal.y, 1,
6415; EG-NEXT:    8(1.121039e-44), 7(9.809089e-45)
6416; EG-NEXT:     BFE_UINT T24.X, T11.X, literal.x, 1,
6417; EG-NEXT:     MOV T24.Y, 0.0,
6418; EG-NEXT:     BFE_UINT * T25.Z, T11.X, literal.y, 1,
6419; EG-NEXT:    6(8.407791e-45), 5(7.006492e-45)
6420; EG-NEXT:     BFE_UINT T25.X, T11.X, literal.x, 1,
6421; EG-NEXT:     MOV T25.Y, 0.0,
6422; EG-NEXT:     BFE_UINT * T26.Z, T11.X, literal.y, 1,
6423; EG-NEXT:    4(5.605194e-45), 3(4.203895e-45)
6424; EG-NEXT:     BFE_UINT T26.X, T11.X, literal.x, 1,
6425; EG-NEXT:     MOV T26.Y, 0.0,
6426; EG-NEXT:     BFE_UINT T11.Z, T11.X, 1, 1,
6427; EG-NEXT:     AND_INT * T11.X, T11.X, 1,
6428; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6429; EG-NEXT:     MOV T11.Y, 0.0,
6430; EG-NEXT:     MOV T12.W, 0.0,
6431; EG-NEXT:     MOV * T13.W, 0.0,
6432; EG-NEXT:     MOV T14.W, 0.0,
6433; EG-NEXT:     MOV * T15.W, 0.0,
6434; EG-NEXT:     MOV T16.W, 0.0,
6435; EG-NEXT:     MOV * T17.W, 0.0,
6436; EG-NEXT:     MOV T18.W, 0.0,
6437; EG-NEXT:     MOV * T19.W, 0.0,
6438; EG-NEXT:     MOV T20.W, 0.0,
6439; EG-NEXT:     MOV * T21.W, 0.0,
6440; EG-NEXT:     MOV T22.W, 0.0,
6441; EG-NEXT:     MOV * T23.W, 0.0,
6442; EG-NEXT:     MOV T24.W, 0.0,
6443; EG-NEXT:     MOV * T25.W, 0.0,
6444; EG-NEXT:     MOV T26.W, 0.0,
6445; EG-NEXT:     MOV * T11.W, 0.0,
6446; EG-NEXT:     LSHR T27.X, KC0[2].Y, literal.x,
6447; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6448; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6449; EG-NEXT:     LSHR T28.X, PV.W, literal.x,
6450; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6451; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6452; EG-NEXT:     LSHR T29.X, PV.W, literal.x,
6453; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6454; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
6455; EG-NEXT:     LSHR T30.X, PV.W, literal.x,
6456; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6457; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
6458; EG-NEXT:     LSHR T31.X, PV.W, literal.x,
6459; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6460; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
6461; EG-NEXT:     LSHR * T32.X, PV.W, literal.x,
6462; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6463; EG-NEXT:    ALU clause starting at 122:
6464; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
6465; EG-NEXT:    96(1.345247e-43), 0(0.000000e+00)
6466; EG-NEXT:     LSHR T33.X, PV.W, literal.x,
6467; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6468; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
6469; EG-NEXT:     LSHR T34.X, PV.W, literal.x,
6470; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6471; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
6472; EG-NEXT:     LSHR T35.X, PV.W, literal.x,
6473; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6474; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
6475; EG-NEXT:     LSHR T36.X, PV.W, literal.x,
6476; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6477; EG-NEXT:    2(2.802597e-45), 160(2.242078e-43)
6478; EG-NEXT:     LSHR T37.X, PV.W, literal.x,
6479; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6480; EG-NEXT:    2(2.802597e-45), 176(2.466285e-43)
6481; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
6482; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6483; EG-NEXT:    2(2.802597e-45), 192(2.690493e-43)
6484; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
6485; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6486; EG-NEXT:    2(2.802597e-45), 208(2.914701e-43)
6487; EG-NEXT:     LSHR T40.X, PV.W, literal.x,
6488; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6489; EG-NEXT:    2(2.802597e-45), 224(3.138909e-43)
6490; EG-NEXT:     LSHR T41.X, PV.W, literal.x,
6491; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6492; EG-NEXT:    2(2.802597e-45), 240(3.363116e-43)
6493; EG-NEXT:     LSHR * T42.X, PV.W, literal.x,
6494; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6495;
6496; GFX12-LABEL: constant_zextload_v32i1_to_v32i64:
6497; GFX12:       ; %bb.0:
6498; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
6499; GFX12-NEXT:    s_wait_kmcnt 0x0
6500; GFX12-NEXT:    s_load_b32 s2, s[2:3], 0x0
6501; GFX12-NEXT:    s_wait_kmcnt 0x0
6502; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x1001e
6503; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
6504; GFX12-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s3
6505; GFX12-NEXT:    s_lshr_b32 s4, s2, 31
6506; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x1001d
6507; GFX12-NEXT:    v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, v1
6508; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x1001c
6509; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:240
6510; GFX12-NEXT:    s_wait_alu 0xfffe
6511; GFX12-NEXT:    v_mov_b32_e32 v0, s4
6512; GFX12-NEXT:    v_mov_b32_e32 v2, s3
6513; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x1001b
6514; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x1001a
6515; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:224
6516; GFX12-NEXT:    s_wait_alu 0xfffe
6517; GFX12-NEXT:    v_mov_b32_e32 v0, s4
6518; GFX12-NEXT:    v_mov_b32_e32 v2, s3
6519; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10019
6520; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10018
6521; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:208
6522; GFX12-NEXT:    s_wait_alu 0xfffe
6523; GFX12-NEXT:    v_mov_b32_e32 v0, s4
6524; GFX12-NEXT:    v_mov_b32_e32 v2, s3
6525; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10017
6526; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10016
6527; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:192
6528; GFX12-NEXT:    s_wait_alu 0xfffe
6529; GFX12-NEXT:    v_mov_b32_e32 v0, s4
6530; GFX12-NEXT:    v_mov_b32_e32 v2, s3
6531; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10014
6532; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10015
6533; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:176
6534; GFX12-NEXT:    s_wait_alu 0xfffe
6535; GFX12-NEXT:    v_mov_b32_e32 v0, s3
6536; GFX12-NEXT:    v_mov_b32_e32 v2, s4
6537; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10013
6538; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10012
6539; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:160
6540; GFX12-NEXT:    s_wait_alu 0xfffe
6541; GFX12-NEXT:    v_mov_b32_e32 v0, s4
6542; GFX12-NEXT:    v_mov_b32_e32 v2, s3
6543; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10011
6544; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10010
6545; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:144
6546; GFX12-NEXT:    s_wait_alu 0xfffe
6547; GFX12-NEXT:    v_mov_b32_e32 v0, s4
6548; GFX12-NEXT:    v_mov_b32_e32 v2, s3
6549; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x1000f
6550; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x1000e
6551; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:128
6552; GFX12-NEXT:    s_wait_alu 0xfffe
6553; GFX12-NEXT:    v_mov_b32_e32 v0, s4
6554; GFX12-NEXT:    v_mov_b32_e32 v2, s3
6555; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x1000d
6556; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x1000c
6557; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:112
6558; GFX12-NEXT:    s_wait_alu 0xfffe
6559; GFX12-NEXT:    v_mov_b32_e32 v0, s4
6560; GFX12-NEXT:    v_mov_b32_e32 v2, s3
6561; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x1000b
6562; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x1000a
6563; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:96
6564; GFX12-NEXT:    s_wait_alu 0xfffe
6565; GFX12-NEXT:    v_mov_b32_e32 v0, s4
6566; GFX12-NEXT:    v_mov_b32_e32 v2, s3
6567; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10009
6568; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10008
6569; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:80
6570; GFX12-NEXT:    s_wait_alu 0xfffe
6571; GFX12-NEXT:    v_mov_b32_e32 v0, s4
6572; GFX12-NEXT:    v_mov_b32_e32 v2, s3
6573; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10007
6574; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10006
6575; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:64
6576; GFX12-NEXT:    s_wait_alu 0xfffe
6577; GFX12-NEXT:    v_mov_b32_e32 v0, s4
6578; GFX12-NEXT:    v_mov_b32_e32 v2, s3
6579; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10005
6580; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10004
6581; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:48
6582; GFX12-NEXT:    s_wait_alu 0xfffe
6583; GFX12-NEXT:    v_mov_b32_e32 v0, s4
6584; GFX12-NEXT:    v_mov_b32_e32 v2, s3
6585; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10003
6586; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10002
6587; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:32
6588; GFX12-NEXT:    s_wait_alu 0xfffe
6589; GFX12-NEXT:    v_mov_b32_e32 v0, s4
6590; GFX12-NEXT:    v_mov_b32_e32 v2, s3
6591; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10001
6592; GFX12-NEXT:    s_and_b32 s2, s2, 1
6593; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:16
6594; GFX12-NEXT:    s_wait_alu 0xfffe
6595; GFX12-NEXT:    v_mov_b32_e32 v0, s2
6596; GFX12-NEXT:    v_mov_b32_e32 v2, s3
6597; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1]
6598; GFX12-NEXT:    s_endpgm
6599  %load = load <32 x i1>, ptr addrspace(4) %in
6600  %ext = zext <32 x i1> %load to <32 x i64>
6601  store <32 x i64> %ext, ptr addrspace(1) %out
6602  ret void
6603}
6604
6605define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
6606; GFX6-LABEL: constant_sextload_v32i1_to_v32i64:
6607; GFX6:       ; %bb.0:
6608; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
6609; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
6610; GFX6-NEXT:    s_load_dword s4, s[2:3], 0x0
6611; GFX6-NEXT:    s_mov_b32 s3, 0xf000
6612; GFX6-NEXT:    s_mov_b32 s2, -1
6613; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
6614; GFX6-NEXT:    s_lshr_b32 s38, s4, 30
6615; GFX6-NEXT:    s_lshr_b32 s40, s4, 31
6616; GFX6-NEXT:    s_lshr_b32 s34, s4, 28
6617; GFX6-NEXT:    s_lshr_b32 s36, s4, 29
6618; GFX6-NEXT:    s_lshr_b32 s28, s4, 26
6619; GFX6-NEXT:    s_lshr_b32 s30, s4, 27
6620; GFX6-NEXT:    s_lshr_b32 s24, s4, 24
6621; GFX6-NEXT:    s_lshr_b32 s26, s4, 25
6622; GFX6-NEXT:    s_lshr_b32 s20, s4, 22
6623; GFX6-NEXT:    s_lshr_b32 s22, s4, 23
6624; GFX6-NEXT:    s_lshr_b32 s18, s4, 20
6625; GFX6-NEXT:    s_lshr_b32 s6, s4, 21
6626; GFX6-NEXT:    s_lshr_b32 s8, s4, 18
6627; GFX6-NEXT:    s_lshr_b32 s10, s4, 19
6628; GFX6-NEXT:    s_lshr_b32 s12, s4, 16
6629; GFX6-NEXT:    s_lshr_b32 s14, s4, 17
6630; GFX6-NEXT:    s_lshr_b32 s16, s4, 14
6631; GFX6-NEXT:    s_bfe_i64 s[44:45], s[4:5], 0x10000
6632; GFX6-NEXT:    s_lshr_b32 s42, s4, 15
6633; GFX6-NEXT:    v_mov_b32_e32 v0, s44
6634; GFX6-NEXT:    v_mov_b32_e32 v1, s45
6635; GFX6-NEXT:    s_lshr_b32 s44, s4, 12
6636; GFX6-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x10000
6637; GFX6-NEXT:    s_bfe_i64 s[40:41], s[40:41], 0x10000
6638; GFX6-NEXT:    v_mov_b32_e32 v2, s38
6639; GFX6-NEXT:    v_mov_b32_e32 v3, s39
6640; GFX6-NEXT:    s_lshr_b32 s38, s4, 13
6641; GFX6-NEXT:    v_mov_b32_e32 v4, s40
6642; GFX6-NEXT:    v_mov_b32_e32 v5, s41
6643; GFX6-NEXT:    s_lshr_b32 s40, s4, 10
6644; GFX6-NEXT:    s_bfe_i64 s[34:35], s[34:35], 0x10000
6645; GFX6-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x10000
6646; GFX6-NEXT:    v_mov_b32_e32 v6, s34
6647; GFX6-NEXT:    v_mov_b32_e32 v7, s35
6648; GFX6-NEXT:    s_lshr_b32 s34, s4, 11
6649; GFX6-NEXT:    v_mov_b32_e32 v8, s36
6650; GFX6-NEXT:    v_mov_b32_e32 v9, s37
6651; GFX6-NEXT:    s_lshr_b32 s36, s4, 8
6652; GFX6-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x10000
6653; GFX6-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x10000
6654; GFX6-NEXT:    v_mov_b32_e32 v10, s28
6655; GFX6-NEXT:    v_mov_b32_e32 v11, s29
6656; GFX6-NEXT:    s_lshr_b32 s28, s4, 9
6657; GFX6-NEXT:    v_mov_b32_e32 v12, s30
6658; GFX6-NEXT:    v_mov_b32_e32 v13, s31
6659; GFX6-NEXT:    s_lshr_b32 s30, s4, 6
6660; GFX6-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x10000
6661; GFX6-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x10000
6662; GFX6-NEXT:    v_mov_b32_e32 v14, s24
6663; GFX6-NEXT:    v_mov_b32_e32 v15, s25
6664; GFX6-NEXT:    s_lshr_b32 s24, s4, 7
6665; GFX6-NEXT:    v_mov_b32_e32 v16, s26
6666; GFX6-NEXT:    v_mov_b32_e32 v17, s27
6667; GFX6-NEXT:    s_lshr_b32 s26, s4, 4
6668; GFX6-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x10000
6669; GFX6-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x10000
6670; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:240
6671; GFX6-NEXT:    s_waitcnt expcnt(0)
6672; GFX6-NEXT:    v_mov_b32_e32 v2, s20
6673; GFX6-NEXT:    v_mov_b32_e32 v3, s21
6674; GFX6-NEXT:    s_lshr_b32 s20, s4, 5
6675; GFX6-NEXT:    v_mov_b32_e32 v4, s22
6676; GFX6-NEXT:    v_mov_b32_e32 v5, s23
6677; GFX6-NEXT:    s_lshr_b32 s22, s4, 2
6678; GFX6-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x10000
6679; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:224
6680; GFX6-NEXT:    s_waitcnt expcnt(0)
6681; GFX6-NEXT:    v_mov_b32_e32 v6, s18
6682; GFX6-NEXT:    v_mov_b32_e32 v7, s19
6683; GFX6-NEXT:    s_lshr_b32 s18, s4, 3
6684; GFX6-NEXT:    s_lshr_b32 s4, s4, 1
6685; GFX6-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x10000
6686; GFX6-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x10000
6687; GFX6-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x10000
6688; GFX6-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x10000
6689; GFX6-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x10000
6690; GFX6-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x10000
6691; GFX6-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x10000
6692; GFX6-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x10000
6693; GFX6-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x10000
6694; GFX6-NEXT:    s_bfe_i64 s[34:35], s[34:35], 0x10000
6695; GFX6-NEXT:    s_bfe_i64 s[40:41], s[40:41], 0x10000
6696; GFX6-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x10000
6697; GFX6-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x10000
6698; GFX6-NEXT:    s_bfe_i64 s[42:43], s[42:43], 0x10000
6699; GFX6-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x10000
6700; GFX6-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x10000
6701; GFX6-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x10000
6702; GFX6-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x10000
6703; GFX6-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x10000
6704; GFX6-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x10000
6705; GFX6-NEXT:    buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:208
6706; GFX6-NEXT:    buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:192
6707; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:176
6708; GFX6-NEXT:    v_mov_b32_e32 v8, s6
6709; GFX6-NEXT:    v_mov_b32_e32 v9, s7
6710; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:160
6711; GFX6-NEXT:    s_waitcnt expcnt(1)
6712; GFX6-NEXT:    v_mov_b32_e32 v2, s8
6713; GFX6-NEXT:    v_mov_b32_e32 v3, s9
6714; GFX6-NEXT:    v_mov_b32_e32 v4, s10
6715; GFX6-NEXT:    v_mov_b32_e32 v5, s11
6716; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:144
6717; GFX6-NEXT:    s_waitcnt expcnt(0)
6718; GFX6-NEXT:    v_mov_b32_e32 v2, s12
6719; GFX6-NEXT:    v_mov_b32_e32 v3, s13
6720; GFX6-NEXT:    v_mov_b32_e32 v4, s14
6721; GFX6-NEXT:    v_mov_b32_e32 v5, s15
6722; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:128
6723; GFX6-NEXT:    s_waitcnt expcnt(0)
6724; GFX6-NEXT:    v_mov_b32_e32 v2, s16
6725; GFX6-NEXT:    v_mov_b32_e32 v3, s17
6726; GFX6-NEXT:    v_mov_b32_e32 v4, s42
6727; GFX6-NEXT:    v_mov_b32_e32 v5, s43
6728; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:112
6729; GFX6-NEXT:    s_waitcnt expcnt(0)
6730; GFX6-NEXT:    v_mov_b32_e32 v2, s44
6731; GFX6-NEXT:    v_mov_b32_e32 v3, s45
6732; GFX6-NEXT:    v_mov_b32_e32 v4, s38
6733; GFX6-NEXT:    v_mov_b32_e32 v5, s39
6734; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:96
6735; GFX6-NEXT:    s_waitcnt expcnt(0)
6736; GFX6-NEXT:    v_mov_b32_e32 v2, s40
6737; GFX6-NEXT:    v_mov_b32_e32 v3, s41
6738; GFX6-NEXT:    v_mov_b32_e32 v4, s34
6739; GFX6-NEXT:    v_mov_b32_e32 v5, s35
6740; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:80
6741; GFX6-NEXT:    s_waitcnt expcnt(0)
6742; GFX6-NEXT:    v_mov_b32_e32 v2, s36
6743; GFX6-NEXT:    v_mov_b32_e32 v3, s37
6744; GFX6-NEXT:    v_mov_b32_e32 v4, s28
6745; GFX6-NEXT:    v_mov_b32_e32 v5, s29
6746; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:64
6747; GFX6-NEXT:    s_waitcnt expcnt(0)
6748; GFX6-NEXT:    v_mov_b32_e32 v2, s30
6749; GFX6-NEXT:    v_mov_b32_e32 v3, s31
6750; GFX6-NEXT:    v_mov_b32_e32 v4, s24
6751; GFX6-NEXT:    v_mov_b32_e32 v5, s25
6752; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:48
6753; GFX6-NEXT:    s_waitcnt expcnt(0)
6754; GFX6-NEXT:    v_mov_b32_e32 v2, s26
6755; GFX6-NEXT:    v_mov_b32_e32 v3, s27
6756; GFX6-NEXT:    v_mov_b32_e32 v4, s20
6757; GFX6-NEXT:    v_mov_b32_e32 v5, s21
6758; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:32
6759; GFX6-NEXT:    s_waitcnt expcnt(0)
6760; GFX6-NEXT:    v_mov_b32_e32 v2, s22
6761; GFX6-NEXT:    v_mov_b32_e32 v3, s23
6762; GFX6-NEXT:    v_mov_b32_e32 v4, s18
6763; GFX6-NEXT:    v_mov_b32_e32 v5, s19
6764; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:16
6765; GFX6-NEXT:    s_waitcnt expcnt(0)
6766; GFX6-NEXT:    v_mov_b32_e32 v2, s4
6767; GFX6-NEXT:    v_mov_b32_e32 v3, s5
6768; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
6769; GFX6-NEXT:    s_endpgm
6770;
6771; GFX8-LABEL: constant_sextload_v32i1_to_v32i64:
6772; GFX8:       ; %bb.0:
6773; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
6774; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
6775; GFX8-NEXT:    s_load_dword s2, s[2:3], 0x0
6776; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
6777; GFX8-NEXT:    s_lshr_b32 s44, s2, 30
6778; GFX8-NEXT:    s_lshr_b32 s46, s2, 31
6779; GFX8-NEXT:    s_lshr_b32 s48, s2, 28
6780; GFX8-NEXT:    s_lshr_b32 s50, s2, 29
6781; GFX8-NEXT:    s_lshr_b32 s52, s2, 26
6782; GFX8-NEXT:    s_lshr_b32 s54, s2, 27
6783; GFX8-NEXT:    s_lshr_b32 s56, s2, 24
6784; GFX8-NEXT:    s_lshr_b32 s58, s2, 25
6785; GFX8-NEXT:    s_lshr_b32 s60, s2, 22
6786; GFX8-NEXT:    s_lshr_b32 s62, s2, 23
6787; GFX8-NEXT:    s_lshr_b32 s64, s2, 20
6788; GFX8-NEXT:    s_lshr_b32 s66, s2, 21
6789; GFX8-NEXT:    s_lshr_b32 s42, s2, 18
6790; GFX8-NEXT:    s_lshr_b32 s40, s2, 19
6791; GFX8-NEXT:    s_lshr_b32 s38, s2, 16
6792; GFX8-NEXT:    s_lshr_b32 s36, s2, 17
6793; GFX8-NEXT:    s_lshr_b32 s34, s2, 14
6794; GFX8-NEXT:    s_lshr_b32 s30, s2, 15
6795; GFX8-NEXT:    s_lshr_b32 s28, s2, 12
6796; GFX8-NEXT:    s_lshr_b32 s26, s2, 13
6797; GFX8-NEXT:    s_lshr_b32 s24, s2, 10
6798; GFX8-NEXT:    s_lshr_b32 s22, s2, 11
6799; GFX8-NEXT:    s_lshr_b32 s20, s2, 8
6800; GFX8-NEXT:    s_lshr_b32 s18, s2, 9
6801; GFX8-NEXT:    s_lshr_b32 s16, s2, 6
6802; GFX8-NEXT:    s_lshr_b32 s14, s2, 7
6803; GFX8-NEXT:    s_lshr_b32 s12, s2, 4
6804; GFX8-NEXT:    s_lshr_b32 s10, s2, 5
6805; GFX8-NEXT:    s_lshr_b32 s8, s2, 2
6806; GFX8-NEXT:    s_lshr_b32 s6, s2, 3
6807; GFX8-NEXT:    s_lshr_b32 s68, s2, 1
6808; GFX8-NEXT:    s_bfe_i64 s[4:5], s[2:3], 0x10000
6809; GFX8-NEXT:    s_bfe_i64 s[2:3], s[68:69], 0x10000
6810; GFX8-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x10000
6811; GFX8-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x10000
6812; GFX8-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x10000
6813; GFX8-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x10000
6814; GFX8-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x10000
6815; GFX8-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x10000
6816; GFX8-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x10000
6817; GFX8-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x10000
6818; GFX8-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x10000
6819; GFX8-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x10000
6820; GFX8-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x10000
6821; GFX8-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x10000
6822; GFX8-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x10000
6823; GFX8-NEXT:    s_bfe_i64 s[34:35], s[34:35], 0x10000
6824; GFX8-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x10000
6825; GFX8-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x10000
6826; GFX8-NEXT:    s_bfe_i64 s[40:41], s[40:41], 0x10000
6827; GFX8-NEXT:    s_bfe_i64 s[42:43], s[42:43], 0x10000
6828; GFX8-NEXT:    s_bfe_i64 s[66:67], s[66:67], 0x10000
6829; GFX8-NEXT:    s_bfe_i64 s[64:65], s[64:65], 0x10000
6830; GFX8-NEXT:    s_bfe_i64 s[62:63], s[62:63], 0x10000
6831; GFX8-NEXT:    s_bfe_i64 s[60:61], s[60:61], 0x10000
6832; GFX8-NEXT:    s_bfe_i64 s[58:59], s[58:59], 0x10000
6833; GFX8-NEXT:    s_bfe_i64 s[56:57], s[56:57], 0x10000
6834; GFX8-NEXT:    s_bfe_i64 s[54:55], s[54:55], 0x10000
6835; GFX8-NEXT:    s_bfe_i64 s[52:53], s[52:53], 0x10000
6836; GFX8-NEXT:    s_bfe_i64 s[50:51], s[50:51], 0x10000
6837; GFX8-NEXT:    s_bfe_i64 s[48:49], s[48:49], 0x10000
6838; GFX8-NEXT:    s_bfe_i64 s[46:47], s[46:47], 0x10000
6839; GFX8-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x10000
6840; GFX8-NEXT:    v_mov_b32_e32 v0, s44
6841; GFX8-NEXT:    s_add_u32 s44, s0, 0xf0
6842; GFX8-NEXT:    v_mov_b32_e32 v1, s45
6843; GFX8-NEXT:    s_addc_u32 s45, s1, 0
6844; GFX8-NEXT:    v_mov_b32_e32 v4, s44
6845; GFX8-NEXT:    v_mov_b32_e32 v2, s46
6846; GFX8-NEXT:    v_mov_b32_e32 v3, s47
6847; GFX8-NEXT:    v_mov_b32_e32 v5, s45
6848; GFX8-NEXT:    s_add_u32 s44, s0, 0xe0
6849; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6850; GFX8-NEXT:    s_addc_u32 s45, s1, 0
6851; GFX8-NEXT:    v_mov_b32_e32 v4, s44
6852; GFX8-NEXT:    v_mov_b32_e32 v0, s48
6853; GFX8-NEXT:    v_mov_b32_e32 v1, s49
6854; GFX8-NEXT:    v_mov_b32_e32 v2, s50
6855; GFX8-NEXT:    v_mov_b32_e32 v3, s51
6856; GFX8-NEXT:    v_mov_b32_e32 v5, s45
6857; GFX8-NEXT:    s_add_u32 s44, s0, 0xd0
6858; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6859; GFX8-NEXT:    s_addc_u32 s45, s1, 0
6860; GFX8-NEXT:    v_mov_b32_e32 v4, s44
6861; GFX8-NEXT:    v_mov_b32_e32 v0, s52
6862; GFX8-NEXT:    v_mov_b32_e32 v1, s53
6863; GFX8-NEXT:    v_mov_b32_e32 v2, s54
6864; GFX8-NEXT:    v_mov_b32_e32 v3, s55
6865; GFX8-NEXT:    v_mov_b32_e32 v5, s45
6866; GFX8-NEXT:    s_add_u32 s44, s0, 0xc0
6867; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6868; GFX8-NEXT:    s_addc_u32 s45, s1, 0
6869; GFX8-NEXT:    v_mov_b32_e32 v4, s44
6870; GFX8-NEXT:    v_mov_b32_e32 v0, s56
6871; GFX8-NEXT:    v_mov_b32_e32 v1, s57
6872; GFX8-NEXT:    v_mov_b32_e32 v2, s58
6873; GFX8-NEXT:    v_mov_b32_e32 v3, s59
6874; GFX8-NEXT:    v_mov_b32_e32 v5, s45
6875; GFX8-NEXT:    s_add_u32 s44, s0, 0xb0
6876; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6877; GFX8-NEXT:    s_addc_u32 s45, s1, 0
6878; GFX8-NEXT:    v_mov_b32_e32 v4, s44
6879; GFX8-NEXT:    v_mov_b32_e32 v0, s60
6880; GFX8-NEXT:    v_mov_b32_e32 v1, s61
6881; GFX8-NEXT:    v_mov_b32_e32 v2, s62
6882; GFX8-NEXT:    v_mov_b32_e32 v3, s63
6883; GFX8-NEXT:    v_mov_b32_e32 v5, s45
6884; GFX8-NEXT:    s_add_u32 s44, s0, 0xa0
6885; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6886; GFX8-NEXT:    s_addc_u32 s45, s1, 0
6887; GFX8-NEXT:    v_mov_b32_e32 v4, s44
6888; GFX8-NEXT:    v_mov_b32_e32 v0, s64
6889; GFX8-NEXT:    v_mov_b32_e32 v1, s65
6890; GFX8-NEXT:    v_mov_b32_e32 v2, s66
6891; GFX8-NEXT:    v_mov_b32_e32 v3, s67
6892; GFX8-NEXT:    v_mov_b32_e32 v5, s45
6893; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6894; GFX8-NEXT:    s_nop 0
6895; GFX8-NEXT:    v_mov_b32_e32 v2, s40
6896; GFX8-NEXT:    s_add_u32 s40, s0, 0x90
6897; GFX8-NEXT:    v_mov_b32_e32 v3, s41
6898; GFX8-NEXT:    s_addc_u32 s41, s1, 0
6899; GFX8-NEXT:    v_mov_b32_e32 v4, s40
6900; GFX8-NEXT:    v_mov_b32_e32 v0, s42
6901; GFX8-NEXT:    v_mov_b32_e32 v1, s43
6902; GFX8-NEXT:    v_mov_b32_e32 v5, s41
6903; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6904; GFX8-NEXT:    s_nop 0
6905; GFX8-NEXT:    v_mov_b32_e32 v2, s36
6906; GFX8-NEXT:    s_add_u32 s36, s0, 0x80
6907; GFX8-NEXT:    v_mov_b32_e32 v3, s37
6908; GFX8-NEXT:    s_addc_u32 s37, s1, 0
6909; GFX8-NEXT:    v_mov_b32_e32 v4, s36
6910; GFX8-NEXT:    v_mov_b32_e32 v0, s38
6911; GFX8-NEXT:    v_mov_b32_e32 v1, s39
6912; GFX8-NEXT:    v_mov_b32_e32 v5, s37
6913; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6914; GFX8-NEXT:    s_nop 0
6915; GFX8-NEXT:    v_mov_b32_e32 v2, s30
6916; GFX8-NEXT:    s_add_u32 s30, s0, 0x70
6917; GFX8-NEXT:    v_mov_b32_e32 v3, s31
6918; GFX8-NEXT:    s_addc_u32 s31, s1, 0
6919; GFX8-NEXT:    v_mov_b32_e32 v4, s30
6920; GFX8-NEXT:    v_mov_b32_e32 v0, s34
6921; GFX8-NEXT:    v_mov_b32_e32 v1, s35
6922; GFX8-NEXT:    v_mov_b32_e32 v5, s31
6923; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6924; GFX8-NEXT:    s_nop 0
6925; GFX8-NEXT:    v_mov_b32_e32 v2, s26
6926; GFX8-NEXT:    s_add_u32 s26, s0, 0x60
6927; GFX8-NEXT:    v_mov_b32_e32 v3, s27
6928; GFX8-NEXT:    s_addc_u32 s27, s1, 0
6929; GFX8-NEXT:    v_mov_b32_e32 v4, s26
6930; GFX8-NEXT:    v_mov_b32_e32 v0, s28
6931; GFX8-NEXT:    v_mov_b32_e32 v1, s29
6932; GFX8-NEXT:    v_mov_b32_e32 v5, s27
6933; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6934; GFX8-NEXT:    s_nop 0
6935; GFX8-NEXT:    v_mov_b32_e32 v2, s22
6936; GFX8-NEXT:    s_add_u32 s22, s0, 0x50
6937; GFX8-NEXT:    v_mov_b32_e32 v3, s23
6938; GFX8-NEXT:    s_addc_u32 s23, s1, 0
6939; GFX8-NEXT:    v_mov_b32_e32 v4, s22
6940; GFX8-NEXT:    v_mov_b32_e32 v0, s24
6941; GFX8-NEXT:    v_mov_b32_e32 v1, s25
6942; GFX8-NEXT:    v_mov_b32_e32 v5, s23
6943; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6944; GFX8-NEXT:    s_nop 0
6945; GFX8-NEXT:    v_mov_b32_e32 v2, s18
6946; GFX8-NEXT:    s_add_u32 s18, s0, 64
6947; GFX8-NEXT:    v_mov_b32_e32 v3, s19
6948; GFX8-NEXT:    s_addc_u32 s19, s1, 0
6949; GFX8-NEXT:    v_mov_b32_e32 v4, s18
6950; GFX8-NEXT:    v_mov_b32_e32 v0, s20
6951; GFX8-NEXT:    v_mov_b32_e32 v1, s21
6952; GFX8-NEXT:    v_mov_b32_e32 v5, s19
6953; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6954; GFX8-NEXT:    s_nop 0
6955; GFX8-NEXT:    v_mov_b32_e32 v2, s14
6956; GFX8-NEXT:    s_add_u32 s14, s0, 48
6957; GFX8-NEXT:    v_mov_b32_e32 v3, s15
6958; GFX8-NEXT:    s_addc_u32 s15, s1, 0
6959; GFX8-NEXT:    v_mov_b32_e32 v4, s14
6960; GFX8-NEXT:    v_mov_b32_e32 v0, s16
6961; GFX8-NEXT:    v_mov_b32_e32 v1, s17
6962; GFX8-NEXT:    v_mov_b32_e32 v5, s15
6963; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6964; GFX8-NEXT:    s_nop 0
6965; GFX8-NEXT:    v_mov_b32_e32 v2, s10
6966; GFX8-NEXT:    s_add_u32 s10, s0, 32
6967; GFX8-NEXT:    v_mov_b32_e32 v3, s11
6968; GFX8-NEXT:    s_addc_u32 s11, s1, 0
6969; GFX8-NEXT:    v_mov_b32_e32 v4, s10
6970; GFX8-NEXT:    v_mov_b32_e32 v0, s12
6971; GFX8-NEXT:    v_mov_b32_e32 v1, s13
6972; GFX8-NEXT:    v_mov_b32_e32 v5, s11
6973; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6974; GFX8-NEXT:    s_nop 0
6975; GFX8-NEXT:    v_mov_b32_e32 v2, s6
6976; GFX8-NEXT:    s_add_u32 s6, s0, 16
6977; GFX8-NEXT:    v_mov_b32_e32 v3, s7
6978; GFX8-NEXT:    s_addc_u32 s7, s1, 0
6979; GFX8-NEXT:    v_mov_b32_e32 v4, s6
6980; GFX8-NEXT:    v_mov_b32_e32 v0, s8
6981; GFX8-NEXT:    v_mov_b32_e32 v1, s9
6982; GFX8-NEXT:    v_mov_b32_e32 v5, s7
6983; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6984; GFX8-NEXT:    v_mov_b32_e32 v5, s1
6985; GFX8-NEXT:    v_mov_b32_e32 v0, s4
6986; GFX8-NEXT:    v_mov_b32_e32 v1, s5
6987; GFX8-NEXT:    v_mov_b32_e32 v2, s2
6988; GFX8-NEXT:    v_mov_b32_e32 v3, s3
6989; GFX8-NEXT:    v_mov_b32_e32 v4, s0
6990; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6991; GFX8-NEXT:    s_endpgm
6992;
6993; EG-LABEL: constant_sextload_v32i1_to_v32i64:
6994; EG:       ; %bb.0:
6995; EG-NEXT:    ALU 0, @24, KC0[CB0:0-32], KC1[]
6996; EG-NEXT:    TEX 0 @22
6997; EG-NEXT:    ALU 92, @25, KC0[CB0:0-32], KC1[]
6998; EG-NEXT:    ALU 65, @118, KC0[CB0:0-32], KC1[]
6999; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T42.X, 0
7000; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T41.X, 0
7001; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T34.X, 0
7002; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T24.X, 0
7003; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T23.X, 0
7004; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T22.X, 0
7005; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T21.X, 0
7006; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T20.X, 0
7007; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T19.X, 0
7008; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T18.X, 0
7009; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T17.X, 0
7010; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T16.X, 0
7011; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T15.X, 0
7012; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T14.X, 0
7013; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T13.X, 0
7014; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T12.X, 1
7015; EG-NEXT:    CF_END
7016; EG-NEXT:    PAD
7017; EG-NEXT:    Fetch clause starting at 22:
7018; EG-NEXT:     VTX_READ_32 T11.X, T11.X, 0, #1
7019; EG-NEXT:    ALU clause starting at 24:
7020; EG-NEXT:     MOV * T11.X, KC0[2].Z,
7021; EG-NEXT:    ALU clause starting at 25:
7022; EG-NEXT:     LSHR T12.X, KC0[2].Y, literal.x,
7023; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7024; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
7025; EG-NEXT:     LSHR T13.X, PV.W, literal.x,
7026; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7027; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
7028; EG-NEXT:     LSHR T14.X, PV.W, literal.x,
7029; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7030; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
7031; EG-NEXT:     LSHR T15.X, PV.W, literal.x,
7032; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7033; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
7034; EG-NEXT:     LSHR T16.X, PV.W, literal.x,
7035; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7036; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
7037; EG-NEXT:     LSHR T17.X, PV.W, literal.x,
7038; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7039; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
7040; EG-NEXT:     LSHR T18.X, PV.W, literal.x,
7041; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7042; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
7043; EG-NEXT:     LSHR T19.X, PV.W, literal.x,
7044; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7045; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
7046; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
7047; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7048; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
7049; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
7050; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7051; EG-NEXT:    2(2.802597e-45), 160(2.242078e-43)
7052; EG-NEXT:     LSHR T22.X, PV.W, literal.x,
7053; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7054; EG-NEXT:    2(2.802597e-45), 176(2.466285e-43)
7055; EG-NEXT:     LSHR T23.X, PV.W, literal.x,
7056; EG-NEXT:     LSHR T0.Y, T11.X, literal.y,
7057; EG-NEXT:     LSHR T0.Z, T11.X, literal.z,
7058; EG-NEXT:     LSHR * T0.W, T11.X, literal.w,
7059; EG-NEXT:    2(2.802597e-45), 28(3.923636e-44)
7060; EG-NEXT:    29(4.063766e-44), 24(3.363116e-44)
7061; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.x,
7062; EG-NEXT:    192(2.690493e-43), 0(0.000000e+00)
7063; EG-NEXT:     LSHR T24.X, PV.W, literal.x,
7064; EG-NEXT:     LSHR T1.Y, T11.X, literal.y,
7065; EG-NEXT:     LSHR T1.Z, T11.X, literal.z,
7066; EG-NEXT:     LSHR * T1.W, T11.X, literal.w,
7067; EG-NEXT:    2(2.802597e-45), 25(3.503246e-44)
7068; EG-NEXT:    20(2.802597e-44), 21(2.942727e-44)
7069; EG-NEXT:     LSHR * T2.W, T11.X, literal.x,
7070; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7071; EG-NEXT:     BFE_INT T25.X, T11.X, 0.0, 1,
7072; EG-NEXT:     LSHR T2.Y, T11.X, literal.x,
7073; EG-NEXT:     ASHR T26.Z, T11.X, literal.y,
7074; EG-NEXT:     LSHR T3.W, T11.X, literal.z,
7075; EG-NEXT:     LSHR * T4.W, T11.X, literal.w,
7076; EG-NEXT:    17(2.382207e-44), 31(4.344025e-44)
7077; EG-NEXT:    27(3.783506e-44), 30(4.203895e-44)
7078; EG-NEXT:     BFE_INT T26.X, PS, 0.0, 1,
7079; EG-NEXT:     LSHR T3.Y, T11.X, literal.x,
7080; EG-NEXT:     BFE_INT T27.Z, PV.W, 0.0, 1,
7081; EG-NEXT:     LSHR T3.W, T11.X, literal.y,
7082; EG-NEXT:     LSHR * T4.W, T11.X, literal.z,
7083; EG-NEXT:    12(1.681558e-44), 23(3.222986e-44)
7084; EG-NEXT:    26(3.643376e-44), 0(0.000000e+00)
7085; EG-NEXT:     BFE_INT T27.X, PS, 0.0, 1,
7086; EG-NEXT:     MOV T26.Y, PV.X,
7087; EG-NEXT:     BFE_INT T28.Z, PV.W, 0.0, 1,
7088; EG-NEXT:     LSHR T3.W, T11.X, literal.x,
7089; EG-NEXT:     LSHR * T4.W, T11.X, literal.y,
7090; EG-NEXT:    19(2.662467e-44), 22(3.082857e-44)
7091; EG-NEXT:     BFE_INT T28.X, PS, 0.0, 1,
7092; EG-NEXT:     MOV T27.Y, PV.X,
7093; EG-NEXT:     BFE_INT T29.Z, PV.W, 0.0, 1,
7094; EG-NEXT:     LSHR T3.W, T11.X, literal.x,
7095; EG-NEXT:     LSHR * T4.W, T11.X, literal.y,
7096; EG-NEXT:    15(2.101948e-44), 18(2.522337e-44)
7097; EG-NEXT:     BFE_INT T29.X, PS, 0.0, 1,
7098; EG-NEXT:     MOV T28.Y, PV.X,
7099; EG-NEXT:     BFE_INT T30.Z, PV.W, 0.0, 1,
7100; EG-NEXT:     LSHR T3.W, T11.X, literal.x,
7101; EG-NEXT:     LSHR * T4.W, T11.X, literal.y,
7102; EG-NEXT:    11(1.541428e-44), 14(1.961818e-44)
7103; EG-NEXT:     BFE_INT T30.X, PS, 0.0, 1,
7104; EG-NEXT:     MOV T29.Y, PV.X,
7105; EG-NEXT:     BFE_INT T31.Z, PV.W, 0.0, 1,
7106; EG-NEXT:     LSHR T3.W, T11.X, literal.x,
7107; EG-NEXT:     LSHR * T4.W, T11.X, literal.y,
7108; EG-NEXT:    7(9.809089e-45), 10(1.401298e-44)
7109; EG-NEXT:     BFE_INT T31.X, PS, 0.0, 1,
7110; EG-NEXT:     MOV T30.Y, PV.X,
7111; EG-NEXT:     BFE_INT T32.Z, PV.W, 0.0, 1,
7112; EG-NEXT:     LSHR T3.W, T11.X, literal.x,
7113; EG-NEXT:     LSHR * T4.W, T11.X, literal.y,
7114; EG-NEXT:    3(4.203895e-45), 6(8.407791e-45)
7115; EG-NEXT:    ALU clause starting at 118:
7116; EG-NEXT:     BFE_INT T32.X, T4.W, 0.0, 1,
7117; EG-NEXT:     MOV T31.Y, T31.X,
7118; EG-NEXT:     BFE_INT T33.Z, T3.W, 0.0, 1, BS:VEC_120/SCL_212
7119; EG-NEXT:     LSHR T3.W, T11.X, 1, BS:VEC_120/SCL_212
7120; EG-NEXT:     LSHR * T4.W, T11.X, literal.x,
7121; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
7122; EG-NEXT:     BFE_INT T33.X, PS, 0.0, 1,
7123; EG-NEXT:     MOV T32.Y, PV.X,
7124; EG-NEXT:     BFE_INT T25.Z, PV.W, 0.0, 1,
7125; EG-NEXT:     LSHR T3.W, T11.X, literal.x,
7126; EG-NEXT:     ADD_INT * T4.W, KC0[2].Y, literal.y,
7127; EG-NEXT:    5(7.006492e-45), 208(2.914701e-43)
7128; EG-NEXT:     LSHR T34.X, PS, literal.x,
7129; EG-NEXT:     MOV T33.Y, PV.X,
7130; EG-NEXT:     BFE_INT T35.Z, PV.W, 0.0, 1,
7131; EG-NEXT:     LSHR T3.W, T11.X, literal.y,
7132; EG-NEXT:     LSHR * T4.W, T11.X, literal.z,
7133; EG-NEXT:    2(2.802597e-45), 9(1.261169e-44)
7134; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
7135; EG-NEXT:     BFE_INT T35.X, PS, 0.0, 1,
7136; EG-NEXT:     MOV T25.Y, T25.X,
7137; EG-NEXT:     BFE_INT T11.Z, PV.W, 0.0, 1,
7138; EG-NEXT:     LSHR T3.W, T11.X, literal.x, BS:VEC_120/SCL_212
7139; EG-NEXT:     LSHR * T4.W, T11.X, literal.y,
7140; EG-NEXT:    13(1.821688e-44), 8(1.121039e-44)
7141; EG-NEXT:     BFE_INT T11.X, PS, 0.0, 1,
7142; EG-NEXT:     MOV T35.Y, PV.X,
7143; EG-NEXT:     BFE_INT T36.Z, PV.W, 0.0, 1,
7144; EG-NEXT:     MOV T25.W, T25.Z,
7145; EG-NEXT:     MOV * T33.W, T33.Z,
7146; EG-NEXT:     BFE_INT T36.X, T3.Y, 0.0, 1,
7147; EG-NEXT:     MOV T11.Y, PV.X,
7148; EG-NEXT:     BFE_INT T37.Z, T2.Y, 0.0, 1, BS:VEC_120/SCL_212
7149; EG-NEXT:     MOV T35.W, T35.Z,
7150; EG-NEXT:     MOV * T32.W, T32.Z,
7151; EG-NEXT:     BFE_INT T37.X, T2.W, 0.0, 1,
7152; EG-NEXT:     MOV T36.Y, PV.X,
7153; EG-NEXT:     BFE_INT T38.Z, T1.W, 0.0, 1, BS:VEC_120/SCL_212
7154; EG-NEXT:     MOV T11.W, T11.Z,
7155; EG-NEXT:     MOV * T31.W, T31.Z,
7156; EG-NEXT:     BFE_INT T38.X, T1.Z, 0.0, 1,
7157; EG-NEXT:     MOV T37.Y, PV.X,
7158; EG-NEXT:     BFE_INT T39.Z, T1.Y, 0.0, 1,
7159; EG-NEXT:     MOV T36.W, T36.Z, BS:VEC_120/SCL_212
7160; EG-NEXT:     MOV * T30.W, T30.Z,
7161; EG-NEXT:     BFE_INT T39.X, T0.W, 0.0, 1,
7162; EG-NEXT:     MOV T38.Y, PV.X,
7163; EG-NEXT:     BFE_INT T40.Z, T0.Z, 0.0, 1,
7164; EG-NEXT:     MOV T37.W, T37.Z, BS:VEC_120/SCL_212
7165; EG-NEXT:     MOV * T29.W, T29.Z,
7166; EG-NEXT:     BFE_INT T40.X, T0.Y, 0.0, 1,
7167; EG-NEXT:     MOV T39.Y, PV.X,
7168; EG-NEXT:     ADD_INT T0.Z, KC0[2].Y, literal.x,
7169; EG-NEXT:     MOV T38.W, T38.Z,
7170; EG-NEXT:     MOV * T28.W, T28.Z,
7171; EG-NEXT:    224(3.138909e-43), 0(0.000000e+00)
7172; EG-NEXT:     LSHR T41.X, PV.Z, literal.x,
7173; EG-NEXT:     MOV T40.Y, PV.X,
7174; EG-NEXT:     ADD_INT T0.Z, KC0[2].Y, literal.y,
7175; EG-NEXT:     MOV T39.W, T39.Z,
7176; EG-NEXT:     MOV * T27.W, T27.Z,
7177; EG-NEXT:    2(2.802597e-45), 240(3.363116e-43)
7178; EG-NEXT:     LSHR T42.X, PV.Z, literal.x,
7179; EG-NEXT:     MOV T40.W, T40.Z,
7180; EG-NEXT:     MOV * T26.W, T26.Z,
7181; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
7182;
7183; GFX12-LABEL: constant_sextload_v32i1_to_v32i64:
7184; GFX12:       ; %bb.0:
7185; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
7186; GFX12-NEXT:    s_wait_kmcnt 0x0
7187; GFX12-NEXT:    s_load_b32 s2, s[2:3], 0x0
7188; GFX12-NEXT:    s_wait_kmcnt 0x0
7189; GFX12-NEXT:    s_lshr_b32 s34, s2, 30
7190; GFX12-NEXT:    s_lshr_b32 s36, s2, 31
7191; GFX12-NEXT:    s_lshr_b32 s38, s2, 28
7192; GFX12-NEXT:    s_lshr_b32 s40, s2, 29
7193; GFX12-NEXT:    s_lshr_b32 s42, s2, 26
7194; GFX12-NEXT:    s_lshr_b32 s44, s2, 27
7195; GFX12-NEXT:    s_bfe_i64 s[34:35], s[34:35], 0x10000
7196; GFX12-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x10000
7197; GFX12-NEXT:    s_lshr_b32 s46, s2, 24
7198; GFX12-NEXT:    s_lshr_b32 s48, s2, 25
7199; GFX12-NEXT:    s_bfe_i64 s[40:41], s[40:41], 0x10000
7200; GFX12-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x10000
7201; GFX12-NEXT:    v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s35
7202; GFX12-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x10000
7203; GFX12-NEXT:    s_bfe_i64 s[42:43], s[42:43], 0x10000
7204; GFX12-NEXT:    v_dual_mov_b32 v0, s34 :: v_dual_mov_b32 v3, s37
7205; GFX12-NEXT:    v_dual_mov_b32 v2, s36 :: v_dual_mov_b32 v5, s39
7206; GFX12-NEXT:    s_lshr_b32 s26, s2, 22
7207; GFX12-NEXT:    s_lshr_b32 s50, s2, 23
7208; GFX12-NEXT:    s_bfe_i64 s[48:49], s[48:49], 0x10000
7209; GFX12-NEXT:    s_bfe_i64 s[46:47], s[46:47], 0x10000
7210; GFX12-NEXT:    v_dual_mov_b32 v4, s38 :: v_dual_mov_b32 v7, s41
7211; GFX12-NEXT:    v_dual_mov_b32 v6, s40 :: v_dual_mov_b32 v9, s43
7212; GFX12-NEXT:    s_lshr_b32 s52, s2, 20
7213; GFX12-NEXT:    s_lshr_b32 s54, s2, 21
7214; GFX12-NEXT:    v_dual_mov_b32 v8, s42 :: v_dual_mov_b32 v11, s45
7215; GFX12-NEXT:    v_dual_mov_b32 v10, s44 :: v_dual_mov_b32 v13, s47
7216; GFX12-NEXT:    s_lshr_b32 s56, s2, 18
7217; GFX12-NEXT:    s_lshr_b32 s58, s2, 19
7218; GFX12-NEXT:    s_bfe_i64 s[50:51], s[50:51], 0x10000
7219; GFX12-NEXT:    v_dual_mov_b32 v12, s46 :: v_dual_mov_b32 v15, s49
7220; GFX12-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x10000
7221; GFX12-NEXT:    v_mov_b32_e32 v14, s48
7222; GFX12-NEXT:    s_lshr_b32 s60, s2, 16
7223; GFX12-NEXT:    s_lshr_b32 s62, s2, 17
7224; GFX12-NEXT:    s_bfe_i64 s[54:55], s[54:55], 0x10000
7225; GFX12-NEXT:    s_bfe_i64 s[52:53], s[52:53], 0x10000
7226; GFX12-NEXT:    s_lshr_b32 s64, s2, 14
7227; GFX12-NEXT:    s_lshr_b32 s66, s2, 15
7228; GFX12-NEXT:    s_bfe_i64 s[58:59], s[58:59], 0x10000
7229; GFX12-NEXT:    s_bfe_i64 s[56:57], s[56:57], 0x10000
7230; GFX12-NEXT:    s_clause 0x3
7231; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[0:1] offset:240
7232; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[0:1] offset:224
7233; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[0:1] offset:208
7234; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[0:1] offset:192
7235; GFX12-NEXT:    v_dual_mov_b32 v1, s27 :: v_dual_mov_b32 v0, s26
7236; GFX12-NEXT:    v_dual_mov_b32 v3, s51 :: v_dual_mov_b32 v2, s50
7237; GFX12-NEXT:    v_mov_b32_e32 v5, s53
7238; GFX12-NEXT:    s_lshr_b32 s30, s2, 12
7239; GFX12-NEXT:    s_lshr_b32 s28, s2, 13
7240; GFX12-NEXT:    s_lshr_b32 s24, s2, 10
7241; GFX12-NEXT:    s_lshr_b32 s22, s2, 11
7242; GFX12-NEXT:    s_bfe_i64 s[62:63], s[62:63], 0x10000
7243; GFX12-NEXT:    s_bfe_i64 s[60:61], s[60:61], 0x10000
7244; GFX12-NEXT:    v_dual_mov_b32 v4, s52 :: v_dual_mov_b32 v7, s55
7245; GFX12-NEXT:    v_dual_mov_b32 v6, s54 :: v_dual_mov_b32 v9, s57
7246; GFX12-NEXT:    s_lshr_b32 s20, s2, 8
7247; GFX12-NEXT:    s_lshr_b32 s18, s2, 9
7248; GFX12-NEXT:    s_bfe_i64 s[66:67], s[66:67], 0x10000
7249; GFX12-NEXT:    s_bfe_i64 s[64:65], s[64:65], 0x10000
7250; GFX12-NEXT:    v_dual_mov_b32 v8, s56 :: v_dual_mov_b32 v11, s59
7251; GFX12-NEXT:    v_dual_mov_b32 v10, s58 :: v_dual_mov_b32 v13, s61
7252; GFX12-NEXT:    s_lshr_b32 s16, s2, 6
7253; GFX12-NEXT:    s_lshr_b32 s14, s2, 7
7254; GFX12-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x10000
7255; GFX12-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x10000
7256; GFX12-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x10000
7257; GFX12-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x10000
7258; GFX12-NEXT:    v_dual_mov_b32 v12, s60 :: v_dual_mov_b32 v15, s63
7259; GFX12-NEXT:    v_dual_mov_b32 v14, s62 :: v_dual_mov_b32 v17, s65
7260; GFX12-NEXT:    s_lshr_b32 s12, s2, 4
7261; GFX12-NEXT:    s_lshr_b32 s10, s2, 5
7262; GFX12-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x10000
7263; GFX12-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x10000
7264; GFX12-NEXT:    v_dual_mov_b32 v16, s64 :: v_dual_mov_b32 v19, s67
7265; GFX12-NEXT:    v_dual_mov_b32 v18, s66 :: v_dual_mov_b32 v21, s31
7266; GFX12-NEXT:    s_lshr_b32 s8, s2, 2
7267; GFX12-NEXT:    s_lshr_b32 s6, s2, 3
7268; GFX12-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x10000
7269; GFX12-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x10000
7270; GFX12-NEXT:    v_dual_mov_b32 v20, s30 :: v_dual_mov_b32 v23, s29
7271; GFX12-NEXT:    v_mov_b32_e32 v22, s28
7272; GFX12-NEXT:    s_clause 0x5
7273; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[0:1] offset:176
7274; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[0:1] offset:160
7275; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[0:1] offset:144
7276; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[0:1] offset:128
7277; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[0:1] offset:112
7278; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[0:1] offset:96
7279; GFX12-NEXT:    v_dual_mov_b32 v1, s25 :: v_dual_mov_b32 v0, s24
7280; GFX12-NEXT:    v_dual_mov_b32 v3, s23 :: v_dual_mov_b32 v2, s22
7281; GFX12-NEXT:    v_mov_b32_e32 v5, s21
7282; GFX12-NEXT:    s_lshr_b32 s68, s2, 1
7283; GFX12-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x10000
7284; GFX12-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x10000
7285; GFX12-NEXT:    v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v7, s19
7286; GFX12-NEXT:    v_dual_mov_b32 v6, s18 :: v_dual_mov_b32 v9, s17
7287; GFX12-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x10000
7288; GFX12-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x10000
7289; GFX12-NEXT:    v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v11, s15
7290; GFX12-NEXT:    v_dual_mov_b32 v10, s14 :: v_dual_mov_b32 v13, s13
7291; GFX12-NEXT:    s_bfe_i64 s[4:5], s[2:3], 0x10000
7292; GFX12-NEXT:    s_bfe_i64 s[2:3], s[68:69], 0x10000
7293; GFX12-NEXT:    v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v15, s11
7294; GFX12-NEXT:    v_dual_mov_b32 v14, s10 :: v_dual_mov_b32 v17, s9
7295; GFX12-NEXT:    v_dual_mov_b32 v16, s8 :: v_dual_mov_b32 v19, s7
7296; GFX12-NEXT:    v_dual_mov_b32 v18, s6 :: v_dual_mov_b32 v21, s5
7297; GFX12-NEXT:    v_dual_mov_b32 v20, s4 :: v_dual_mov_b32 v23, s3
7298; GFX12-NEXT:    v_mov_b32_e32 v22, s2
7299; GFX12-NEXT:    s_clause 0x5
7300; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[0:1] offset:80
7301; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[0:1] offset:64
7302; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[0:1] offset:48
7303; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[0:1] offset:32
7304; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[0:1] offset:16
7305; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[0:1]
7306; GFX12-NEXT:    s_endpgm
7307  %load = load <32 x i1>, ptr addrspace(4) %in
7308  %ext = sext <32 x i1> %load to <32 x i64>
7309  store <32 x i64> %ext, ptr addrspace(1) %out
7310  ret void
7311}
7312
7313define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
7314; GFX6-LABEL: constant_zextload_v64i1_to_v64i64:
7315; GFX6:       ; %bb.0:
7316; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
7317; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
7318; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
7319; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
7320; GFX6-NEXT:    s_bfe_u32 s4, s2, 0x10003
7321; GFX6-NEXT:    s_bfe_u32 s5, s2, 0x10005
7322; GFX6-NEXT:    s_bfe_u32 s8, s2, 0x10007
7323; GFX6-NEXT:    s_bfe_u32 s11, s2, 0x10009
7324; GFX6-NEXT:    s_bfe_u32 s13, s2, 0x1000b
7325; GFX6-NEXT:    s_bfe_u32 s15, s2, 0x1000d
7326; GFX6-NEXT:    s_bfe_u32 s17, s2, 0x1000f
7327; GFX6-NEXT:    s_bfe_u32 s19, s2, 0x10011
7328; GFX6-NEXT:    s_bfe_u32 s21, s2, 0x10013
7329; GFX6-NEXT:    s_bfe_u32 s23, s2, 0x10015
7330; GFX6-NEXT:    s_bfe_u32 s25, s2, 0x10017
7331; GFX6-NEXT:    s_bfe_u32 s27, s2, 0x10019
7332; GFX6-NEXT:    s_bfe_u32 s29, s2, 0x1001b
7333; GFX6-NEXT:    s_bfe_u32 s31, s2, 0x1001d
7334; GFX6-NEXT:    s_lshr_b32 s34, s2, 31
7335; GFX6-NEXT:    s_bfe_u32 s35, s3, 0x10003
7336; GFX6-NEXT:    s_bfe_u32 s36, s3, 0x10005
7337; GFX6-NEXT:    s_bfe_u32 s37, s3, 0x10007
7338; GFX6-NEXT:    s_bfe_u32 s38, s3, 0x10009
7339; GFX6-NEXT:    s_bfe_u32 s39, s3, 0x1000b
7340; GFX6-NEXT:    s_bfe_u32 s40, s3, 0x1000d
7341; GFX6-NEXT:    s_bfe_u32 s41, s3, 0x1000f
7342; GFX6-NEXT:    s_bfe_u32 s42, s3, 0x10011
7343; GFX6-NEXT:    s_bfe_u32 s43, s3, 0x10013
7344; GFX6-NEXT:    s_bfe_u32 s44, s3, 0x10015
7345; GFX6-NEXT:    s_bfe_u32 s45, s3, 0x10017
7346; GFX6-NEXT:    s_bfe_u32 s46, s3, 0x10019
7347; GFX6-NEXT:    s_bfe_u32 s47, s3, 0x1001b
7348; GFX6-NEXT:    s_bfe_u32 s48, s3, 0x1001d
7349; GFX6-NEXT:    s_lshr_b32 s49, s3, 31
7350; GFX6-NEXT:    s_bfe_u32 s9, s3, 0x10001
7351; GFX6-NEXT:    s_bfe_u32 s6, s2, 0x10001
7352; GFX6-NEXT:    s_and_b32 s7, s2, 1
7353; GFX6-NEXT:    s_and_b32 s10, s3, 1
7354; GFX6-NEXT:    s_bfe_u32 s12, s2, 0x10002
7355; GFX6-NEXT:    s_bfe_u32 s14, s2, 0x10004
7356; GFX6-NEXT:    s_bfe_u32 s16, s2, 0x10006
7357; GFX6-NEXT:    s_bfe_u32 s18, s2, 0x10008
7358; GFX6-NEXT:    s_bfe_u32 s20, s2, 0x1000a
7359; GFX6-NEXT:    s_bfe_u32 s22, s2, 0x1000c
7360; GFX6-NEXT:    s_bfe_u32 s24, s2, 0x1000e
7361; GFX6-NEXT:    s_bfe_u32 s26, s2, 0x10010
7362; GFX6-NEXT:    s_bfe_u32 s28, s2, 0x10012
7363; GFX6-NEXT:    s_bfe_u32 s30, s2, 0x10014
7364; GFX6-NEXT:    s_bfe_u32 s33, s2, 0x10016
7365; GFX6-NEXT:    s_bfe_u32 s50, s2, 0x10018
7366; GFX6-NEXT:    s_bfe_u32 s51, s2, 0x1001a
7367; GFX6-NEXT:    s_bfe_u32 s52, s2, 0x1001c
7368; GFX6-NEXT:    s_bfe_u32 s53, s2, 0x1001e
7369; GFX6-NEXT:    s_bfe_u32 s54, s3, 0x10002
7370; GFX6-NEXT:    s_bfe_u32 s55, s3, 0x10004
7371; GFX6-NEXT:    s_bfe_u32 s56, s3, 0x10006
7372; GFX6-NEXT:    s_bfe_u32 s57, s3, 0x10008
7373; GFX6-NEXT:    s_bfe_u32 s58, s3, 0x1000a
7374; GFX6-NEXT:    s_bfe_u32 s59, s3, 0x1000c
7375; GFX6-NEXT:    s_bfe_u32 s60, s3, 0x1000e
7376; GFX6-NEXT:    s_bfe_u32 s61, s3, 0x10010
7377; GFX6-NEXT:    s_bfe_u32 s62, s3, 0x10012
7378; GFX6-NEXT:    s_bfe_u32 s63, s3, 0x10014
7379; GFX6-NEXT:    s_bfe_u32 s64, s3, 0x10016
7380; GFX6-NEXT:    s_bfe_u32 s65, s3, 0x10018
7381; GFX6-NEXT:    s_bfe_u32 s66, s3, 0x1001a
7382; GFX6-NEXT:    s_bfe_u32 s67, s3, 0x1001e
7383; GFX6-NEXT:    s_bfe_u32 s68, s3, 0x1001c
7384; GFX6-NEXT:    s_mov_b32 s3, 0xf000
7385; GFX6-NEXT:    v_mov_b32_e32 v1, 0
7386; GFX6-NEXT:    s_mov_b32 s2, -1
7387; GFX6-NEXT:    v_mov_b32_e32 v3, v1
7388; GFX6-NEXT:    v_mov_b32_e32 v0, s67
7389; GFX6-NEXT:    v_mov_b32_e32 v2, s49
7390; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:496
7391; GFX6-NEXT:    s_waitcnt expcnt(0)
7392; GFX6-NEXT:    v_mov_b32_e32 v0, s68
7393; GFX6-NEXT:    v_mov_b32_e32 v2, s48
7394; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:480
7395; GFX6-NEXT:    s_waitcnt expcnt(0)
7396; GFX6-NEXT:    v_mov_b32_e32 v0, s66
7397; GFX6-NEXT:    v_mov_b32_e32 v2, s47
7398; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:464
7399; GFX6-NEXT:    s_waitcnt expcnt(0)
7400; GFX6-NEXT:    v_mov_b32_e32 v0, s65
7401; GFX6-NEXT:    v_mov_b32_e32 v2, s46
7402; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:448
7403; GFX6-NEXT:    s_waitcnt expcnt(0)
7404; GFX6-NEXT:    v_mov_b32_e32 v0, s64
7405; GFX6-NEXT:    v_mov_b32_e32 v2, s45
7406; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:432
7407; GFX6-NEXT:    s_waitcnt expcnt(0)
7408; GFX6-NEXT:    v_mov_b32_e32 v0, s63
7409; GFX6-NEXT:    v_mov_b32_e32 v2, s44
7410; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:416
7411; GFX6-NEXT:    s_waitcnt expcnt(0)
7412; GFX6-NEXT:    v_mov_b32_e32 v0, s62
7413; GFX6-NEXT:    v_mov_b32_e32 v2, s43
7414; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:400
7415; GFX6-NEXT:    s_waitcnt expcnt(0)
7416; GFX6-NEXT:    v_mov_b32_e32 v0, s61
7417; GFX6-NEXT:    v_mov_b32_e32 v2, s42
7418; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:384
7419; GFX6-NEXT:    s_waitcnt expcnt(0)
7420; GFX6-NEXT:    v_mov_b32_e32 v0, s60
7421; GFX6-NEXT:    v_mov_b32_e32 v2, s41
7422; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:368
7423; GFX6-NEXT:    s_waitcnt expcnt(0)
7424; GFX6-NEXT:    v_mov_b32_e32 v0, s59
7425; GFX6-NEXT:    v_mov_b32_e32 v2, s40
7426; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:352
7427; GFX6-NEXT:    s_waitcnt expcnt(0)
7428; GFX6-NEXT:    v_mov_b32_e32 v0, s58
7429; GFX6-NEXT:    v_mov_b32_e32 v2, s39
7430; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:336
7431; GFX6-NEXT:    s_waitcnt expcnt(0)
7432; GFX6-NEXT:    v_mov_b32_e32 v0, s57
7433; GFX6-NEXT:    v_mov_b32_e32 v2, s38
7434; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:320
7435; GFX6-NEXT:    s_waitcnt expcnt(0)
7436; GFX6-NEXT:    v_mov_b32_e32 v0, s56
7437; GFX6-NEXT:    v_mov_b32_e32 v2, s37
7438; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:304
7439; GFX6-NEXT:    s_waitcnt expcnt(0)
7440; GFX6-NEXT:    v_mov_b32_e32 v0, s55
7441; GFX6-NEXT:    v_mov_b32_e32 v2, s36
7442; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:288
7443; GFX6-NEXT:    s_waitcnt expcnt(0)
7444; GFX6-NEXT:    v_mov_b32_e32 v0, s54
7445; GFX6-NEXT:    v_mov_b32_e32 v2, s35
7446; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:272
7447; GFX6-NEXT:    s_waitcnt expcnt(0)
7448; GFX6-NEXT:    v_mov_b32_e32 v0, s53
7449; GFX6-NEXT:    v_mov_b32_e32 v2, s34
7450; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
7451; GFX6-NEXT:    s_waitcnt expcnt(0)
7452; GFX6-NEXT:    v_mov_b32_e32 v0, s52
7453; GFX6-NEXT:    v_mov_b32_e32 v2, s31
7454; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
7455; GFX6-NEXT:    s_waitcnt expcnt(0)
7456; GFX6-NEXT:    v_mov_b32_e32 v0, s51
7457; GFX6-NEXT:    v_mov_b32_e32 v2, s29
7458; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
7459; GFX6-NEXT:    s_waitcnt expcnt(0)
7460; GFX6-NEXT:    v_mov_b32_e32 v0, s50
7461; GFX6-NEXT:    v_mov_b32_e32 v2, s27
7462; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
7463; GFX6-NEXT:    s_waitcnt expcnt(0)
7464; GFX6-NEXT:    v_mov_b32_e32 v0, s33
7465; GFX6-NEXT:    v_mov_b32_e32 v2, s25
7466; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
7467; GFX6-NEXT:    s_waitcnt expcnt(0)
7468; GFX6-NEXT:    v_mov_b32_e32 v0, s30
7469; GFX6-NEXT:    v_mov_b32_e32 v2, s23
7470; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
7471; GFX6-NEXT:    s_waitcnt expcnt(0)
7472; GFX6-NEXT:    v_mov_b32_e32 v0, s28
7473; GFX6-NEXT:    v_mov_b32_e32 v2, s21
7474; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
7475; GFX6-NEXT:    s_waitcnt expcnt(0)
7476; GFX6-NEXT:    v_mov_b32_e32 v0, s26
7477; GFX6-NEXT:    v_mov_b32_e32 v2, s19
7478; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
7479; GFX6-NEXT:    s_waitcnt expcnt(0)
7480; GFX6-NEXT:    v_mov_b32_e32 v0, s24
7481; GFX6-NEXT:    v_mov_b32_e32 v2, s17
7482; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
7483; GFX6-NEXT:    s_waitcnt expcnt(0)
7484; GFX6-NEXT:    v_mov_b32_e32 v0, s22
7485; GFX6-NEXT:    v_mov_b32_e32 v2, s15
7486; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
7487; GFX6-NEXT:    s_waitcnt expcnt(0)
7488; GFX6-NEXT:    v_mov_b32_e32 v0, s20
7489; GFX6-NEXT:    v_mov_b32_e32 v2, s13
7490; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
7491; GFX6-NEXT:    s_waitcnt expcnt(0)
7492; GFX6-NEXT:    v_mov_b32_e32 v0, s18
7493; GFX6-NEXT:    v_mov_b32_e32 v2, s11
7494; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
7495; GFX6-NEXT:    s_waitcnt expcnt(0)
7496; GFX6-NEXT:    v_mov_b32_e32 v0, s16
7497; GFX6-NEXT:    v_mov_b32_e32 v2, s8
7498; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
7499; GFX6-NEXT:    s_waitcnt expcnt(0)
7500; GFX6-NEXT:    v_mov_b32_e32 v0, s14
7501; GFX6-NEXT:    v_mov_b32_e32 v2, s5
7502; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
7503; GFX6-NEXT:    s_waitcnt expcnt(0)
7504; GFX6-NEXT:    v_mov_b32_e32 v0, s12
7505; GFX6-NEXT:    v_mov_b32_e32 v2, s4
7506; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
7507; GFX6-NEXT:    s_waitcnt expcnt(0)
7508; GFX6-NEXT:    v_mov_b32_e32 v0, s10
7509; GFX6-NEXT:    v_mov_b32_e32 v2, s9
7510; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:256
7511; GFX6-NEXT:    s_waitcnt expcnt(0)
7512; GFX6-NEXT:    v_mov_b32_e32 v0, s7
7513; GFX6-NEXT:    v_mov_b32_e32 v2, s6
7514; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
7515; GFX6-NEXT:    s_endpgm
7516;
7517; GFX8-LABEL: constant_zextload_v64i1_to_v64i64:
7518; GFX8:       ; %bb.0:
7519; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
7520; GFX8-NEXT:    v_mov_b32_e32 v1, 0
7521; GFX8-NEXT:    v_mov_b32_e32 v3, v1
7522; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
7523; GFX8-NEXT:    s_load_dwordx2 s[42:43], s[2:3], 0x0
7524; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
7525; GFX8-NEXT:    s_lshr_b32 s44, s43, 31
7526; GFX8-NEXT:    s_bfe_u32 s45, s43, 0x1001d
7527; GFX8-NEXT:    s_bfe_u32 s46, s43, 0x1001b
7528; GFX8-NEXT:    s_bfe_u32 s47, s43, 0x10019
7529; GFX8-NEXT:    s_bfe_u32 s48, s43, 0x10017
7530; GFX8-NEXT:    s_bfe_u32 s49, s43, 0x10013
7531; GFX8-NEXT:    s_bfe_u32 s50, s43, 0x10011
7532; GFX8-NEXT:    s_bfe_u32 s51, s43, 0x1000f
7533; GFX8-NEXT:    s_bfe_u32 s52, s43, 0x1000d
7534; GFX8-NEXT:    s_bfe_u32 s53, s43, 0x1000b
7535; GFX8-NEXT:    s_bfe_u32 s40, s43, 0x10009
7536; GFX8-NEXT:    s_bfe_u32 s38, s43, 0x10007
7537; GFX8-NEXT:    s_bfe_u32 s37, s43, 0x10005
7538; GFX8-NEXT:    s_bfe_u32 s35, s43, 0x10003
7539; GFX8-NEXT:    s_bfe_u32 s33, s43, 0x10001
7540; GFX8-NEXT:    s_lshr_b32 s30, s42, 31
7541; GFX8-NEXT:    s_bfe_u32 s28, s42, 0x1001d
7542; GFX8-NEXT:    s_bfe_u32 s26, s42, 0x1001b
7543; GFX8-NEXT:    s_bfe_u32 s25, s42, 0x10019
7544; GFX8-NEXT:    s_bfe_u32 s22, s42, 0x10017
7545; GFX8-NEXT:    s_bfe_u32 s19, s42, 0x10013
7546; GFX8-NEXT:    s_bfe_u32 s17, s42, 0x10011
7547; GFX8-NEXT:    s_bfe_u32 s15, s42, 0x1000f
7548; GFX8-NEXT:    s_bfe_u32 s13, s42, 0x1000d
7549; GFX8-NEXT:    s_bfe_u32 s12, s42, 0x1000b
7550; GFX8-NEXT:    s_bfe_u32 s10, s42, 0x10009
7551; GFX8-NEXT:    s_bfe_u32 s8, s42, 0x10007
7552; GFX8-NEXT:    s_bfe_u32 s6, s42, 0x10005
7553; GFX8-NEXT:    s_bfe_u32 s4, s42, 0x10003
7554; GFX8-NEXT:    s_bfe_u32 s2, s42, 0x10001
7555; GFX8-NEXT:    s_and_b32 s3, s42, 1
7556; GFX8-NEXT:    s_bfe_u32 s5, s42, 0x10002
7557; GFX8-NEXT:    s_bfe_u32 s7, s42, 0x10004
7558; GFX8-NEXT:    s_bfe_u32 s9, s42, 0x10006
7559; GFX8-NEXT:    s_bfe_u32 s11, s42, 0x10008
7560; GFX8-NEXT:    s_bfe_u32 s14, s42, 0x1000a
7561; GFX8-NEXT:    s_bfe_u32 s16, s42, 0x1000c
7562; GFX8-NEXT:    s_bfe_u32 s18, s42, 0x1000e
7563; GFX8-NEXT:    s_bfe_u32 s20, s42, 0x10010
7564; GFX8-NEXT:    s_bfe_u32 s21, s42, 0x10012
7565; GFX8-NEXT:    s_bfe_u32 s23, s42, 0x10014
7566; GFX8-NEXT:    s_bfe_u32 s24, s42, 0x10015
7567; GFX8-NEXT:    s_bfe_u32 s27, s42, 0x10016
7568; GFX8-NEXT:    s_bfe_u32 s29, s42, 0x10018
7569; GFX8-NEXT:    s_bfe_u32 s31, s42, 0x1001a
7570; GFX8-NEXT:    s_bfe_u32 s34, s42, 0x1001c
7571; GFX8-NEXT:    s_bfe_u32 s36, s42, 0x1001e
7572; GFX8-NEXT:    s_and_b32 s39, s43, 1
7573; GFX8-NEXT:    s_bfe_u32 s41, s43, 0x10002
7574; GFX8-NEXT:    s_bfe_u32 s54, s43, 0x10004
7575; GFX8-NEXT:    s_bfe_u32 s55, s43, 0x10006
7576; GFX8-NEXT:    s_bfe_u32 s56, s43, 0x10008
7577; GFX8-NEXT:    s_bfe_u32 s57, s43, 0x1000a
7578; GFX8-NEXT:    s_bfe_u32 s58, s43, 0x1000c
7579; GFX8-NEXT:    s_bfe_u32 s59, s43, 0x1000e
7580; GFX8-NEXT:    s_bfe_u32 s60, s43, 0x10010
7581; GFX8-NEXT:    s_bfe_u32 s61, s43, 0x10012
7582; GFX8-NEXT:    s_bfe_u32 s62, s43, 0x10016
7583; GFX8-NEXT:    s_bfe_u32 s63, s43, 0x10018
7584; GFX8-NEXT:    s_bfe_u32 s64, s43, 0x1001a
7585; GFX8-NEXT:    s_bfe_u32 s65, s43, 0x1001c
7586; GFX8-NEXT:    s_bfe_u32 s66, s43, 0x1001e
7587; GFX8-NEXT:    s_bfe_u32 s42, s43, 0x10015
7588; GFX8-NEXT:    s_bfe_u32 s43, s43, 0x10014
7589; GFX8-NEXT:    v_mov_b32_e32 v2, s42
7590; GFX8-NEXT:    s_add_u32 s42, s0, 0x1a0
7591; GFX8-NEXT:    v_mov_b32_e32 v0, s43
7592; GFX8-NEXT:    s_addc_u32 s43, s1, 0
7593; GFX8-NEXT:    v_mov_b32_e32 v4, s42
7594; GFX8-NEXT:    v_mov_b32_e32 v5, s43
7595; GFX8-NEXT:    s_add_u32 s42, s0, 0x1f0
7596; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7597; GFX8-NEXT:    s_addc_u32 s43, s1, 0
7598; GFX8-NEXT:    v_mov_b32_e32 v4, s42
7599; GFX8-NEXT:    v_mov_b32_e32 v0, s66
7600; GFX8-NEXT:    v_mov_b32_e32 v2, s44
7601; GFX8-NEXT:    v_mov_b32_e32 v5, s43
7602; GFX8-NEXT:    s_add_u32 s42, s0, 0x1e0
7603; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7604; GFX8-NEXT:    s_addc_u32 s43, s1, 0
7605; GFX8-NEXT:    v_mov_b32_e32 v4, s42
7606; GFX8-NEXT:    v_mov_b32_e32 v0, s65
7607; GFX8-NEXT:    v_mov_b32_e32 v2, s45
7608; GFX8-NEXT:    v_mov_b32_e32 v5, s43
7609; GFX8-NEXT:    s_add_u32 s42, s0, 0x1d0
7610; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7611; GFX8-NEXT:    s_addc_u32 s43, s1, 0
7612; GFX8-NEXT:    v_mov_b32_e32 v4, s42
7613; GFX8-NEXT:    v_mov_b32_e32 v0, s64
7614; GFX8-NEXT:    v_mov_b32_e32 v2, s46
7615; GFX8-NEXT:    v_mov_b32_e32 v5, s43
7616; GFX8-NEXT:    s_add_u32 s42, s0, 0x1c0
7617; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7618; GFX8-NEXT:    s_addc_u32 s43, s1, 0
7619; GFX8-NEXT:    v_mov_b32_e32 v4, s42
7620; GFX8-NEXT:    v_mov_b32_e32 v0, s63
7621; GFX8-NEXT:    v_mov_b32_e32 v2, s47
7622; GFX8-NEXT:    v_mov_b32_e32 v5, s43
7623; GFX8-NEXT:    s_add_u32 s42, s0, 0x1b0
7624; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7625; GFX8-NEXT:    s_addc_u32 s43, s1, 0
7626; GFX8-NEXT:    v_mov_b32_e32 v4, s42
7627; GFX8-NEXT:    v_mov_b32_e32 v0, s62
7628; GFX8-NEXT:    v_mov_b32_e32 v2, s48
7629; GFX8-NEXT:    v_mov_b32_e32 v5, s43
7630; GFX8-NEXT:    s_add_u32 s42, s0, 0x190
7631; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7632; GFX8-NEXT:    s_addc_u32 s43, s1, 0
7633; GFX8-NEXT:    v_mov_b32_e32 v4, s42
7634; GFX8-NEXT:    v_mov_b32_e32 v0, s61
7635; GFX8-NEXT:    v_mov_b32_e32 v2, s49
7636; GFX8-NEXT:    v_mov_b32_e32 v5, s43
7637; GFX8-NEXT:    s_add_u32 s42, s0, 0x180
7638; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7639; GFX8-NEXT:    s_addc_u32 s43, s1, 0
7640; GFX8-NEXT:    v_mov_b32_e32 v4, s42
7641; GFX8-NEXT:    v_mov_b32_e32 v0, s60
7642; GFX8-NEXT:    v_mov_b32_e32 v2, s50
7643; GFX8-NEXT:    v_mov_b32_e32 v5, s43
7644; GFX8-NEXT:    s_add_u32 s42, s0, 0x170
7645; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7646; GFX8-NEXT:    s_addc_u32 s43, s1, 0
7647; GFX8-NEXT:    v_mov_b32_e32 v4, s42
7648; GFX8-NEXT:    v_mov_b32_e32 v0, s59
7649; GFX8-NEXT:    v_mov_b32_e32 v2, s51
7650; GFX8-NEXT:    v_mov_b32_e32 v5, s43
7651; GFX8-NEXT:    s_add_u32 s42, s0, 0x160
7652; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7653; GFX8-NEXT:    s_addc_u32 s43, s1, 0
7654; GFX8-NEXT:    v_mov_b32_e32 v4, s42
7655; GFX8-NEXT:    v_mov_b32_e32 v0, s58
7656; GFX8-NEXT:    v_mov_b32_e32 v2, s52
7657; GFX8-NEXT:    v_mov_b32_e32 v5, s43
7658; GFX8-NEXT:    s_add_u32 s42, s0, 0x150
7659; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7660; GFX8-NEXT:    s_addc_u32 s43, s1, 0
7661; GFX8-NEXT:    v_mov_b32_e32 v4, s42
7662; GFX8-NEXT:    v_mov_b32_e32 v0, s57
7663; GFX8-NEXT:    v_mov_b32_e32 v2, s53
7664; GFX8-NEXT:    v_mov_b32_e32 v5, s43
7665; GFX8-NEXT:    s_add_u32 s42, s0, 0x140
7666; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7667; GFX8-NEXT:    s_addc_u32 s43, s1, 0
7668; GFX8-NEXT:    v_mov_b32_e32 v4, s42
7669; GFX8-NEXT:    v_mov_b32_e32 v0, s56
7670; GFX8-NEXT:    v_mov_b32_e32 v2, s40
7671; GFX8-NEXT:    v_mov_b32_e32 v5, s43
7672; GFX8-NEXT:    s_add_u32 s42, s0, 0x130
7673; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7674; GFX8-NEXT:    s_addc_u32 s43, s1, 0
7675; GFX8-NEXT:    v_mov_b32_e32 v4, s42
7676; GFX8-NEXT:    v_mov_b32_e32 v0, s55
7677; GFX8-NEXT:    v_mov_b32_e32 v2, s38
7678; GFX8-NEXT:    v_mov_b32_e32 v5, s43
7679; GFX8-NEXT:    s_add_u32 s42, s0, 0x120
7680; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7681; GFX8-NEXT:    s_addc_u32 s43, s1, 0
7682; GFX8-NEXT:    v_mov_b32_e32 v4, s42
7683; GFX8-NEXT:    v_mov_b32_e32 v0, s54
7684; GFX8-NEXT:    v_mov_b32_e32 v2, s37
7685; GFX8-NEXT:    v_mov_b32_e32 v5, s43
7686; GFX8-NEXT:    s_add_u32 s40, s0, 0x110
7687; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7688; GFX8-NEXT:    s_nop 0
7689; GFX8-NEXT:    v_mov_b32_e32 v0, s41
7690; GFX8-NEXT:    s_addc_u32 s41, s1, 0
7691; GFX8-NEXT:    v_mov_b32_e32 v4, s40
7692; GFX8-NEXT:    v_mov_b32_e32 v2, s35
7693; GFX8-NEXT:    v_mov_b32_e32 v5, s41
7694; GFX8-NEXT:    s_add_u32 s38, s0, 0x100
7695; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7696; GFX8-NEXT:    s_nop 0
7697; GFX8-NEXT:    v_mov_b32_e32 v0, s39
7698; GFX8-NEXT:    s_addc_u32 s39, s1, 0
7699; GFX8-NEXT:    v_mov_b32_e32 v4, s38
7700; GFX8-NEXT:    v_mov_b32_e32 v2, s33
7701; GFX8-NEXT:    v_mov_b32_e32 v5, s39
7702; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7703; GFX8-NEXT:    s_nop 0
7704; GFX8-NEXT:    v_mov_b32_e32 v0, s36
7705; GFX8-NEXT:    s_add_u32 s36, s0, 0xf0
7706; GFX8-NEXT:    s_addc_u32 s37, s1, 0
7707; GFX8-NEXT:    v_mov_b32_e32 v4, s36
7708; GFX8-NEXT:    v_mov_b32_e32 v2, s30
7709; GFX8-NEXT:    v_mov_b32_e32 v5, s37
7710; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7711; GFX8-NEXT:    s_nop 0
7712; GFX8-NEXT:    v_mov_b32_e32 v0, s34
7713; GFX8-NEXT:    s_add_u32 s34, s0, 0xe0
7714; GFX8-NEXT:    s_addc_u32 s35, s1, 0
7715; GFX8-NEXT:    v_mov_b32_e32 v4, s34
7716; GFX8-NEXT:    v_mov_b32_e32 v2, s28
7717; GFX8-NEXT:    v_mov_b32_e32 v5, s35
7718; GFX8-NEXT:    s_add_u32 s30, s0, 0xd0
7719; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7720; GFX8-NEXT:    s_nop 0
7721; GFX8-NEXT:    v_mov_b32_e32 v0, s31
7722; GFX8-NEXT:    s_addc_u32 s31, s1, 0
7723; GFX8-NEXT:    v_mov_b32_e32 v4, s30
7724; GFX8-NEXT:    v_mov_b32_e32 v2, s26
7725; GFX8-NEXT:    v_mov_b32_e32 v5, s31
7726; GFX8-NEXT:    s_add_u32 s28, s0, 0xc0
7727; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7728; GFX8-NEXT:    s_nop 0
7729; GFX8-NEXT:    v_mov_b32_e32 v0, s29
7730; GFX8-NEXT:    s_addc_u32 s29, s1, 0
7731; GFX8-NEXT:    v_mov_b32_e32 v4, s28
7732; GFX8-NEXT:    v_mov_b32_e32 v2, s25
7733; GFX8-NEXT:    v_mov_b32_e32 v5, s29
7734; GFX8-NEXT:    s_add_u32 s26, s0, 0xb0
7735; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7736; GFX8-NEXT:    s_nop 0
7737; GFX8-NEXT:    v_mov_b32_e32 v0, s27
7738; GFX8-NEXT:    s_addc_u32 s27, s1, 0
7739; GFX8-NEXT:    v_mov_b32_e32 v4, s26
7740; GFX8-NEXT:    v_mov_b32_e32 v2, s22
7741; GFX8-NEXT:    v_mov_b32_e32 v5, s27
7742; GFX8-NEXT:    s_add_u32 s22, s0, 0xa0
7743; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7744; GFX8-NEXT:    s_nop 0
7745; GFX8-NEXT:    v_mov_b32_e32 v0, s23
7746; GFX8-NEXT:    s_addc_u32 s23, s1, 0
7747; GFX8-NEXT:    v_mov_b32_e32 v4, s22
7748; GFX8-NEXT:    v_mov_b32_e32 v2, s24
7749; GFX8-NEXT:    v_mov_b32_e32 v5, s23
7750; GFX8-NEXT:    s_add_u32 s22, s0, 0x90
7751; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7752; GFX8-NEXT:    s_addc_u32 s23, s1, 0
7753; GFX8-NEXT:    v_mov_b32_e32 v4, s22
7754; GFX8-NEXT:    v_mov_b32_e32 v0, s21
7755; GFX8-NEXT:    v_mov_b32_e32 v2, s19
7756; GFX8-NEXT:    v_mov_b32_e32 v5, s23
7757; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7758; GFX8-NEXT:    s_nop 0
7759; GFX8-NEXT:    v_mov_b32_e32 v0, s20
7760; GFX8-NEXT:    s_add_u32 s20, s0, 0x80
7761; GFX8-NEXT:    s_addc_u32 s21, s1, 0
7762; GFX8-NEXT:    v_mov_b32_e32 v4, s20
7763; GFX8-NEXT:    v_mov_b32_e32 v2, s17
7764; GFX8-NEXT:    v_mov_b32_e32 v5, s21
7765; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7766; GFX8-NEXT:    s_nop 0
7767; GFX8-NEXT:    v_mov_b32_e32 v0, s18
7768; GFX8-NEXT:    s_add_u32 s18, s0, 0x70
7769; GFX8-NEXT:    s_addc_u32 s19, s1, 0
7770; GFX8-NEXT:    v_mov_b32_e32 v4, s18
7771; GFX8-NEXT:    v_mov_b32_e32 v2, s15
7772; GFX8-NEXT:    v_mov_b32_e32 v5, s19
7773; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7774; GFX8-NEXT:    s_nop 0
7775; GFX8-NEXT:    v_mov_b32_e32 v0, s16
7776; GFX8-NEXT:    s_add_u32 s16, s0, 0x60
7777; GFX8-NEXT:    s_addc_u32 s17, s1, 0
7778; GFX8-NEXT:    v_mov_b32_e32 v4, s16
7779; GFX8-NEXT:    v_mov_b32_e32 v2, s13
7780; GFX8-NEXT:    v_mov_b32_e32 v5, s17
7781; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7782; GFX8-NEXT:    s_nop 0
7783; GFX8-NEXT:    v_mov_b32_e32 v2, s12
7784; GFX8-NEXT:    s_add_u32 s12, s0, 0x50
7785; GFX8-NEXT:    s_addc_u32 s13, s1, 0
7786; GFX8-NEXT:    v_mov_b32_e32 v4, s12
7787; GFX8-NEXT:    v_mov_b32_e32 v0, s14
7788; GFX8-NEXT:    v_mov_b32_e32 v5, s13
7789; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7790; GFX8-NEXT:    s_nop 0
7791; GFX8-NEXT:    v_mov_b32_e32 v2, s10
7792; GFX8-NEXT:    s_add_u32 s10, s0, 64
7793; GFX8-NEXT:    v_mov_b32_e32 v0, s11
7794; GFX8-NEXT:    s_addc_u32 s11, s1, 0
7795; GFX8-NEXT:    v_mov_b32_e32 v4, s10
7796; GFX8-NEXT:    v_mov_b32_e32 v5, s11
7797; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7798; GFX8-NEXT:    s_nop 0
7799; GFX8-NEXT:    v_mov_b32_e32 v2, s8
7800; GFX8-NEXT:    s_add_u32 s8, s0, 48
7801; GFX8-NEXT:    v_mov_b32_e32 v0, s9
7802; GFX8-NEXT:    s_addc_u32 s9, s1, 0
7803; GFX8-NEXT:    v_mov_b32_e32 v4, s8
7804; GFX8-NEXT:    v_mov_b32_e32 v5, s9
7805; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7806; GFX8-NEXT:    s_nop 0
7807; GFX8-NEXT:    v_mov_b32_e32 v2, s6
7808; GFX8-NEXT:    s_add_u32 s6, s0, 32
7809; GFX8-NEXT:    v_mov_b32_e32 v0, s7
7810; GFX8-NEXT:    s_addc_u32 s7, s1, 0
7811; GFX8-NEXT:    v_mov_b32_e32 v4, s6
7812; GFX8-NEXT:    v_mov_b32_e32 v5, s7
7813; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7814; GFX8-NEXT:    s_nop 0
7815; GFX8-NEXT:    v_mov_b32_e32 v2, s4
7816; GFX8-NEXT:    s_add_u32 s4, s0, 16
7817; GFX8-NEXT:    v_mov_b32_e32 v0, s5
7818; GFX8-NEXT:    s_addc_u32 s5, s1, 0
7819; GFX8-NEXT:    v_mov_b32_e32 v4, s4
7820; GFX8-NEXT:    v_mov_b32_e32 v5, s5
7821; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7822; GFX8-NEXT:    v_mov_b32_e32 v5, s1
7823; GFX8-NEXT:    v_mov_b32_e32 v0, s3
7824; GFX8-NEXT:    v_mov_b32_e32 v2, s2
7825; GFX8-NEXT:    v_mov_b32_e32 v4, s0
7826; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7827; GFX8-NEXT:    s_endpgm
7828;
7829; EG-LABEL: constant_zextload_v64i1_to_v64i64:
7830; EG:       ; %bb.0:
7831; EG-NEXT:    ALU 0, @40, KC0[CB0:0-32], KC1[]
7832; EG-NEXT:    TEX 0 @38
7833; EG-NEXT:    ALU 95, @41, KC0[], KC1[]
7834; EG-NEXT:    ALU 99, @137, KC0[CB0:0-32], KC1[]
7835; EG-NEXT:    ALU 60, @237, KC0[CB0:0-32], KC1[]
7836; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T82.X, 0
7837; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T81.X, 0
7838; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T80.X, 0
7839; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T79.X, 0
7840; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T78.X, 0
7841; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T77.X, 0
7842; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T76.X, 0
7843; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T75.X, 0
7844; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T74.X, 0
7845; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T73.X, 0
7846; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T72.X, 0
7847; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T71.X, 0
7848; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T70.X, 0
7849; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T69.X, 0
7850; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T68.X, 0
7851; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T67.X, 0
7852; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T66.X, 0
7853; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T65.X, 0
7854; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T64.X, 0
7855; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T63.X, 0
7856; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T62.X, 0
7857; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T61.X, 0
7858; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T60.X, 0
7859; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T59.X, 0
7860; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T58.X, 0
7861; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T57.X, 0
7862; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T56.X, 0
7863; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T55.X, 0
7864; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T54.X, 0
7865; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T53.X, 0
7866; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T52.X, 0
7867; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T51.X, 1
7868; EG-NEXT:    CF_END
7869; EG-NEXT:    Fetch clause starting at 38:
7870; EG-NEXT:     VTX_READ_64 T25.XY, T19.X, 0, #1
7871; EG-NEXT:    ALU clause starting at 40:
7872; EG-NEXT:     MOV * T19.X, KC0[2].Z,
7873; EG-NEXT:    ALU clause starting at 41:
7874; EG-NEXT:     LSHR * T19.Z, T25.Y, literal.x,
7875; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7876; EG-NEXT:     BFE_UINT T19.X, T25.Y, literal.x, 1,
7877; EG-NEXT:     MOV T19.Y, 0.0,
7878; EG-NEXT:     BFE_UINT * T20.Z, T25.Y, literal.y, 1,
7879; EG-NEXT:    30(4.203895e-44), 29(4.063766e-44)
7880; EG-NEXT:     BFE_UINT T20.X, T25.Y, literal.x, 1,
7881; EG-NEXT:     MOV T20.Y, 0.0,
7882; EG-NEXT:     BFE_UINT * T21.Z, T25.Y, literal.y, 1,
7883; EG-NEXT:    28(3.923636e-44), 27(3.783506e-44)
7884; EG-NEXT:     BFE_UINT T21.X, T25.Y, literal.x, 1,
7885; EG-NEXT:     MOV T21.Y, 0.0,
7886; EG-NEXT:     BFE_UINT * T22.Z, T25.Y, literal.y, 1,
7887; EG-NEXT:    26(3.643376e-44), 25(3.503246e-44)
7888; EG-NEXT:     BFE_UINT T22.X, T25.Y, literal.x, 1,
7889; EG-NEXT:     MOV T22.Y, 0.0,
7890; EG-NEXT:     BFE_UINT * T23.Z, T25.Y, literal.y, 1,
7891; EG-NEXT:    24(3.363116e-44), 23(3.222986e-44)
7892; EG-NEXT:     BFE_UINT T23.X, T25.Y, literal.x, 1,
7893; EG-NEXT:     MOV T23.Y, 0.0,
7894; EG-NEXT:     BFE_UINT * T24.Z, T25.Y, literal.y, 1,
7895; EG-NEXT:    22(3.082857e-44), 21(2.942727e-44)
7896; EG-NEXT:     BFE_UINT T24.X, T25.Y, literal.x, 1,
7897; EG-NEXT:     MOV T24.Y, 0.0,
7898; EG-NEXT:     BFE_UINT * T26.Z, T25.Y, literal.y, 1,
7899; EG-NEXT:    20(2.802597e-44), 19(2.662467e-44)
7900; EG-NEXT:     BFE_UINT T26.X, T25.Y, literal.x, 1,
7901; EG-NEXT:     MOV T26.Y, 0.0,
7902; EG-NEXT:     BFE_UINT * T27.Z, T25.Y, literal.y, 1,
7903; EG-NEXT:    18(2.522337e-44), 17(2.382207e-44)
7904; EG-NEXT:     BFE_UINT T27.X, T25.Y, literal.x, 1,
7905; EG-NEXT:     MOV T27.Y, 0.0,
7906; EG-NEXT:     BFE_UINT * T28.Z, T25.Y, literal.y, 1,
7907; EG-NEXT:    16(2.242078e-44), 15(2.101948e-44)
7908; EG-NEXT:     BFE_UINT T28.X, T25.Y, literal.x, 1,
7909; EG-NEXT:     MOV T28.Y, 0.0,
7910; EG-NEXT:     BFE_UINT * T29.Z, T25.Y, literal.y, 1,
7911; EG-NEXT:    14(1.961818e-44), 13(1.821688e-44)
7912; EG-NEXT:     BFE_UINT T29.X, T25.Y, literal.x, 1,
7913; EG-NEXT:     MOV T29.Y, 0.0,
7914; EG-NEXT:     BFE_UINT * T30.Z, T25.Y, literal.y, 1,
7915; EG-NEXT:    12(1.681558e-44), 11(1.541428e-44)
7916; EG-NEXT:     BFE_UINT T30.X, T25.Y, literal.x, 1,
7917; EG-NEXT:     MOV T30.Y, 0.0,
7918; EG-NEXT:     BFE_UINT * T31.Z, T25.Y, literal.y, 1,
7919; EG-NEXT:    10(1.401298e-44), 9(1.261169e-44)
7920; EG-NEXT:     BFE_UINT T31.X, T25.Y, literal.x, 1,
7921; EG-NEXT:     MOV T31.Y, 0.0,
7922; EG-NEXT:     BFE_UINT * T32.Z, T25.Y, literal.y, 1,
7923; EG-NEXT:    8(1.121039e-44), 7(9.809089e-45)
7924; EG-NEXT:     BFE_UINT T32.X, T25.Y, literal.x, 1,
7925; EG-NEXT:     MOV T32.Y, 0.0,
7926; EG-NEXT:     BFE_UINT * T33.Z, T25.Y, literal.y, 1,
7927; EG-NEXT:    6(8.407791e-45), 5(7.006492e-45)
7928; EG-NEXT:     BFE_UINT T33.X, T25.Y, literal.x, 1,
7929; EG-NEXT:     MOV T33.Y, 0.0,
7930; EG-NEXT:     BFE_UINT * T34.Z, T25.Y, literal.y, 1,
7931; EG-NEXT:    4(5.605194e-45), 3(4.203895e-45)
7932; EG-NEXT:     BFE_UINT T34.X, T25.Y, literal.x, 1,
7933; EG-NEXT:     MOV T34.Y, 0.0,
7934; EG-NEXT:     BFE_UINT T35.Z, T25.Y, 1, 1,
7935; EG-NEXT:     AND_INT * T35.X, T25.Y, 1,
7936; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
7937; EG-NEXT:     MOV T35.Y, 0.0,
7938; EG-NEXT:     LSHR * T36.Z, T25.X, literal.x,
7939; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7940; EG-NEXT:     BFE_UINT T36.X, T25.X, literal.x, 1,
7941; EG-NEXT:     MOV T36.Y, 0.0,
7942; EG-NEXT:     BFE_UINT * T37.Z, T25.X, literal.y, 1,
7943; EG-NEXT:    30(4.203895e-44), 29(4.063766e-44)
7944; EG-NEXT:     BFE_UINT T37.X, T25.X, literal.x, 1,
7945; EG-NEXT:     MOV T37.Y, 0.0,
7946; EG-NEXT:     BFE_UINT * T38.Z, T25.X, literal.y, 1,
7947; EG-NEXT:    28(3.923636e-44), 27(3.783506e-44)
7948; EG-NEXT:     BFE_UINT T38.X, T25.X, literal.x, 1,
7949; EG-NEXT:     MOV T38.Y, 0.0,
7950; EG-NEXT:     BFE_UINT * T39.Z, T25.X, literal.y, 1,
7951; EG-NEXT:    26(3.643376e-44), 25(3.503246e-44)
7952; EG-NEXT:     BFE_UINT T39.X, T25.X, literal.x, 1,
7953; EG-NEXT:     MOV T39.Y, 0.0,
7954; EG-NEXT:     BFE_UINT * T40.Z, T25.X, literal.y, 1,
7955; EG-NEXT:    24(3.363116e-44), 23(3.222986e-44)
7956; EG-NEXT:     BFE_UINT T40.X, T25.X, literal.x, 1,
7957; EG-NEXT:     MOV T40.Y, 0.0,
7958; EG-NEXT:     BFE_UINT * T41.Z, T25.X, literal.y, 1,
7959; EG-NEXT:    22(3.082857e-44), 21(2.942727e-44)
7960; EG-NEXT:     BFE_UINT T41.X, T25.X, literal.x, 1,
7961; EG-NEXT:     MOV T41.Y, 0.0,
7962; EG-NEXT:     BFE_UINT * T42.Z, T25.X, literal.y, 1,
7963; EG-NEXT:    20(2.802597e-44), 19(2.662467e-44)
7964; EG-NEXT:     BFE_UINT T42.X, T25.X, literal.x, 1,
7965; EG-NEXT:     MOV T42.Y, 0.0,
7966; EG-NEXT:     BFE_UINT * T43.Z, T25.X, literal.y, 1,
7967; EG-NEXT:    18(2.522337e-44), 17(2.382207e-44)
7968; EG-NEXT:     BFE_UINT * T43.X, T25.X, literal.x, 1,
7969; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7970; EG-NEXT:    ALU clause starting at 137:
7971; EG-NEXT:     MOV T43.Y, 0.0,
7972; EG-NEXT:     BFE_UINT * T44.Z, T25.X, literal.x, 1,
7973; EG-NEXT:    15(2.101948e-44), 0(0.000000e+00)
7974; EG-NEXT:     BFE_UINT T44.X, T25.X, literal.x, 1,
7975; EG-NEXT:     MOV T44.Y, 0.0,
7976; EG-NEXT:     BFE_UINT * T45.Z, T25.X, literal.y, 1,
7977; EG-NEXT:    14(1.961818e-44), 13(1.821688e-44)
7978; EG-NEXT:     BFE_UINT T45.X, T25.X, literal.x, 1,
7979; EG-NEXT:     MOV T45.Y, 0.0,
7980; EG-NEXT:     BFE_UINT * T46.Z, T25.X, literal.y, 1,
7981; EG-NEXT:    12(1.681558e-44), 11(1.541428e-44)
7982; EG-NEXT:     BFE_UINT T46.X, T25.X, literal.x, 1,
7983; EG-NEXT:     MOV T46.Y, 0.0,
7984; EG-NEXT:     BFE_UINT * T47.Z, T25.X, literal.y, 1,
7985; EG-NEXT:    10(1.401298e-44), 9(1.261169e-44)
7986; EG-NEXT:     BFE_UINT T47.X, T25.X, literal.x, 1,
7987; EG-NEXT:     MOV T47.Y, 0.0,
7988; EG-NEXT:     BFE_UINT * T48.Z, T25.X, literal.y, 1,
7989; EG-NEXT:    8(1.121039e-44), 7(9.809089e-45)
7990; EG-NEXT:     BFE_UINT T48.X, T25.X, literal.x, 1,
7991; EG-NEXT:     MOV T48.Y, 0.0,
7992; EG-NEXT:     BFE_UINT * T49.Z, T25.X, literal.y, 1,
7993; EG-NEXT:    6(8.407791e-45), 5(7.006492e-45)
7994; EG-NEXT:     BFE_UINT T49.X, T25.X, literal.x, 1,
7995; EG-NEXT:     MOV T49.Y, 0.0,
7996; EG-NEXT:     BFE_UINT * T50.Z, T25.X, literal.y, 1,
7997; EG-NEXT:    4(5.605194e-45), 3(4.203895e-45)
7998; EG-NEXT:     BFE_UINT T50.X, T25.X, literal.x, 1,
7999; EG-NEXT:     MOV T50.Y, 0.0,
8000; EG-NEXT:     BFE_UINT T25.Z, T25.X, 1, 1,
8001; EG-NEXT:     AND_INT * T25.X, T25.X, 1,
8002; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
8003; EG-NEXT:     MOV T25.Y, 0.0,
8004; EG-NEXT:     MOV T19.W, 0.0,
8005; EG-NEXT:     MOV * T20.W, 0.0,
8006; EG-NEXT:     MOV T21.W, 0.0,
8007; EG-NEXT:     MOV * T22.W, 0.0,
8008; EG-NEXT:     MOV T23.W, 0.0,
8009; EG-NEXT:     MOV * T24.W, 0.0,
8010; EG-NEXT:     MOV T26.W, 0.0,
8011; EG-NEXT:     MOV * T27.W, 0.0,
8012; EG-NEXT:     MOV T28.W, 0.0,
8013; EG-NEXT:     MOV * T29.W, 0.0,
8014; EG-NEXT:     MOV T30.W, 0.0,
8015; EG-NEXT:     MOV * T31.W, 0.0,
8016; EG-NEXT:     MOV T32.W, 0.0,
8017; EG-NEXT:     MOV * T33.W, 0.0,
8018; EG-NEXT:     MOV T34.W, 0.0,
8019; EG-NEXT:     MOV * T35.W, 0.0,
8020; EG-NEXT:     MOV T36.W, 0.0,
8021; EG-NEXT:     MOV * T37.W, 0.0,
8022; EG-NEXT:     MOV T38.W, 0.0,
8023; EG-NEXT:     MOV * T39.W, 0.0,
8024; EG-NEXT:     MOV T40.W, 0.0,
8025; EG-NEXT:     MOV * T41.W, 0.0,
8026; EG-NEXT:     MOV T42.W, 0.0,
8027; EG-NEXT:     MOV * T43.W, 0.0,
8028; EG-NEXT:     MOV T44.W, 0.0,
8029; EG-NEXT:     MOV * T45.W, 0.0,
8030; EG-NEXT:     MOV T46.W, 0.0,
8031; EG-NEXT:     MOV * T47.W, 0.0,
8032; EG-NEXT:     MOV T48.W, 0.0,
8033; EG-NEXT:     MOV * T49.W, 0.0,
8034; EG-NEXT:     MOV T50.W, 0.0,
8035; EG-NEXT:     MOV * T25.W, 0.0,
8036; EG-NEXT:     LSHR T51.X, KC0[2].Y, literal.x,
8037; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8038; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
8039; EG-NEXT:     LSHR T52.X, PV.W, literal.x,
8040; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8041; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
8042; EG-NEXT:     LSHR T53.X, PV.W, literal.x,
8043; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8044; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
8045; EG-NEXT:     LSHR T54.X, PV.W, literal.x,
8046; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8047; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
8048; EG-NEXT:     LSHR T55.X, PV.W, literal.x,
8049; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8050; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
8051; EG-NEXT:     LSHR T56.X, PV.W, literal.x,
8052; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8053; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
8054; EG-NEXT:     LSHR T57.X, PV.W, literal.x,
8055; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8056; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
8057; EG-NEXT:     LSHR T58.X, PV.W, literal.x,
8058; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8059; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
8060; EG-NEXT:     LSHR T59.X, PV.W, literal.x,
8061; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8062; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
8063; EG-NEXT:     LSHR T60.X, PV.W, literal.x,
8064; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8065; EG-NEXT:    2(2.802597e-45), 160(2.242078e-43)
8066; EG-NEXT:     LSHR T61.X, PV.W, literal.x,
8067; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8068; EG-NEXT:    2(2.802597e-45), 176(2.466285e-43)
8069; EG-NEXT:     LSHR * T62.X, PV.W, literal.x,
8070; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
8071; EG-NEXT:    ALU clause starting at 237:
8072; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
8073; EG-NEXT:    192(2.690493e-43), 0(0.000000e+00)
8074; EG-NEXT:     LSHR T63.X, PV.W, literal.x,
8075; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8076; EG-NEXT:    2(2.802597e-45), 208(2.914701e-43)
8077; EG-NEXT:     LSHR T64.X, PV.W, literal.x,
8078; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8079; EG-NEXT:    2(2.802597e-45), 224(3.138909e-43)
8080; EG-NEXT:     LSHR T65.X, PV.W, literal.x,
8081; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8082; EG-NEXT:    2(2.802597e-45), 240(3.363116e-43)
8083; EG-NEXT:     LSHR T66.X, PV.W, literal.x,
8084; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8085; EG-NEXT:    2(2.802597e-45), 256(3.587324e-43)
8086; EG-NEXT:     LSHR T67.X, PV.W, literal.x,
8087; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8088; EG-NEXT:    2(2.802597e-45), 272(3.811532e-43)
8089; EG-NEXT:     LSHR T68.X, PV.W, literal.x,
8090; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8091; EG-NEXT:    2(2.802597e-45), 288(4.035740e-43)
8092; EG-NEXT:     LSHR T69.X, PV.W, literal.x,
8093; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8094; EG-NEXT:    2(2.802597e-45), 304(4.259947e-43)
8095; EG-NEXT:     LSHR T70.X, PV.W, literal.x,
8096; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8097; EG-NEXT:    2(2.802597e-45), 320(4.484155e-43)
8098; EG-NEXT:     LSHR T71.X, PV.W, literal.x,
8099; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8100; EG-NEXT:    2(2.802597e-45), 336(4.708363e-43)
8101; EG-NEXT:     LSHR T72.X, PV.W, literal.x,
8102; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8103; EG-NEXT:    2(2.802597e-45), 352(4.932571e-43)
8104; EG-NEXT:     LSHR T73.X, PV.W, literal.x,
8105; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8106; EG-NEXT:    2(2.802597e-45), 368(5.156778e-43)
8107; EG-NEXT:     LSHR T74.X, PV.W, literal.x,
8108; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8109; EG-NEXT:    2(2.802597e-45), 384(5.380986e-43)
8110; EG-NEXT:     LSHR T75.X, PV.W, literal.x,
8111; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8112; EG-NEXT:    2(2.802597e-45), 400(5.605194e-43)
8113; EG-NEXT:     LSHR T76.X, PV.W, literal.x,
8114; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8115; EG-NEXT:    2(2.802597e-45), 416(5.829402e-43)
8116; EG-NEXT:     LSHR T77.X, PV.W, literal.x,
8117; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8118; EG-NEXT:    2(2.802597e-45), 432(6.053609e-43)
8119; EG-NEXT:     LSHR T78.X, PV.W, literal.x,
8120; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8121; EG-NEXT:    2(2.802597e-45), 448(6.277817e-43)
8122; EG-NEXT:     LSHR T79.X, PV.W, literal.x,
8123; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8124; EG-NEXT:    2(2.802597e-45), 464(6.502025e-43)
8125; EG-NEXT:     LSHR T80.X, PV.W, literal.x,
8126; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8127; EG-NEXT:    2(2.802597e-45), 480(6.726233e-43)
8128; EG-NEXT:     LSHR T81.X, PV.W, literal.x,
8129; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8130; EG-NEXT:    2(2.802597e-45), 496(6.950440e-43)
8131; EG-NEXT:     LSHR * T82.X, PV.W, literal.x,
8132; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
8133;
8134; GFX12-LABEL: constant_zextload_v64i1_to_v64i64:
8135; GFX12:       ; %bb.0:
8136; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
8137; GFX12-NEXT:    s_wait_kmcnt 0x0
8138; GFX12-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
8139; GFX12-NEXT:    s_wait_kmcnt 0x0
8140; GFX12-NEXT:    s_bfe_u32 s4, s3, 0x10014
8141; GFX12-NEXT:    s_wait_alu 0xfffe
8142; GFX12-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s4
8143; GFX12-NEXT:    s_bfe_u32 s5, s3, 0x10015
8144; GFX12-NEXT:    s_lshr_b32 s4, s3, 31
8145; GFX12-NEXT:    s_wait_alu 0xfffe
8146; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
8147; GFX12-NEXT:    v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v3, v1
8148; GFX12-NEXT:    s_bfe_u32 s5, s3, 0x1001e
8149; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:416
8150; GFX12-NEXT:    s_wait_alu 0xfffe
8151; GFX12-NEXT:    v_mov_b32_e32 v0, s5
8152; GFX12-NEXT:    v_mov_b32_e32 v2, s4
8153; GFX12-NEXT:    s_bfe_u32 s4, s3, 0x1001d
8154; GFX12-NEXT:    s_bfe_u32 s5, s3, 0x1001c
8155; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:496
8156; GFX12-NEXT:    s_wait_alu 0xfffe
8157; GFX12-NEXT:    v_mov_b32_e32 v0, s5
8158; GFX12-NEXT:    v_mov_b32_e32 v2, s4
8159; GFX12-NEXT:    s_bfe_u32 s4, s3, 0x1001b
8160; GFX12-NEXT:    s_bfe_u32 s5, s3, 0x1001a
8161; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:480
8162; GFX12-NEXT:    s_wait_alu 0xfffe
8163; GFX12-NEXT:    v_mov_b32_e32 v0, s5
8164; GFX12-NEXT:    v_mov_b32_e32 v2, s4
8165; GFX12-NEXT:    s_bfe_u32 s4, s3, 0x10019
8166; GFX12-NEXT:    s_bfe_u32 s5, s3, 0x10018
8167; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:464
8168; GFX12-NEXT:    s_wait_alu 0xfffe
8169; GFX12-NEXT:    v_mov_b32_e32 v0, s5
8170; GFX12-NEXT:    v_mov_b32_e32 v2, s4
8171; GFX12-NEXT:    s_bfe_u32 s4, s3, 0x10017
8172; GFX12-NEXT:    s_bfe_u32 s5, s3, 0x10016
8173; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:448
8174; GFX12-NEXT:    s_wait_alu 0xfffe
8175; GFX12-NEXT:    v_mov_b32_e32 v0, s5
8176; GFX12-NEXT:    v_mov_b32_e32 v2, s4
8177; GFX12-NEXT:    s_bfe_u32 s4, s3, 0x10013
8178; GFX12-NEXT:    s_bfe_u32 s5, s3, 0x10012
8179; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:432
8180; GFX12-NEXT:    s_wait_alu 0xfffe
8181; GFX12-NEXT:    v_mov_b32_e32 v0, s5
8182; GFX12-NEXT:    v_mov_b32_e32 v2, s4
8183; GFX12-NEXT:    s_bfe_u32 s4, s3, 0x10011
8184; GFX12-NEXT:    s_bfe_u32 s5, s3, 0x10010
8185; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:400
8186; GFX12-NEXT:    s_wait_alu 0xfffe
8187; GFX12-NEXT:    v_mov_b32_e32 v0, s5
8188; GFX12-NEXT:    v_mov_b32_e32 v2, s4
8189; GFX12-NEXT:    s_bfe_u32 s4, s3, 0x1000f
8190; GFX12-NEXT:    s_bfe_u32 s5, s3, 0x1000e
8191; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:384
8192; GFX12-NEXT:    s_wait_alu 0xfffe
8193; GFX12-NEXT:    v_mov_b32_e32 v0, s5
8194; GFX12-NEXT:    v_mov_b32_e32 v2, s4
8195; GFX12-NEXT:    s_bfe_u32 s4, s3, 0x1000d
8196; GFX12-NEXT:    s_bfe_u32 s5, s3, 0x1000c
8197; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:368
8198; GFX12-NEXT:    s_wait_alu 0xfffe
8199; GFX12-NEXT:    v_mov_b32_e32 v0, s5
8200; GFX12-NEXT:    v_mov_b32_e32 v2, s4
8201; GFX12-NEXT:    s_bfe_u32 s4, s3, 0x1000b
8202; GFX12-NEXT:    s_bfe_u32 s5, s3, 0x1000a
8203; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:352
8204; GFX12-NEXT:    s_wait_alu 0xfffe
8205; GFX12-NEXT:    v_mov_b32_e32 v0, s5
8206; GFX12-NEXT:    v_mov_b32_e32 v2, s4
8207; GFX12-NEXT:    s_bfe_u32 s4, s3, 0x10009
8208; GFX12-NEXT:    s_bfe_u32 s5, s3, 0x10008
8209; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:336
8210; GFX12-NEXT:    s_wait_alu 0xfffe
8211; GFX12-NEXT:    v_mov_b32_e32 v0, s5
8212; GFX12-NEXT:    v_mov_b32_e32 v2, s4
8213; GFX12-NEXT:    s_bfe_u32 s4, s3, 0x10007
8214; GFX12-NEXT:    s_bfe_u32 s5, s3, 0x10006
8215; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:320
8216; GFX12-NEXT:    s_wait_alu 0xfffe
8217; GFX12-NEXT:    v_mov_b32_e32 v0, s5
8218; GFX12-NEXT:    v_mov_b32_e32 v2, s4
8219; GFX12-NEXT:    s_bfe_u32 s4, s3, 0x10005
8220; GFX12-NEXT:    s_bfe_u32 s5, s3, 0x10004
8221; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:304
8222; GFX12-NEXT:    s_wait_alu 0xfffe
8223; GFX12-NEXT:    v_mov_b32_e32 v0, s5
8224; GFX12-NEXT:    v_mov_b32_e32 v2, s4
8225; GFX12-NEXT:    s_bfe_u32 s4, s3, 0x10003
8226; GFX12-NEXT:    s_bfe_u32 s5, s3, 0x10002
8227; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:288
8228; GFX12-NEXT:    s_wait_alu 0xfffe
8229; GFX12-NEXT:    v_mov_b32_e32 v0, s5
8230; GFX12-NEXT:    v_mov_b32_e32 v2, s4
8231; GFX12-NEXT:    s_bfe_u32 s4, s3, 0x10001
8232; GFX12-NEXT:    s_and_b32 s3, s3, 1
8233; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:272
8234; GFX12-NEXT:    s_wait_alu 0xfffe
8235; GFX12-NEXT:    v_mov_b32_e32 v0, s3
8236; GFX12-NEXT:    v_mov_b32_e32 v2, s4
8237; GFX12-NEXT:    s_lshr_b32 s3, s2, 31
8238; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x1001e
8239; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:256
8240; GFX12-NEXT:    s_wait_alu 0xfffe
8241; GFX12-NEXT:    v_mov_b32_e32 v0, s4
8242; GFX12-NEXT:    v_mov_b32_e32 v2, s3
8243; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x1001d
8244; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x1001c
8245; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:240
8246; GFX12-NEXT:    s_wait_alu 0xfffe
8247; GFX12-NEXT:    v_mov_b32_e32 v0, s4
8248; GFX12-NEXT:    v_mov_b32_e32 v2, s3
8249; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x1001b
8250; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x1001a
8251; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:224
8252; GFX12-NEXT:    s_wait_alu 0xfffe
8253; GFX12-NEXT:    v_mov_b32_e32 v0, s4
8254; GFX12-NEXT:    v_mov_b32_e32 v2, s3
8255; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10019
8256; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10018
8257; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:208
8258; GFX12-NEXT:    s_wait_alu 0xfffe
8259; GFX12-NEXT:    v_mov_b32_e32 v0, s4
8260; GFX12-NEXT:    v_mov_b32_e32 v2, s3
8261; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10017
8262; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10016
8263; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:192
8264; GFX12-NEXT:    s_wait_alu 0xfffe
8265; GFX12-NEXT:    v_mov_b32_e32 v0, s4
8266; GFX12-NEXT:    v_mov_b32_e32 v2, s3
8267; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10014
8268; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10015
8269; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:176
8270; GFX12-NEXT:    s_wait_alu 0xfffe
8271; GFX12-NEXT:    v_mov_b32_e32 v0, s3
8272; GFX12-NEXT:    v_mov_b32_e32 v2, s4
8273; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10013
8274; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10012
8275; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:160
8276; GFX12-NEXT:    s_wait_alu 0xfffe
8277; GFX12-NEXT:    v_mov_b32_e32 v0, s4
8278; GFX12-NEXT:    v_mov_b32_e32 v2, s3
8279; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10011
8280; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10010
8281; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:144
8282; GFX12-NEXT:    s_wait_alu 0xfffe
8283; GFX12-NEXT:    v_mov_b32_e32 v0, s4
8284; GFX12-NEXT:    v_mov_b32_e32 v2, s3
8285; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x1000f
8286; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x1000e
8287; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:128
8288; GFX12-NEXT:    s_wait_alu 0xfffe
8289; GFX12-NEXT:    v_mov_b32_e32 v0, s4
8290; GFX12-NEXT:    v_mov_b32_e32 v2, s3
8291; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x1000d
8292; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x1000c
8293; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:112
8294; GFX12-NEXT:    s_wait_alu 0xfffe
8295; GFX12-NEXT:    v_mov_b32_e32 v0, s4
8296; GFX12-NEXT:    v_mov_b32_e32 v2, s3
8297; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x1000b
8298; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x1000a
8299; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:96
8300; GFX12-NEXT:    s_wait_alu 0xfffe
8301; GFX12-NEXT:    v_mov_b32_e32 v0, s4
8302; GFX12-NEXT:    v_mov_b32_e32 v2, s3
8303; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10009
8304; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10008
8305; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:80
8306; GFX12-NEXT:    s_wait_alu 0xfffe
8307; GFX12-NEXT:    v_mov_b32_e32 v0, s4
8308; GFX12-NEXT:    v_mov_b32_e32 v2, s3
8309; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10007
8310; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10006
8311; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:64
8312; GFX12-NEXT:    s_wait_alu 0xfffe
8313; GFX12-NEXT:    v_mov_b32_e32 v0, s4
8314; GFX12-NEXT:    v_mov_b32_e32 v2, s3
8315; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10005
8316; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10004
8317; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:48
8318; GFX12-NEXT:    s_wait_alu 0xfffe
8319; GFX12-NEXT:    v_mov_b32_e32 v0, s4
8320; GFX12-NEXT:    v_mov_b32_e32 v2, s3
8321; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10003
8322; GFX12-NEXT:    s_bfe_u32 s4, s2, 0x10002
8323; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:32
8324; GFX12-NEXT:    s_wait_alu 0xfffe
8325; GFX12-NEXT:    v_mov_b32_e32 v0, s4
8326; GFX12-NEXT:    v_mov_b32_e32 v2, s3
8327; GFX12-NEXT:    s_bfe_u32 s3, s2, 0x10001
8328; GFX12-NEXT:    s_and_b32 s2, s2, 1
8329; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:16
8330; GFX12-NEXT:    s_wait_alu 0xfffe
8331; GFX12-NEXT:    v_mov_b32_e32 v0, s2
8332; GFX12-NEXT:    v_mov_b32_e32 v2, s3
8333; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1]
8334; GFX12-NEXT:    s_endpgm
8335  %load = load <64 x i1>, ptr addrspace(4) %in
8336  %ext = zext <64 x i1> %load to <64 x i64>
8337  store <64 x i64> %ext, ptr addrspace(1) %out
8338  ret void
8339}
8340
8341define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
8342; GFX6-LABEL: constant_sextload_v64i1_to_v64i64:
8343; GFX6:       ; %bb.0:
8344; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
8345; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
8346; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
8347; GFX6-NEXT:    s_mov_b32 s3, 0xf000
8348; GFX6-NEXT:    s_mov_b32 s2, -1
8349; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
8350; GFX6-NEXT:    s_lshr_b32 s42, s5, 30
8351; GFX6-NEXT:    s_lshr_b32 s36, s5, 28
8352; GFX6-NEXT:    s_lshr_b32 s38, s5, 29
8353; GFX6-NEXT:    s_lshr_b32 s30, s5, 26
8354; GFX6-NEXT:    s_lshr_b32 s34, s5, 27
8355; GFX6-NEXT:    s_lshr_b32 s26, s5, 24
8356; GFX6-NEXT:    s_lshr_b32 s28, s5, 25
8357; GFX6-NEXT:    s_lshr_b32 s22, s5, 22
8358; GFX6-NEXT:    s_lshr_b32 s24, s5, 23
8359; GFX6-NEXT:    s_lshr_b32 s18, s5, 20
8360; GFX6-NEXT:    s_lshr_b32 s20, s5, 21
8361; GFX6-NEXT:    s_lshr_b32 s14, s5, 18
8362; GFX6-NEXT:    s_lshr_b32 s16, s5, 19
8363; GFX6-NEXT:    s_lshr_b32 s10, s5, 16
8364; GFX6-NEXT:    s_lshr_b32 s12, s5, 17
8365; GFX6-NEXT:    s_lshr_b32 s6, s5, 14
8366; GFX6-NEXT:    s_lshr_b32 s8, s5, 15
8367; GFX6-NEXT:    s_mov_b32 s40, s5
8368; GFX6-NEXT:    s_ashr_i32 s7, s5, 31
8369; GFX6-NEXT:    s_bfe_i64 s[44:45], s[40:41], 0x10000
8370; GFX6-NEXT:    v_mov_b32_e32 v4, s7
8371; GFX6-NEXT:    s_lshr_b32 s40, s5, 12
8372; GFX6-NEXT:    v_mov_b32_e32 v0, s44
8373; GFX6-NEXT:    v_mov_b32_e32 v1, s45
8374; GFX6-NEXT:    s_bfe_i64 s[44:45], s[4:5], 0x10000
8375; GFX6-NEXT:    s_bfe_i64 s[42:43], s[42:43], 0x10000
8376; GFX6-NEXT:    v_mov_b32_e32 v6, s44
8377; GFX6-NEXT:    v_mov_b32_e32 v7, s45
8378; GFX6-NEXT:    s_lshr_b32 s44, s5, 13
8379; GFX6-NEXT:    v_mov_b32_e32 v2, s42
8380; GFX6-NEXT:    v_mov_b32_e32 v3, s43
8381; GFX6-NEXT:    s_lshr_b32 s42, s5, 10
8382; GFX6-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x10000
8383; GFX6-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x10000
8384; GFX6-NEXT:    v_mov_b32_e32 v8, s36
8385; GFX6-NEXT:    v_mov_b32_e32 v9, s37
8386; GFX6-NEXT:    s_lshr_b32 s36, s5, 11
8387; GFX6-NEXT:    v_mov_b32_e32 v10, s38
8388; GFX6-NEXT:    v_mov_b32_e32 v11, s39
8389; GFX6-NEXT:    s_lshr_b32 s38, s5, 8
8390; GFX6-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x10000
8391; GFX6-NEXT:    s_bfe_i64 s[34:35], s[34:35], 0x10000
8392; GFX6-NEXT:    v_mov_b32_e32 v12, s30
8393; GFX6-NEXT:    v_mov_b32_e32 v13, s31
8394; GFX6-NEXT:    s_lshr_b32 s30, s5, 9
8395; GFX6-NEXT:    v_mov_b32_e32 v14, s34
8396; GFX6-NEXT:    v_mov_b32_e32 v15, s35
8397; GFX6-NEXT:    s_lshr_b32 s34, s5, 6
8398; GFX6-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x10000
8399; GFX6-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x10000
8400; GFX6-NEXT:    v_mov_b32_e32 v5, s7
8401; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:496
8402; GFX6-NEXT:    s_waitcnt expcnt(0)
8403; GFX6-NEXT:    v_mov_b32_e32 v2, s26
8404; GFX6-NEXT:    v_mov_b32_e32 v3, s27
8405; GFX6-NEXT:    s_lshr_b32 s26, s5, 7
8406; GFX6-NEXT:    v_mov_b32_e32 v4, s28
8407; GFX6-NEXT:    v_mov_b32_e32 v5, s29
8408; GFX6-NEXT:    s_lshr_b32 s28, s5, 4
8409; GFX6-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x10000
8410; GFX6-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x10000
8411; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:480
8412; GFX6-NEXT:    s_waitcnt expcnt(0)
8413; GFX6-NEXT:    v_mov_b32_e32 v8, s22
8414; GFX6-NEXT:    v_mov_b32_e32 v9, s23
8415; GFX6-NEXT:    s_lshr_b32 s22, s5, 5
8416; GFX6-NEXT:    v_mov_b32_e32 v10, s24
8417; GFX6-NEXT:    v_mov_b32_e32 v11, s25
8418; GFX6-NEXT:    s_lshr_b32 s24, s5, 2
8419; GFX6-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x10000
8420; GFX6-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x10000
8421; GFX6-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:464
8422; GFX6-NEXT:    s_waitcnt expcnt(0)
8423; GFX6-NEXT:    v_mov_b32_e32 v12, s18
8424; GFX6-NEXT:    v_mov_b32_e32 v13, s19
8425; GFX6-NEXT:    s_lshr_b32 s18, s5, 3
8426; GFX6-NEXT:    v_mov_b32_e32 v14, s20
8427; GFX6-NEXT:    v_mov_b32_e32 v15, s21
8428; GFX6-NEXT:    s_lshr_b32 s20, s5, 1
8429; GFX6-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x10000
8430; GFX6-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x10000
8431; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:448
8432; GFX6-NEXT:    s_waitcnt expcnt(0)
8433; GFX6-NEXT:    v_mov_b32_e32 v2, s14
8434; GFX6-NEXT:    v_mov_b32_e32 v3, s15
8435; GFX6-NEXT:    s_lshr_b32 s14, s4, 30
8436; GFX6-NEXT:    v_mov_b32_e32 v4, s16
8437; GFX6-NEXT:    v_mov_b32_e32 v5, s17
8438; GFX6-NEXT:    s_lshr_b32 s16, s4, 31
8439; GFX6-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x10000
8440; GFX6-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x10000
8441; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:432
8442; GFX6-NEXT:    s_waitcnt expcnt(0)
8443; GFX6-NEXT:    v_mov_b32_e32 v8, s10
8444; GFX6-NEXT:    v_mov_b32_e32 v9, s11
8445; GFX6-NEXT:    s_lshr_b32 s10, s4, 28
8446; GFX6-NEXT:    v_mov_b32_e32 v10, s12
8447; GFX6-NEXT:    v_mov_b32_e32 v11, s13
8448; GFX6-NEXT:    s_lshr_b32 s12, s4, 29
8449; GFX6-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x10000
8450; GFX6-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x10000
8451; GFX6-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:416
8452; GFX6-NEXT:    s_waitcnt expcnt(0)
8453; GFX6-NEXT:    v_mov_b32_e32 v12, s6
8454; GFX6-NEXT:    v_mov_b32_e32 v13, s7
8455; GFX6-NEXT:    s_lshr_b32 s46, s4, 26
8456; GFX6-NEXT:    v_mov_b32_e32 v14, s8
8457; GFX6-NEXT:    v_mov_b32_e32 v15, s9
8458; GFX6-NEXT:    s_lshr_b32 s8, s4, 27
8459; GFX6-NEXT:    s_bfe_i64 s[6:7], s[44:45], 0x10000
8460; GFX6-NEXT:    s_bfe_i64 s[40:41], s[40:41], 0x10000
8461; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:400
8462; GFX6-NEXT:    s_waitcnt expcnt(0)
8463; GFX6-NEXT:    v_mov_b32_e32 v2, s40
8464; GFX6-NEXT:    v_mov_b32_e32 v3, s41
8465; GFX6-NEXT:    s_lshr_b32 s40, s4, 24
8466; GFX6-NEXT:    v_mov_b32_e32 v4, s6
8467; GFX6-NEXT:    v_mov_b32_e32 v5, s7
8468; GFX6-NEXT:    s_lshr_b32 s44, s4, 25
8469; GFX6-NEXT:    s_bfe_i64 s[6:7], s[36:37], 0x10000
8470; GFX6-NEXT:    s_bfe_i64 s[36:37], s[42:43], 0x10000
8471; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:384
8472; GFX6-NEXT:    s_waitcnt expcnt(0)
8473; GFX6-NEXT:    v_mov_b32_e32 v8, s36
8474; GFX6-NEXT:    v_mov_b32_e32 v9, s37
8475; GFX6-NEXT:    s_lshr_b32 s36, s4, 22
8476; GFX6-NEXT:    v_mov_b32_e32 v10, s6
8477; GFX6-NEXT:    v_mov_b32_e32 v11, s7
8478; GFX6-NEXT:    s_lshr_b32 s42, s4, 23
8479; GFX6-NEXT:    s_bfe_i64 s[6:7], s[30:31], 0x10000
8480; GFX6-NEXT:    s_bfe_i64 s[30:31], s[38:39], 0x10000
8481; GFX6-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:368
8482; GFX6-NEXT:    s_waitcnt expcnt(0)
8483; GFX6-NEXT:    v_mov_b32_e32 v12, s30
8484; GFX6-NEXT:    v_mov_b32_e32 v13, s31
8485; GFX6-NEXT:    s_lshr_b32 s30, s4, 20
8486; GFX6-NEXT:    v_mov_b32_e32 v14, s6
8487; GFX6-NEXT:    v_mov_b32_e32 v15, s7
8488; GFX6-NEXT:    s_lshr_b32 s6, s4, 21
8489; GFX6-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x10000
8490; GFX6-NEXT:    s_bfe_i64 s[34:35], s[34:35], 0x10000
8491; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:352
8492; GFX6-NEXT:    v_mov_b32_e32 v16, s34
8493; GFX6-NEXT:    v_mov_b32_e32 v17, s35
8494; GFX6-NEXT:    s_lshr_b32 s34, s4, 18
8495; GFX6-NEXT:    v_mov_b32_e32 v18, s26
8496; GFX6-NEXT:    v_mov_b32_e32 v19, s27
8497; GFX6-NEXT:    s_lshr_b32 s26, s4, 19
8498; GFX6-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x10000
8499; GFX6-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x10000
8500; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:336
8501; GFX6-NEXT:    s_waitcnt expcnt(0)
8502; GFX6-NEXT:    v_mov_b32_e32 v8, s28
8503; GFX6-NEXT:    v_mov_b32_e32 v9, s29
8504; GFX6-NEXT:    s_lshr_b32 s28, s4, 16
8505; GFX6-NEXT:    v_mov_b32_e32 v10, s22
8506; GFX6-NEXT:    v_mov_b32_e32 v11, s23
8507; GFX6-NEXT:    s_lshr_b32 s22, s4, 17
8508; GFX6-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x10000
8509; GFX6-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:320
8510; GFX6-NEXT:    s_waitcnt expcnt(0)
8511; GFX6-NEXT:    v_mov_b32_e32 v12, s24
8512; GFX6-NEXT:    v_mov_b32_e32 v13, s25
8513; GFX6-NEXT:    s_lshr_b32 s24, s4, 14
8514; GFX6-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x10000
8515; GFX6-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x10000
8516; GFX6-NEXT:    v_mov_b32_e32 v14, s18
8517; GFX6-NEXT:    v_mov_b32_e32 v15, s19
8518; GFX6-NEXT:    s_lshr_b32 s18, s4, 15
8519; GFX6-NEXT:    v_mov_b32_e32 v2, s20
8520; GFX6-NEXT:    v_mov_b32_e32 v3, s21
8521; GFX6-NEXT:    s_lshr_b32 s20, s4, 12
8522; GFX6-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x10000
8523; GFX6-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x10000
8524; GFX6-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:304
8525; GFX6-NEXT:    s_waitcnt expcnt(0)
8526; GFX6-NEXT:    v_mov_b32_e32 v16, s14
8527; GFX6-NEXT:    v_mov_b32_e32 v17, s15
8528; GFX6-NEXT:    s_lshr_b32 s14, s4, 13
8529; GFX6-NEXT:    v_mov_b32_e32 v18, s16
8530; GFX6-NEXT:    v_mov_b32_e32 v19, s17
8531; GFX6-NEXT:    s_lshr_b32 s16, s4, 10
8532; GFX6-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x10000
8533; GFX6-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x10000
8534; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:288
8535; GFX6-NEXT:    s_waitcnt expcnt(0)
8536; GFX6-NEXT:    v_mov_b32_e32 v8, s10
8537; GFX6-NEXT:    v_mov_b32_e32 v9, s11
8538; GFX6-NEXT:    s_lshr_b32 s10, s4, 11
8539; GFX6-NEXT:    v_mov_b32_e32 v10, s12
8540; GFX6-NEXT:    v_mov_b32_e32 v11, s13
8541; GFX6-NEXT:    s_lshr_b32 s12, s4, 8
8542; GFX6-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x10000
8543; GFX6-NEXT:    s_bfe_i64 s[38:39], s[46:47], 0x10000
8544; GFX6-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:272
8545; GFX6-NEXT:    s_waitcnt expcnt(0)
8546; GFX6-NEXT:    v_mov_b32_e32 v12, s38
8547; GFX6-NEXT:    v_mov_b32_e32 v13, s39
8548; GFX6-NEXT:    s_lshr_b32 s38, s4, 9
8549; GFX6-NEXT:    v_mov_b32_e32 v14, s8
8550; GFX6-NEXT:    v_mov_b32_e32 v15, s9
8551; GFX6-NEXT:    s_lshr_b32 s8, s4, 6
8552; GFX6-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x10000
8553; GFX6-NEXT:    s_bfe_i64 s[40:41], s[40:41], 0x10000
8554; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:256
8555; GFX6-NEXT:    s_waitcnt expcnt(0)
8556; GFX6-NEXT:    v_mov_b32_e32 v0, s40
8557; GFX6-NEXT:    v_mov_b32_e32 v1, s41
8558; GFX6-NEXT:    s_lshr_b32 s40, s4, 7
8559; GFX6-NEXT:    v_mov_b32_e32 v2, s44
8560; GFX6-NEXT:    v_mov_b32_e32 v3, s45
8561; GFX6-NEXT:    s_lshr_b32 s44, s4, 4
8562; GFX6-NEXT:    s_bfe_i64 s[42:43], s[42:43], 0x10000
8563; GFX6-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x10000
8564; GFX6-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:240
8565; GFX6-NEXT:    s_waitcnt expcnt(0)
8566; GFX6-NEXT:    v_mov_b32_e32 v16, s36
8567; GFX6-NEXT:    v_mov_b32_e32 v17, s37
8568; GFX6-NEXT:    s_lshr_b32 s36, s4, 5
8569; GFX6-NEXT:    v_mov_b32_e32 v18, s42
8570; GFX6-NEXT:    v_mov_b32_e32 v19, s43
8571; GFX6-NEXT:    s_lshr_b32 s42, s4, 2
8572; GFX6-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x10000
8573; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:224
8574; GFX6-NEXT:    s_waitcnt expcnt(0)
8575; GFX6-NEXT:    v_mov_b32_e32 v8, s30
8576; GFX6-NEXT:    v_mov_b32_e32 v9, s31
8577; GFX6-NEXT:    s_lshr_b32 s30, s4, 3
8578; GFX6-NEXT:    s_lshr_b32 s4, s4, 1
8579; GFX6-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x10000
8580; GFX6-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x10000
8581; GFX6-NEXT:    s_bfe_i64 s[42:43], s[42:43], 0x10000
8582; GFX6-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x10000
8583; GFX6-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x10000
8584; GFX6-NEXT:    s_bfe_i64 s[40:41], s[40:41], 0x10000
8585; GFX6-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x10000
8586; GFX6-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x10000
8587; GFX6-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x10000
8588; GFX6-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x10000
8589; GFX6-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x10000
8590; GFX6-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x10000
8591; GFX6-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x10000
8592; GFX6-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x10000
8593; GFX6-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x10000
8594; GFX6-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x10000
8595; GFX6-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x10000
8596; GFX6-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x10000
8597; GFX6-NEXT:    s_bfe_i64 s[34:35], s[34:35], 0x10000
8598; GFX6-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x10000
8599; GFX6-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:208
8600; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
8601; GFX6-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
8602; GFX6-NEXT:    v_mov_b32_e32 v10, s6
8603; GFX6-NEXT:    v_mov_b32_e32 v11, s7
8604; GFX6-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:160
8605; GFX6-NEXT:    s_waitcnt expcnt(2)
8606; GFX6-NEXT:    v_mov_b32_e32 v0, s34
8607; GFX6-NEXT:    v_mov_b32_e32 v1, s35
8608; GFX6-NEXT:    v_mov_b32_e32 v2, s26
8609; GFX6-NEXT:    v_mov_b32_e32 v3, s27
8610; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
8611; GFX6-NEXT:    s_waitcnt expcnt(0)
8612; GFX6-NEXT:    v_mov_b32_e32 v0, s28
8613; GFX6-NEXT:    v_mov_b32_e32 v1, s29
8614; GFX6-NEXT:    v_mov_b32_e32 v2, s22
8615; GFX6-NEXT:    v_mov_b32_e32 v3, s23
8616; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
8617; GFX6-NEXT:    s_waitcnt expcnt(0)
8618; GFX6-NEXT:    v_mov_b32_e32 v0, s24
8619; GFX6-NEXT:    v_mov_b32_e32 v1, s25
8620; GFX6-NEXT:    v_mov_b32_e32 v2, s18
8621; GFX6-NEXT:    v_mov_b32_e32 v3, s19
8622; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
8623; GFX6-NEXT:    s_waitcnt expcnt(0)
8624; GFX6-NEXT:    v_mov_b32_e32 v0, s20
8625; GFX6-NEXT:    v_mov_b32_e32 v1, s21
8626; GFX6-NEXT:    v_mov_b32_e32 v2, s14
8627; GFX6-NEXT:    v_mov_b32_e32 v3, s15
8628; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
8629; GFX6-NEXT:    s_waitcnt expcnt(0)
8630; GFX6-NEXT:    v_mov_b32_e32 v0, s16
8631; GFX6-NEXT:    v_mov_b32_e32 v1, s17
8632; GFX6-NEXT:    v_mov_b32_e32 v2, s10
8633; GFX6-NEXT:    v_mov_b32_e32 v3, s11
8634; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
8635; GFX6-NEXT:    s_waitcnt expcnt(0)
8636; GFX6-NEXT:    v_mov_b32_e32 v0, s12
8637; GFX6-NEXT:    v_mov_b32_e32 v1, s13
8638; GFX6-NEXT:    v_mov_b32_e32 v2, s38
8639; GFX6-NEXT:    v_mov_b32_e32 v3, s39
8640; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
8641; GFX6-NEXT:    s_waitcnt expcnt(0)
8642; GFX6-NEXT:    v_mov_b32_e32 v0, s8
8643; GFX6-NEXT:    v_mov_b32_e32 v1, s9
8644; GFX6-NEXT:    v_mov_b32_e32 v2, s40
8645; GFX6-NEXT:    v_mov_b32_e32 v3, s41
8646; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
8647; GFX6-NEXT:    s_waitcnt expcnt(0)
8648; GFX6-NEXT:    v_mov_b32_e32 v0, s44
8649; GFX6-NEXT:    v_mov_b32_e32 v1, s45
8650; GFX6-NEXT:    v_mov_b32_e32 v2, s36
8651; GFX6-NEXT:    v_mov_b32_e32 v3, s37
8652; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
8653; GFX6-NEXT:    s_waitcnt expcnt(0)
8654; GFX6-NEXT:    v_mov_b32_e32 v0, s42
8655; GFX6-NEXT:    v_mov_b32_e32 v1, s43
8656; GFX6-NEXT:    v_mov_b32_e32 v2, s30
8657; GFX6-NEXT:    v_mov_b32_e32 v3, s31
8658; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
8659; GFX6-NEXT:    v_mov_b32_e32 v8, s4
8660; GFX6-NEXT:    v_mov_b32_e32 v9, s5
8661; GFX6-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0
8662; GFX6-NEXT:    s_endpgm
8663;
8664; GFX8-LABEL: constant_sextload_v64i1_to_v64i64:
8665; GFX8:       ; %bb.0:
8666; GFX8-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x24
8667; GFX8-NEXT:    ; implicit-def: $vgpr62 : SGPR spill to VGPR lane
8668; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
8669; GFX8-NEXT:    s_load_dwordx2 s[2:3], s[10:11], 0x0
8670; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
8671; GFX8-NEXT:    s_lshr_b32 s0, s3, 8
8672; GFX8-NEXT:    s_lshr_b32 s48, s3, 15
8673; GFX8-NEXT:    v_writelane_b32 v62, s0, 0
8674; GFX8-NEXT:    s_lshr_b32 s74, s3, 30
8675; GFX8-NEXT:    s_lshr_b32 s30, s3, 31
8676; GFX8-NEXT:    s_lshr_b32 s72, s3, 28
8677; GFX8-NEXT:    s_lshr_b32 s34, s3, 29
8678; GFX8-NEXT:    s_lshr_b32 s70, s3, 26
8679; GFX8-NEXT:    s_lshr_b32 s36, s3, 27
8680; GFX8-NEXT:    s_lshr_b32 s68, s3, 24
8681; GFX8-NEXT:    s_lshr_b32 s38, s3, 25
8682; GFX8-NEXT:    s_lshr_b32 s64, s3, 22
8683; GFX8-NEXT:    s_lshr_b32 s40, s3, 23
8684; GFX8-NEXT:    s_lshr_b32 s60, s3, 20
8685; GFX8-NEXT:    s_lshr_b32 s42, s3, 21
8686; GFX8-NEXT:    s_lshr_b32 s66, s3, 18
8687; GFX8-NEXT:    s_lshr_b32 s44, s3, 19
8688; GFX8-NEXT:    s_lshr_b32 s56, s3, 16
8689; GFX8-NEXT:    s_lshr_b32 s46, s3, 17
8690; GFX8-NEXT:    s_lshr_b32 s58, s3, 14
8691; GFX8-NEXT:    s_lshr_b32 s62, s3, 12
8692; GFX8-NEXT:    s_lshr_b32 s54, s3, 10
8693; GFX8-NEXT:    v_writelane_b32 v62, s1, 1
8694; GFX8-NEXT:    s_lshr_b32 s0, s3, 9
8695; GFX8-NEXT:    s_bfe_i64 s[48:49], s[48:49], 0x10000
8696; GFX8-NEXT:    s_lshr_b32 s52, s3, 11
8697; GFX8-NEXT:    v_writelane_b32 v62, s0, 2
8698; GFX8-NEXT:    s_bfe_i64 s[60:61], s[60:61], 0x10000
8699; GFX8-NEXT:    s_bfe_i64 s[64:65], s[64:65], 0x10000
8700; GFX8-NEXT:    s_bfe_i64 s[68:69], s[68:69], 0x10000
8701; GFX8-NEXT:    s_bfe_i64 s[70:71], s[70:71], 0x10000
8702; GFX8-NEXT:    s_bfe_i64 s[72:73], s[72:73], 0x10000
8703; GFX8-NEXT:    s_bfe_i64 s[74:75], s[74:75], 0x10000
8704; GFX8-NEXT:    s_bfe_i64 s[66:67], s[66:67], 0x10000
8705; GFX8-NEXT:    s_bfe_i64 s[62:63], s[62:63], 0x10000
8706; GFX8-NEXT:    s_bfe_i64 s[58:59], s[58:59], 0x10000
8707; GFX8-NEXT:    s_bfe_i64 s[56:57], s[56:57], 0x10000
8708; GFX8-NEXT:    s_bfe_i64 s[54:55], s[54:55], 0x10000
8709; GFX8-NEXT:    s_bfe_i64 s[46:47], s[46:47], 0x10000
8710; GFX8-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x10000
8711; GFX8-NEXT:    s_bfe_i64 s[42:43], s[42:43], 0x10000
8712; GFX8-NEXT:    s_bfe_i64 s[40:41], s[40:41], 0x10000
8713; GFX8-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x10000
8714; GFX8-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x10000
8715; GFX8-NEXT:    s_bfe_i64 s[34:35], s[34:35], 0x10000
8716; GFX8-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x10000
8717; GFX8-NEXT:    v_mov_b32_e32 v34, s48
8718; GFX8-NEXT:    s_lshr_b32 s48, s2, 1
8719; GFX8-NEXT:    s_lshr_b32 s50, s3, 13
8720; GFX8-NEXT:    v_writelane_b32 v62, s1, 3
8721; GFX8-NEXT:    s_lshr_b32 s6, s3, 6
8722; GFX8-NEXT:    s_lshr_b32 s10, s3, 7
8723; GFX8-NEXT:    s_lshr_b32 s12, s3, 4
8724; GFX8-NEXT:    s_lshr_b32 s14, s3, 5
8725; GFX8-NEXT:    s_lshr_b32 s16, s3, 2
8726; GFX8-NEXT:    s_lshr_b32 s18, s3, 3
8727; GFX8-NEXT:    s_lshr_b32 s20, s3, 1
8728; GFX8-NEXT:    s_mov_b32 s22, s3
8729; GFX8-NEXT:    s_lshr_b32 s24, s2, 30
8730; GFX8-NEXT:    s_lshr_b32 s26, s2, 31
8731; GFX8-NEXT:    s_lshr_b32 s28, s2, 28
8732; GFX8-NEXT:    v_mov_b32_e32 v4, s74
8733; GFX8-NEXT:    v_mov_b32_e32 v12, s72
8734; GFX8-NEXT:    v_mov_b32_e32 v0, s70
8735; GFX8-NEXT:    v_mov_b32_e32 v8, s68
8736; GFX8-NEXT:    v_mov_b32_e32 v16, s64
8737; GFX8-NEXT:    v_mov_b32_e32 v20, s60
8738; GFX8-NEXT:    v_mov_b32_e32 v24, s66
8739; GFX8-NEXT:    v_mov_b32_e32 v28, s56
8740; GFX8-NEXT:    v_mov_b32_e32 v32, s58
8741; GFX8-NEXT:    v_mov_b32_e32 v36, s62
8742; GFX8-NEXT:    s_lshr_b32 s86, s2, 29
8743; GFX8-NEXT:    v_mov_b32_e32 v40, s54
8744; GFX8-NEXT:    s_lshr_b32 s84, s2, 26
8745; GFX8-NEXT:    s_lshr_b32 s82, s2, 27
8746; GFX8-NEXT:    s_bfe_i64 vcc, s[52:53], 0x10000
8747; GFX8-NEXT:    s_lshr_b32 s80, s2, 24
8748; GFX8-NEXT:    v_mov_b32_e32 v6, s30
8749; GFX8-NEXT:    v_mov_b32_e32 v7, s31
8750; GFX8-NEXT:    s_lshr_b32 s78, s2, 25
8751; GFX8-NEXT:    s_lshr_b32 s76, s2, 22
8752; GFX8-NEXT:    v_mov_b32_e32 v14, s34
8753; GFX8-NEXT:    s_lshr_b32 s74, s2, 23
8754; GFX8-NEXT:    s_lshr_b32 s72, s2, 20
8755; GFX8-NEXT:    v_mov_b32_e32 v2, s36
8756; GFX8-NEXT:    s_lshr_b32 s70, s2, 21
8757; GFX8-NEXT:    s_lshr_b32 s68, s2, 18
8758; GFX8-NEXT:    v_mov_b32_e32 v10, s38
8759; GFX8-NEXT:    s_lshr_b32 s66, s2, 19
8760; GFX8-NEXT:    s_lshr_b32 s64, s2, 16
8761; GFX8-NEXT:    v_mov_b32_e32 v18, s40
8762; GFX8-NEXT:    s_lshr_b32 s62, s2, 17
8763; GFX8-NEXT:    s_lshr_b32 s60, s2, 14
8764; GFX8-NEXT:    v_mov_b32_e32 v22, s42
8765; GFX8-NEXT:    s_lshr_b32 s58, s2, 15
8766; GFX8-NEXT:    s_lshr_b32 s56, s2, 12
8767; GFX8-NEXT:    v_mov_b32_e32 v26, s44
8768; GFX8-NEXT:    s_lshr_b32 s54, s2, 13
8769; GFX8-NEXT:    s_lshr_b32 s52, s2, 10
8770; GFX8-NEXT:    v_mov_b32_e32 v30, s46
8771; GFX8-NEXT:    s_lshr_b32 s4, s2, 11
8772; GFX8-NEXT:    s_lshr_b32 s0, s2, 8
8773; GFX8-NEXT:    s_lshr_b32 s46, s2, 9
8774; GFX8-NEXT:    s_lshr_b32 s44, s2, 6
8775; GFX8-NEXT:    s_lshr_b32 s42, s2, 7
8776; GFX8-NEXT:    s_lshr_b32 s40, s2, 4
8777; GFX8-NEXT:    s_lshr_b32 s38, s2, 5
8778; GFX8-NEXT:    s_lshr_b32 s36, s2, 2
8779; GFX8-NEXT:    s_lshr_b32 s34, s2, 3
8780; GFX8-NEXT:    s_bfe_i64 s[30:31], s[2:3], 0x10000
8781; GFX8-NEXT:    s_bfe_i64 s[2:3], s[48:49], 0x10000
8782; GFX8-NEXT:    v_writelane_b32 v62, s2, 4
8783; GFX8-NEXT:    v_writelane_b32 v62, s3, 5
8784; GFX8-NEXT:    v_readlane_b32 s2, v62, 2
8785; GFX8-NEXT:    s_bfe_i64 s[50:51], s[50:51], 0x10000
8786; GFX8-NEXT:    v_readlane_b32 s3, v62, 3
8787; GFX8-NEXT:    v_mov_b32_e32 v38, s50
8788; GFX8-NEXT:    v_mov_b32_e32 v39, s51
8789; GFX8-NEXT:    s_bfe_i64 s[50:51], s[4:5], 0x10000
8790; GFX8-NEXT:    s_bfe_i64 s[4:5], s[6:7], 0x10000
8791; GFX8-NEXT:    s_bfe_i64 s[6:7], s[2:3], 0x10000
8792; GFX8-NEXT:    v_readlane_b32 s2, v62, 0
8793; GFX8-NEXT:    v_readlane_b32 s3, v62, 1
8794; GFX8-NEXT:    v_mov_b32_e32 v5, s75
8795; GFX8-NEXT:    v_mov_b32_e32 v13, s73
8796; GFX8-NEXT:    v_mov_b32_e32 v15, s35
8797; GFX8-NEXT:    v_mov_b32_e32 v1, s71
8798; GFX8-NEXT:    v_mov_b32_e32 v3, s37
8799; GFX8-NEXT:    v_mov_b32_e32 v9, s69
8800; GFX8-NEXT:    v_mov_b32_e32 v11, s39
8801; GFX8-NEXT:    v_mov_b32_e32 v17, s65
8802; GFX8-NEXT:    v_mov_b32_e32 v19, s41
8803; GFX8-NEXT:    v_mov_b32_e32 v21, s61
8804; GFX8-NEXT:    v_mov_b32_e32 v23, s43
8805; GFX8-NEXT:    v_mov_b32_e32 v25, s67
8806; GFX8-NEXT:    v_mov_b32_e32 v27, s45
8807; GFX8-NEXT:    v_mov_b32_e32 v29, s57
8808; GFX8-NEXT:    v_mov_b32_e32 v31, s47
8809; GFX8-NEXT:    v_mov_b32_e32 v33, s59
8810; GFX8-NEXT:    v_mov_b32_e32 v35, s49
8811; GFX8-NEXT:    v_mov_b32_e32 v37, s63
8812; GFX8-NEXT:    v_mov_b32_e32 v41, s55
8813; GFX8-NEXT:    s_bfe_i64 s[34:35], s[34:35], 0x10000
8814; GFX8-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x10000
8815; GFX8-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x10000
8816; GFX8-NEXT:    s_bfe_i64 s[40:41], s[40:41], 0x10000
8817; GFX8-NEXT:    s_bfe_i64 s[42:43], s[42:43], 0x10000
8818; GFX8-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x10000
8819; GFX8-NEXT:    s_bfe_i64 s[46:47], s[46:47], 0x10000
8820; GFX8-NEXT:    s_bfe_i64 s[48:49], s[0:1], 0x10000
8821; GFX8-NEXT:    s_bfe_i64 s[52:53], s[52:53], 0x10000
8822; GFX8-NEXT:    s_bfe_i64 s[54:55], s[54:55], 0x10000
8823; GFX8-NEXT:    s_bfe_i64 s[56:57], s[56:57], 0x10000
8824; GFX8-NEXT:    s_bfe_i64 s[58:59], s[58:59], 0x10000
8825; GFX8-NEXT:    s_bfe_i64 s[60:61], s[60:61], 0x10000
8826; GFX8-NEXT:    s_bfe_i64 s[62:63], s[62:63], 0x10000
8827; GFX8-NEXT:    s_bfe_i64 s[64:65], s[64:65], 0x10000
8828; GFX8-NEXT:    s_bfe_i64 s[66:67], s[66:67], 0x10000
8829; GFX8-NEXT:    s_bfe_i64 s[68:69], s[68:69], 0x10000
8830; GFX8-NEXT:    s_bfe_i64 s[70:71], s[70:71], 0x10000
8831; GFX8-NEXT:    s_bfe_i64 s[72:73], s[72:73], 0x10000
8832; GFX8-NEXT:    s_bfe_i64 s[74:75], s[74:75], 0x10000
8833; GFX8-NEXT:    s_bfe_i64 s[76:77], s[76:77], 0x10000
8834; GFX8-NEXT:    s_bfe_i64 s[78:79], s[78:79], 0x10000
8835; GFX8-NEXT:    s_bfe_i64 s[80:81], s[80:81], 0x10000
8836; GFX8-NEXT:    s_bfe_i64 s[82:83], s[82:83], 0x10000
8837; GFX8-NEXT:    s_bfe_i64 s[84:85], s[84:85], 0x10000
8838; GFX8-NEXT:    s_bfe_i64 s[86:87], s[86:87], 0x10000
8839; GFX8-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x10000
8840; GFX8-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x10000
8841; GFX8-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x10000
8842; GFX8-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x10000
8843; GFX8-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x10000
8844; GFX8-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x10000
8845; GFX8-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x10000
8846; GFX8-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x10000
8847; GFX8-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x10000
8848; GFX8-NEXT:    s_bfe_i64 s[0:1], s[10:11], 0x10000
8849; GFX8-NEXT:    s_bfe_i64 s[10:11], s[2:3], 0x10000
8850; GFX8-NEXT:    s_add_u32 s2, s8, 0x1f0
8851; GFX8-NEXT:    s_addc_u32 s3, s9, 0
8852; GFX8-NEXT:    v_mov_b32_e32 v43, s3
8853; GFX8-NEXT:    v_mov_b32_e32 v42, s2
8854; GFX8-NEXT:    s_add_u32 s2, s8, 0x1e0
8855; GFX8-NEXT:    s_addc_u32 s3, s9, 0
8856; GFX8-NEXT:    v_mov_b32_e32 v45, s3
8857; GFX8-NEXT:    v_mov_b32_e32 v44, s2
8858; GFX8-NEXT:    s_add_u32 s2, s8, 0x1d0
8859; GFX8-NEXT:    s_addc_u32 s3, s9, 0
8860; GFX8-NEXT:    v_mov_b32_e32 v47, s3
8861; GFX8-NEXT:    v_mov_b32_e32 v46, s2
8862; GFX8-NEXT:    s_add_u32 s2, s8, 0x1c0
8863; GFX8-NEXT:    s_addc_u32 s3, s9, 0
8864; GFX8-NEXT:    v_mov_b32_e32 v49, s3
8865; GFX8-NEXT:    v_mov_b32_e32 v48, s2
8866; GFX8-NEXT:    s_add_u32 s2, s8, 0x1b0
8867; GFX8-NEXT:    s_addc_u32 s3, s9, 0
8868; GFX8-NEXT:    v_mov_b32_e32 v51, s3
8869; GFX8-NEXT:    v_mov_b32_e32 v50, s2
8870; GFX8-NEXT:    s_add_u32 s2, s8, 0x1a0
8871; GFX8-NEXT:    s_addc_u32 s3, s9, 0
8872; GFX8-NEXT:    v_mov_b32_e32 v53, s3
8873; GFX8-NEXT:    v_mov_b32_e32 v52, s2
8874; GFX8-NEXT:    s_add_u32 s2, s8, 0x190
8875; GFX8-NEXT:    s_addc_u32 s3, s9, 0
8876; GFX8-NEXT:    v_mov_b32_e32 v55, s3
8877; GFX8-NEXT:    v_mov_b32_e32 v54, s2
8878; GFX8-NEXT:    s_add_u32 s2, s8, 0x180
8879; GFX8-NEXT:    s_addc_u32 s3, s9, 0
8880; GFX8-NEXT:    v_mov_b32_e32 v57, s3
8881; GFX8-NEXT:    v_mov_b32_e32 v56, s2
8882; GFX8-NEXT:    s_add_u32 s2, s8, 0x170
8883; GFX8-NEXT:    s_addc_u32 s3, s9, 0
8884; GFX8-NEXT:    v_mov_b32_e32 v59, s3
8885; GFX8-NEXT:    v_mov_b32_e32 v58, s2
8886; GFX8-NEXT:    s_add_u32 s2, s8, 0x160
8887; GFX8-NEXT:    s_addc_u32 s3, s9, 0
8888; GFX8-NEXT:    v_mov_b32_e32 v61, s3
8889; GFX8-NEXT:    v_mov_b32_e32 v60, s2
8890; GFX8-NEXT:    s_add_u32 s2, s8, 0x150
8891; GFX8-NEXT:    s_addc_u32 s3, s9, 0
8892; GFX8-NEXT:    flat_store_dwordx4 v[44:45], v[12:15]
8893; GFX8-NEXT:    flat_store_dwordx4 v[46:47], v[0:3]
8894; GFX8-NEXT:    v_mov_b32_e32 v13, s3
8895; GFX8-NEXT:    v_mov_b32_e32 v12, s2
8896; GFX8-NEXT:    s_add_u32 s2, s8, 0x140
8897; GFX8-NEXT:    s_addc_u32 s3, s9, 0
8898; GFX8-NEXT:    v_mov_b32_e32 v2, s0
8899; GFX8-NEXT:    s_add_u32 s0, s8, 0x130
8900; GFX8-NEXT:    v_mov_b32_e32 v3, s1
8901; GFX8-NEXT:    s_addc_u32 s1, s9, 0
8902; GFX8-NEXT:    flat_store_dwordx4 v[42:43], v[4:7]
8903; GFX8-NEXT:    flat_store_dwordx4 v[48:49], v[8:11]
8904; GFX8-NEXT:    flat_store_dwordx4 v[50:51], v[16:19]
8905; GFX8-NEXT:    v_mov_b32_e32 v4, s10
8906; GFX8-NEXT:    v_mov_b32_e32 v17, s1
8907; GFX8-NEXT:    v_mov_b32_e32 v16, s0
8908; GFX8-NEXT:    s_add_u32 s0, s8, 0x120
8909; GFX8-NEXT:    s_addc_u32 s1, s9, 0
8910; GFX8-NEXT:    v_mov_b32_e32 v19, s1
8911; GFX8-NEXT:    v_mov_b32_e32 v18, s0
8912; GFX8-NEXT:    s_add_u32 s0, s8, 0x110
8913; GFX8-NEXT:    v_mov_b32_e32 v5, s11
8914; GFX8-NEXT:    v_mov_b32_e32 v15, s3
8915; GFX8-NEXT:    s_addc_u32 s1, s9, 0
8916; GFX8-NEXT:    v_mov_b32_e32 v42, vcc_lo
8917; GFX8-NEXT:    v_mov_b32_e32 v43, vcc_hi
8918; GFX8-NEXT:    v_mov_b32_e32 v14, s2
8919; GFX8-NEXT:    v_mov_b32_e32 v6, s6
8920; GFX8-NEXT:    v_mov_b32_e32 v7, s7
8921; GFX8-NEXT:    v_mov_b32_e32 v0, s4
8922; GFX8-NEXT:    v_mov_b32_e32 v1, s5
8923; GFX8-NEXT:    v_mov_b32_e32 v8, s12
8924; GFX8-NEXT:    flat_store_dwordx4 v[52:53], v[20:23]
8925; GFX8-NEXT:    v_mov_b32_e32 v9, s13
8926; GFX8-NEXT:    flat_store_dwordx4 v[54:55], v[24:27]
8927; GFX8-NEXT:    v_mov_b32_e32 v10, s14
8928; GFX8-NEXT:    v_mov_b32_e32 v11, s15
8929; GFX8-NEXT:    flat_store_dwordx4 v[56:57], v[28:31]
8930; GFX8-NEXT:    flat_store_dwordx4 v[58:59], v[32:35]
8931; GFX8-NEXT:    flat_store_dwordx4 v[60:61], v[36:39]
8932; GFX8-NEXT:    flat_store_dwordx4 v[12:13], v[40:43]
8933; GFX8-NEXT:    flat_store_dwordx4 v[14:15], v[4:7]
8934; GFX8-NEXT:    flat_store_dwordx4 v[16:17], v[0:3]
8935; GFX8-NEXT:    flat_store_dwordx4 v[18:19], v[8:11]
8936; GFX8-NEXT:    v_mov_b32_e32 v5, s1
8937; GFX8-NEXT:    v_mov_b32_e32 v4, s0
8938; GFX8-NEXT:    s_add_u32 s0, s8, 0x100
8939; GFX8-NEXT:    v_mov_b32_e32 v0, s16
8940; GFX8-NEXT:    v_mov_b32_e32 v1, s17
8941; GFX8-NEXT:    v_mov_b32_e32 v2, s18
8942; GFX8-NEXT:    v_mov_b32_e32 v3, s19
8943; GFX8-NEXT:    s_addc_u32 s1, s9, 0
8944; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8945; GFX8-NEXT:    v_mov_b32_e32 v5, s1
8946; GFX8-NEXT:    v_mov_b32_e32 v4, s0
8947; GFX8-NEXT:    s_add_u32 s0, s8, 0xf0
8948; GFX8-NEXT:    v_mov_b32_e32 v0, s22
8949; GFX8-NEXT:    v_mov_b32_e32 v1, s23
8950; GFX8-NEXT:    v_mov_b32_e32 v2, s20
8951; GFX8-NEXT:    v_mov_b32_e32 v3, s21
8952; GFX8-NEXT:    s_addc_u32 s1, s9, 0
8953; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8954; GFX8-NEXT:    v_mov_b32_e32 v5, s1
8955; GFX8-NEXT:    v_mov_b32_e32 v4, s0
8956; GFX8-NEXT:    s_add_u32 s0, s8, 0xe0
8957; GFX8-NEXT:    v_mov_b32_e32 v0, s24
8958; GFX8-NEXT:    v_mov_b32_e32 v1, s25
8959; GFX8-NEXT:    v_mov_b32_e32 v2, s26
8960; GFX8-NEXT:    v_mov_b32_e32 v3, s27
8961; GFX8-NEXT:    s_addc_u32 s1, s9, 0
8962; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8963; GFX8-NEXT:    v_mov_b32_e32 v5, s1
8964; GFX8-NEXT:    v_mov_b32_e32 v4, s0
8965; GFX8-NEXT:    s_add_u32 s0, s8, 0xd0
8966; GFX8-NEXT:    v_mov_b32_e32 v0, s28
8967; GFX8-NEXT:    v_mov_b32_e32 v1, s29
8968; GFX8-NEXT:    v_mov_b32_e32 v2, s86
8969; GFX8-NEXT:    v_mov_b32_e32 v3, s87
8970; GFX8-NEXT:    s_addc_u32 s1, s9, 0
8971; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8972; GFX8-NEXT:    v_mov_b32_e32 v5, s1
8973; GFX8-NEXT:    v_mov_b32_e32 v4, s0
8974; GFX8-NEXT:    s_add_u32 s0, s8, 0xc0
8975; GFX8-NEXT:    v_mov_b32_e32 v0, s84
8976; GFX8-NEXT:    v_mov_b32_e32 v1, s85
8977; GFX8-NEXT:    v_mov_b32_e32 v2, s82
8978; GFX8-NEXT:    v_mov_b32_e32 v3, s83
8979; GFX8-NEXT:    s_addc_u32 s1, s9, 0
8980; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8981; GFX8-NEXT:    v_mov_b32_e32 v5, s1
8982; GFX8-NEXT:    v_mov_b32_e32 v4, s0
8983; GFX8-NEXT:    s_add_u32 s0, s8, 0xb0
8984; GFX8-NEXT:    v_mov_b32_e32 v0, s80
8985; GFX8-NEXT:    v_mov_b32_e32 v1, s81
8986; GFX8-NEXT:    v_mov_b32_e32 v2, s78
8987; GFX8-NEXT:    v_mov_b32_e32 v3, s79
8988; GFX8-NEXT:    s_addc_u32 s1, s9, 0
8989; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8990; GFX8-NEXT:    v_mov_b32_e32 v5, s1
8991; GFX8-NEXT:    v_mov_b32_e32 v4, s0
8992; GFX8-NEXT:    s_add_u32 s0, s8, 0xa0
8993; GFX8-NEXT:    v_mov_b32_e32 v0, s76
8994; GFX8-NEXT:    v_mov_b32_e32 v1, s77
8995; GFX8-NEXT:    v_mov_b32_e32 v2, s74
8996; GFX8-NEXT:    v_mov_b32_e32 v3, s75
8997; GFX8-NEXT:    s_addc_u32 s1, s9, 0
8998; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8999; GFX8-NEXT:    v_mov_b32_e32 v5, s1
9000; GFX8-NEXT:    v_mov_b32_e32 v4, s0
9001; GFX8-NEXT:    s_add_u32 s0, s8, 0x90
9002; GFX8-NEXT:    v_mov_b32_e32 v0, s72
9003; GFX8-NEXT:    v_mov_b32_e32 v1, s73
9004; GFX8-NEXT:    v_mov_b32_e32 v2, s70
9005; GFX8-NEXT:    v_mov_b32_e32 v3, s71
9006; GFX8-NEXT:    s_addc_u32 s1, s9, 0
9007; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
9008; GFX8-NEXT:    v_mov_b32_e32 v5, s1
9009; GFX8-NEXT:    v_mov_b32_e32 v4, s0
9010; GFX8-NEXT:    s_add_u32 s0, s8, 0x80
9011; GFX8-NEXT:    v_mov_b32_e32 v0, s68
9012; GFX8-NEXT:    v_mov_b32_e32 v1, s69
9013; GFX8-NEXT:    v_mov_b32_e32 v2, s66
9014; GFX8-NEXT:    v_mov_b32_e32 v3, s67
9015; GFX8-NEXT:    s_addc_u32 s1, s9, 0
9016; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
9017; GFX8-NEXT:    v_mov_b32_e32 v5, s1
9018; GFX8-NEXT:    v_mov_b32_e32 v4, s0
9019; GFX8-NEXT:    s_add_u32 s0, s8, 0x70
9020; GFX8-NEXT:    v_mov_b32_e32 v0, s64
9021; GFX8-NEXT:    v_mov_b32_e32 v1, s65
9022; GFX8-NEXT:    v_mov_b32_e32 v2, s62
9023; GFX8-NEXT:    v_mov_b32_e32 v3, s63
9024; GFX8-NEXT:    s_addc_u32 s1, s9, 0
9025; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
9026; GFX8-NEXT:    v_mov_b32_e32 v5, s1
9027; GFX8-NEXT:    v_mov_b32_e32 v4, s0
9028; GFX8-NEXT:    s_add_u32 s0, s8, 0x60
9029; GFX8-NEXT:    v_mov_b32_e32 v0, s60
9030; GFX8-NEXT:    v_mov_b32_e32 v1, s61
9031; GFX8-NEXT:    v_mov_b32_e32 v2, s58
9032; GFX8-NEXT:    v_mov_b32_e32 v3, s59
9033; GFX8-NEXT:    s_addc_u32 s1, s9, 0
9034; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
9035; GFX8-NEXT:    v_mov_b32_e32 v5, s1
9036; GFX8-NEXT:    v_mov_b32_e32 v4, s0
9037; GFX8-NEXT:    s_add_u32 s0, s8, 0x50
9038; GFX8-NEXT:    v_mov_b32_e32 v0, s56
9039; GFX8-NEXT:    v_mov_b32_e32 v1, s57
9040; GFX8-NEXT:    v_mov_b32_e32 v2, s54
9041; GFX8-NEXT:    v_mov_b32_e32 v3, s55
9042; GFX8-NEXT:    s_addc_u32 s1, s9, 0
9043; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
9044; GFX8-NEXT:    v_mov_b32_e32 v5, s1
9045; GFX8-NEXT:    v_mov_b32_e32 v4, s0
9046; GFX8-NEXT:    s_add_u32 s0, s8, 64
9047; GFX8-NEXT:    v_mov_b32_e32 v0, s52
9048; GFX8-NEXT:    v_mov_b32_e32 v1, s53
9049; GFX8-NEXT:    v_mov_b32_e32 v2, s50
9050; GFX8-NEXT:    v_mov_b32_e32 v3, s51
9051; GFX8-NEXT:    s_addc_u32 s1, s9, 0
9052; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
9053; GFX8-NEXT:    v_mov_b32_e32 v5, s1
9054; GFX8-NEXT:    v_mov_b32_e32 v4, s0
9055; GFX8-NEXT:    s_add_u32 s0, s8, 48
9056; GFX8-NEXT:    v_mov_b32_e32 v0, s48
9057; GFX8-NEXT:    v_mov_b32_e32 v1, s49
9058; GFX8-NEXT:    v_mov_b32_e32 v2, s46
9059; GFX8-NEXT:    v_mov_b32_e32 v3, s47
9060; GFX8-NEXT:    s_addc_u32 s1, s9, 0
9061; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
9062; GFX8-NEXT:    v_mov_b32_e32 v5, s1
9063; GFX8-NEXT:    v_mov_b32_e32 v4, s0
9064; GFX8-NEXT:    s_add_u32 s0, s8, 32
9065; GFX8-NEXT:    v_mov_b32_e32 v0, s44
9066; GFX8-NEXT:    v_mov_b32_e32 v1, s45
9067; GFX8-NEXT:    v_mov_b32_e32 v2, s42
9068; GFX8-NEXT:    v_mov_b32_e32 v3, s43
9069; GFX8-NEXT:    s_addc_u32 s1, s9, 0
9070; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
9071; GFX8-NEXT:    v_mov_b32_e32 v5, s1
9072; GFX8-NEXT:    v_mov_b32_e32 v4, s0
9073; GFX8-NEXT:    s_add_u32 s0, s8, 16
9074; GFX8-NEXT:    v_mov_b32_e32 v0, s40
9075; GFX8-NEXT:    v_mov_b32_e32 v1, s41
9076; GFX8-NEXT:    v_mov_b32_e32 v2, s38
9077; GFX8-NEXT:    v_mov_b32_e32 v3, s39
9078; GFX8-NEXT:    s_addc_u32 s1, s9, 0
9079; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
9080; GFX8-NEXT:    v_mov_b32_e32 v5, s1
9081; GFX8-NEXT:    v_mov_b32_e32 v0, s36
9082; GFX8-NEXT:    v_mov_b32_e32 v1, s37
9083; GFX8-NEXT:    v_mov_b32_e32 v2, s34
9084; GFX8-NEXT:    v_mov_b32_e32 v3, s35
9085; GFX8-NEXT:    v_mov_b32_e32 v4, s0
9086; GFX8-NEXT:    v_readlane_b32 s0, v62, 4
9087; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
9088; GFX8-NEXT:    v_readlane_b32 s1, v62, 5
9089; GFX8-NEXT:    v_mov_b32_e32 v4, s8
9090; GFX8-NEXT:    v_mov_b32_e32 v0, s30
9091; GFX8-NEXT:    v_mov_b32_e32 v1, s31
9092; GFX8-NEXT:    v_mov_b32_e32 v2, s0
9093; GFX8-NEXT:    v_mov_b32_e32 v3, s1
9094; GFX8-NEXT:    v_mov_b32_e32 v5, s9
9095; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
9096; GFX8-NEXT:    s_endpgm
9097;
9098; EG-LABEL: constant_sextload_v64i1_to_v64i64:
9099; EG:       ; %bb.0:
9100; EG-NEXT:    ALU 22, @40, KC0[CB0:0-32], KC1[]
9101; EG-NEXT:    TEX 0 @38
9102; EG-NEXT:    ALU 89, @63, KC0[CB0:0-32], KC1[]
9103; EG-NEXT:    ALU 99, @153, KC0[], KC1[]
9104; EG-NEXT:    ALU 107, @253, KC0[CB0:0-32], KC1[]
9105; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T82.X, 0
9106; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T80.XYZW, T81.X, 0
9107; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T51.XYZW, T73.X, 0
9108; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T79.XYZW, T48.X, 0
9109; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T52.XYZW, T47.X, 0
9110; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T78.XYZW, T46.X, 0
9111; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T53.XYZW, T45.X, 0
9112; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T77.XYZW, T44.X, 0
9113; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T54.XYZW, T43.X, 0
9114; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T76.XYZW, T42.X, 0
9115; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T55.XYZW, T41.X, 0
9116; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T75.XYZW, T39.X, 0
9117; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T56.XYZW, T38.X, 0
9118; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T74.XYZW, T37.X, 0
9119; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T36.X, 0
9120; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T66.XYZW, T35.X, 0
9121; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T58.XYZW, T34.X, 0
9122; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T72.XYZW, T33.X, 0
9123; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T32.X, 0
9124; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T71.XYZW, T31.X, 0
9125; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T30.X, 0
9126; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T70.XYZW, T29.X, 0
9127; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T61.XYZW, T28.X, 0
9128; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T69.XYZW, T27.X, 0
9129; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T26.X, 0
9130; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T68.XYZW, T25.X, 0
9131; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T24.X, 0
9132; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T23.X, 0
9133; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T64.XYZW, T22.X, 0
9134; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T67.XYZW, T21.X, 0
9135; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T20.X, 0
9136; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T19.X, 1
9137; EG-NEXT:    CF_END
9138; EG-NEXT:    Fetch clause starting at 38:
9139; EG-NEXT:     VTX_READ_64 T40.XY, T26.X, 0, #1
9140; EG-NEXT:    ALU clause starting at 40:
9141; EG-NEXT:     LSHR T19.X, KC0[2].Y, literal.x,
9142; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
9143; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
9144; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
9145; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
9146; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
9147; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
9148; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
9149; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
9150; EG-NEXT:     LSHR T22.X, PV.W, literal.x,
9151; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
9152; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
9153; EG-NEXT:     LSHR T23.X, PV.W, literal.x,
9154; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
9155; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
9156; EG-NEXT:     LSHR T24.X, PV.W, literal.x,
9157; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
9158; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
9159; EG-NEXT:     LSHR T25.X, PV.W, literal.x,
9160; EG-NEXT:     MOV * T26.X, KC0[2].Z,
9161; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
9162; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
9163; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
9164; EG-NEXT:    ALU clause starting at 63:
9165; EG-NEXT:     LSHR T26.X, T0.W, literal.x,
9166; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
9167; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
9168; EG-NEXT:     LSHR T27.X, PV.W, literal.x,
9169; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
9170; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
9171; EG-NEXT:     LSHR T28.X, PV.W, literal.x,
9172; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
9173; EG-NEXT:    2(2.802597e-45), 160(2.242078e-43)
9174; EG-NEXT:     LSHR T29.X, PV.W, literal.x,
9175; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
9176; EG-NEXT:    2(2.802597e-45), 176(2.466285e-43)
9177; EG-NEXT:     LSHR T30.X, PV.W, literal.x,
9178; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
9179; EG-NEXT:    2(2.802597e-45), 192(2.690493e-43)
9180; EG-NEXT:     LSHR T31.X, PV.W, literal.x,
9181; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
9182; EG-NEXT:    2(2.802597e-45), 208(2.914701e-43)
9183; EG-NEXT:     LSHR T32.X, PV.W, literal.x,
9184; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
9185; EG-NEXT:    2(2.802597e-45), 224(3.138909e-43)
9186; EG-NEXT:     LSHR T33.X, PV.W, literal.x,
9187; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
9188; EG-NEXT:    2(2.802597e-45), 240(3.363116e-43)
9189; EG-NEXT:     LSHR T34.X, PV.W, literal.x,
9190; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
9191; EG-NEXT:    2(2.802597e-45), 256(3.587324e-43)
9192; EG-NEXT:     LSHR T35.X, PV.W, literal.x,
9193; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
9194; EG-NEXT:    2(2.802597e-45), 272(3.811532e-43)
9195; EG-NEXT:     LSHR T36.X, PV.W, literal.x,
9196; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
9197; EG-NEXT:    2(2.802597e-45), 288(4.035740e-43)
9198; EG-NEXT:     LSHR T37.X, PV.W, literal.x,
9199; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
9200; EG-NEXT:    2(2.802597e-45), 304(4.259947e-43)
9201; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
9202; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
9203; EG-NEXT:    2(2.802597e-45), 320(4.484155e-43)
9204; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
9205; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
9206; EG-NEXT:    2(2.802597e-45), 336(4.708363e-43)
9207; EG-NEXT:     LSHR T41.X, PV.W, literal.x,
9208; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
9209; EG-NEXT:    2(2.802597e-45), 352(4.932571e-43)
9210; EG-NEXT:     LSHR T42.X, PV.W, literal.x,
9211; EG-NEXT:     LSHR T0.Z, T40.Y, literal.y,
9212; EG-NEXT:     LSHR T0.W, T40.Y, literal.z,
9213; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.w,
9214; EG-NEXT:    2(2.802597e-45), 28(3.923636e-44)
9215; EG-NEXT:    29(4.063766e-44), 368(5.156778e-43)
9216; EG-NEXT:     LSHR T43.X, PS, literal.x,
9217; EG-NEXT:     LSHR T0.Y, T40.Y, literal.y,
9218; EG-NEXT:     LSHR T1.Z, T40.Y, literal.z,
9219; EG-NEXT:     LSHR * T1.W, T40.Y, literal.w,
9220; EG-NEXT:    2(2.802597e-45), 24(3.363116e-44)
9221; EG-NEXT:    25(3.503246e-44), 20(2.802597e-44)
9222; EG-NEXT:     ADD_INT * T2.W, KC0[2].Y, literal.x,
9223; EG-NEXT:    384(5.380986e-43), 0(0.000000e+00)
9224; EG-NEXT:     LSHR T44.X, PV.W, literal.x,
9225; EG-NEXT:     LSHR T1.Y, T40.Y, literal.y,
9226; EG-NEXT:     LSHR T2.Z, T40.Y, literal.z,
9227; EG-NEXT:     LSHR * T2.W, T40.Y, literal.w,
9228; EG-NEXT:    2(2.802597e-45), 21(2.942727e-44)
9229; EG-NEXT:    16(2.242078e-44), 17(2.382207e-44)
9230; EG-NEXT:     ADD_INT * T3.W, KC0[2].Y, literal.x,
9231; EG-NEXT:    400(5.605194e-43), 0(0.000000e+00)
9232; EG-NEXT:     LSHR T45.X, PV.W, literal.x,
9233; EG-NEXT:     LSHR T2.Y, T40.Y, literal.y,
9234; EG-NEXT:     LSHR T3.Z, T40.Y, literal.z,
9235; EG-NEXT:     LSHR * T3.W, T40.Y, literal.w,
9236; EG-NEXT:    2(2.802597e-45), 12(1.681558e-44)
9237; EG-NEXT:    13(1.821688e-44), 8(1.121039e-44)
9238; EG-NEXT:     ADD_INT * T4.W, KC0[2].Y, literal.x,
9239; EG-NEXT:    416(5.829402e-43), 0(0.000000e+00)
9240; EG-NEXT:     LSHR T46.X, PV.W, literal.x,
9241; EG-NEXT:     LSHR T3.Y, T40.Y, literal.y,
9242; EG-NEXT:     LSHR T4.Z, T40.Y, literal.z,
9243; EG-NEXT:     LSHR * T4.W, T40.Y, literal.w,
9244; EG-NEXT:    2(2.802597e-45), 9(1.261169e-44)
9245; EG-NEXT:    4(5.605194e-45), 5(7.006492e-45)
9246; EG-NEXT:     ADD_INT * T5.W, KC0[2].Y, literal.x,
9247; EG-NEXT:    432(6.053609e-43), 0(0.000000e+00)
9248; EG-NEXT:     LSHR T47.X, PV.W, literal.x,
9249; EG-NEXT:     ADD_INT T4.Y, KC0[2].Y, literal.y,
9250; EG-NEXT:     LSHR T5.Z, T40.Y, 1,
9251; EG-NEXT:     LSHR T5.W, T40.X, literal.z,
9252; EG-NEXT:     ADD_INT * T6.W, KC0[2].Y, literal.w,
9253; EG-NEXT:    2(2.802597e-45), 464(6.502025e-43)
9254; EG-NEXT:    28(3.923636e-44), 448(6.277817e-43)
9255; EG-NEXT:    ALU clause starting at 153:
9256; EG-NEXT:     LSHR T48.X, T6.W, literal.x,
9257; EG-NEXT:     LSHR T5.Y, T40.X, literal.y,
9258; EG-NEXT:     LSHR T6.Z, T40.X, literal.z,
9259; EG-NEXT:     LSHR * T6.W, T40.X, literal.w,
9260; EG-NEXT:    2(2.802597e-45), 29(4.063766e-44)
9261; EG-NEXT:    24(3.363116e-44), 25(3.503246e-44)
9262; EG-NEXT:     LSHR * T7.W, T40.X, literal.x,
9263; EG-NEXT:    20(2.802597e-44), 0(0.000000e+00)
9264; EG-NEXT:     BFE_INT T49.X, T40.X, 0.0, 1,
9265; EG-NEXT:     LSHR T6.Y, T40.X, literal.x,
9266; EG-NEXT:     ASHR T50.Z, T40.Y, literal.y,
9267; EG-NEXT:     LSHR T8.W, T40.Y, literal.z,
9268; EG-NEXT:     LSHR * T9.W, T40.Y, literal.w,
9269; EG-NEXT:    21(2.942727e-44), 31(4.344025e-44)
9270; EG-NEXT:    27(3.783506e-44), 30(4.203895e-44)
9271; EG-NEXT:     BFE_INT T50.X, PS, 0.0, 1,
9272; EG-NEXT:     LSHR T7.Y, T40.X, literal.x,
9273; EG-NEXT:     BFE_INT T51.Z, PV.W, 0.0, 1,
9274; EG-NEXT:     LSHR T8.W, T40.Y, literal.y,
9275; EG-NEXT:     LSHR * T9.W, T40.Y, literal.z,
9276; EG-NEXT:    16(2.242078e-44), 23(3.222986e-44)
9277; EG-NEXT:    26(3.643376e-44), 0(0.000000e+00)
9278; EG-NEXT:     BFE_INT T51.X, PS, 0.0, 1,
9279; EG-NEXT:     MOV T50.Y, PV.X,
9280; EG-NEXT:     BFE_INT T52.Z, PV.W, 0.0, 1,
9281; EG-NEXT:     LSHR T8.W, T40.Y, literal.x,
9282; EG-NEXT:     LSHR * T9.W, T40.Y, literal.y,
9283; EG-NEXT:    19(2.662467e-44), 22(3.082857e-44)
9284; EG-NEXT:     BFE_INT T52.X, PS, 0.0, 1,
9285; EG-NEXT:     MOV T51.Y, PV.X,
9286; EG-NEXT:     BFE_INT T53.Z, PV.W, 0.0, 1,
9287; EG-NEXT:     LSHR T8.W, T40.Y, literal.x,
9288; EG-NEXT:     LSHR * T9.W, T40.Y, literal.y,
9289; EG-NEXT:    15(2.101948e-44), 18(2.522337e-44)
9290; EG-NEXT:     BFE_INT T53.X, PS, 0.0, 1,
9291; EG-NEXT:     MOV T52.Y, PV.X,
9292; EG-NEXT:     BFE_INT T54.Z, PV.W, 0.0, 1,
9293; EG-NEXT:     LSHR T8.W, T40.Y, literal.x,
9294; EG-NEXT:     LSHR * T9.W, T40.Y, literal.y,
9295; EG-NEXT:    11(1.541428e-44), 14(1.961818e-44)
9296; EG-NEXT:     BFE_INT T54.X, PS, 0.0, 1,
9297; EG-NEXT:     MOV T53.Y, PV.X,
9298; EG-NEXT:     BFE_INT T55.Z, PV.W, 0.0, 1,
9299; EG-NEXT:     LSHR T8.W, T40.Y, literal.x,
9300; EG-NEXT:     LSHR * T9.W, T40.Y, literal.y,
9301; EG-NEXT:    7(9.809089e-45), 10(1.401298e-44)
9302; EG-NEXT:     BFE_INT T55.X, PS, 0.0, 1,
9303; EG-NEXT:     MOV T54.Y, PV.X,
9304; EG-NEXT:     BFE_INT T56.Z, PV.W, 0.0, 1,
9305; EG-NEXT:     LSHR T8.W, T40.Y, literal.x,
9306; EG-NEXT:     LSHR * T9.W, T40.Y, literal.y,
9307; EG-NEXT:    3(4.203895e-45), 6(8.407791e-45)
9308; EG-NEXT:     BFE_INT T56.X, PS, 0.0, 1,
9309; EG-NEXT:     MOV T55.Y, PV.X,
9310; EG-NEXT:     BFE_INT T57.Z, PV.W, 0.0, 1,
9311; EG-NEXT:     LSHR T8.W, T40.X, literal.x,
9312; EG-NEXT:     LSHR * T9.W, T40.Y, literal.y,
9313; EG-NEXT:    17(2.382207e-44), 2(2.802597e-45)
9314; EG-NEXT:     BFE_INT T57.X, PS, 0.0, 1,
9315; EG-NEXT:     MOV T56.Y, PV.X,
9316; EG-NEXT:     ASHR T58.Z, T40.X, literal.x,
9317; EG-NEXT:     LSHR T9.W, T40.X, literal.y,
9318; EG-NEXT:     LSHR * T10.W, T40.X, literal.z,
9319; EG-NEXT:    31(4.344025e-44), 27(3.783506e-44)
9320; EG-NEXT:    30(4.203895e-44), 0(0.000000e+00)
9321; EG-NEXT:     BFE_INT T58.X, PS, 0.0, 1,
9322; EG-NEXT:     MOV T57.Y, PV.X,
9323; EG-NEXT:     BFE_INT T59.Z, PV.W, 0.0, 1,
9324; EG-NEXT:     LSHR T9.W, T40.X, literal.x,
9325; EG-NEXT:     LSHR * T10.W, T40.X, literal.y,
9326; EG-NEXT:    23(3.222986e-44), 26(3.643376e-44)
9327; EG-NEXT:     BFE_INT T59.X, PS, 0.0, 1,
9328; EG-NEXT:     MOV T58.Y, PV.X,
9329; EG-NEXT:     BFE_INT T60.Z, PV.W, 0.0, 1,
9330; EG-NEXT:     LSHR T9.W, T40.X, literal.x,
9331; EG-NEXT:     LSHR * T10.W, T40.X, literal.y,
9332; EG-NEXT:    19(2.662467e-44), 22(3.082857e-44)
9333; EG-NEXT:     BFE_INT T60.X, PS, 0.0, 1,
9334; EG-NEXT:     MOV T59.Y, PV.X,
9335; EG-NEXT:     BFE_INT T61.Z, PV.W, 0.0, 1,
9336; EG-NEXT:     LSHR T9.W, T40.X, literal.x,
9337; EG-NEXT:     LSHR * T10.W, T40.X, literal.y,
9338; EG-NEXT:    15(2.101948e-44), 18(2.522337e-44)
9339; EG-NEXT:     BFE_INT T61.X, PS, 0.0, 1,
9340; EG-NEXT:     MOV T60.Y, PV.X,
9341; EG-NEXT:     BFE_INT T62.Z, PV.W, 0.0, 1,
9342; EG-NEXT:     LSHR T9.W, T40.X, literal.x,
9343; EG-NEXT:     LSHR * T10.W, T40.X, literal.y,
9344; EG-NEXT:    11(1.541428e-44), 14(1.961818e-44)
9345; EG-NEXT:     BFE_INT T62.X, PS, 0.0, 1,
9346; EG-NEXT:     MOV T61.Y, PV.X,
9347; EG-NEXT:     BFE_INT T63.Z, PV.W, 0.0, 1,
9348; EG-NEXT:     LSHR T9.W, T40.X, literal.x,
9349; EG-NEXT:     LSHR * T10.W, T40.X, literal.y,
9350; EG-NEXT:    7(9.809089e-45), 10(1.401298e-44)
9351; EG-NEXT:     BFE_INT T63.X, PS, 0.0, 1,
9352; EG-NEXT:     MOV T62.Y, PV.X,
9353; EG-NEXT:     BFE_INT T64.Z, PV.W, 0.0, 1,
9354; EG-NEXT:     LSHR * T9.W, T40.X, literal.x,
9355; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
9356; EG-NEXT:    ALU clause starting at 253:
9357; EG-NEXT:     LSHR * T10.W, T40.X, literal.x,
9358; EG-NEXT:    6(8.407791e-45), 0(0.000000e+00)
9359; EG-NEXT:     BFE_INT T64.X, PV.W, 0.0, 1,
9360; EG-NEXT:     MOV T63.Y, T63.X,
9361; EG-NEXT:     BFE_INT T65.Z, T9.W, 0.0, 1,
9362; EG-NEXT:     LSHR T9.W, T40.X, 1, BS:VEC_120/SCL_212
9363; EG-NEXT:     LSHR * T10.W, T40.X, literal.x,
9364; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
9365; EG-NEXT:     BFE_INT T65.X, PS, 0.0, 1,
9366; EG-NEXT:     MOV T64.Y, PV.X,
9367; EG-NEXT:     BFE_INT T49.Z, PV.W, 0.0, 1,
9368; EG-NEXT:     LSHR T9.W, T40.X, literal.x,
9369; EG-NEXT:     LSHR * T10.W, T40.X, literal.y,
9370; EG-NEXT:    12(1.681558e-44), 5(7.006492e-45)
9371; EG-NEXT:     BFE_INT T66.X, T40.Y, 0.0, 1,
9372; EG-NEXT:     MOV T65.Y, PV.X,
9373; EG-NEXT:     BFE_INT T67.Z, PS, 0.0, 1,
9374; EG-NEXT:     LSHR T10.W, T40.X, literal.x,
9375; EG-NEXT:     LSHR * T11.W, T40.X, literal.y,
9376; EG-NEXT:    9(1.261169e-44), 4(5.605194e-45)
9377; EG-NEXT:     BFE_INT T67.X, PS, 0.0, 1,
9378; EG-NEXT:     MOV T49.Y, T49.X,
9379; EG-NEXT:     BFE_INT T40.Z, PV.W, 0.0, 1,
9380; EG-NEXT:     LSHR T10.W, T40.X, literal.x, BS:VEC_120/SCL_212
9381; EG-NEXT:     LSHR * T11.W, T40.X, literal.y,
9382; EG-NEXT:    13(1.821688e-44), 8(1.121039e-44)
9383; EG-NEXT:     BFE_INT T40.X, PS, 0.0, 1,
9384; EG-NEXT:     MOV T67.Y, PV.X,
9385; EG-NEXT:     BFE_INT T68.Z, PV.W, 0.0, 1,
9386; EG-NEXT:     MOV T49.W, T49.Z,
9387; EG-NEXT:     MOV * T65.W, T65.Z,
9388; EG-NEXT:     BFE_INT T68.X, T9.W, 0.0, 1,
9389; EG-NEXT:     MOV T40.Y, PV.X,
9390; EG-NEXT:     BFE_INT T69.Z, T8.W, 0.0, 1, BS:VEC_120/SCL_212
9391; EG-NEXT:     MOV T67.W, T67.Z,
9392; EG-NEXT:     MOV * T64.W, T64.Z,
9393; EG-NEXT:     BFE_INT T69.X, T7.Y, 0.0, 1,
9394; EG-NEXT:     MOV T68.Y, PV.X,
9395; EG-NEXT:     BFE_INT T70.Z, T6.Y, 0.0, 1, BS:VEC_120/SCL_212
9396; EG-NEXT:     MOV T40.W, T40.Z,
9397; EG-NEXT:     MOV * T63.W, T63.Z,
9398; EG-NEXT:     BFE_INT T70.X, T7.W, 0.0, 1,
9399; EG-NEXT:     MOV T69.Y, PV.X,
9400; EG-NEXT:     BFE_INT T71.Z, T6.W, 0.0, 1, BS:VEC_120/SCL_212
9401; EG-NEXT:     MOV T68.W, T68.Z,
9402; EG-NEXT:     MOV * T62.W, T62.Z,
9403; EG-NEXT:     BFE_INT T71.X, T6.Z, 0.0, 1,
9404; EG-NEXT:     MOV T70.Y, PV.X,
9405; EG-NEXT:     BFE_INT T72.Z, T5.Y, 0.0, 1,
9406; EG-NEXT:     MOV T69.W, T69.Z, BS:VEC_120/SCL_212
9407; EG-NEXT:     MOV * T61.W, T61.Z,
9408; EG-NEXT:     BFE_INT T72.X, T5.W, 0.0, 1,
9409; EG-NEXT:     MOV T71.Y, PV.X,
9410; EG-NEXT:     BFE_INT T66.Z, T5.Z, 0.0, 1,
9411; EG-NEXT:     MOV T70.W, T70.Z, BS:VEC_120/SCL_212
9412; EG-NEXT:     MOV * T60.W, T60.Z,
9413; EG-NEXT:     LSHR T73.X, T4.Y, literal.x,
9414; EG-NEXT:     MOV T72.Y, PV.X,
9415; EG-NEXT:     BFE_INT T74.Z, T4.W, 0.0, 1,
9416; EG-NEXT:     MOV T71.W, T71.Z,
9417; EG-NEXT:     MOV * T59.W, T59.Z,
9418; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
9419; EG-NEXT:     BFE_INT T74.X, T4.Z, 0.0, 1,
9420; EG-NEXT:     MOV T66.Y, T66.X,
9421; EG-NEXT:     BFE_INT T75.Z, T3.Y, 0.0, 1,
9422; EG-NEXT:     MOV T72.W, T72.Z, BS:VEC_120/SCL_212
9423; EG-NEXT:     MOV * T58.W, T58.Z,
9424; EG-NEXT:     BFE_INT T75.X, T3.W, 0.0, 1,
9425; EG-NEXT:     MOV T74.Y, PV.X,
9426; EG-NEXT:     BFE_INT T76.Z, T3.Z, 0.0, 1,
9427; EG-NEXT:     MOV T66.W, T66.Z, BS:VEC_120/SCL_212
9428; EG-NEXT:     MOV * T57.W, T57.Z,
9429; EG-NEXT:     BFE_INT T76.X, T2.Y, 0.0, 1,
9430; EG-NEXT:     MOV T75.Y, PV.X,
9431; EG-NEXT:     BFE_INT T77.Z, T2.W, 0.0, 1,
9432; EG-NEXT:     MOV T74.W, T74.Z,
9433; EG-NEXT:     MOV * T56.W, T56.Z,
9434; EG-NEXT:     BFE_INT T77.X, T2.Z, 0.0, 1,
9435; EG-NEXT:     MOV T76.Y, PV.X,
9436; EG-NEXT:     BFE_INT T78.Z, T1.Y, 0.0, 1,
9437; EG-NEXT:     MOV T75.W, T75.Z, BS:VEC_120/SCL_212
9438; EG-NEXT:     MOV * T55.W, T55.Z,
9439; EG-NEXT:     BFE_INT T78.X, T1.W, 0.0, 1,
9440; EG-NEXT:     MOV T77.Y, PV.X,
9441; EG-NEXT:     BFE_INT T79.Z, T1.Z, 0.0, 1,
9442; EG-NEXT:     MOV T76.W, T76.Z, BS:VEC_120/SCL_212
9443; EG-NEXT:     MOV * T54.W, T54.Z,
9444; EG-NEXT:     BFE_INT T79.X, T0.Y, 0.0, 1,
9445; EG-NEXT:     MOV T78.Y, PV.X,
9446; EG-NEXT:     BFE_INT T80.Z, T0.W, 0.0, 1,
9447; EG-NEXT:     MOV T77.W, T77.Z,
9448; EG-NEXT:     MOV * T53.W, T53.Z,
9449; EG-NEXT:     BFE_INT T80.X, T0.Z, 0.0, 1,
9450; EG-NEXT:     MOV T79.Y, PV.X,
9451; EG-NEXT:     ADD_INT T0.Z, KC0[2].Y, literal.x,
9452; EG-NEXT:     MOV T78.W, T78.Z, BS:VEC_120/SCL_212
9453; EG-NEXT:     MOV * T52.W, T52.Z,
9454; EG-NEXT:    480(6.726233e-43), 0(0.000000e+00)
9455; EG-NEXT:     LSHR T81.X, PV.Z, literal.x,
9456; EG-NEXT:     MOV T80.Y, PV.X,
9457; EG-NEXT:     ADD_INT T0.Z, KC0[2].Y, literal.y,
9458; EG-NEXT:     MOV T79.W, T79.Z,
9459; EG-NEXT:     MOV * T51.W, T51.Z,
9460; EG-NEXT:    2(2.802597e-45), 496(6.950440e-43)
9461; EG-NEXT:     LSHR T82.X, PV.Z, literal.x,
9462; EG-NEXT:     MOV T80.W, T80.Z,
9463; EG-NEXT:     MOV * T50.W, T50.Z,
9464; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
9465;
9466; GFX12-LABEL: constant_sextload_v64i1_to_v64i64:
9467; GFX12:       ; %bb.0:
9468; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
9469; GFX12-NEXT:    s_wait_kmcnt 0x0
9470; GFX12-NEXT:    s_load_b64 s[12:13], s[2:3], 0x0
9471; GFX12-NEXT:    s_wait_kmcnt 0x0
9472; GFX12-NEXT:    s_lshr_b32 s96, s13, 30
9473; GFX12-NEXT:    s_lshr_b32 s98, s13, 31
9474; GFX12-NEXT:    s_lshr_b32 s92, s13, 28
9475; GFX12-NEXT:    s_lshr_b32 s94, s13, 29
9476; GFX12-NEXT:    s_lshr_b32 s78, s13, 26
9477; GFX12-NEXT:    s_lshr_b32 s88, s13, 27
9478; GFX12-NEXT:    s_wait_alu 0xfffe
9479; GFX12-NEXT:    s_bfe_i64 s[96:97], s[96:97], 0x10000
9480; GFX12-NEXT:    s_bfe_i64 s[100:101], s[98:99], 0x10000
9481; GFX12-NEXT:    s_lshr_b32 s66, s13, 24
9482; GFX12-NEXT:    s_lshr_b32 s74, s13, 25
9483; GFX12-NEXT:    s_bfe_i64 s[92:93], s[92:93], 0x10000
9484; GFX12-NEXT:    s_bfe_i64 s[94:95], s[94:95], 0x10000
9485; GFX12-NEXT:    s_wait_alu 0xfffe
9486; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s96
9487; GFX12-NEXT:    s_lshr_b32 s56, s13, 22
9488; GFX12-NEXT:    s_lshr_b32 s62, s13, 23
9489; GFX12-NEXT:    v_dual_mov_b32 v2, s97 :: v_dual_mov_b32 v3, s100
9490; GFX12-NEXT:    v_dual_mov_b32 v4, s101 :: v_dual_mov_b32 v5, s92
9491; GFX12-NEXT:    s_bfe_i64 s[78:79], s[78:79], 0x10000
9492; GFX12-NEXT:    s_bfe_i64 s[88:89], s[88:89], 0x10000
9493; GFX12-NEXT:    s_lshr_b32 s44, s13, 20
9494; GFX12-NEXT:    s_lshr_b32 s52, s13, 21
9495; GFX12-NEXT:    s_lshr_b32 s30, s13, 18
9496; GFX12-NEXT:    s_lshr_b32 s40, s13, 19
9497; GFX12-NEXT:    s_lshr_b32 s18, s13, 16
9498; GFX12-NEXT:    s_lshr_b32 s26, s13, 17
9499; GFX12-NEXT:    s_lshr_b32 s2, s13, 14
9500; GFX12-NEXT:    s_lshr_b32 s4, s13, 15
9501; GFX12-NEXT:    v_dual_mov_b32 v6, s93 :: v_dual_mov_b32 v7, s94
9502; GFX12-NEXT:    s_wait_alu 0xfffe
9503; GFX12-NEXT:    v_dual_mov_b32 v8, s95 :: v_dual_mov_b32 v9, s78
9504; GFX12-NEXT:    s_bfe_i64 s[66:67], s[66:67], 0x10000
9505; GFX12-NEXT:    s_bfe_i64 s[74:75], s[74:75], 0x10000
9506; GFX12-NEXT:    s_lshr_b32 s6, s13, 12
9507; GFX12-NEXT:    s_lshr_b32 s8, s13, 13
9508; GFX12-NEXT:    v_dual_mov_b32 v10, s79 :: v_dual_mov_b32 v11, s88
9509; GFX12-NEXT:    s_wait_alu 0xfffe
9510; GFX12-NEXT:    v_dual_mov_b32 v12, s89 :: v_dual_mov_b32 v13, s66
9511; GFX12-NEXT:    s_bfe_i64 s[56:57], s[56:57], 0x10000
9512; GFX12-NEXT:    s_bfe_i64 s[62:63], s[62:63], 0x10000
9513; GFX12-NEXT:    s_lshr_b32 s10, s13, 10
9514; GFX12-NEXT:    s_lshr_b32 s14, s13, 11
9515; GFX12-NEXT:    v_dual_mov_b32 v14, s67 :: v_dual_mov_b32 v15, s74
9516; GFX12-NEXT:    s_wait_alu 0xfffe
9517; GFX12-NEXT:    v_dual_mov_b32 v16, s75 :: v_dual_mov_b32 v17, s56
9518; GFX12-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x10000
9519; GFX12-NEXT:    s_bfe_i64 s[52:53], s[52:53], 0x10000
9520; GFX12-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x10000
9521; GFX12-NEXT:    s_bfe_i64 s[40:41], s[40:41], 0x10000
9522; GFX12-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x10000
9523; GFX12-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x10000
9524; GFX12-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x10000
9525; GFX12-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x10000
9526; GFX12-NEXT:    s_lshr_b32 s16, s13, 8
9527; GFX12-NEXT:    s_lshr_b32 s20, s13, 9
9528; GFX12-NEXT:    v_dual_mov_b32 v18, s57 :: v_dual_mov_b32 v19, s62
9529; GFX12-NEXT:    s_wait_alu 0xfffe
9530; GFX12-NEXT:    v_dual_mov_b32 v20, s63 :: v_dual_mov_b32 v21, s44
9531; GFX12-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x10000
9532; GFX12-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x10000
9533; GFX12-NEXT:    s_lshr_b32 s22, s13, 6
9534; GFX12-NEXT:    s_lshr_b32 s24, s13, 7
9535; GFX12-NEXT:    v_dual_mov_b32 v22, s45 :: v_dual_mov_b32 v23, s52
9536; GFX12-NEXT:    v_dual_mov_b32 v24, s53 :: v_dual_mov_b32 v25, s30
9537; GFX12-NEXT:    v_dual_mov_b32 v26, s31 :: v_dual_mov_b32 v27, s40
9538; GFX12-NEXT:    v_dual_mov_b32 v28, s41 :: v_dual_mov_b32 v29, s18
9539; GFX12-NEXT:    v_dual_mov_b32 v30, s19 :: v_dual_mov_b32 v31, s26
9540; GFX12-NEXT:    v_mov_b32_e32 v32, s27
9541; GFX12-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x10000
9542; GFX12-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x10000
9543; GFX12-NEXT:    s_clause 0x7
9544; GFX12-NEXT:    global_store_b128 v0, v[1:4], s[0:1] offset:496
9545; GFX12-NEXT:    global_store_b128 v0, v[5:8], s[0:1] offset:480
9546; GFX12-NEXT:    global_store_b128 v0, v[9:12], s[0:1] offset:464
9547; GFX12-NEXT:    global_store_b128 v0, v[13:16], s[0:1] offset:448
9548; GFX12-NEXT:    global_store_b128 v0, v[17:20], s[0:1] offset:432
9549; GFX12-NEXT:    global_store_b128 v0, v[21:24], s[0:1] offset:416
9550; GFX12-NEXT:    global_store_b128 v0, v[25:28], s[0:1] offset:400
9551; GFX12-NEXT:    global_store_b128 v0, v[29:32], s[0:1] offset:384
9552; GFX12-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s3
9553; GFX12-NEXT:    v_dual_mov_b32 v3, s4 :: v_dual_mov_b32 v4, s5
9554; GFX12-NEXT:    v_mov_b32_e32 v5, s6
9555; GFX12-NEXT:    s_lshr_b32 s28, s13, 4
9556; GFX12-NEXT:    s_lshr_b32 s34, s13, 5
9557; GFX12-NEXT:    s_lshr_b32 s36, s13, 2
9558; GFX12-NEXT:    s_lshr_b32 s38, s13, 3
9559; GFX12-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x10000
9560; GFX12-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x10000
9561; GFX12-NEXT:    v_dual_mov_b32 v6, s7 :: v_dual_mov_b32 v7, s8
9562; GFX12-NEXT:    v_dual_mov_b32 v8, s9 :: v_dual_mov_b32 v9, s10
9563; GFX12-NEXT:    s_lshr_b32 s42, s13, 1
9564; GFX12-NEXT:    s_mov_b32 s46, s13
9565; GFX12-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x10000
9566; GFX12-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x10000
9567; GFX12-NEXT:    v_dual_mov_b32 v10, s11 :: v_dual_mov_b32 v11, s14
9568; GFX12-NEXT:    v_dual_mov_b32 v12, s15 :: v_dual_mov_b32 v13, s16
9569; GFX12-NEXT:    s_lshr_b32 s48, s12, 30
9570; GFX12-NEXT:    s_lshr_b32 s50, s12, 31
9571; GFX12-NEXT:    s_bfe_i64 s[38:39], s[38:39], 0x10000
9572; GFX12-NEXT:    s_bfe_i64 s[36:37], s[36:37], 0x10000
9573; GFX12-NEXT:    s_bfe_i64 s[34:35], s[34:35], 0x10000
9574; GFX12-NEXT:    s_bfe_i64 s[28:29], s[28:29], 0x10000
9575; GFX12-NEXT:    v_dual_mov_b32 v14, s17 :: v_dual_mov_b32 v15, s20
9576; GFX12-NEXT:    v_dual_mov_b32 v16, s21 :: v_dual_mov_b32 v17, s22
9577; GFX12-NEXT:    s_lshr_b32 s54, s12, 28
9578; GFX12-NEXT:    s_lshr_b32 s58, s12, 29
9579; GFX12-NEXT:    s_bfe_i64 s[46:47], s[46:47], 0x10000
9580; GFX12-NEXT:    s_bfe_i64 s[42:43], s[42:43], 0x10000
9581; GFX12-NEXT:    v_dual_mov_b32 v18, s23 :: v_dual_mov_b32 v19, s24
9582; GFX12-NEXT:    v_dual_mov_b32 v20, s25 :: v_dual_mov_b32 v21, s28
9583; GFX12-NEXT:    s_lshr_b32 s60, s12, 26
9584; GFX12-NEXT:    s_lshr_b32 s64, s12, 27
9585; GFX12-NEXT:    s_bfe_i64 s[50:51], s[50:51], 0x10000
9586; GFX12-NEXT:    s_bfe_i64 s[48:49], s[48:49], 0x10000
9587; GFX12-NEXT:    v_dual_mov_b32 v22, s29 :: v_dual_mov_b32 v23, s34
9588; GFX12-NEXT:    v_mov_b32_e32 v24, s35
9589; GFX12-NEXT:    s_clause 0x5
9590; GFX12-NEXT:    global_store_b128 v0, v[1:4], s[0:1] offset:368
9591; GFX12-NEXT:    global_store_b128 v0, v[5:8], s[0:1] offset:352
9592; GFX12-NEXT:    global_store_b128 v0, v[9:12], s[0:1] offset:336
9593; GFX12-NEXT:    global_store_b128 v0, v[13:16], s[0:1] offset:320
9594; GFX12-NEXT:    global_store_b128 v0, v[17:20], s[0:1] offset:304
9595; GFX12-NEXT:    global_store_b128 v0, v[21:24], s[0:1] offset:288
9596; GFX12-NEXT:    v_dual_mov_b32 v1, s36 :: v_dual_mov_b32 v2, s37
9597; GFX12-NEXT:    v_dual_mov_b32 v3, s38 :: v_dual_mov_b32 v4, s39
9598; GFX12-NEXT:    v_mov_b32_e32 v5, s46
9599; GFX12-NEXT:    s_lshr_b32 s68, s12, 24
9600; GFX12-NEXT:    s_lshr_b32 s70, s12, 25
9601; GFX12-NEXT:    s_lshr_b32 s72, s12, 22
9602; GFX12-NEXT:    s_lshr_b32 s76, s12, 23
9603; GFX12-NEXT:    s_bfe_i64 s[58:59], s[58:59], 0x10000
9604; GFX12-NEXT:    s_bfe_i64 s[54:55], s[54:55], 0x10000
9605; GFX12-NEXT:    v_dual_mov_b32 v6, s47 :: v_dual_mov_b32 v7, s42
9606; GFX12-NEXT:    v_dual_mov_b32 v8, s43 :: v_dual_mov_b32 v9, s48
9607; GFX12-NEXT:    s_lshr_b32 s80, s12, 20
9608; GFX12-NEXT:    s_lshr_b32 s82, s12, 21
9609; GFX12-NEXT:    s_bfe_i64 s[64:65], s[64:65], 0x10000
9610; GFX12-NEXT:    s_bfe_i64 s[60:61], s[60:61], 0x10000
9611; GFX12-NEXT:    v_dual_mov_b32 v10, s49 :: v_dual_mov_b32 v11, s50
9612; GFX12-NEXT:    v_dual_mov_b32 v12, s51 :: v_dual_mov_b32 v13, s54
9613; GFX12-NEXT:    s_lshr_b32 s84, s12, 18
9614; GFX12-NEXT:    s_lshr_b32 s86, s12, 19
9615; GFX12-NEXT:    s_bfe_i64 s[76:77], s[76:77], 0x10000
9616; GFX12-NEXT:    s_bfe_i64 s[72:73], s[72:73], 0x10000
9617; GFX12-NEXT:    s_bfe_i64 s[70:71], s[70:71], 0x10000
9618; GFX12-NEXT:    s_bfe_i64 s[68:69], s[68:69], 0x10000
9619; GFX12-NEXT:    v_dual_mov_b32 v14, s55 :: v_dual_mov_b32 v15, s58
9620; GFX12-NEXT:    v_dual_mov_b32 v16, s59 :: v_dual_mov_b32 v17, s60
9621; GFX12-NEXT:    s_lshr_b32 s90, s12, 16
9622; GFX12-NEXT:    s_lshr_b32 s98, s12, 17
9623; GFX12-NEXT:    s_bfe_i64 s[82:83], s[82:83], 0x10000
9624; GFX12-NEXT:    s_bfe_i64 s[80:81], s[80:81], 0x10000
9625; GFX12-NEXT:    v_dual_mov_b32 v18, s61 :: v_dual_mov_b32 v19, s64
9626; GFX12-NEXT:    v_dual_mov_b32 v20, s65 :: v_dual_mov_b32 v21, s68
9627; GFX12-NEXT:    s_lshr_b32 s96, s12, 14
9628; GFX12-NEXT:    s_lshr_b32 s100, s12, 15
9629; GFX12-NEXT:    s_lshr_b32 s94, s12, 13
9630; GFX12-NEXT:    s_lshr_b32 s88, s12, 11
9631; GFX12-NEXT:    s_lshr_b32 s74, s12, 9
9632; GFX12-NEXT:    s_lshr_b32 s62, s12, 7
9633; GFX12-NEXT:    s_lshr_b32 s52, s12, 5
9634; GFX12-NEXT:    s_lshr_b32 s40, s12, 3
9635; GFX12-NEXT:    s_lshr_b32 s26, s12, 1
9636; GFX12-NEXT:    s_bfe_i64 s[86:87], s[86:87], 0x10000
9637; GFX12-NEXT:    s_bfe_i64 s[84:85], s[84:85], 0x10000
9638; GFX12-NEXT:    v_dual_mov_b32 v22, s69 :: v_dual_mov_b32 v23, s70
9639; GFX12-NEXT:    v_mov_b32_e32 v24, s71
9640; GFX12-NEXT:    s_clause 0x5
9641; GFX12-NEXT:    global_store_b128 v0, v[1:4], s[0:1] offset:272
9642; GFX12-NEXT:    global_store_b128 v0, v[5:8], s[0:1] offset:256
9643; GFX12-NEXT:    global_store_b128 v0, v[9:12], s[0:1] offset:240
9644; GFX12-NEXT:    global_store_b128 v0, v[13:16], s[0:1] offset:224
9645; GFX12-NEXT:    global_store_b128 v0, v[17:20], s[0:1] offset:208
9646; GFX12-NEXT:    global_store_b128 v0, v[21:24], s[0:1] offset:192
9647; GFX12-NEXT:    v_dual_mov_b32 v1, s72 :: v_dual_mov_b32 v2, s73
9648; GFX12-NEXT:    v_dual_mov_b32 v3, s76 :: v_dual_mov_b32 v4, s77
9649; GFX12-NEXT:    v_mov_b32_e32 v5, s80
9650; GFX12-NEXT:    s_lshr_b32 s92, s12, 12
9651; GFX12-NEXT:    s_lshr_b32 s78, s12, 10
9652; GFX12-NEXT:    s_bfe_i64 s[98:99], s[98:99], 0x10000
9653; GFX12-NEXT:    s_bfe_i64 s[90:91], s[90:91], 0x10000
9654; GFX12-NEXT:    v_dual_mov_b32 v6, s81 :: v_dual_mov_b32 v7, s82
9655; GFX12-NEXT:    v_dual_mov_b32 v8, s83 :: v_dual_mov_b32 v9, s84
9656; GFX12-NEXT:    s_lshr_b32 s66, s12, 8
9657; GFX12-NEXT:    s_lshr_b32 s56, s12, 6
9658; GFX12-NEXT:    s_lshr_b32 s44, s12, 4
9659; GFX12-NEXT:    s_lshr_b32 s30, s12, 2
9660; GFX12-NEXT:    s_bfe_i64 s[18:19], s[12:13], 0x10000
9661; GFX12-NEXT:    s_bfe_i64 s[12:13], s[26:27], 0x10000
9662; GFX12-NEXT:    s_bfe_i64 s[26:27], s[40:41], 0x10000
9663; GFX12-NEXT:    s_bfe_i64 s[40:41], s[52:53], 0x10000
9664; GFX12-NEXT:    s_bfe_i64 s[52:53], s[62:63], 0x10000
9665; GFX12-NEXT:    s_bfe_i64 s[62:63], s[74:75], 0x10000
9666; GFX12-NEXT:    s_bfe_i64 s[74:75], s[88:89], 0x10000
9667; GFX12-NEXT:    s_bfe_i64 s[88:89], s[94:95], 0x10000
9668; GFX12-NEXT:    s_bfe_i64 s[94:95], s[100:101], 0x10000
9669; GFX12-NEXT:    s_bfe_i64 s[96:97], s[96:97], 0x10000
9670; GFX12-NEXT:    v_dual_mov_b32 v10, s85 :: v_dual_mov_b32 v11, s86
9671; GFX12-NEXT:    v_dual_mov_b32 v12, s87 :: v_dual_mov_b32 v13, s90
9672; GFX12-NEXT:    s_bfe_i64 s[78:79], s[78:79], 0x10000
9673; GFX12-NEXT:    s_bfe_i64 s[92:93], s[92:93], 0x10000
9674; GFX12-NEXT:    v_dual_mov_b32 v14, s91 :: v_dual_mov_b32 v15, s98
9675; GFX12-NEXT:    s_wait_alu 0xfffe
9676; GFX12-NEXT:    v_dual_mov_b32 v16, s99 :: v_dual_mov_b32 v17, s96
9677; GFX12-NEXT:    s_bfe_i64 s[66:67], s[66:67], 0x10000
9678; GFX12-NEXT:    v_dual_mov_b32 v18, s97 :: v_dual_mov_b32 v19, s94
9679; GFX12-NEXT:    v_dual_mov_b32 v20, s95 :: v_dual_mov_b32 v21, s92
9680; GFX12-NEXT:    s_bfe_i64 s[56:57], s[56:57], 0x10000
9681; GFX12-NEXT:    v_dual_mov_b32 v22, s93 :: v_dual_mov_b32 v23, s88
9682; GFX12-NEXT:    v_mov_b32_e32 v24, s89
9683; GFX12-NEXT:    s_clause 0x5
9684; GFX12-NEXT:    global_store_b128 v0, v[1:4], s[0:1] offset:176
9685; GFX12-NEXT:    global_store_b128 v0, v[5:8], s[0:1] offset:160
9686; GFX12-NEXT:    global_store_b128 v0, v[9:12], s[0:1] offset:144
9687; GFX12-NEXT:    global_store_b128 v0, v[13:16], s[0:1] offset:128
9688; GFX12-NEXT:    global_store_b128 v0, v[17:20], s[0:1] offset:112
9689; GFX12-NEXT:    global_store_b128 v0, v[21:24], s[0:1] offset:96
9690; GFX12-NEXT:    v_dual_mov_b32 v1, s78 :: v_dual_mov_b32 v2, s79
9691; GFX12-NEXT:    v_dual_mov_b32 v3, s74 :: v_dual_mov_b32 v4, s75
9692; GFX12-NEXT:    s_wait_alu 0xfffe
9693; GFX12-NEXT:    v_mov_b32_e32 v5, s66
9694; GFX12-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x10000
9695; GFX12-NEXT:    v_dual_mov_b32 v6, s67 :: v_dual_mov_b32 v7, s62
9696; GFX12-NEXT:    v_dual_mov_b32 v8, s63 :: v_dual_mov_b32 v9, s56
9697; GFX12-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x10000
9698; GFX12-NEXT:    v_dual_mov_b32 v10, s57 :: v_dual_mov_b32 v11, s52
9699; GFX12-NEXT:    s_wait_alu 0xfffe
9700; GFX12-NEXT:    v_dual_mov_b32 v12, s53 :: v_dual_mov_b32 v13, s44
9701; GFX12-NEXT:    v_dual_mov_b32 v14, s45 :: v_dual_mov_b32 v15, s40
9702; GFX12-NEXT:    v_dual_mov_b32 v16, s41 :: v_dual_mov_b32 v17, s30
9703; GFX12-NEXT:    v_dual_mov_b32 v18, s31 :: v_dual_mov_b32 v19, s26
9704; GFX12-NEXT:    v_dual_mov_b32 v20, s27 :: v_dual_mov_b32 v21, s18
9705; GFX12-NEXT:    v_dual_mov_b32 v22, s19 :: v_dual_mov_b32 v23, s12
9706; GFX12-NEXT:    v_mov_b32_e32 v24, s13
9707; GFX12-NEXT:    s_clause 0x5
9708; GFX12-NEXT:    global_store_b128 v0, v[1:4], s[0:1] offset:80
9709; GFX12-NEXT:    global_store_b128 v0, v[5:8], s[0:1] offset:64
9710; GFX12-NEXT:    global_store_b128 v0, v[9:12], s[0:1] offset:48
9711; GFX12-NEXT:    global_store_b128 v0, v[13:16], s[0:1] offset:32
9712; GFX12-NEXT:    global_store_b128 v0, v[17:20], s[0:1] offset:16
9713; GFX12-NEXT:    global_store_b128 v0, v[21:24], s[0:1]
9714; GFX12-NEXT:    s_endpgm
9715  %load = load <64 x i1>, ptr addrspace(4) %in
9716  %ext = sext <64 x i1> %load to <64 x i64>
9717  store <64 x i64> %ext, ptr addrspace(1) %out
9718  ret void
9719}
9720
9721attributes #0 = { nounwind }
9722