xref: /llvm-project/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll (revision 6206f5444fc0732e6495703c75a67f1f90f5b418)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-NOHSA-SI %s
3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-HSA %s
4; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-NOHSA-VI %s
5; RUN: llc -mtriple=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck --check-prefix=EG %s
6; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
7
8define amdgpu_kernel void @constant_load_i16(ptr addrspace(1) %out, ptr addrspace(4) %in) {
9; GCN-NOHSA-SI-LABEL: constant_load_i16:
10; GCN-NOHSA-SI:       ; %bb.0: ; %entry
11; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
12; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
13; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
14; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
15; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
16; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
17; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
18; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
19; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
20; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
21; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
22; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
23; GCN-NOHSA-SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
24; GCN-NOHSA-SI-NEXT:    s_endpgm
25;
26; GCN-HSA-LABEL: constant_load_i16:
27; GCN-HSA:       ; %bb.0: ; %entry
28; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
29; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
30; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
31; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
32; GCN-HSA-NEXT:    flat_load_ushort v2, v[0:1]
33; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
34; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
35; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
36; GCN-HSA-NEXT:    flat_store_short v[0:1], v2
37; GCN-HSA-NEXT:    s_endpgm
38;
39; GCN-NOHSA-VI-LABEL: constant_load_i16:
40; GCN-NOHSA-VI:       ; %bb.0: ; %entry
41; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
42; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
43; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
44; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s3
45; GCN-NOHSA-VI-NEXT:    flat_load_ushort v2, v[0:1]
46; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
47; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
48; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
49; GCN-NOHSA-VI-NEXT:    flat_store_short v[0:1], v2
50; GCN-NOHSA-VI-NEXT:    s_endpgm
51;
52; EG-LABEL: constant_load_i16:
53; EG:       ; %bb.0: ; %entry
54; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
55; EG-NEXT:    TEX 0 @6
56; EG-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
57; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
58; EG-NEXT:    CF_END
59; EG-NEXT:    PAD
60; EG-NEXT:    Fetch clause starting at 6:
61; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
62; EG-NEXT:    ALU clause starting at 8:
63; EG-NEXT:     MOV * T0.X, KC0[2].Z,
64; EG-NEXT:    ALU clause starting at 9:
65; EG-NEXT:     AND_INT T0.W, KC0[2].Y, literal.x,
66; EG-NEXT:     AND_INT * T1.W, T0.X, literal.y,
67; EG-NEXT:    3(4.203895e-45), 65535(9.183409e-41)
68; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
69; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
70; EG-NEXT:     LSHL T0.X, T1.W, PV.W,
71; EG-NEXT:     LSHL * T0.W, literal.x, PV.W,
72; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
73; EG-NEXT:     MOV T0.Y, 0.0,
74; EG-NEXT:     MOV * T0.Z, 0.0,
75; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
76; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
77;
78; GFX12-LABEL: constant_load_i16:
79; GFX12:       ; %bb.0: ; %entry
80; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
81; GFX12-NEXT:    v_mov_b32_e32 v0, 0
82; GFX12-NEXT:    s_wait_kmcnt 0x0
83; GFX12-NEXT:    global_load_u16 v1, v0, s[2:3]
84; GFX12-NEXT:    s_wait_loadcnt 0x0
85; GFX12-NEXT:    global_store_b16 v0, v1, s[0:1]
86; GFX12-NEXT:    s_endpgm
87entry:
88  %ld = load i16, ptr addrspace(4) %in
89  store i16 %ld, ptr addrspace(1) %out
90  ret void
91}
92
93define amdgpu_kernel void @constant_load_v2i16(ptr addrspace(1) %out, ptr addrspace(4) %in) {
94; GCN-NOHSA-SI-LABEL: constant_load_v2i16:
95; GCN-NOHSA-SI:       ; %bb.0: ; %entry
96; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
97; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
98; GCN-NOHSA-SI-NEXT:    s_load_dword s4, s[2:3], 0x0
99; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
100; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
101; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
102; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
103; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
104; GCN-NOHSA-SI-NEXT:    s_endpgm
105;
106; GCN-HSA-LABEL: constant_load_v2i16:
107; GCN-HSA:       ; %bb.0: ; %entry
108; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
109; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
110; GCN-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
111; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
112; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
113; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
114; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
115; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
116; GCN-HSA-NEXT:    s_endpgm
117;
118; GCN-NOHSA-VI-LABEL: constant_load_v2i16:
119; GCN-NOHSA-VI:       ; %bb.0: ; %entry
120; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
121; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
122; GCN-NOHSA-VI-NEXT:    s_load_dword s2, s[2:3], 0x0
123; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
124; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
125; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
126; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s2
127; GCN-NOHSA-VI-NEXT:    flat_store_dword v[0:1], v2
128; GCN-NOHSA-VI-NEXT:    s_endpgm
129;
130; EG-LABEL: constant_load_v2i16:
131; EG:       ; %bb.0: ; %entry
132; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
133; EG-NEXT:    TEX 0 @6
134; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
135; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
136; EG-NEXT:    CF_END
137; EG-NEXT:    PAD
138; EG-NEXT:    Fetch clause starting at 6:
139; EG-NEXT:     VTX_READ_32 T0.X, T0.X, 0, #1
140; EG-NEXT:    ALU clause starting at 8:
141; EG-NEXT:     MOV * T0.X, KC0[2].Z,
142; EG-NEXT:    ALU clause starting at 9:
143; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
144; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
145;
146; GFX12-LABEL: constant_load_v2i16:
147; GFX12:       ; %bb.0: ; %entry
148; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
149; GFX12-NEXT:    s_wait_kmcnt 0x0
150; GFX12-NEXT:    s_load_b32 s2, s[2:3], 0x0
151; GFX12-NEXT:    s_wait_kmcnt 0x0
152; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
153; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
154; GFX12-NEXT:    s_endpgm
155entry:
156  %ld = load <2 x i16>, ptr addrspace(4) %in
157  store <2 x i16> %ld, ptr addrspace(1) %out
158  ret void
159}
160
161define amdgpu_kernel void @constant_load_v3i16(ptr addrspace(1) %out, ptr addrspace(4) %in) {
162; GCN-NOHSA-SI-LABEL: constant_load_v3i16:
163; GCN-NOHSA-SI:       ; %bb.0: ; %entry
164; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
165; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
166; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
167; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
168; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
169; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
170; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
171; GCN-NOHSA-SI-NEXT:    buffer_store_short v0, off, s[0:3], 0 offset:4
172; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
173; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
174; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
175; GCN-NOHSA-SI-NEXT:    s_endpgm
176;
177; GCN-HSA-LABEL: constant_load_v3i16:
178; GCN-HSA:       ; %bb.0: ; %entry
179; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
180; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
181; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
182; GCN-HSA-NEXT:    s_add_u32 s4, s0, 4
183; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
184; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s4
185; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
186; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s5
187; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
188; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s3
189; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
190; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s2
191; GCN-HSA-NEXT:    flat_store_short v[2:3], v4
192; GCN-HSA-NEXT:    flat_store_dword v[0:1], v5
193; GCN-HSA-NEXT:    s_endpgm
194;
195; GCN-NOHSA-VI-LABEL: constant_load_v3i16:
196; GCN-NOHSA-VI:       ; %bb.0: ; %entry
197; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
198; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
199; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
200; GCN-NOHSA-VI-NEXT:    s_add_u32 s4, s0, 4
201; GCN-NOHSA-VI-NEXT:    s_addc_u32 s5, s1, 0
202; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
203; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
204; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s5
205; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
206; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s3
207; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
208; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s2
209; GCN-NOHSA-VI-NEXT:    flat_store_short v[2:3], v4
210; GCN-NOHSA-VI-NEXT:    flat_store_dword v[0:1], v5
211; GCN-NOHSA-VI-NEXT:    s_endpgm
212;
213; EG-LABEL: constant_load_v3i16:
214; EG:       ; %bb.0: ; %entry
215; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
216; EG-NEXT:    TEX 2 @6
217; EG-NEXT:    ALU 19, @13, KC0[CB0:0-32], KC1[]
218; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
219; EG-NEXT:    MEM_RAT MSKOR T5.XW, T8.X
220; EG-NEXT:    CF_END
221; EG-NEXT:    Fetch clause starting at 6:
222; EG-NEXT:     VTX_READ_16 T6.X, T5.X, 0, #1
223; EG-NEXT:     VTX_READ_16 T7.X, T5.X, 2, #1
224; EG-NEXT:     VTX_READ_16 T5.X, T5.X, 4, #1
225; EG-NEXT:    ALU clause starting at 12:
226; EG-NEXT:     MOV * T5.X, KC0[2].Z,
227; EG-NEXT:    ALU clause starting at 13:
228; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
229; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
230; EG-NEXT:     AND_INT T1.W, PV.W, literal.x,
231; EG-NEXT:     AND_INT * T2.W, T5.X, literal.y,
232; EG-NEXT:    3(4.203895e-45), 65535(9.183409e-41)
233; EG-NEXT:     LSHL * T1.W, PV.W, literal.x,
234; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
235; EG-NEXT:     LSHL T5.X, T2.W, PV.W,
236; EG-NEXT:     LSHL * T5.W, literal.x, PV.W,
237; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
238; EG-NEXT:     MOV T5.Y, 0.0,
239; EG-NEXT:     MOV * T5.Z, 0.0,
240; EG-NEXT:     LSHR T8.X, T0.W, literal.x,
241; EG-NEXT:     LSHL T0.W, T7.X, literal.y,
242; EG-NEXT:     AND_INT * T1.W, T6.X, literal.z,
243; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
244; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
245; EG-NEXT:     OR_INT T6.X, PV.W, PS,
246; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.x,
247; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
248;
249; GFX12-LABEL: constant_load_v3i16:
250; GFX12:       ; %bb.0: ; %entry
251; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
252; GFX12-NEXT:    s_wait_kmcnt 0x0
253; GFX12-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
254; GFX12-NEXT:    s_wait_kmcnt 0x0
255; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s3
256; GFX12-NEXT:    v_mov_b32_e32 v2, s2
257; GFX12-NEXT:    s_clause 0x1
258; GFX12-NEXT:    global_store_b16 v0, v1, s[0:1] offset:4
259; GFX12-NEXT:    global_store_b32 v0, v2, s[0:1]
260; GFX12-NEXT:    s_endpgm
261entry:
262  %ld = load <3 x i16>, ptr addrspace(4) %in
263  store <3 x i16> %ld, ptr addrspace(1) %out
264  ret void
265}
266
267define amdgpu_kernel void @constant_load_v4i16(ptr addrspace(1) %out, ptr addrspace(4) %in) {
268; GCN-NOHSA-SI-LABEL: constant_load_v4i16:
269; GCN-NOHSA-SI:       ; %bb.0: ; %entry
270; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
271; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
272; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
273; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
274; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
275; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
276; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
277; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s5
278; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
279; GCN-NOHSA-SI-NEXT:    s_endpgm
280;
281; GCN-HSA-LABEL: constant_load_v4i16:
282; GCN-HSA:       ; %bb.0: ; %entry
283; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
284; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
285; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
286; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
287; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
288; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
289; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
290; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
291; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
292; GCN-HSA-NEXT:    s_endpgm
293;
294; GCN-NOHSA-VI-LABEL: constant_load_v4i16:
295; GCN-NOHSA-VI:       ; %bb.0: ; %entry
296; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
297; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
298; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
299; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
300; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
301; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
302; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s2
303; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s3
304; GCN-NOHSA-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
305; GCN-NOHSA-VI-NEXT:    s_endpgm
306;
307; EG-LABEL: constant_load_v4i16:
308; EG:       ; %bb.0: ; %entry
309; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
310; EG-NEXT:    TEX 0 @6
311; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
312; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
313; EG-NEXT:    CF_END
314; EG-NEXT:    PAD
315; EG-NEXT:    Fetch clause starting at 6:
316; EG-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
317; EG-NEXT:    ALU clause starting at 8:
318; EG-NEXT:     MOV * T0.X, KC0[2].Z,
319; EG-NEXT:    ALU clause starting at 9:
320; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
321; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
322;
323; GFX12-LABEL: constant_load_v4i16:
324; GFX12:       ; %bb.0: ; %entry
325; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
326; GFX12-NEXT:    s_wait_kmcnt 0x0
327; GFX12-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
328; GFX12-NEXT:    v_mov_b32_e32 v2, 0
329; GFX12-NEXT:    s_wait_kmcnt 0x0
330; GFX12-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
331; GFX12-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
332; GFX12-NEXT:    s_endpgm
333entry:
334  %ld = load <4 x i16>, ptr addrspace(4) %in
335  store <4 x i16> %ld, ptr addrspace(1) %out
336  ret void
337}
338
339define amdgpu_kernel void @constant_load_v8i16(ptr addrspace(1) %out, ptr addrspace(4) %in) {
340; GCN-NOHSA-SI-LABEL: constant_load_v8i16:
341; GCN-NOHSA-SI:       ; %bb.0: ; %entry
342; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
343; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
344; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
345; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
346; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
347; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
348; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
349; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s5
350; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
351; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s7
352; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
353; GCN-NOHSA-SI-NEXT:    s_endpgm
354;
355; GCN-HSA-LABEL: constant_load_v8i16:
356; GCN-HSA:       ; %bb.0: ; %entry
357; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
358; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
359; GCN-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
360; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
361; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
362; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
363; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
364; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
365; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
366; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
367; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
368; GCN-HSA-NEXT:    s_endpgm
369;
370; GCN-NOHSA-VI-LABEL: constant_load_v8i16:
371; GCN-NOHSA-VI:       ; %bb.0: ; %entry
372; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
373; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
374; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
375; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
376; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
377; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
378; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
379; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s5
380; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
381; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s7
382; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
383; GCN-NOHSA-VI-NEXT:    s_endpgm
384;
385; EG-LABEL: constant_load_v8i16:
386; EG:       ; %bb.0: ; %entry
387; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
388; EG-NEXT:    TEX 0 @6
389; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
390; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
391; EG-NEXT:    CF_END
392; EG-NEXT:    PAD
393; EG-NEXT:    Fetch clause starting at 6:
394; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
395; EG-NEXT:    ALU clause starting at 8:
396; EG-NEXT:     MOV * T0.X, KC0[2].Z,
397; EG-NEXT:    ALU clause starting at 9:
398; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
399; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
400;
401; GFX12-LABEL: constant_load_v8i16:
402; GFX12:       ; %bb.0: ; %entry
403; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
404; GFX12-NEXT:    s_wait_kmcnt 0x0
405; GFX12-NEXT:    s_load_b128 s[4:7], s[2:3], 0x0
406; GFX12-NEXT:    v_mov_b32_e32 v4, 0
407; GFX12-NEXT:    s_wait_kmcnt 0x0
408; GFX12-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v3, s7
409; GFX12-NEXT:    v_dual_mov_b32 v1, s5 :: v_dual_mov_b32 v2, s6
410; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[0:1]
411; GFX12-NEXT:    s_endpgm
412entry:
413  %ld = load <8 x i16>, ptr addrspace(4) %in
414  store <8 x i16> %ld, ptr addrspace(1) %out
415  ret void
416}
417
418define amdgpu_kernel void @constant_load_v16i16(ptr addrspace(1) %out, ptr addrspace(4) %in) {
419; GCN-NOHSA-SI-LABEL: constant_load_v16i16:
420; GCN-NOHSA-SI:       ; %bb.0: ; %entry
421; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x9
422; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
423; GCN-NOHSA-SI-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
424; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, 0xf000
425; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, -1
426; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
427; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
428; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s5
429; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
430; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s7
431; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
432; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
433; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s0
434; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s1
435; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s2
436; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s3
437; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
438; GCN-NOHSA-SI-NEXT:    s_endpgm
439;
440; GCN-HSA-LABEL: constant_load_v16i16:
441; GCN-HSA:       ; %bb.0: ; %entry
442; GCN-HSA-NEXT:    s_load_dwordx4 s[8:11], s[8:9], 0x0
443; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
444; GCN-HSA-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
445; GCN-HSA-NEXT:    s_add_u32 s10, s8, 16
446; GCN-HSA-NEXT:    s_addc_u32 s11, s9, 0
447; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s10
448; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s11
449; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
450; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
451; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
452; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
453; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
454; GCN-HSA-NEXT:    flat_store_dwordx4 v[6:7], v[0:3]
455; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
456; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
457; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
458; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s2
459; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s3
460; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
461; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
462; GCN-HSA-NEXT:    s_endpgm
463;
464; GCN-NOHSA-VI-LABEL: constant_load_v16i16:
465; GCN-NOHSA-VI:       ; %bb.0: ; %entry
466; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x24
467; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
468; GCN-NOHSA-VI-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
469; GCN-NOHSA-VI-NEXT:    s_add_u32 s10, s8, 16
470; GCN-NOHSA-VI-NEXT:    s_addc_u32 s11, s9, 0
471; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v6, s10
472; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v7, s11
473; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
474; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
475; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s5
476; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
477; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s7
478; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[6:7], v[0:3]
479; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
480; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
481; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
482; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v6, s2
483; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v7, s3
484; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s9
485; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
486; GCN-NOHSA-VI-NEXT:    s_endpgm
487;
488; EG-LABEL: constant_load_v16i16:
489; EG:       ; %bb.0: ; %entry
490; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
491; EG-NEXT:    TEX 0 @8
492; EG-NEXT:    ALU 3, @13, KC0[CB0:0-32], KC1[]
493; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
494; EG-NEXT:    TEX 0 @10
495; EG-NEXT:    ALU 1, @17, KC0[CB0:0-32], KC1[]
496; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
497; EG-NEXT:    CF_END
498; EG-NEXT:    Fetch clause starting at 8:
499; EG-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 16, #1
500; EG-NEXT:    Fetch clause starting at 10:
501; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
502; EG-NEXT:    ALU clause starting at 12:
503; EG-NEXT:     MOV * T0.X, KC0[2].Z,
504; EG-NEXT:    ALU clause starting at 13:
505; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
506; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
507; EG-NEXT:     LSHR * T2.X, PV.W, literal.x,
508; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
509; EG-NEXT:    ALU clause starting at 17:
510; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
511; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
512;
513; GFX12-LABEL: constant_load_v16i16:
514; GFX12:       ; %bb.0: ; %entry
515; GFX12-NEXT:    s_load_b128 s[8:11], s[4:5], 0x24
516; GFX12-NEXT:    s_wait_kmcnt 0x0
517; GFX12-NEXT:    s_load_b256 s[0:7], s[10:11], 0x0
518; GFX12-NEXT:    s_wait_kmcnt 0x0
519; GFX12-NEXT:    v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s5
520; GFX12-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v3, s7
521; GFX12-NEXT:    v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v5, s1
522; GFX12-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v7, s3
523; GFX12-NEXT:    v_mov_b32_e32 v6, s2
524; GFX12-NEXT:    s_clause 0x1
525; GFX12-NEXT:    global_store_b128 v8, v[0:3], s[8:9] offset:16
526; GFX12-NEXT:    global_store_b128 v8, v[4:7], s[8:9]
527; GFX12-NEXT:    s_endpgm
528entry:
529  %ld = load <16 x i16>, ptr addrspace(4) %in
530  store <16 x i16> %ld, ptr addrspace(1) %out
531  ret void
532}
533
534define amdgpu_kernel void @constant_load_v16i16_align2(ptr addrspace(4) %ptr0) #0 {
535; GCN-NOHSA-SI-LABEL: constant_load_v16i16_align2:
536; GCN-NOHSA-SI:       ; %bb.0: ; %entry
537; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
538; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
539; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
540; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
541; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[0:3], 0
542; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v1, off, s[0:3], 0 offset:2
543; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v4, off, s[0:3], 0 offset:4
544; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v2, off, s[0:3], 0 offset:6
545; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v5, off, s[0:3], 0 offset:8
546; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v3, off, s[0:3], 0 offset:10
547; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v6, off, s[0:3], 0 offset:12
548; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v7, off, s[0:3], 0 offset:14
549; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v8, off, s[0:3], 0 offset:16
550; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v9, off, s[0:3], 0 offset:18
551; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v10, off, s[0:3], 0 offset:20
552; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v11, off, s[0:3], 0 offset:22
553; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v12, off, s[0:3], 0 offset:24
554; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v13, off, s[0:3], 0 offset:26
555; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v14, off, s[0:3], 0 offset:28
556; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v15, off, s[0:3], 0 offset:30
557; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(8)
558; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
559; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v16, 16, v3
560; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v17, 16, v2
561; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v18, 16, v1
562; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
563; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v15, 16, v15
564; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v13, 16, v13
565; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v11, 16, v11
566; GCN-NOHSA-SI-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
567; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v3, v7, v6
568; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v2, v16, v5
569; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v1, v17, v4
570; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v0, v18, v0
571; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v7, v15, v14
572; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v6, v13, v12
573; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v5, v11, v10
574; GCN-NOHSA-SI-NEXT:    v_or_b32_e32 v4, v9, v8
575; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
576; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
577; GCN-NOHSA-SI-NEXT:    s_endpgm
578;
579; GCN-HSA-LABEL: constant_load_v16i16_align2:
580; GCN-HSA:       ; %bb.0: ; %entry
581; GCN-HSA-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
582; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
583; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
584; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
585; GCN-HSA-NEXT:    s_add_u32 s0, s0, 16
586; GCN-HSA-NEXT:    s_addc_u32 s1, s1, 0
587; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
588; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
589; GCN-HSA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
590; GCN-HSA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
591; GCN-HSA-NEXT:    s_waitcnt vmcnt(1)
592; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[0:3]
593; GCN-HSA-NEXT:    s_waitcnt vmcnt(1)
594; GCN-HSA-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
595; GCN-HSA-NEXT:    s_endpgm
596;
597; GCN-NOHSA-VI-LABEL: constant_load_v16i16_align2:
598; GCN-NOHSA-VI:       ; %bb.0: ; %entry
599; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
600; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
601; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 14
602; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
603; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
604; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s3
605; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 12
606; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
607; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s2
608; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s3
609; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 10
610; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
611; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
612; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
613; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 8
614; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
615; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v7, s3
616; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v6, s2
617; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 6
618; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
619; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v9, s3
620; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v8, s2
621; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 4
622; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
623; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v11, s3
624; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v10, s2
625; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 30
626; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
627; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v13, s3
628; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v12, s2
629; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 28
630; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
631; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v15, s3
632; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v14, s2
633; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 26
634; GCN-NOHSA-VI-NEXT:    flat_load_ushort v16, v[0:1]
635; GCN-NOHSA-VI-NEXT:    flat_load_ushort v17, v[2:3]
636; GCN-NOHSA-VI-NEXT:    flat_load_ushort v18, v[4:5]
637; GCN-NOHSA-VI-NEXT:    flat_load_ushort v19, v[6:7]
638; GCN-NOHSA-VI-NEXT:    flat_load_ushort v20, v[8:9]
639; GCN-NOHSA-VI-NEXT:    flat_load_ushort v21, v[10:11]
640; GCN-NOHSA-VI-NEXT:    flat_load_ushort v12, v[12:13]
641; GCN-NOHSA-VI-NEXT:    flat_load_ushort v13, v[14:15]
642; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
643; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
644; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s3
645; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 24
646; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
647; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s2
648; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s3
649; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 22
650; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
651; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
652; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
653; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 20
654; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
655; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v7, s3
656; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v6, s2
657; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 18
658; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
659; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v9, s3
660; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v8, s2
661; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 16
662; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
663; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v11, s3
664; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v10, s2
665; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 2
666; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
667; GCN-NOHSA-VI-NEXT:    flat_load_ushort v14, v[0:1]
668; GCN-NOHSA-VI-NEXT:    flat_load_ushort v15, v[2:3]
669; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
670; GCN-NOHSA-VI-NEXT:    flat_load_ushort v4, v[4:5]
671; GCN-NOHSA-VI-NEXT:    flat_load_ushort v5, v[6:7]
672; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s1
673; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s3
674; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s0
675; GCN-NOHSA-VI-NEXT:    flat_load_ushort v8, v[8:9]
676; GCN-NOHSA-VI-NEXT:    flat_load_ushort v9, v[10:11]
677; GCN-NOHSA-VI-NEXT:    flat_load_ushort v0, v[0:1]
678; GCN-NOHSA-VI-NEXT:    flat_load_ushort v10, v[2:3]
679; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(14)
680; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v1, 16, v16
681; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v3, v17, v1
682; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(13)
683; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v1, 16, v18
684; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(12)
685; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v2, v19, v1
686; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(11)
687; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v1, 16, v20
688; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(10)
689; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v1, v21, v1
690; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(9)
691; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v6, 16, v12
692; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(8)
693; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v7, v13, v6
694; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(7)
695; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v6, 16, v14
696; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(6)
697; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v6, v15, v6
698; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(5)
699; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
700; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(4)
701; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v5, v5, v4
702; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(3)
703; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v4, 16, v8
704; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(2)
705; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v4, v9, v4
706; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(1)
707; GCN-NOHSA-VI-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
708; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
709; GCN-NOHSA-VI-NEXT:    v_or_b32_e32 v0, v10, v0
710; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
711; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[0:1], v[0:3]
712; GCN-NOHSA-VI-NEXT:    s_endpgm
713;
714; EG-LABEL: constant_load_v16i16_align2:
715; EG:       ; %bb.0: ; %entry
716; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
717; EG-NEXT:    TEX 1 @6
718; EG-NEXT:    ALU 1, @11, KC0[], KC1[]
719; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
720; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1
721; EG-NEXT:    CF_END
722; EG-NEXT:    Fetch clause starting at 6:
723; EG-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 16, #1
724; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
725; EG-NEXT:    ALU clause starting at 10:
726; EG-NEXT:     MOV * T0.X, KC0[2].Y,
727; EG-NEXT:    ALU clause starting at 11:
728; EG-NEXT:     MOV * T2.X, literal.x,
729; EG-NEXT:    0(0.000000e+00), 0(0.000000e+00)
730;
731; GFX12-LABEL: constant_load_v16i16_align2:
732; GFX12:       ; %bb.0: ; %entry
733; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
734; GFX12-NEXT:    v_mov_b32_e32 v8, 0
735; GFX12-NEXT:    s_wait_kmcnt 0x0
736; GFX12-NEXT:    s_clause 0x7
737; GFX12-NEXT:    global_load_u16 v3, v8, s[0:1] offset:28
738; GFX12-NEXT:    global_load_u16 v2, v8, s[0:1] offset:24
739; GFX12-NEXT:    global_load_u16 v1, v8, s[0:1] offset:20
740; GFX12-NEXT:    global_load_u16 v0, v8, s[0:1] offset:16
741; GFX12-NEXT:    global_load_u16 v7, v8, s[0:1] offset:12
742; GFX12-NEXT:    global_load_u16 v6, v8, s[0:1] offset:8
743; GFX12-NEXT:    global_load_u16 v5, v8, s[0:1] offset:4
744; GFX12-NEXT:    global_load_u16 v4, v8, s[0:1]
745; GFX12-NEXT:    s_wait_loadcnt 0x7
746; GFX12-NEXT:    global_load_d16_hi_b16 v3, v8, s[0:1] offset:30
747; GFX12-NEXT:    s_wait_loadcnt 0x7
748; GFX12-NEXT:    global_load_d16_hi_b16 v2, v8, s[0:1] offset:26
749; GFX12-NEXT:    s_wait_loadcnt 0x7
750; GFX12-NEXT:    global_load_d16_hi_b16 v1, v8, s[0:1] offset:22
751; GFX12-NEXT:    s_wait_loadcnt 0x7
752; GFX12-NEXT:    global_load_d16_hi_b16 v0, v8, s[0:1] offset:18
753; GFX12-NEXT:    s_wait_loadcnt 0x7
754; GFX12-NEXT:    global_load_d16_hi_b16 v7, v8, s[0:1] offset:14
755; GFX12-NEXT:    s_wait_loadcnt 0x7
756; GFX12-NEXT:    global_load_d16_hi_b16 v6, v8, s[0:1] offset:10
757; GFX12-NEXT:    s_wait_loadcnt 0x7
758; GFX12-NEXT:    global_load_d16_hi_b16 v5, v8, s[0:1] offset:6
759; GFX12-NEXT:    s_wait_loadcnt 0x7
760; GFX12-NEXT:    global_load_d16_hi_b16 v4, v8, s[0:1] offset:2
761; GFX12-NEXT:    s_wait_loadcnt 0x4
762; GFX12-NEXT:    global_store_b128 v[0:1], v[0:3], off
763; GFX12-NEXT:    s_wait_loadcnt 0x0
764; GFX12-NEXT:    global_store_b128 v[0:1], v[4:7], off
765; GFX12-NEXT:    s_endpgm
766entry:
767  %ld =  load <16 x i16>, ptr addrspace(4) %ptr0, align 2
768  store <16 x i16> %ld, ptr addrspace(1) undef, align 32
769  ret void
770}
771
772define amdgpu_kernel void @constant_zextload_i16_to_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
773; GCN-NOHSA-SI-LABEL: constant_zextload_i16_to_i32:
774; GCN-NOHSA-SI:       ; %bb.0:
775; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
776; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
777; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
778; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
779; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
780; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
781; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
782; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
783; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
784; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
785; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
786; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
787; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
788; GCN-NOHSA-SI-NEXT:    s_endpgm
789;
790; GCN-HSA-LABEL: constant_zextload_i16_to_i32:
791; GCN-HSA:       ; %bb.0:
792; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
793; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
794; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
795; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
796; GCN-HSA-NEXT:    flat_load_ushort v2, v[0:1]
797; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
798; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
799; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
800; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
801; GCN-HSA-NEXT:    s_endpgm
802;
803; GCN-NOHSA-VI-LABEL: constant_zextload_i16_to_i32:
804; GCN-NOHSA-VI:       ; %bb.0:
805; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
806; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
807; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
808; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s3
809; GCN-NOHSA-VI-NEXT:    flat_load_ushort v2, v[0:1]
810; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
811; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
812; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
813; GCN-NOHSA-VI-NEXT:    flat_store_dword v[0:1], v2
814; GCN-NOHSA-VI-NEXT:    s_endpgm
815;
816; EG-LABEL: constant_zextload_i16_to_i32:
817; EG:       ; %bb.0:
818; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
819; EG-NEXT:    TEX 0 @6
820; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
821; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
822; EG-NEXT:    CF_END
823; EG-NEXT:    PAD
824; EG-NEXT:    Fetch clause starting at 6:
825; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
826; EG-NEXT:    ALU clause starting at 8:
827; EG-NEXT:     MOV * T0.X, KC0[2].Z,
828; EG-NEXT:    ALU clause starting at 9:
829; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
830; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
831;
832; GFX12-LABEL: constant_zextload_i16_to_i32:
833; GFX12:       ; %bb.0:
834; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
835; GFX12-NEXT:    s_wait_kmcnt 0x0
836; GFX12-NEXT:    s_load_u16 s2, s[2:3], 0x0
837; GFX12-NEXT:    s_wait_kmcnt 0x0
838; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
839; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
840; GFX12-NEXT:    s_endpgm
841  %a = load i16, ptr addrspace(4) %in
842  %ext = zext i16 %a to i32
843  store i32 %ext, ptr addrspace(1) %out
844  ret void
845}
846
847define amdgpu_kernel void @constant_sextload_i16_to_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
848; GCN-NOHSA-SI-LABEL: constant_sextload_i16_to_i32:
849; GCN-NOHSA-SI:       ; %bb.0:
850; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
851; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
852; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
853; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
854; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
855; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
856; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
857; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
858; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
859; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
860; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
861; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
862; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
863; GCN-NOHSA-SI-NEXT:    s_endpgm
864;
865; GCN-HSA-LABEL: constant_sextload_i16_to_i32:
866; GCN-HSA:       ; %bb.0:
867; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
868; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
869; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
870; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
871; GCN-HSA-NEXT:    flat_load_sshort v2, v[0:1]
872; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
873; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
874; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
875; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
876; GCN-HSA-NEXT:    s_endpgm
877;
878; GCN-NOHSA-VI-LABEL: constant_sextload_i16_to_i32:
879; GCN-NOHSA-VI:       ; %bb.0:
880; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
881; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
882; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
883; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s3
884; GCN-NOHSA-VI-NEXT:    flat_load_sshort v2, v[0:1]
885; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
886; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
887; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
888; GCN-NOHSA-VI-NEXT:    flat_store_dword v[0:1], v2
889; GCN-NOHSA-VI-NEXT:    s_endpgm
890;
891; EG-LABEL: constant_sextload_i16_to_i32:
892; EG:       ; %bb.0:
893; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
894; EG-NEXT:    TEX 0 @6
895; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
896; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
897; EG-NEXT:    CF_END
898; EG-NEXT:    PAD
899; EG-NEXT:    Fetch clause starting at 6:
900; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
901; EG-NEXT:    ALU clause starting at 8:
902; EG-NEXT:     MOV * T0.X, KC0[2].Z,
903; EG-NEXT:    ALU clause starting at 9:
904; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
905; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
906; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
907;
908; GFX12-LABEL: constant_sextload_i16_to_i32:
909; GFX12:       ; %bb.0:
910; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
911; GFX12-NEXT:    s_wait_kmcnt 0x0
912; GFX12-NEXT:    s_load_i16 s2, s[2:3], 0x0
913; GFX12-NEXT:    s_wait_kmcnt 0x0
914; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
915; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
916; GFX12-NEXT:    s_endpgm
917  %a = load i16, ptr addrspace(4) %in
918  %ext = sext i16 %a to i32
919  store i32 %ext, ptr addrspace(1) %out
920  ret void
921}
922
923define amdgpu_kernel void @constant_zextload_v1i16_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
924; GCN-NOHSA-SI-LABEL: constant_zextload_v1i16_to_v1i32:
925; GCN-NOHSA-SI:       ; %bb.0:
926; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
927; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
928; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
929; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
930; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
931; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
932; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
933; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
934; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
935; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
936; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
937; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
938; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
939; GCN-NOHSA-SI-NEXT:    s_endpgm
940;
941; GCN-HSA-LABEL: constant_zextload_v1i16_to_v1i32:
942; GCN-HSA:       ; %bb.0:
943; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
944; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
945; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
946; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
947; GCN-HSA-NEXT:    flat_load_ushort v2, v[0:1]
948; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
949; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
950; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
951; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
952; GCN-HSA-NEXT:    s_endpgm
953;
954; GCN-NOHSA-VI-LABEL: constant_zextload_v1i16_to_v1i32:
955; GCN-NOHSA-VI:       ; %bb.0:
956; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
957; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
958; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
959; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s3
960; GCN-NOHSA-VI-NEXT:    flat_load_ushort v2, v[0:1]
961; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
962; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
963; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
964; GCN-NOHSA-VI-NEXT:    flat_store_dword v[0:1], v2
965; GCN-NOHSA-VI-NEXT:    s_endpgm
966;
967; EG-LABEL: constant_zextload_v1i16_to_v1i32:
968; EG:       ; %bb.0:
969; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
970; EG-NEXT:    TEX 0 @6
971; EG-NEXT:    ALU 1, @9, KC0[CB0:0-32], KC1[]
972; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
973; EG-NEXT:    CF_END
974; EG-NEXT:    PAD
975; EG-NEXT:    Fetch clause starting at 6:
976; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
977; EG-NEXT:    ALU clause starting at 8:
978; EG-NEXT:     MOV * T0.X, KC0[2].Z,
979; EG-NEXT:    ALU clause starting at 9:
980; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
981; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
982;
983; GFX12-LABEL: constant_zextload_v1i16_to_v1i32:
984; GFX12:       ; %bb.0:
985; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
986; GFX12-NEXT:    s_wait_kmcnt 0x0
987; GFX12-NEXT:    s_load_u16 s2, s[2:3], 0x0
988; GFX12-NEXT:    s_wait_kmcnt 0x0
989; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
990; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
991; GFX12-NEXT:    s_endpgm
992  %load = load <1 x i16>, ptr addrspace(4) %in
993  %ext = zext <1 x i16> %load to <1 x i32>
994  store <1 x i32> %ext, ptr addrspace(1) %out
995  ret void
996}
997
998define amdgpu_kernel void @constant_sextload_v1i16_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
999; GCN-NOHSA-SI-LABEL: constant_sextload_v1i16_to_v1i32:
1000; GCN-NOHSA-SI:       ; %bb.0:
1001; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1002; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
1003; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
1004; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
1005; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
1006; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1007; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
1008; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
1009; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
1010; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
1011; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
1012; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
1013; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1014; GCN-NOHSA-SI-NEXT:    s_endpgm
1015;
1016; GCN-HSA-LABEL: constant_sextload_v1i16_to_v1i32:
1017; GCN-HSA:       ; %bb.0:
1018; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1019; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1020; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1021; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1022; GCN-HSA-NEXT:    flat_load_sshort v2, v[0:1]
1023; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
1024; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
1025; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
1026; GCN-HSA-NEXT:    flat_store_dword v[0:1], v2
1027; GCN-HSA-NEXT:    s_endpgm
1028;
1029; GCN-NOHSA-VI-LABEL: constant_sextload_v1i16_to_v1i32:
1030; GCN-NOHSA-VI:       ; %bb.0:
1031; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1032; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1033; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
1034; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s3
1035; GCN-NOHSA-VI-NEXT:    flat_load_sshort v2, v[0:1]
1036; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
1037; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
1038; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
1039; GCN-NOHSA-VI-NEXT:    flat_store_dword v[0:1], v2
1040; GCN-NOHSA-VI-NEXT:    s_endpgm
1041;
1042; EG-LABEL: constant_sextload_v1i16_to_v1i32:
1043; EG:       ; %bb.0:
1044; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1045; EG-NEXT:    TEX 0 @6
1046; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
1047; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
1048; EG-NEXT:    CF_END
1049; EG-NEXT:    PAD
1050; EG-NEXT:    Fetch clause starting at 6:
1051; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
1052; EG-NEXT:    ALU clause starting at 8:
1053; EG-NEXT:     MOV * T0.X, KC0[2].Z,
1054; EG-NEXT:    ALU clause starting at 9:
1055; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
1056; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
1057; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
1058;
1059; GFX12-LABEL: constant_sextload_v1i16_to_v1i32:
1060; GFX12:       ; %bb.0:
1061; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1062; GFX12-NEXT:    s_wait_kmcnt 0x0
1063; GFX12-NEXT:    s_load_i16 s2, s[2:3], 0x0
1064; GFX12-NEXT:    s_wait_kmcnt 0x0
1065; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
1066; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
1067; GFX12-NEXT:    s_endpgm
1068  %load = load <1 x i16>, ptr addrspace(4) %in
1069  %ext = sext <1 x i16> %load to <1 x i32>
1070  store <1 x i32> %ext, ptr addrspace(1) %out
1071  ret void
1072}
1073
1074define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
1075; GCN-NOHSA-SI-LABEL: constant_zextload_v2i16_to_v2i32:
1076; GCN-NOHSA-SI:       ; %bb.0:
1077; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1078; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1079; GCN-NOHSA-SI-NEXT:    s_load_dword s2, s[2:3], 0x0
1080; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1081; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1082; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s4, s2, 16
1083; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s2, 0xffff
1084; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1085; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
1086; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s4
1087; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1088; GCN-NOHSA-SI-NEXT:    s_endpgm
1089;
1090; GCN-HSA-LABEL: constant_zextload_v2i16_to_v2i32:
1091; GCN-HSA:       ; %bb.0:
1092; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1093; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1094; GCN-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
1095; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
1096; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
1097; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1098; GCN-HSA-NEXT:    s_lshr_b32 s0, s2, 16
1099; GCN-HSA-NEXT:    s_and_b32 s1, s2, 0xffff
1100; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
1101; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s0
1102; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
1103; GCN-HSA-NEXT:    s_endpgm
1104;
1105; GCN-NOHSA-VI-LABEL: constant_zextload_v2i16_to_v2i32:
1106; GCN-NOHSA-VI:       ; %bb.0:
1107; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1108; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1109; GCN-NOHSA-VI-NEXT:    s_load_dword s2, s[2:3], 0x0
1110; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
1111; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
1112; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1113; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s0, s2, 16
1114; GCN-NOHSA-VI-NEXT:    s_and_b32 s1, s2, 0xffff
1115; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s1
1116; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s0
1117; GCN-NOHSA-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
1118; GCN-NOHSA-VI-NEXT:    s_endpgm
1119;
1120; EG-LABEL: constant_zextload_v2i16_to_v2i32:
1121; EG:       ; %bb.0:
1122; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1123; EG-NEXT:    TEX 0 @6
1124; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
1125; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1
1126; EG-NEXT:    CF_END
1127; EG-NEXT:    PAD
1128; EG-NEXT:    Fetch clause starting at 6:
1129; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
1130; EG-NEXT:    ALU clause starting at 8:
1131; EG-NEXT:     MOV * T4.X, KC0[2].Z,
1132; EG-NEXT:    ALU clause starting at 9:
1133; EG-NEXT:     LSHR * T4.Y, T4.X, literal.x,
1134; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1135; EG-NEXT:     AND_INT T4.X, T4.X, literal.x,
1136; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
1137; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
1138;
1139; GFX12-LABEL: constant_zextload_v2i16_to_v2i32:
1140; GFX12:       ; %bb.0:
1141; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1142; GFX12-NEXT:    s_wait_kmcnt 0x0
1143; GFX12-NEXT:    s_load_b32 s2, s[2:3], 0x0
1144; GFX12-NEXT:    s_wait_kmcnt 0x0
1145; GFX12-NEXT:    s_and_b32 s3, s2, 0xffff
1146; GFX12-NEXT:    s_lshr_b32 s2, s2, 16
1147; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1148; GFX12-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
1149; GFX12-NEXT:    v_mov_b32_e32 v0, s3
1150; GFX12-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
1151; GFX12-NEXT:    s_endpgm
1152  %load = load <2 x i16>, ptr addrspace(4) %in
1153  %ext = zext <2 x i16> %load to <2 x i32>
1154  store <2 x i32> %ext, ptr addrspace(1) %out
1155  ret void
1156}
1157
1158; TODO: We should use ASHR instead of LSHR + BFE
1159define amdgpu_kernel void @constant_sextload_v2i16_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
1160; GCN-NOHSA-SI-LABEL: constant_sextload_v2i16_to_v2i32:
1161; GCN-NOHSA-SI:       ; %bb.0:
1162; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1163; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1164; GCN-NOHSA-SI-NEXT:    s_load_dword s2, s[2:3], 0x0
1165; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1166; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1167; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s4, s2, 16
1168; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s2
1169; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1170; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
1171; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s4
1172; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1173; GCN-NOHSA-SI-NEXT:    s_endpgm
1174;
1175; GCN-HSA-LABEL: constant_sextload_v2i16_to_v2i32:
1176; GCN-HSA:       ; %bb.0:
1177; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1178; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1179; GCN-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
1180; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
1181; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
1182; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1183; GCN-HSA-NEXT:    s_ashr_i32 s0, s2, 16
1184; GCN-HSA-NEXT:    s_sext_i32_i16 s1, s2
1185; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
1186; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s0
1187; GCN-HSA-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
1188; GCN-HSA-NEXT:    s_endpgm
1189;
1190; GCN-NOHSA-VI-LABEL: constant_sextload_v2i16_to_v2i32:
1191; GCN-NOHSA-VI:       ; %bb.0:
1192; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1193; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1194; GCN-NOHSA-VI-NEXT:    s_load_dword s2, s[2:3], 0x0
1195; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
1196; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
1197; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1198; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s0, s2, 16
1199; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s1, s2
1200; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s1
1201; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s0
1202; GCN-NOHSA-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
1203; GCN-NOHSA-VI-NEXT:    s_endpgm
1204;
1205; EG-LABEL: constant_sextload_v2i16_to_v2i32:
1206; EG:       ; %bb.0:
1207; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1208; EG-NEXT:    TEX 0 @6
1209; EG-NEXT:    ALU 5, @9, KC0[CB0:0-32], KC1[]
1210; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 1
1211; EG-NEXT:    CF_END
1212; EG-NEXT:    PAD
1213; EG-NEXT:    Fetch clause starting at 6:
1214; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
1215; EG-NEXT:    ALU clause starting at 8:
1216; EG-NEXT:     MOV * T4.X, KC0[2].Z,
1217; EG-NEXT:    ALU clause starting at 9:
1218; EG-NEXT:     BFE_INT T5.X, T4.X, 0.0, literal.x,
1219; EG-NEXT:     LSHR T0.W, T4.X, literal.x,
1220; EG-NEXT:     LSHR * T4.X, KC0[2].Y, literal.y,
1221; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
1222; EG-NEXT:     BFE_INT * T5.Y, PV.W, 0.0, literal.x,
1223; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1224;
1225; GFX12-LABEL: constant_sextload_v2i16_to_v2i32:
1226; GFX12:       ; %bb.0:
1227; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1228; GFX12-NEXT:    s_wait_kmcnt 0x0
1229; GFX12-NEXT:    s_load_b32 s2, s[2:3], 0x0
1230; GFX12-NEXT:    s_wait_kmcnt 0x0
1231; GFX12-NEXT:    s_sext_i32_i16 s3, s2
1232; GFX12-NEXT:    s_ashr_i32 s2, s2, 16
1233; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1234; GFX12-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
1235; GFX12-NEXT:    v_mov_b32_e32 v0, s3
1236; GFX12-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
1237; GFX12-NEXT:    s_endpgm
1238  %load = load <2 x i16>, ptr addrspace(4) %in
1239  %ext = sext <2 x i16> %load to <2 x i32>
1240  store <2 x i32> %ext, ptr addrspace(1) %out
1241  ret void
1242}
1243
1244define amdgpu_kernel void @constant_zextload_v3i16_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) %in) {
1245; GCN-NOHSA-SI-LABEL: constant_zextload_v3i16_to_v3i32:
1246; GCN-NOHSA-SI:       ; %bb.0: ; %entry
1247; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1248; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1249; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1250; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1251; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1252; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1253; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s6, s4, 16
1254; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
1255; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, 0xffff
1256; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
1257; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:8
1258; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1259; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1260; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s6
1261; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1262; GCN-NOHSA-SI-NEXT:    s_endpgm
1263;
1264; GCN-HSA-LABEL: constant_zextload_v3i16_to_v3i32:
1265; GCN-HSA:       ; %bb.0: ; %entry
1266; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1267; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1268; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
1269; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s0
1270; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s1
1271; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1272; GCN-HSA-NEXT:    s_lshr_b32 s0, s2, 16
1273; GCN-HSA-NEXT:    s_and_b32 s1, s3, 0xffff
1274; GCN-HSA-NEXT:    s_and_b32 s2, s2, 0xffff
1275; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1276; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s0
1277; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
1278; GCN-HSA-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
1279; GCN-HSA-NEXT:    s_endpgm
1280;
1281; GCN-NOHSA-VI-LABEL: constant_zextload_v3i16_to_v3i32:
1282; GCN-NOHSA-VI:       ; %bb.0: ; %entry
1283; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1284; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1285; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
1286; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s0
1287; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s1
1288; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1289; GCN-NOHSA-VI-NEXT:    s_and_b32 s0, s3, 0xffff
1290; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s1, s2, 16
1291; GCN-NOHSA-VI-NEXT:    s_and_b32 s2, s2, 0xffff
1292; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
1293; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
1294; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s0
1295; GCN-NOHSA-VI-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
1296; GCN-NOHSA-VI-NEXT:    s_endpgm
1297;
1298; EG-LABEL: constant_zextload_v3i16_to_v3i32:
1299; EG:       ; %bb.0: ; %entry
1300; EG-NEXT:    ALU 4, @12, KC0[CB0:0-32], KC1[]
1301; EG-NEXT:    TEX 2 @6
1302; EG-NEXT:    ALU 2, @17, KC0[], KC1[]
1303; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T2.X, T4.X, 0
1304; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T3.XY, T0.X, 1
1305; EG-NEXT:    CF_END
1306; EG-NEXT:    Fetch clause starting at 6:
1307; EG-NEXT:     VTX_READ_16 T2.X, T1.X, 4, #1
1308; EG-NEXT:     VTX_READ_16 T3.X, T1.X, 0, #1
1309; EG-NEXT:     VTX_READ_16 T1.X, T1.X, 2, #1
1310; EG-NEXT:    ALU clause starting at 12:
1311; EG-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
1312; EG-NEXT:     MOV * T1.X, KC0[2].Z,
1313; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1314; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
1315; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
1316; EG-NEXT:    ALU clause starting at 17:
1317; EG-NEXT:     LSHR T4.X, T0.W, literal.x,
1318; EG-NEXT:     MOV * T3.Y, T1.X,
1319; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1320;
1321; GFX12-LABEL: constant_zextload_v3i16_to_v3i32:
1322; GFX12:       ; %bb.0: ; %entry
1323; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1324; GFX12-NEXT:    s_wait_kmcnt 0x0
1325; GFX12-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
1326; GFX12-NEXT:    s_wait_kmcnt 0x0
1327; GFX12-NEXT:    s_and_b32 s3, s3, 0xffff
1328; GFX12-NEXT:    s_and_b32 s4, s2, 0xffff
1329; GFX12-NEXT:    s_lshr_b32 s2, s2, 16
1330; GFX12-NEXT:    v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s4
1331; GFX12-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s3
1332; GFX12-NEXT:    global_store_b96 v3, v[0:2], s[0:1]
1333; GFX12-NEXT:    s_endpgm
1334entry:
1335  %ld = load <3 x i16>, ptr addrspace(4) %in
1336  %ext = zext <3 x i16> %ld to <3 x i32>
1337  store <3 x i32> %ext, ptr addrspace(1) %out
1338  ret void
1339}
1340
1341define amdgpu_kernel void @constant_sextload_v3i16_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) %in) {
1342; GCN-NOHSA-SI-LABEL: constant_sextload_v3i16_to_v3i32:
1343; GCN-NOHSA-SI:       ; %bb.0: ; %entry
1344; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1345; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1346; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1347; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1348; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1349; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1350; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s6, s4, 16
1351; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
1352; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
1353; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
1354; GCN-NOHSA-SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:8
1355; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1356; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1357; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s6
1358; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1359; GCN-NOHSA-SI-NEXT:    s_endpgm
1360;
1361; GCN-HSA-LABEL: constant_sextload_v3i16_to_v3i32:
1362; GCN-HSA:       ; %bb.0: ; %entry
1363; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1364; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1365; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
1366; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s0
1367; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s1
1368; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1369; GCN-HSA-NEXT:    s_ashr_i32 s0, s2, 16
1370; GCN-HSA-NEXT:    s_sext_i32_i16 s1, s3
1371; GCN-HSA-NEXT:    s_sext_i32_i16 s2, s2
1372; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1373; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s0
1374; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
1375; GCN-HSA-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
1376; GCN-HSA-NEXT:    s_endpgm
1377;
1378; GCN-NOHSA-VI-LABEL: constant_sextload_v3i16_to_v3i32:
1379; GCN-NOHSA-VI:       ; %bb.0: ; %entry
1380; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1381; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1382; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
1383; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s0
1384; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s1
1385; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1386; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s0, s2, 16
1387; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s1, s3
1388; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s2, s2
1389; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
1390; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s0
1391; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s1
1392; GCN-NOHSA-VI-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
1393; GCN-NOHSA-VI-NEXT:    s_endpgm
1394;
1395; EG-LABEL: constant_sextload_v3i16_to_v3i32:
1396; EG:       ; %bb.0: ; %entry
1397; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
1398; EG-NEXT:    TEX 2 @6
1399; EG-NEXT:    ALU 9, @13, KC0[CB0:0-32], KC1[]
1400; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0
1401; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
1402; EG-NEXT:    CF_END
1403; EG-NEXT:    Fetch clause starting at 6:
1404; EG-NEXT:     VTX_READ_16 T1.X, T0.X, 2, #1
1405; EG-NEXT:     VTX_READ_16 T2.X, T0.X, 4, #1
1406; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
1407; EG-NEXT:    ALU clause starting at 12:
1408; EG-NEXT:     MOV * T0.X, KC0[2].Z,
1409; EG-NEXT:    ALU clause starting at 13:
1410; EG-NEXT:     BFE_INT * T0.Y, T1.X, 0.0, literal.x,
1411; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1412; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
1413; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
1414; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
1415; EG-NEXT:     BFE_INT T2.X, T2.X, 0.0, literal.x,
1416; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1417; EG-NEXT:    16(2.242078e-44), 8(1.121039e-44)
1418; EG-NEXT:     LSHR * T3.X, PV.W, literal.x,
1419; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1420;
1421; GFX12-LABEL: constant_sextload_v3i16_to_v3i32:
1422; GFX12:       ; %bb.0: ; %entry
1423; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1424; GFX12-NEXT:    s_wait_kmcnt 0x0
1425; GFX12-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
1426; GFX12-NEXT:    s_wait_kmcnt 0x0
1427; GFX12-NEXT:    s_ashr_i32 s4, s2, 16
1428; GFX12-NEXT:    s_sext_i32_i16 s2, s2
1429; GFX12-NEXT:    s_sext_i32_i16 s3, s3
1430; GFX12-NEXT:    v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s2
1431; GFX12-NEXT:    v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s3
1432; GFX12-NEXT:    global_store_b96 v3, v[0:2], s[0:1]
1433; GFX12-NEXT:    s_endpgm
1434entry:
1435  %ld = load <3 x i16>, ptr addrspace(4) %in
1436  %ext = sext <3 x i16> %ld to <3 x i32>
1437  store <3 x i32> %ext, ptr addrspace(1) %out
1438  ret void
1439}
1440
1441; v4i16 is naturally 8 byte aligned
1442; TODO: This should use LD, but for some there are redundant MOVs
1443define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
1444; GCN-NOHSA-SI-LABEL: constant_zextload_v4i16_to_v4i32:
1445; GCN-NOHSA-SI:       ; %bb.0:
1446; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1447; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1448; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1449; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1450; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1451; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s6, s5, 16
1452; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s7, s4, 16
1453; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
1454; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, 0xffff
1455; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1456; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1457; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s7
1458; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
1459; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s6
1460; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1461; GCN-NOHSA-SI-NEXT:    s_endpgm
1462;
1463; GCN-HSA-LABEL: constant_zextload_v4i16_to_v4i32:
1464; GCN-HSA:       ; %bb.0:
1465; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1466; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1467; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
1468; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
1469; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
1470; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1471; GCN-HSA-NEXT:    s_lshr_b32 s0, s3, 16
1472; GCN-HSA-NEXT:    s_lshr_b32 s1, s2, 16
1473; GCN-HSA-NEXT:    s_and_b32 s3, s3, 0xffff
1474; GCN-HSA-NEXT:    s_and_b32 s2, s2, 0xffff
1475; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1476; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s1
1477; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s3
1478; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s0
1479; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1480; GCN-HSA-NEXT:    s_endpgm
1481;
1482; GCN-NOHSA-VI-LABEL: constant_zextload_v4i16_to_v4i32:
1483; GCN-NOHSA-VI:       ; %bb.0:
1484; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1485; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1486; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
1487; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
1488; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
1489; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1490; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s0, s3, 16
1491; GCN-NOHSA-VI-NEXT:    s_and_b32 s1, s3, 0xffff
1492; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s3, s2, 16
1493; GCN-NOHSA-VI-NEXT:    s_and_b32 s2, s2, 0xffff
1494; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
1495; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s3
1496; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s1
1497; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s0
1498; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1499; GCN-NOHSA-VI-NEXT:    s_endpgm
1500;
1501; EG-LABEL: constant_zextload_v4i16_to_v4i32:
1502; EG:       ; %bb.0:
1503; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1504; EG-NEXT:    TEX 0 @6
1505; EG-NEXT:    ALU 8, @9, KC0[CB0:0-32], KC1[]
1506; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
1507; EG-NEXT:    CF_END
1508; EG-NEXT:    PAD
1509; EG-NEXT:    Fetch clause starting at 6:
1510; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
1511; EG-NEXT:    ALU clause starting at 8:
1512; EG-NEXT:     MOV * T5.X, KC0[2].Z,
1513; EG-NEXT:    ALU clause starting at 9:
1514; EG-NEXT:     LSHR * T5.W, T5.Y, literal.x,
1515; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1516; EG-NEXT:     AND_INT * T5.Z, T5.Y, literal.x,
1517; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1518; EG-NEXT:     LSHR * T5.Y, T5.X, literal.x,
1519; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1520; EG-NEXT:     AND_INT T5.X, T5.X, literal.x,
1521; EG-NEXT:     LSHR * T6.X, KC0[2].Y, literal.y,
1522; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
1523;
1524; GFX12-LABEL: constant_zextload_v4i16_to_v4i32:
1525; GFX12:       ; %bb.0:
1526; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1527; GFX12-NEXT:    s_wait_kmcnt 0x0
1528; GFX12-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
1529; GFX12-NEXT:    s_wait_kmcnt 0x0
1530; GFX12-NEXT:    s_lshr_b32 s4, s3, 16
1531; GFX12-NEXT:    s_and_b32 s3, s3, 0xffff
1532; GFX12-NEXT:    s_and_b32 s5, s2, 0xffff
1533; GFX12-NEXT:    s_lshr_b32 s2, s2, 16
1534; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1535; GFX12-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s2
1536; GFX12-NEXT:    v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v3, s4
1537; GFX12-NEXT:    v_mov_b32_e32 v2, s3
1538; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[0:1]
1539; GFX12-NEXT:    s_endpgm
1540  %load = load <4 x i16>, ptr addrspace(4) %in
1541  %ext = zext <4 x i16> %load to <4 x i32>
1542  store <4 x i32> %ext, ptr addrspace(1) %out
1543  ret void
1544}
1545
1546; v4i16 is naturally 8 byte aligned
1547; TODO: This should use LD, but for some there are redundant MOVs
1548; TODO: We should use ASHR instead of LSHR + BFE
1549define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
1550; GCN-NOHSA-SI-LABEL: constant_sextload_v4i16_to_v4i32:
1551; GCN-NOHSA-SI:       ; %bb.0:
1552; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1553; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1554; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1555; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1556; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1557; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s8, s4, 16
1558; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[6:7], s[4:5], 48
1559; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
1560; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
1561; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1562; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1563; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s8
1564; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
1565; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s6
1566; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1567; GCN-NOHSA-SI-NEXT:    s_endpgm
1568;
1569; GCN-HSA-LABEL: constant_sextload_v4i16_to_v4i32:
1570; GCN-HSA:       ; %bb.0:
1571; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1572; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1573; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
1574; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
1575; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
1576; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1577; GCN-HSA-NEXT:    s_ashr_i64 s[0:1], s[2:3], 48
1578; GCN-HSA-NEXT:    s_ashr_i32 s4, s2, 16
1579; GCN-HSA-NEXT:    s_sext_i32_i16 s1, s3
1580; GCN-HSA-NEXT:    s_sext_i32_i16 s2, s2
1581; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
1582; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s4
1583; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s1
1584; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s0
1585; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1586; GCN-HSA-NEXT:    s_endpgm
1587;
1588; GCN-NOHSA-VI-LABEL: constant_sextload_v4i16_to_v4i32:
1589; GCN-NOHSA-VI:       ; %bb.0:
1590; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1591; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1592; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
1593; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
1594; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
1595; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1596; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s0, s3, 16
1597; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s1, s2, 16
1598; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s3, s3
1599; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s2, s2
1600; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
1601; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
1602; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s3
1603; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s0
1604; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1605; GCN-NOHSA-VI-NEXT:    s_endpgm
1606;
1607; EG-LABEL: constant_sextload_v4i16_to_v4i32:
1608; EG:       ; %bb.0:
1609; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1610; EG-NEXT:    TEX 0 @6
1611; EG-NEXT:    ALU 10, @9, KC0[CB0:0-32], KC1[]
1612; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 1
1613; EG-NEXT:    CF_END
1614; EG-NEXT:    PAD
1615; EG-NEXT:    Fetch clause starting at 6:
1616; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
1617; EG-NEXT:    ALU clause starting at 8:
1618; EG-NEXT:     MOV * T5.X, KC0[2].Z,
1619; EG-NEXT:    ALU clause starting at 9:
1620; EG-NEXT:     BFE_INT * T6.Z, T5.Y, 0.0, literal.x,
1621; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1622; EG-NEXT:     BFE_INT T6.X, T5.X, 0.0, literal.x,
1623; EG-NEXT:     LSHR * T0.W, T5.Y, literal.x,
1624; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1625; EG-NEXT:     BFE_INT T6.W, PV.W, 0.0, literal.x,
1626; EG-NEXT:     LSHR * T0.W, T5.X, literal.x,
1627; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1628; EG-NEXT:     LSHR T5.X, KC0[2].Y, literal.x,
1629; EG-NEXT:     BFE_INT * T6.Y, PS, 0.0, literal.y,
1630; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1631;
1632; GFX12-LABEL: constant_sextload_v4i16_to_v4i32:
1633; GFX12:       ; %bb.0:
1634; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1635; GFX12-NEXT:    s_wait_kmcnt 0x0
1636; GFX12-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
1637; GFX12-NEXT:    s_wait_kmcnt 0x0
1638; GFX12-NEXT:    s_ashr_i32 s4, s3, 16
1639; GFX12-NEXT:    s_ashr_i32 s5, s2, 16
1640; GFX12-NEXT:    s_sext_i32_i16 s2, s2
1641; GFX12-NEXT:    s_sext_i32_i16 s3, s3
1642; GFX12-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s5
1643; GFX12-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, s4
1644; GFX12-NEXT:    v_mov_b32_e32 v2, s3
1645; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[0:1]
1646; GFX12-NEXT:    s_endpgm
1647  %load = load <4 x i16>, ptr addrspace(4) %in
1648  %ext = sext <4 x i16> %load to <4 x i32>
1649  store <4 x i32> %ext, ptr addrspace(1) %out
1650  ret void
1651}
1652
1653; v8i16 is naturally 16 byte aligned
1654; TODO: These should use LSHR instead of BFE_UINT
1655; TODO: This should use DST, but for some there are redundant MOVs
1656define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
1657; GCN-NOHSA-SI-LABEL: constant_zextload_v8i16_to_v8i32:
1658; GCN-NOHSA-SI:       ; %bb.0:
1659; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1660; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1661; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1662; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1663; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1664; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1665; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s8, s5, 16
1666; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s9, s4, 16
1667; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s10, s7, 16
1668; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s11, s6, 16
1669; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
1670; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, 0xffff
1671; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, 0xffff
1672; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, 0xffff
1673; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
1674; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s11
1675; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
1676; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s10
1677; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1678; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1679; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1680; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s9
1681; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
1682; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s8
1683; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1684; GCN-NOHSA-SI-NEXT:    s_endpgm
1685;
1686; GCN-HSA-LABEL: constant_zextload_v8i16_to_v8i32:
1687; GCN-HSA:       ; %bb.0:
1688; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1689; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1690; GCN-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1691; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1692; GCN-HSA-NEXT:    s_lshr_b32 s8, s5, 16
1693; GCN-HSA-NEXT:    s_lshr_b32 s9, s4, 16
1694; GCN-HSA-NEXT:    s_lshr_b32 s2, s7, 16
1695; GCN-HSA-NEXT:    s_lshr_b32 s3, s6, 16
1696; GCN-HSA-NEXT:    s_and_b32 s5, s5, 0xffff
1697; GCN-HSA-NEXT:    s_and_b32 s4, s4, 0xffff
1698; GCN-HSA-NEXT:    s_and_b32 s7, s7, 0xffff
1699; GCN-HSA-NEXT:    s_and_b32 s6, s6, 0xffff
1700; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s2
1701; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
1702; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1703; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1704; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1705; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
1706; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
1707; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1708; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1709; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
1710; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
1711; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
1712; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
1713; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s8
1714; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
1715; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1716; GCN-HSA-NEXT:    s_endpgm
1717;
1718; GCN-NOHSA-VI-LABEL: constant_zextload_v8i16_to_v8i32:
1719; GCN-NOHSA-VI:       ; %bb.0:
1720; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1721; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1722; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1723; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1724; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s8, s5, 16
1725; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s5, 0xffff
1726; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s9, s4, 16
1727; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, 0xffff
1728; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s2, s7, 16
1729; GCN-NOHSA-VI-NEXT:    s_and_b32 s3, s7, 0xffff
1730; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s7, s6, 16
1731; GCN-NOHSA-VI-NEXT:    s_and_b32 s6, s6, 0xffff
1732; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s2
1733; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 16
1734; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s3
1735; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
1736; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
1737; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
1738; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s7
1739; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
1740; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1741; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
1742; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
1743; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s9
1744; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
1745; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s8
1746; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
1747; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1748; GCN-NOHSA-VI-NEXT:    s_endpgm
1749;
1750; EG-LABEL: constant_zextload_v8i16_to_v8i32:
1751; EG:       ; %bb.0:
1752; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1753; EG-NEXT:    TEX 0 @6
1754; EG-NEXT:    ALU 17, @9, KC0[CB0:0-32], KC1[]
1755; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
1756; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
1757; EG-NEXT:    CF_END
1758; EG-NEXT:    Fetch clause starting at 6:
1759; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
1760; EG-NEXT:    ALU clause starting at 8:
1761; EG-NEXT:     MOV * T7.X, KC0[2].Z,
1762; EG-NEXT:    ALU clause starting at 9:
1763; EG-NEXT:     LSHR * T8.W, T7.Y, literal.x,
1764; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1765; EG-NEXT:     AND_INT * T8.Z, T7.Y, literal.x,
1766; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
1767; EG-NEXT:     LSHR T8.Y, T7.X, literal.x,
1768; EG-NEXT:     LSHR * T9.W, T7.W, literal.x,
1769; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1770; EG-NEXT:     AND_INT T8.X, T7.X, literal.x,
1771; EG-NEXT:     AND_INT T9.Z, T7.W, literal.x,
1772; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.y,
1773; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
1774; EG-NEXT:     LSHR * T9.Y, T7.Z, literal.x,
1775; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1776; EG-NEXT:     AND_INT T9.X, T7.Z, literal.x,
1777; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1778; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
1779; EG-NEXT:     LSHR * T10.X, PV.W, literal.x,
1780; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1781;
1782; GFX12-LABEL: constant_zextload_v8i16_to_v8i32:
1783; GFX12:       ; %bb.0:
1784; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1785; GFX12-NEXT:    s_wait_kmcnt 0x0
1786; GFX12-NEXT:    s_load_b128 s[4:7], s[2:3], 0x0
1787; GFX12-NEXT:    s_wait_kmcnt 0x0
1788; GFX12-NEXT:    s_lshr_b32 s8, s7, 16
1789; GFX12-NEXT:    s_and_b32 s7, s7, 0xffff
1790; GFX12-NEXT:    s_and_b32 s9, s6, 0xffff
1791; GFX12-NEXT:    s_lshr_b32 s6, s6, 16
1792; GFX12-NEXT:    s_lshr_b32 s2, s5, 16
1793; GFX12-NEXT:    s_and_b32 s3, s5, 0xffff
1794; GFX12-NEXT:    s_lshr_b32 s5, s4, 16
1795; GFX12-NEXT:    s_and_b32 s4, s4, 0xffff
1796; GFX12-NEXT:    v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s6
1797; GFX12-NEXT:    v_dual_mov_b32 v0, s9 :: v_dual_mov_b32 v3, s8
1798; GFX12-NEXT:    v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v5, s5
1799; GFX12-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v7, s2
1800; GFX12-NEXT:    v_mov_b32_e32 v6, s3
1801; GFX12-NEXT:    s_clause 0x1
1802; GFX12-NEXT:    global_store_b128 v8, v[0:3], s[0:1] offset:16
1803; GFX12-NEXT:    global_store_b128 v8, v[4:7], s[0:1]
1804; GFX12-NEXT:    s_endpgm
1805  %load = load <8 x i16>, ptr addrspace(4) %in
1806  %ext = zext <8 x i16> %load to <8 x i32>
1807  store <8 x i32> %ext, ptr addrspace(1) %out
1808  ret void
1809}
1810
1811; v8i16 is naturally 16 byte aligned
1812; TODO: 4 of these should use ASHR instead of LSHR + BFE_INT
1813; TODO: This should use DST, but for some there are redundant MOVs
1814define amdgpu_kernel void @constant_sextload_v8i16_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
1815; GCN-NOHSA-SI-LABEL: constant_sextload_v8i16_to_v8i32:
1816; GCN-NOHSA-SI:       ; %bb.0:
1817; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1818; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1819; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1820; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1821; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1822; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1823; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s8, s5, 16
1824; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s9, s4, 16
1825; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
1826; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s10, s7, 16
1827; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s11, s6, 16
1828; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s7, s7
1829; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s6, s6
1830; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
1831; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
1832; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s11
1833; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
1834; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s10
1835; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
1836; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
1837; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
1838; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s9
1839; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
1840; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s8
1841; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1842; GCN-NOHSA-SI-NEXT:    s_endpgm
1843;
1844; GCN-HSA-LABEL: constant_sextload_v8i16_to_v8i32:
1845; GCN-HSA:       ; %bb.0:
1846; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
1847; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1848; GCN-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1849; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
1850; GCN-HSA-NEXT:    s_ashr_i32 s8, s5, 16
1851; GCN-HSA-NEXT:    s_ashr_i32 s9, s4, 16
1852; GCN-HSA-NEXT:    s_ashr_i32 s2, s7, 16
1853; GCN-HSA-NEXT:    s_ashr_i32 s3, s6, 16
1854; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s2
1855; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
1856; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
1857; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
1858; GCN-HSA-NEXT:    s_sext_i32_i16 s7, s7
1859; GCN-HSA-NEXT:    s_sext_i32_i16 s6, s6
1860; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
1861; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
1862; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
1863; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
1864; GCN-HSA-NEXT:    s_sext_i32_i16 s5, s5
1865; GCN-HSA-NEXT:    s_sext_i32_i16 s4, s4
1866; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1867; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
1868; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
1869; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
1870; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
1871; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s8
1872; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
1873; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1874; GCN-HSA-NEXT:    s_endpgm
1875;
1876; GCN-NOHSA-VI-LABEL: constant_sextload_v8i16_to_v8i32:
1877; GCN-NOHSA-VI:       ; %bb.0:
1878; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1879; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1880; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1881; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
1882; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s8, s5, 16
1883; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s9, s4, 16
1884; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s2, s7, 16
1885; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s3, s6, 16
1886; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s2
1887; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 16
1888; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s3
1889; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
1890; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s7, s7
1891; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s6, s6
1892; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
1893; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
1894; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
1895; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
1896; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s5, s5
1897; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
1898; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1899; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
1900; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
1901; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s9
1902; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
1903; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s8
1904; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
1905; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1906; GCN-NOHSA-VI-NEXT:    s_endpgm
1907;
1908; EG-LABEL: constant_sextload_v8i16_to_v8i32:
1909; EG:       ; %bb.0:
1910; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1911; EG-NEXT:    TEX 0 @6
1912; EG-NEXT:    ALU 19, @9, KC0[CB0:0-32], KC1[]
1913; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
1914; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
1915; EG-NEXT:    CF_END
1916; EG-NEXT:    Fetch clause starting at 6:
1917; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
1918; EG-NEXT:    ALU clause starting at 8:
1919; EG-NEXT:     MOV * T7.X, KC0[2].Z,
1920; EG-NEXT:    ALU clause starting at 9:
1921; EG-NEXT:     BFE_INT * T8.Z, T7.Y, 0.0, literal.x,
1922; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1923; EG-NEXT:     BFE_INT T8.X, T7.X, 0.0, literal.x,
1924; EG-NEXT:     BFE_INT T9.Z, T7.W, 0.0, literal.x,
1925; EG-NEXT:     LSHR * T0.W, T7.Y, literal.x,
1926; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1927; EG-NEXT:     BFE_INT T9.X, T7.Z, 0.0, literal.x,
1928; EG-NEXT:     LSHR T0.Z, T7.W, literal.x,
1929; EG-NEXT:     BFE_INT T8.W, PV.W, 0.0, literal.x,
1930; EG-NEXT:     LSHR * T0.W, T7.X, literal.x,
1931; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
1932; EG-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
1933; EG-NEXT:     BFE_INT T8.Y, PS, 0.0, literal.y,
1934; EG-NEXT:     LSHR T1.Z, T7.Z, literal.y,
1935; EG-NEXT:     BFE_INT T9.W, PV.Z, 0.0, literal.y,
1936; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
1937; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1938; EG-NEXT:     LSHR T10.X, PS, literal.x,
1939; EG-NEXT:     BFE_INT * T9.Y, PV.Z, 0.0, literal.y,
1940; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
1941;
1942; GFX12-LABEL: constant_sextload_v8i16_to_v8i32:
1943; GFX12:       ; %bb.0:
1944; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1945; GFX12-NEXT:    s_wait_kmcnt 0x0
1946; GFX12-NEXT:    s_load_b128 s[4:7], s[2:3], 0x0
1947; GFX12-NEXT:    s_wait_kmcnt 0x0
1948; GFX12-NEXT:    s_ashr_i32 s8, s7, 16
1949; GFX12-NEXT:    s_ashr_i32 s9, s6, 16
1950; GFX12-NEXT:    s_sext_i32_i16 s6, s6
1951; GFX12-NEXT:    s_sext_i32_i16 s7, s7
1952; GFX12-NEXT:    s_ashr_i32 s2, s5, 16
1953; GFX12-NEXT:    s_ashr_i32 s3, s4, 16
1954; GFX12-NEXT:    s_sext_i32_i16 s5, s5
1955; GFX12-NEXT:    s_sext_i32_i16 s4, s4
1956; GFX12-NEXT:    v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s9
1957; GFX12-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v3, s8
1958; GFX12-NEXT:    v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v5, s3
1959; GFX12-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v7, s2
1960; GFX12-NEXT:    v_mov_b32_e32 v6, s5
1961; GFX12-NEXT:    s_clause 0x1
1962; GFX12-NEXT:    global_store_b128 v8, v[0:3], s[0:1] offset:16
1963; GFX12-NEXT:    global_store_b128 v8, v[4:7], s[0:1]
1964; GFX12-NEXT:    s_endpgm
1965  %load = load <8 x i16>, ptr addrspace(4) %in
1966  %ext = sext <8 x i16> %load to <8 x i32>
1967  store <8 x i32> %ext, ptr addrspace(1) %out
1968  ret void
1969}
1970
1971define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
1972; GCN-NOHSA-SI-LABEL: constant_zextload_v16i16_to_v16i32:
1973; GCN-NOHSA-SI:       ; %bb.0:
1974; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1975; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1976; GCN-NOHSA-SI-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
1977; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
1978; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
1979; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
1980; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s12, s5, 16
1981; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s13, s4, 16
1982; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s14, s7, 16
1983; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s15, s6, 16
1984; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s16, s9, 16
1985; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s17, s8, 16
1986; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s18, s11, 16
1987; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s19, s10, 16
1988; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
1989; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, 0xffff
1990; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, 0xffff
1991; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, 0xffff
1992; GCN-NOHSA-SI-NEXT:    s_and_b32 s9, s9, 0xffff
1993; GCN-NOHSA-SI-NEXT:    s_and_b32 s11, s11, 0xffff
1994; GCN-NOHSA-SI-NEXT:    s_and_b32 s10, s10, 0xffff
1995; GCN-NOHSA-SI-NEXT:    s_and_b32 s8, s8, 0xffff
1996; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
1997; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
1998; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
1999; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s18
2000; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
2001; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2002; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
2003; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s17
2004; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
2005; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s16
2006; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2007; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2008; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
2009; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s15
2010; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
2011; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s14
2012; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2013; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2014; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
2015; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s13
2016; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
2017; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s12
2018; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2019; GCN-NOHSA-SI-NEXT:    s_endpgm
2020;
2021; GCN-HSA-LABEL: constant_zextload_v16i16_to_v16i32:
2022; GCN-HSA:       ; %bb.0:
2023; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
2024; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2025; GCN-HSA-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
2026; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2027; GCN-HSA-NEXT:    s_lshr_b32 s12, s5, 16
2028; GCN-HSA-NEXT:    s_lshr_b32 s13, s4, 16
2029; GCN-HSA-NEXT:    s_lshr_b32 s14, s7, 16
2030; GCN-HSA-NEXT:    s_lshr_b32 s15, s6, 16
2031; GCN-HSA-NEXT:    s_lshr_b32 s16, s9, 16
2032; GCN-HSA-NEXT:    s_lshr_b32 s17, s8, 16
2033; GCN-HSA-NEXT:    s_lshr_b32 s2, s11, 16
2034; GCN-HSA-NEXT:    s_lshr_b32 s3, s10, 16
2035; GCN-HSA-NEXT:    s_and_b32 s5, s5, 0xffff
2036; GCN-HSA-NEXT:    s_and_b32 s4, s4, 0xffff
2037; GCN-HSA-NEXT:    s_and_b32 s7, s7, 0xffff
2038; GCN-HSA-NEXT:    s_and_b32 s6, s6, 0xffff
2039; GCN-HSA-NEXT:    s_and_b32 s9, s9, 0xffff
2040; GCN-HSA-NEXT:    s_and_b32 s8, s8, 0xffff
2041; GCN-HSA-NEXT:    s_and_b32 s11, s11, 0xffff
2042; GCN-HSA-NEXT:    s_and_b32 s10, s10, 0xffff
2043; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s2
2044; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
2045; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
2046; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2047; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2048; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2049; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
2050; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
2051; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s11
2052; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2053; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2054; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2055; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2056; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
2057; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
2058; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s17
2059; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s9
2060; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s16
2061; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2062; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2063; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2064; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
2065; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s15
2066; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
2067; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s14
2068; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2069; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2070; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
2071; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
2072; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s13
2073; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
2074; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s12
2075; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
2076; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2077; GCN-HSA-NEXT:    s_endpgm
2078;
2079; GCN-NOHSA-VI-LABEL: constant_zextload_v16i16_to_v16i32:
2080; GCN-NOHSA-VI:       ; %bb.0:
2081; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
2082; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2083; GCN-NOHSA-VI-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
2084; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2085; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s12, s5, 16
2086; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s5, 0xffff
2087; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s13, s4, 16
2088; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, 0xffff
2089; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s14, s7, 16
2090; GCN-NOHSA-VI-NEXT:    s_and_b32 s7, s7, 0xffff
2091; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s15, s6, 16
2092; GCN-NOHSA-VI-NEXT:    s_and_b32 s6, s6, 0xffff
2093; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s16, s9, 16
2094; GCN-NOHSA-VI-NEXT:    s_and_b32 s9, s9, 0xffff
2095; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s17, s8, 16
2096; GCN-NOHSA-VI-NEXT:    s_and_b32 s8, s8, 0xffff
2097; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s2, s11, 16
2098; GCN-NOHSA-VI-NEXT:    s_and_b32 s3, s11, 0xffff
2099; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s11, s10, 16
2100; GCN-NOHSA-VI-NEXT:    s_and_b32 s10, s10, 0xffff
2101; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s2
2102; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 48
2103; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s3
2104; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
2105; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
2106; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
2107; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 32
2108; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
2109; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s11
2110; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
2111; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2112; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
2113; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
2114; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 16
2115; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
2116; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s17
2117; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
2118; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s16
2119; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
2120; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2121; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
2122; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
2123; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s15
2124; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
2125; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s14
2126; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
2127; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2128; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
2129; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
2130; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s13
2131; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
2132; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s12
2133; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
2134; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2135; GCN-NOHSA-VI-NEXT:    s_endpgm
2136;
2137; EG-LABEL: constant_zextload_v16i16_to_v16i32:
2138; EG:       ; %bb.0:
2139; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
2140; EG-NEXT:    TEX 1 @8
2141; EG-NEXT:    ALU 35, @13, KC0[CB0:0-32], KC1[]
2142; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0
2143; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T11.X, 0
2144; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0
2145; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T12.X, 1
2146; EG-NEXT:    CF_END
2147; EG-NEXT:    Fetch clause starting at 8:
2148; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 0, #1
2149; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 16, #1
2150; EG-NEXT:    ALU clause starting at 12:
2151; EG-NEXT:     MOV * T11.X, KC0[2].Z,
2152; EG-NEXT:    ALU clause starting at 13:
2153; EG-NEXT:     LSHR * T13.W, T12.Y, literal.x,
2154; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2155; EG-NEXT:     AND_INT * T13.Z, T12.Y, literal.x,
2156; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2157; EG-NEXT:     LSHR T13.Y, T12.X, literal.x,
2158; EG-NEXT:     LSHR * T14.W, T12.W, literal.x,
2159; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2160; EG-NEXT:     AND_INT T13.X, T12.X, literal.x,
2161; EG-NEXT:     AND_INT T14.Z, T12.W, literal.x,
2162; EG-NEXT:     LSHR * T12.X, KC0[2].Y, literal.y,
2163; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
2164; EG-NEXT:     LSHR T14.Y, T12.Z, literal.x,
2165; EG-NEXT:     LSHR * T15.W, T11.Y, literal.x,
2166; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2167; EG-NEXT:     AND_INT T14.X, T12.Z, literal.x,
2168; EG-NEXT:     AND_INT T15.Z, T11.Y, literal.x,
2169; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2170; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
2171; EG-NEXT:     LSHR T16.X, PV.W, literal.x,
2172; EG-NEXT:     LSHR T15.Y, T11.X, literal.y,
2173; EG-NEXT:     LSHR T17.W, T11.W, literal.y,
2174; EG-NEXT:     AND_INT * T15.X, T11.X, literal.z,
2175; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2176; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2177; EG-NEXT:     AND_INT T17.Z, T11.W, literal.x,
2178; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2179; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
2180; EG-NEXT:     LSHR T11.X, PV.W, literal.x,
2181; EG-NEXT:     LSHR T17.Y, T11.Z, literal.y,
2182; EG-NEXT:     AND_INT * T17.X, T11.Z, literal.z,
2183; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2184; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2185; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
2186; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
2187; EG-NEXT:     LSHR * T18.X, PV.W, literal.x,
2188; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2189;
2190; GFX12-LABEL: constant_zextload_v16i16_to_v16i32:
2191; GFX12:       ; %bb.0:
2192; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
2193; GFX12-NEXT:    s_wait_kmcnt 0x0
2194; GFX12-NEXT:    s_load_b256 s[4:11], s[2:3], 0x0
2195; GFX12-NEXT:    s_wait_kmcnt 0x0
2196; GFX12-NEXT:    s_lshr_b32 s16, s11, 16
2197; GFX12-NEXT:    s_and_b32 s11, s11, 0xffff
2198; GFX12-NEXT:    s_and_b32 s17, s10, 0xffff
2199; GFX12-NEXT:    s_lshr_b32 s10, s10, 16
2200; GFX12-NEXT:    s_lshr_b32 s14, s9, 16
2201; GFX12-NEXT:    s_and_b32 s9, s9, 0xffff
2202; GFX12-NEXT:    s_lshr_b32 s15, s8, 16
2203; GFX12-NEXT:    s_and_b32 s8, s8, 0xffff
2204; GFX12-NEXT:    v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v1, s10
2205; GFX12-NEXT:    s_lshr_b32 s12, s7, 16
2206; GFX12-NEXT:    s_and_b32 s7, s7, 0xffff
2207; GFX12-NEXT:    s_lshr_b32 s13, s6, 16
2208; GFX12-NEXT:    s_and_b32 s6, s6, 0xffff
2209; GFX12-NEXT:    v_dual_mov_b32 v0, s17 :: v_dual_mov_b32 v3, s16
2210; GFX12-NEXT:    v_dual_mov_b32 v2, s11 :: v_dual_mov_b32 v5, s15
2211; GFX12-NEXT:    s_lshr_b32 s2, s5, 16
2212; GFX12-NEXT:    s_and_b32 s3, s5, 0xffff
2213; GFX12-NEXT:    s_lshr_b32 s5, s4, 16
2214; GFX12-NEXT:    s_and_b32 s4, s4, 0xffff
2215; GFX12-NEXT:    v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v7, s14
2216; GFX12-NEXT:    v_dual_mov_b32 v6, s9 :: v_dual_mov_b32 v9, s13
2217; GFX12-NEXT:    v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v11, s12
2218; GFX12-NEXT:    v_dual_mov_b32 v10, s7 :: v_dual_mov_b32 v13, s5
2219; GFX12-NEXT:    v_dual_mov_b32 v12, s4 :: v_dual_mov_b32 v15, s2
2220; GFX12-NEXT:    v_mov_b32_e32 v14, s3
2221; GFX12-NEXT:    s_clause 0x3
2222; GFX12-NEXT:    global_store_b128 v16, v[0:3], s[0:1] offset:48
2223; GFX12-NEXT:    global_store_b128 v16, v[4:7], s[0:1] offset:32
2224; GFX12-NEXT:    global_store_b128 v16, v[8:11], s[0:1] offset:16
2225; GFX12-NEXT:    global_store_b128 v16, v[12:15], s[0:1]
2226; GFX12-NEXT:    s_endpgm
2227  %load = load <16 x i16>, ptr addrspace(4) %in
2228  %ext = zext <16 x i16> %load to <16 x i32>
2229  store <16 x i32> %ext, ptr addrspace(1) %out
2230  ret void
2231}
2232
2233define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
2234; GCN-NOHSA-SI-LABEL: constant_sextload_v16i16_to_v16i32:
2235; GCN-NOHSA-SI:       ; %bb.0:
2236; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
2237; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2238; GCN-NOHSA-SI-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
2239; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
2240; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
2241; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2242; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s12, s5, 16
2243; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s13, s4, 16
2244; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
2245; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
2246; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s14, s7, 16
2247; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s15, s6, 16
2248; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s7, s7
2249; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s6, s6
2250; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s16, s9, 16
2251; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s17, s8, 16
2252; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s9, s9
2253; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s18, s11, 16
2254; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s19, s10, 16
2255; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s11, s11
2256; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s10, s10
2257; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s8, s8
2258; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
2259; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
2260; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
2261; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s18
2262; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
2263; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2264; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
2265; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s17
2266; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
2267; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s16
2268; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2269; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2270; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
2271; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s15
2272; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
2273; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s14
2274; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2275; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2276; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
2277; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s13
2278; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
2279; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s12
2280; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2281; GCN-NOHSA-SI-NEXT:    s_endpgm
2282;
2283; GCN-HSA-LABEL: constant_sextload_v16i16_to_v16i32:
2284; GCN-HSA:       ; %bb.0:
2285; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
2286; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2287; GCN-HSA-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
2288; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2289; GCN-HSA-NEXT:    s_ashr_i32 s12, s5, 16
2290; GCN-HSA-NEXT:    s_ashr_i32 s13, s4, 16
2291; GCN-HSA-NEXT:    s_ashr_i32 s14, s7, 16
2292; GCN-HSA-NEXT:    s_ashr_i32 s15, s6, 16
2293; GCN-HSA-NEXT:    s_ashr_i32 s16, s9, 16
2294; GCN-HSA-NEXT:    s_ashr_i32 s17, s8, 16
2295; GCN-HSA-NEXT:    s_ashr_i32 s2, s11, 16
2296; GCN-HSA-NEXT:    s_ashr_i32 s3, s10, 16
2297; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s2
2298; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
2299; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
2300; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2301; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2302; GCN-HSA-NEXT:    s_sext_i32_i16 s11, s11
2303; GCN-HSA-NEXT:    s_sext_i32_i16 s10, s10
2304; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2305; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
2306; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
2307; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s11
2308; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2309; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2310; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2311; GCN-HSA-NEXT:    s_sext_i32_i16 s9, s9
2312; GCN-HSA-NEXT:    s_sext_i32_i16 s8, s8
2313; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2314; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
2315; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
2316; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s17
2317; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s9
2318; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s16
2319; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
2320; GCN-HSA-NEXT:    s_sext_i32_i16 s7, s7
2321; GCN-HSA-NEXT:    s_sext_i32_i16 s6, s6
2322; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2323; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
2324; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
2325; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s15
2326; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
2327; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s14
2328; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
2329; GCN-HSA-NEXT:    s_sext_i32_i16 s5, s5
2330; GCN-HSA-NEXT:    s_sext_i32_i16 s4, s4
2331; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2332; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
2333; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
2334; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s13
2335; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
2336; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s12
2337; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
2338; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2339; GCN-HSA-NEXT:    s_endpgm
2340;
2341; GCN-NOHSA-VI-LABEL: constant_sextload_v16i16_to_v16i32:
2342; GCN-NOHSA-VI:       ; %bb.0:
2343; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
2344; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2345; GCN-NOHSA-VI-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
2346; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2347; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s12, s5, 16
2348; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s13, s4, 16
2349; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s14, s7, 16
2350; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s15, s6, 16
2351; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s16, s9, 16
2352; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s17, s8, 16
2353; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s2, s11, 16
2354; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s3, s10, 16
2355; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s2
2356; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 48
2357; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s3
2358; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
2359; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
2360; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s11, s11
2361; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s10, s10
2362; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
2363; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 32
2364; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
2365; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
2366; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
2367; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2368; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
2369; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s9, s9
2370; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s8, s8
2371; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
2372; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 16
2373; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
2374; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s17
2375; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
2376; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s16
2377; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
2378; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s7, s7
2379; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s6, s6
2380; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2381; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
2382; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
2383; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s15
2384; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
2385; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s14
2386; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
2387; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s5, s5
2388; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
2389; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2390; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
2391; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
2392; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s13
2393; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
2394; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s12
2395; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
2396; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2397; GCN-NOHSA-VI-NEXT:    s_endpgm
2398;
2399; EG-LABEL: constant_sextload_v16i16_to_v16i32:
2400; EG:       ; %bb.0:
2401; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
2402; EG-NEXT:    TEX 1 @8
2403; EG-NEXT:    ALU 39, @13, KC0[CB0:0-32], KC1[]
2404; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T12.X, 0
2405; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T11.X, 0
2406; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T14.X, 0
2407; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T13.X, 1
2408; EG-NEXT:    CF_END
2409; EG-NEXT:    Fetch clause starting at 8:
2410; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
2411; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
2412; EG-NEXT:    ALU clause starting at 12:
2413; EG-NEXT:     MOV * T11.X, KC0[2].Z,
2414; EG-NEXT:    ALU clause starting at 13:
2415; EG-NEXT:     LSHR T13.X, KC0[2].Y, literal.x,
2416; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2417; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2418; EG-NEXT:     LSHR T14.X, PV.W, literal.x,
2419; EG-NEXT:     BFE_INT * T15.Z, T11.Y, 0.0, literal.y,
2420; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2421; EG-NEXT:     BFE_INT T15.X, T11.X, 0.0, literal.x,
2422; EG-NEXT:     LSHR T0.Y, T12.W, literal.x,
2423; EG-NEXT:     BFE_INT T16.Z, T11.W, 0.0, literal.x, BS:VEC_120/SCL_212
2424; EG-NEXT:     LSHR T0.W, T12.Y, literal.x,
2425; EG-NEXT:     LSHR * T1.W, T11.Y, literal.x,
2426; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2427; EG-NEXT:     BFE_INT T16.X, T11.Z, 0.0, literal.x,
2428; EG-NEXT:     LSHR T1.Y, T11.W, literal.x,
2429; EG-NEXT:     BFE_INT T17.Z, T12.Y, 0.0, literal.x,
2430; EG-NEXT:     BFE_INT T15.W, PS, 0.0, literal.x,
2431; EG-NEXT:     LSHR * T1.W, T11.X, literal.x,
2432; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2433; EG-NEXT:     BFE_INT T17.X, T12.X, 0.0, literal.x,
2434; EG-NEXT:     BFE_INT T15.Y, PS, 0.0, literal.x,
2435; EG-NEXT:     BFE_INT T18.Z, T12.W, 0.0, literal.x,
2436; EG-NEXT:     BFE_INT T16.W, PV.Y, 0.0, literal.x,
2437; EG-NEXT:     LSHR * T1.W, T11.Z, literal.x,
2438; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2439; EG-NEXT:     BFE_INT T18.X, T12.Z, 0.0, literal.x,
2440; EG-NEXT:     BFE_INT T16.Y, PS, 0.0, literal.x,
2441; EG-NEXT:     LSHR T0.Z, T12.X, literal.x,
2442; EG-NEXT:     BFE_INT T17.W, T0.W, 0.0, literal.x,
2443; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2444; EG-NEXT:    16(2.242078e-44), 32(4.484155e-44)
2445; EG-NEXT:     LSHR T11.X, PS, literal.x,
2446; EG-NEXT:     BFE_INT T17.Y, PV.Z, 0.0, literal.y,
2447; EG-NEXT:     LSHR T0.Z, T12.Z, literal.y,
2448; EG-NEXT:     BFE_INT T18.W, T0.Y, 0.0, literal.y,
2449; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
2450; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2451; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
2452; EG-NEXT:     LSHR T12.X, PS, literal.x,
2453; EG-NEXT:     BFE_INT * T18.Y, PV.Z, 0.0, literal.y,
2454; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2455;
2456; GFX12-LABEL: constant_sextload_v16i16_to_v16i32:
2457; GFX12:       ; %bb.0:
2458; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
2459; GFX12-NEXT:    s_wait_kmcnt 0x0
2460; GFX12-NEXT:    s_load_b256 s[4:11], s[2:3], 0x0
2461; GFX12-NEXT:    s_wait_kmcnt 0x0
2462; GFX12-NEXT:    s_ashr_i32 s16, s11, 16
2463; GFX12-NEXT:    s_ashr_i32 s17, s10, 16
2464; GFX12-NEXT:    s_sext_i32_i16 s10, s10
2465; GFX12-NEXT:    s_sext_i32_i16 s11, s11
2466; GFX12-NEXT:    s_ashr_i32 s14, s9, 16
2467; GFX12-NEXT:    s_ashr_i32 s15, s8, 16
2468; GFX12-NEXT:    s_sext_i32_i16 s9, s9
2469; GFX12-NEXT:    s_sext_i32_i16 s8, s8
2470; GFX12-NEXT:    v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v1, s17
2471; GFX12-NEXT:    s_ashr_i32 s12, s7, 16
2472; GFX12-NEXT:    s_ashr_i32 s13, s6, 16
2473; GFX12-NEXT:    s_sext_i32_i16 s7, s7
2474; GFX12-NEXT:    s_sext_i32_i16 s6, s6
2475; GFX12-NEXT:    v_dual_mov_b32 v0, s10 :: v_dual_mov_b32 v3, s16
2476; GFX12-NEXT:    v_dual_mov_b32 v2, s11 :: v_dual_mov_b32 v5, s15
2477; GFX12-NEXT:    s_ashr_i32 s2, s5, 16
2478; GFX12-NEXT:    s_ashr_i32 s3, s4, 16
2479; GFX12-NEXT:    s_sext_i32_i16 s5, s5
2480; GFX12-NEXT:    s_sext_i32_i16 s4, s4
2481; GFX12-NEXT:    v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v7, s14
2482; GFX12-NEXT:    v_dual_mov_b32 v6, s9 :: v_dual_mov_b32 v9, s13
2483; GFX12-NEXT:    v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v11, s12
2484; GFX12-NEXT:    v_dual_mov_b32 v10, s7 :: v_dual_mov_b32 v13, s3
2485; GFX12-NEXT:    v_dual_mov_b32 v12, s4 :: v_dual_mov_b32 v15, s2
2486; GFX12-NEXT:    v_mov_b32_e32 v14, s5
2487; GFX12-NEXT:    s_clause 0x3
2488; GFX12-NEXT:    global_store_b128 v16, v[0:3], s[0:1] offset:48
2489; GFX12-NEXT:    global_store_b128 v16, v[4:7], s[0:1] offset:32
2490; GFX12-NEXT:    global_store_b128 v16, v[8:11], s[0:1] offset:16
2491; GFX12-NEXT:    global_store_b128 v16, v[12:15], s[0:1]
2492; GFX12-NEXT:    s_endpgm
2493  %load = load <16 x i16>, ptr addrspace(4) %in
2494  %ext = sext <16 x i16> %load to <16 x i32>
2495  store <16 x i32> %ext, ptr addrspace(1) %out
2496  ret void
2497}
2498
2499define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
2500; GCN-NOHSA-SI-LABEL: constant_zextload_v32i16_to_v32i32:
2501; GCN-NOHSA-SI:       ; %bb.0:
2502; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x9
2503; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2504; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
2505; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2506; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s18, s1, 16
2507; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s19, s0, 16
2508; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s20, s3, 16
2509; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s21, s2, 16
2510; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s22, s5, 16
2511; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s23, s4, 16
2512; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s24, s7, 16
2513; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s25, s6, 16
2514; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s26, s9, 16
2515; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s27, s8, 16
2516; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s28, s11, 16
2517; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s29, s10, 16
2518; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s30, s13, 16
2519; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s31, s12, 16
2520; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s33, s15, 16
2521; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s34, s14, 16
2522; GCN-NOHSA-SI-NEXT:    s_and_b32 s35, s1, 0xffff
2523; GCN-NOHSA-SI-NEXT:    s_and_b32 s36, s0, 0xffff
2524; GCN-NOHSA-SI-NEXT:    s_and_b32 s37, s3, 0xffff
2525; GCN-NOHSA-SI-NEXT:    s_and_b32 s38, s2, 0xffff
2526; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
2527; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, 0xffff
2528; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, 0xffff
2529; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, 0xffff
2530; GCN-NOHSA-SI-NEXT:    s_and_b32 s9, s9, 0xffff
2531; GCN-NOHSA-SI-NEXT:    s_and_b32 s8, s8, 0xffff
2532; GCN-NOHSA-SI-NEXT:    s_and_b32 s11, s11, 0xffff
2533; GCN-NOHSA-SI-NEXT:    s_and_b32 s10, s10, 0xffff
2534; GCN-NOHSA-SI-NEXT:    s_and_b32 s13, s13, 0xffff
2535; GCN-NOHSA-SI-NEXT:    s_and_b32 s12, s12, 0xffff
2536; GCN-NOHSA-SI-NEXT:    s_and_b32 s15, s15, 0xffff
2537; GCN-NOHSA-SI-NEXT:    s_and_b32 s14, s14, 0xffff
2538; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
2539; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
2540; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s16
2541; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s17
2542; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
2543; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s34
2544; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s15
2545; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s33
2546; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
2547; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2548; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
2549; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s31
2550; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s13
2551; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s30
2552; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
2553; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2554; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
2555; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s29
2556; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
2557; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s28
2558; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
2559; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2560; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
2561; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s27
2562; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
2563; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s26
2564; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
2565; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2566; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
2567; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s25
2568; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
2569; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s24
2570; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
2571; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2572; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
2573; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s23
2574; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
2575; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s22
2576; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2577; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2578; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s38
2579; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s21
2580; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s37
2581; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s20
2582; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2583; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
2584; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s36
2585; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
2586; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s35
2587; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s18
2588; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2589; GCN-NOHSA-SI-NEXT:    s_endpgm
2590;
2591; GCN-HSA-LABEL: constant_zextload_v32i16_to_v32i32:
2592; GCN-HSA:       ; %bb.0:
2593; GCN-HSA-NEXT:    s_load_dwordx4 s[16:19], s[8:9], 0x0
2594; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2595; GCN-HSA-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
2596; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
2597; GCN-HSA-NEXT:    s_lshr_b32 s18, s1, 16
2598; GCN-HSA-NEXT:    s_lshr_b32 s19, s0, 16
2599; GCN-HSA-NEXT:    s_lshr_b32 s20, s3, 16
2600; GCN-HSA-NEXT:    s_lshr_b32 s21, s2, 16
2601; GCN-HSA-NEXT:    s_lshr_b32 s22, s5, 16
2602; GCN-HSA-NEXT:    s_lshr_b32 s23, s4, 16
2603; GCN-HSA-NEXT:    s_lshr_b32 s24, s7, 16
2604; GCN-HSA-NEXT:    s_lshr_b32 s25, s6, 16
2605; GCN-HSA-NEXT:    s_lshr_b32 s26, s9, 16
2606; GCN-HSA-NEXT:    s_lshr_b32 s27, s8, 16
2607; GCN-HSA-NEXT:    s_lshr_b32 s28, s11, 16
2608; GCN-HSA-NEXT:    s_lshr_b32 s29, s10, 16
2609; GCN-HSA-NEXT:    s_lshr_b32 s30, s13, 16
2610; GCN-HSA-NEXT:    s_lshr_b32 s31, s12, 16
2611; GCN-HSA-NEXT:    s_lshr_b32 s33, s15, 16
2612; GCN-HSA-NEXT:    s_lshr_b32 s34, s14, 16
2613; GCN-HSA-NEXT:    s_and_b32 s35, s1, 0xffff
2614; GCN-HSA-NEXT:    s_and_b32 s36, s0, 0xffff
2615; GCN-HSA-NEXT:    s_and_b32 s3, s3, 0xffff
2616; GCN-HSA-NEXT:    s_and_b32 s2, s2, 0xffff
2617; GCN-HSA-NEXT:    s_and_b32 s5, s5, 0xffff
2618; GCN-HSA-NEXT:    s_and_b32 s4, s4, 0xffff
2619; GCN-HSA-NEXT:    s_and_b32 s7, s7, 0xffff
2620; GCN-HSA-NEXT:    s_and_b32 s6, s6, 0xffff
2621; GCN-HSA-NEXT:    s_and_b32 s9, s9, 0xffff
2622; GCN-HSA-NEXT:    s_and_b32 s8, s8, 0xffff
2623; GCN-HSA-NEXT:    s_and_b32 s11, s11, 0xffff
2624; GCN-HSA-NEXT:    s_and_b32 s10, s10, 0xffff
2625; GCN-HSA-NEXT:    s_and_b32 s13, s13, 0xffff
2626; GCN-HSA-NEXT:    s_and_b32 s12, s12, 0xffff
2627; GCN-HSA-NEXT:    s_and_b32 s15, s15, 0xffff
2628; GCN-HSA-NEXT:    s_and_b32 s14, s14, 0xffff
2629; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0x70
2630; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
2631; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s1
2632; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s0
2633; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0x60
2634; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
2635; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s1
2636; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s0
2637; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0x50
2638; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s14
2639; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s34
2640; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s15
2641; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s33
2642; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s12
2643; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s31
2644; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
2645; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s13
2646; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s30
2647; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
2648; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
2649; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
2650; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
2651; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
2652; GCN-HSA-NEXT:    s_add_u32 s0, s16, 64
2653; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s29
2654; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s11
2655; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s28
2656; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
2657; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2658; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
2659; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
2660; GCN-HSA-NEXT:    s_add_u32 s0, s16, 48
2661; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
2662; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s27
2663; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s9
2664; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s26
2665; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
2666; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2667; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
2668; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
2669; GCN-HSA-NEXT:    s_add_u32 s0, s16, 32
2670; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
2671; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s25
2672; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
2673; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s24
2674; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
2675; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2676; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
2677; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
2678; GCN-HSA-NEXT:    s_add_u32 s0, s16, 16
2679; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
2680; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s23
2681; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
2682; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s22
2683; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
2684; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2685; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
2686; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
2687; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s21
2688; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s3
2689; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s20
2690; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
2691; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2692; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s16
2693; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s36
2694; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s19
2695; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s35
2696; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s18
2697; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s17
2698; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2699; GCN-HSA-NEXT:    s_endpgm
2700;
2701; GCN-NOHSA-VI-LABEL: constant_zextload_v32i16_to_v32i32:
2702; GCN-NOHSA-VI:       ; %bb.0:
2703; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x24
2704; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2705; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
2706; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
2707; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s18, s1, 16
2708; GCN-NOHSA-VI-NEXT:    s_and_b32 s19, s1, 0xffff
2709; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s20, s0, 16
2710; GCN-NOHSA-VI-NEXT:    s_and_b32 s21, s0, 0xffff
2711; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s22, s3, 16
2712; GCN-NOHSA-VI-NEXT:    s_and_b32 s3, s3, 0xffff
2713; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s23, s2, 16
2714; GCN-NOHSA-VI-NEXT:    s_and_b32 s2, s2, 0xffff
2715; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s24, s5, 16
2716; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s5, 0xffff
2717; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s25, s4, 16
2718; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, 0xffff
2719; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s26, s7, 16
2720; GCN-NOHSA-VI-NEXT:    s_and_b32 s7, s7, 0xffff
2721; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s27, s6, 16
2722; GCN-NOHSA-VI-NEXT:    s_and_b32 s6, s6, 0xffff
2723; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s28, s9, 16
2724; GCN-NOHSA-VI-NEXT:    s_and_b32 s9, s9, 0xffff
2725; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s29, s8, 16
2726; GCN-NOHSA-VI-NEXT:    s_and_b32 s8, s8, 0xffff
2727; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s30, s11, 16
2728; GCN-NOHSA-VI-NEXT:    s_and_b32 s11, s11, 0xffff
2729; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s31, s10, 16
2730; GCN-NOHSA-VI-NEXT:    s_and_b32 s10, s10, 0xffff
2731; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s33, s13, 16
2732; GCN-NOHSA-VI-NEXT:    s_and_b32 s13, s13, 0xffff
2733; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s34, s12, 16
2734; GCN-NOHSA-VI-NEXT:    s_and_b32 s12, s12, 0xffff
2735; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s0, s15, 16
2736; GCN-NOHSA-VI-NEXT:    s_and_b32 s1, s15, 0xffff
2737; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s15, s14, 16
2738; GCN-NOHSA-VI-NEXT:    s_and_b32 s14, s14, 0xffff
2739; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s0
2740; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s16, 0x70
2741; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s1
2742; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s17, 0
2743; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
2744; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
2745; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s16, 0x60
2746; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
2747; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s15
2748; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s17, 0
2749; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2750; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
2751; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
2752; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s16, 0x50
2753; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
2754; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s34
2755; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s13
2756; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s33
2757; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s17, 0
2758; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2759; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
2760; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
2761; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s16, 64
2762; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
2763; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s31
2764; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
2765; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s30
2766; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s17, 0
2767; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2768; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
2769; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
2770; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s16, 48
2771; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
2772; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s29
2773; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
2774; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s28
2775; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s17, 0
2776; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2777; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
2778; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
2779; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s16, 32
2780; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
2781; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s27
2782; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
2783; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s26
2784; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s17, 0
2785; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2786; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
2787; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
2788; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s16, 16
2789; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
2790; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s25
2791; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
2792; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s24
2793; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s17, 0
2794; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2795; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
2796; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
2797; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s23
2798; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s3
2799; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s22
2800; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
2801; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2802; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s16
2803; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s21
2804; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s20
2805; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s19
2806; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s18
2807; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s17
2808; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2809; GCN-NOHSA-VI-NEXT:    s_endpgm
2810;
2811; EG-LABEL: constant_zextload_v32i16_to_v32i32:
2812; EG:       ; %bb.0:
2813; EG-NEXT:    ALU 0, @20, KC0[CB0:0-32], KC1[]
2814; EG-NEXT:    TEX 3 @12
2815; EG-NEXT:    ALU 71, @21, KC0[CB0:0-32], KC1[]
2816; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T34.X, 0
2817; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0
2818; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T32.X, 0
2819; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T22.X, 0
2820; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T29.X, 0
2821; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T19.X, 0
2822; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T26.X, 0
2823; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T20.X, 1
2824; EG-NEXT:    CF_END
2825; EG-NEXT:    Fetch clause starting at 12:
2826; EG-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 0, #1
2827; EG-NEXT:     VTX_READ_128 T21.XYZW, T19.X, 48, #1
2828; EG-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 32, #1
2829; EG-NEXT:     VTX_READ_128 T19.XYZW, T19.X, 16, #1
2830; EG-NEXT:    ALU clause starting at 20:
2831; EG-NEXT:     MOV * T19.X, KC0[2].Z,
2832; EG-NEXT:    ALU clause starting at 21:
2833; EG-NEXT:     LSHR * T23.W, T20.Y, literal.x,
2834; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2835; EG-NEXT:     AND_INT * T23.Z, T20.Y, literal.x,
2836; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2837; EG-NEXT:     LSHR T23.Y, T20.X, literal.x,
2838; EG-NEXT:     LSHR * T24.W, T20.W, literal.x,
2839; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2840; EG-NEXT:     AND_INT T23.X, T20.X, literal.x,
2841; EG-NEXT:     AND_INT T24.Z, T20.W, literal.x,
2842; EG-NEXT:     LSHR * T20.X, KC0[2].Y, literal.y,
2843; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
2844; EG-NEXT:     LSHR T24.Y, T20.Z, literal.x,
2845; EG-NEXT:     LSHR * T25.W, T19.Y, literal.x,
2846; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2847; EG-NEXT:     AND_INT T24.X, T20.Z, literal.x,
2848; EG-NEXT:     AND_INT T25.Z, T19.Y, literal.x,
2849; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2850; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
2851; EG-NEXT:     LSHR T26.X, PV.W, literal.x,
2852; EG-NEXT:     LSHR T25.Y, T19.X, literal.y,
2853; EG-NEXT:     LSHR T27.W, T19.W, literal.y,
2854; EG-NEXT:     AND_INT * T25.X, T19.X, literal.z,
2855; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2856; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2857; EG-NEXT:     AND_INT T27.Z, T19.W, literal.x,
2858; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2859; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
2860; EG-NEXT:     LSHR T19.X, PV.W, literal.x,
2861; EG-NEXT:     LSHR T27.Y, T19.Z, literal.y,
2862; EG-NEXT:     LSHR T28.W, T22.Y, literal.y,
2863; EG-NEXT:     AND_INT * T27.X, T19.Z, literal.z,
2864; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2865; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2866; EG-NEXT:     AND_INT T28.Z, T22.Y, literal.x,
2867; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2868; EG-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
2869; EG-NEXT:     LSHR T29.X, PV.W, literal.x,
2870; EG-NEXT:     LSHR T28.Y, T22.X, literal.y,
2871; EG-NEXT:     LSHR T30.W, T22.W, literal.y,
2872; EG-NEXT:     AND_INT * T28.X, T22.X, literal.z,
2873; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2874; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2875; EG-NEXT:     AND_INT T30.Z, T22.W, literal.x,
2876; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2877; EG-NEXT:    65535(9.183409e-41), 64(8.968310e-44)
2878; EG-NEXT:     LSHR T22.X, PV.W, literal.x,
2879; EG-NEXT:     LSHR T30.Y, T22.Z, literal.y,
2880; EG-NEXT:     LSHR T31.W, T21.Y, literal.y,
2881; EG-NEXT:     AND_INT * T30.X, T22.Z, literal.z,
2882; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2883; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2884; EG-NEXT:     AND_INT T31.Z, T21.Y, literal.x,
2885; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2886; EG-NEXT:    65535(9.183409e-41), 80(1.121039e-43)
2887; EG-NEXT:     LSHR T32.X, PV.W, literal.x,
2888; EG-NEXT:     LSHR T31.Y, T21.X, literal.y,
2889; EG-NEXT:     LSHR T33.W, T21.W, literal.y,
2890; EG-NEXT:     AND_INT * T31.X, T21.X, literal.z,
2891; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2892; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2893; EG-NEXT:     AND_INT T33.Z, T21.W, literal.x,
2894; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
2895; EG-NEXT:    65535(9.183409e-41), 96(1.345247e-43)
2896; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
2897; EG-NEXT:     LSHR T33.Y, T21.Z, literal.y,
2898; EG-NEXT:     AND_INT * T33.X, T21.Z, literal.z,
2899; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
2900; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
2901; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
2902; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
2903; EG-NEXT:     LSHR * T34.X, PV.W, literal.x,
2904; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2905;
2906; GFX12-LABEL: constant_zextload_v32i16_to_v32i32:
2907; GFX12:       ; %bb.0:
2908; GFX12-NEXT:    s_load_b128 s[16:19], s[4:5], 0x24
2909; GFX12-NEXT:    s_wait_kmcnt 0x0
2910; GFX12-NEXT:    s_load_b512 s[0:15], s[18:19], 0x0
2911; GFX12-NEXT:    s_wait_kmcnt 0x0
2912; GFX12-NEXT:    s_lshr_b32 s33, s15, 16
2913; GFX12-NEXT:    s_and_b32 s15, s15, 0xffff
2914; GFX12-NEXT:    s_and_b32 s34, s14, 0xffff
2915; GFX12-NEXT:    s_lshr_b32 s14, s14, 16
2916; GFX12-NEXT:    s_lshr_b32 s30, s13, 16
2917; GFX12-NEXT:    s_and_b32 s13, s13, 0xffff
2918; GFX12-NEXT:    s_lshr_b32 s31, s12, 16
2919; GFX12-NEXT:    s_and_b32 s12, s12, 0xffff
2920; GFX12-NEXT:    v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s14
2921; GFX12-NEXT:    v_dual_mov_b32 v0, s34 :: v_dual_mov_b32 v3, s33
2922; GFX12-NEXT:    v_dual_mov_b32 v2, s15 :: v_dual_mov_b32 v5, s31
2923; GFX12-NEXT:    s_lshr_b32 s29, s10, 16
2924; GFX12-NEXT:    v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v7, s30
2925; GFX12-NEXT:    v_dual_mov_b32 v6, s13 :: v_dual_mov_b32 v9, s29
2926; GFX12-NEXT:    s_lshr_b32 s28, s11, 16
2927; GFX12-NEXT:    s_and_b32 s11, s11, 0xffff
2928; GFX12-NEXT:    s_and_b32 s10, s10, 0xffff
2929; GFX12-NEXT:    s_lshr_b32 s26, s9, 16
2930; GFX12-NEXT:    s_and_b32 s9, s9, 0xffff
2931; GFX12-NEXT:    s_lshr_b32 s27, s8, 16
2932; GFX12-NEXT:    s_and_b32 s8, s8, 0xffff
2933; GFX12-NEXT:    s_lshr_b32 s24, s7, 16
2934; GFX12-NEXT:    s_and_b32 s7, s7, 0xffff
2935; GFX12-NEXT:    s_lshr_b32 s25, s6, 16
2936; GFX12-NEXT:    s_and_b32 s6, s6, 0xffff
2937; GFX12-NEXT:    s_wait_alu 0xfffe
2938; GFX12-NEXT:    v_dual_mov_b32 v8, s10 :: v_dual_mov_b32 v11, s28
2939; GFX12-NEXT:    v_mov_b32_e32 v10, s11
2940; GFX12-NEXT:    s_lshr_b32 s22, s5, 16
2941; GFX12-NEXT:    s_and_b32 s5, s5, 0xffff
2942; GFX12-NEXT:    s_lshr_b32 s23, s4, 16
2943; GFX12-NEXT:    s_and_b32 s4, s4, 0xffff
2944; GFX12-NEXT:    s_clause 0x1
2945; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[16:17] offset:112
2946; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[16:17] offset:96
2947; GFX12-NEXT:    v_dual_mov_b32 v1, s27 :: v_dual_mov_b32 v0, s8
2948; GFX12-NEXT:    v_dual_mov_b32 v3, s26 :: v_dual_mov_b32 v2, s9
2949; GFX12-NEXT:    v_mov_b32_e32 v5, s25
2950; GFX12-NEXT:    s_lshr_b32 s20, s3, 16
2951; GFX12-NEXT:    s_and_b32 s3, s3, 0xffff
2952; GFX12-NEXT:    s_lshr_b32 s21, s2, 16
2953; GFX12-NEXT:    s_and_b32 s2, s2, 0xffff
2954; GFX12-NEXT:    v_dual_mov_b32 v4, s6 :: v_dual_mov_b32 v7, s24
2955; GFX12-NEXT:    v_dual_mov_b32 v6, s7 :: v_dual_mov_b32 v13, s23
2956; GFX12-NEXT:    s_lshr_b32 s18, s1, 16
2957; GFX12-NEXT:    s_and_b32 s1, s1, 0xffff
2958; GFX12-NEXT:    s_lshr_b32 s19, s0, 16
2959; GFX12-NEXT:    s_and_b32 s0, s0, 0xffff
2960; GFX12-NEXT:    v_dual_mov_b32 v12, s4 :: v_dual_mov_b32 v15, s22
2961; GFX12-NEXT:    v_dual_mov_b32 v14, s5 :: v_dual_mov_b32 v17, s21
2962; GFX12-NEXT:    v_dual_mov_b32 v16, s2 :: v_dual_mov_b32 v19, s20
2963; GFX12-NEXT:    v_dual_mov_b32 v18, s3 :: v_dual_mov_b32 v21, s19
2964; GFX12-NEXT:    v_dual_mov_b32 v20, s0 :: v_dual_mov_b32 v23, s18
2965; GFX12-NEXT:    v_mov_b32_e32 v22, s1
2966; GFX12-NEXT:    s_clause 0x5
2967; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[16:17] offset:80
2968; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[16:17] offset:64
2969; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[16:17] offset:48
2970; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[16:17] offset:32
2971; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[16:17] offset:16
2972; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[16:17]
2973; GFX12-NEXT:    s_endpgm
2974  %load = load <32 x i16>, ptr addrspace(4) %in
2975  %ext = zext <32 x i16> %load to <32 x i32>
2976  store <32 x i32> %ext, ptr addrspace(1) %out
2977  ret void
2978}
2979
2980define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
2981; GCN-NOHSA-SI-LABEL: constant_sextload_v32i16_to_v32i32:
2982; GCN-NOHSA-SI:       ; %bb.0:
2983; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x9
2984; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2985; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
2986; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
2987; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s18, s1, 16
2988; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s19, s0, 16
2989; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s20, s1
2990; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s21, s0
2991; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s22, s3, 16
2992; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s23, s2, 16
2993; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s24, s3
2994; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s25, s2
2995; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s26, s5, 16
2996; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s27, s4, 16
2997; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
2998; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
2999; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s28, s7, 16
3000; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s29, s6, 16
3001; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s7, s7
3002; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s6, s6
3003; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s30, s9, 16
3004; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s31, s8, 16
3005; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s9, s9
3006; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s8, s8
3007; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s33, s11, 16
3008; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s34, s10, 16
3009; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s11, s11
3010; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s10, s10
3011; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s35, s13, 16
3012; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s36, s12, 16
3013; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s13, s13
3014; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s12, s12
3015; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s37, s15, 16
3016; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s38, s14, 16
3017; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s15, s15
3018; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s14, s14
3019; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
3020; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
3021; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s16
3022; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s17
3023; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
3024; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s38
3025; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s15
3026; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s37
3027; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
3028; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3029; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
3030; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s36
3031; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s13
3032; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s35
3033; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
3034; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3035; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
3036; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s34
3037; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
3038; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s33
3039; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
3040; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3041; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
3042; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s31
3043; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
3044; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s30
3045; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
3046; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3047; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
3048; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s29
3049; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
3050; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s28
3051; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
3052; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3053; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
3054; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s27
3055; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
3056; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s26
3057; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
3058; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3059; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s25
3060; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s23
3061; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s24
3062; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s22
3063; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
3064; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3065; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s21
3066; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
3067; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s20
3068; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s18
3069; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
3070; GCN-NOHSA-SI-NEXT:    s_endpgm
3071;
3072; GCN-HSA-LABEL: constant_sextload_v32i16_to_v32i32:
3073; GCN-HSA:       ; %bb.0:
3074; GCN-HSA-NEXT:    s_load_dwordx4 s[16:19], s[8:9], 0x0
3075; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3076; GCN-HSA-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
3077; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3078; GCN-HSA-NEXT:    s_ashr_i32 s18, s1, 16
3079; GCN-HSA-NEXT:    s_ashr_i32 s19, s0, 16
3080; GCN-HSA-NEXT:    s_ashr_i32 s22, s3, 16
3081; GCN-HSA-NEXT:    s_ashr_i32 s23, s2, 16
3082; GCN-HSA-NEXT:    s_ashr_i32 s24, s5, 16
3083; GCN-HSA-NEXT:    s_ashr_i32 s25, s4, 16
3084; GCN-HSA-NEXT:    s_ashr_i32 s26, s7, 16
3085; GCN-HSA-NEXT:    s_ashr_i32 s27, s6, 16
3086; GCN-HSA-NEXT:    s_ashr_i32 s28, s9, 16
3087; GCN-HSA-NEXT:    s_ashr_i32 s29, s8, 16
3088; GCN-HSA-NEXT:    s_ashr_i32 s30, s11, 16
3089; GCN-HSA-NEXT:    s_ashr_i32 s31, s10, 16
3090; GCN-HSA-NEXT:    s_ashr_i32 s33, s13, 16
3091; GCN-HSA-NEXT:    s_ashr_i32 s34, s12, 16
3092; GCN-HSA-NEXT:    s_ashr_i32 s35, s15, 16
3093; GCN-HSA-NEXT:    s_ashr_i32 s36, s14, 16
3094; GCN-HSA-NEXT:    s_sext_i32_i16 s21, s0
3095; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0x70
3096; GCN-HSA-NEXT:    s_sext_i32_i16 s20, s1
3097; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
3098; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s1
3099; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s0
3100; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0x60
3101; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
3102; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s1
3103; GCN-HSA-NEXT:    s_sext_i32_i16 s12, s12
3104; GCN-HSA-NEXT:    s_sext_i32_i16 s15, s15
3105; GCN-HSA-NEXT:    s_sext_i32_i16 s14, s14
3106; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s0
3107; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0x50
3108; GCN-HSA-NEXT:    s_sext_i32_i16 s13, s13
3109; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s14
3110; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s36
3111; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s15
3112; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s35
3113; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s12
3114; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s34
3115; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
3116; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s13
3117; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s33
3118; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
3119; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
3120; GCN-HSA-NEXT:    s_sext_i32_i16 s11, s11
3121; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
3122; GCN-HSA-NEXT:    s_sext_i32_i16 s10, s10
3123; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
3124; GCN-HSA-NEXT:    s_add_u32 s0, s16, 64
3125; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
3126; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s31
3127; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s11
3128; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s30
3129; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
3130; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3131; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
3132; GCN-HSA-NEXT:    s_sext_i32_i16 s9, s9
3133; GCN-HSA-NEXT:    s_sext_i32_i16 s8, s8
3134; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
3135; GCN-HSA-NEXT:    s_add_u32 s0, s16, 48
3136; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
3137; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s29
3138; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s9
3139; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s28
3140; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
3141; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3142; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
3143; GCN-HSA-NEXT:    s_sext_i32_i16 s7, s7
3144; GCN-HSA-NEXT:    s_sext_i32_i16 s6, s6
3145; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
3146; GCN-HSA-NEXT:    s_add_u32 s0, s16, 32
3147; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
3148; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s27
3149; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s7
3150; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s26
3151; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
3152; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3153; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
3154; GCN-HSA-NEXT:    s_sext_i32_i16 s5, s5
3155; GCN-HSA-NEXT:    s_sext_i32_i16 s4, s4
3156; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
3157; GCN-HSA-NEXT:    s_add_u32 s0, s16, 16
3158; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
3159; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s25
3160; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
3161; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s24
3162; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
3163; GCN-HSA-NEXT:    s_sext_i32_i16 s3, s3
3164; GCN-HSA-NEXT:    s_sext_i32_i16 s2, s2
3165; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3166; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
3167; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
3168; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s23
3169; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s3
3170; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s22
3171; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
3172; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3173; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s16
3174; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s21
3175; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s19
3176; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s20
3177; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s18
3178; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s17
3179; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3180; GCN-HSA-NEXT:    s_endpgm
3181;
3182; GCN-NOHSA-VI-LABEL: constant_sextload_v32i16_to_v32i32:
3183; GCN-NOHSA-VI:       ; %bb.0:
3184; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x24
3185; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
3186; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
3187; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
3188; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s18, s1, 16
3189; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s19, s0, 16
3190; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s20, s1
3191; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s21, s0
3192; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s22, s3, 16
3193; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s23, s2, 16
3194; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s24, s5, 16
3195; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s25, s4, 16
3196; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s26, s7, 16
3197; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s27, s6, 16
3198; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s28, s9, 16
3199; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s29, s8, 16
3200; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s30, s11, 16
3201; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s31, s10, 16
3202; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s33, s13, 16
3203; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s34, s12, 16
3204; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s0, s15, 16
3205; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s1, s14, 16
3206; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s0
3207; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s16, 0x70
3208; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
3209; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s17, 0
3210; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
3211; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s15, s15
3212; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s14, s14
3213; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
3214; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s16, 0x60
3215; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
3216; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s15
3217; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s17, 0
3218; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3219; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
3220; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s13, s13
3221; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s12, s12
3222; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
3223; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s16, 0x50
3224; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
3225; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s34
3226; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s13
3227; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s33
3228; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s17, 0
3229; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3230; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
3231; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s11, s11
3232; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s10, s10
3233; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
3234; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s16, 64
3235; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
3236; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s31
3237; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
3238; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s30
3239; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s17, 0
3240; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3241; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
3242; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s9, s9
3243; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s8, s8
3244; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
3245; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s16, 48
3246; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
3247; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s29
3248; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
3249; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s28
3250; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s17, 0
3251; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3252; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
3253; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s7, s7
3254; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s6, s6
3255; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
3256; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s16, 32
3257; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
3258; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s27
3259; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
3260; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s26
3261; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s17, 0
3262; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3263; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
3264; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s5, s5
3265; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
3266; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
3267; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s16, 16
3268; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
3269; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s25
3270; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
3271; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s24
3272; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s17, 0
3273; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s3, s3
3274; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s2, s2
3275; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3276; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
3277; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
3278; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s23
3279; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s3
3280; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s22
3281; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
3282; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3283; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s16
3284; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s21
3285; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s19
3286; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s20
3287; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s18
3288; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s17
3289; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3290; GCN-NOHSA-VI-NEXT:    s_endpgm
3291;
3292; EG-LABEL: constant_sextload_v32i16_to_v32i32:
3293; EG:       ; %bb.0:
3294; EG-NEXT:    ALU 8, @20, KC0[CB0:0-32], KC1[]
3295; EG-NEXT:    TEX 3 @12
3296; EG-NEXT:    ALU 73, @29, KC0[CB0:0-32], KC1[]
3297; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T24.X, 0
3298; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T22.X, 0
3299; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T28.X, 0
3300; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T27.X, 0
3301; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T26.X, 0
3302; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0
3303; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T20.X, 0
3304; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T19.X, 1
3305; EG-NEXT:    CF_END
3306; EG-NEXT:    Fetch clause starting at 12:
3307; EG-NEXT:     VTX_READ_128 T23.XYZW, T22.X, 16, #1
3308; EG-NEXT:     VTX_READ_128 T24.XYZW, T22.X, 32, #1
3309; EG-NEXT:     VTX_READ_128 T25.XYZW, T22.X, 0, #1
3310; EG-NEXT:     VTX_READ_128 T22.XYZW, T22.X, 48, #1
3311; EG-NEXT:    ALU clause starting at 20:
3312; EG-NEXT:     LSHR T19.X, KC0[2].Y, literal.x,
3313; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3314; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3315; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
3316; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3317; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
3318; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
3319; EG-NEXT:     MOV * T22.X, KC0[2].Z,
3320; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
3321; EG-NEXT:    ALU clause starting at 29:
3322; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
3323; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
3324; EG-NEXT:     LSHR T26.X, PV.W, literal.x,
3325; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
3326; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
3327; EG-NEXT:     LSHR T27.X, PV.W, literal.x,
3328; EG-NEXT:     LSHR T0.W, T22.W, literal.y,
3329; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
3330; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3331; EG-NEXT:    80(1.121039e-43), 0(0.000000e+00)
3332; EG-NEXT:     LSHR T28.X, PS, literal.x,
3333; EG-NEXT:     LSHR T0.Y, T22.Y, literal.y,
3334; EG-NEXT:     BFE_INT T29.Z, T25.Y, 0.0, literal.y, BS:VEC_120/SCL_212
3335; EG-NEXT:     LSHR T1.W, T24.W, literal.y,
3336; EG-NEXT:     LSHR * T2.W, T24.Y, literal.y,
3337; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3338; EG-NEXT:     BFE_INT T29.X, T25.X, 0.0, literal.x,
3339; EG-NEXT:     LSHR T1.Y, T23.W, literal.x,
3340; EG-NEXT:     BFE_INT T30.Z, T25.W, 0.0, literal.x, BS:VEC_120/SCL_212
3341; EG-NEXT:     LSHR T3.W, T23.Y, literal.x,
3342; EG-NEXT:     LSHR * T4.W, T25.Y, literal.x,
3343; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3344; EG-NEXT:     BFE_INT T30.X, T25.Z, 0.0, literal.x,
3345; EG-NEXT:     LSHR T2.Y, T25.W, literal.x,
3346; EG-NEXT:     BFE_INT T31.Z, T23.Y, 0.0, literal.x,
3347; EG-NEXT:     BFE_INT T29.W, PS, 0.0, literal.x,
3348; EG-NEXT:     LSHR * T4.W, T25.X, literal.x,
3349; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3350; EG-NEXT:     BFE_INT T31.X, T23.X, 0.0, literal.x,
3351; EG-NEXT:     BFE_INT T29.Y, PS, 0.0, literal.x,
3352; EG-NEXT:     BFE_INT T32.Z, T23.W, 0.0, literal.x,
3353; EG-NEXT:     BFE_INT T30.W, PV.Y, 0.0, literal.x,
3354; EG-NEXT:     LSHR * T4.W, T25.Z, literal.x,
3355; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3356; EG-NEXT:     BFE_INT T32.X, T23.Z, 0.0, literal.x,
3357; EG-NEXT:     BFE_INT T30.Y, PS, 0.0, literal.x,
3358; EG-NEXT:     BFE_INT T25.Z, T24.Y, 0.0, literal.x,
3359; EG-NEXT:     BFE_INT T31.W, T3.W, 0.0, literal.x,
3360; EG-NEXT:     LSHR * T3.W, T23.X, literal.x,
3361; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3362; EG-NEXT:     BFE_INT T25.X, T24.X, 0.0, literal.x,
3363; EG-NEXT:     BFE_INT T31.Y, PS, 0.0, literal.x,
3364; EG-NEXT:     BFE_INT T33.Z, T24.W, 0.0, literal.x,
3365; EG-NEXT:     BFE_INT T32.W, T1.Y, 0.0, literal.x,
3366; EG-NEXT:     LSHR * T3.W, T23.Z, literal.x,
3367; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3368; EG-NEXT:     BFE_INT T33.X, T24.Z, 0.0, literal.x,
3369; EG-NEXT:     BFE_INT T32.Y, PS, 0.0, literal.x,
3370; EG-NEXT:     BFE_INT T23.Z, T22.Y, 0.0, literal.x,
3371; EG-NEXT:     BFE_INT T25.W, T2.W, 0.0, literal.x,
3372; EG-NEXT:     LSHR * T2.W, T24.X, literal.x,
3373; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3374; EG-NEXT:     BFE_INT T23.X, T22.X, 0.0, literal.x,
3375; EG-NEXT:     BFE_INT T25.Y, PS, 0.0, literal.x,
3376; EG-NEXT:     BFE_INT T34.Z, T22.W, 0.0, literal.x,
3377; EG-NEXT:     BFE_INT T33.W, T1.W, 0.0, literal.x, BS:VEC_120/SCL_212
3378; EG-NEXT:     LSHR * T1.W, T24.Z, literal.x,
3379; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
3380; EG-NEXT:     BFE_INT T34.X, T22.Z, 0.0, literal.x,
3381; EG-NEXT:     BFE_INT T33.Y, PS, 0.0, literal.x,
3382; EG-NEXT:     LSHR T0.Z, T22.X, literal.x,
3383; EG-NEXT:     BFE_INT T23.W, T0.Y, 0.0, literal.x,
3384; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.y,
3385; EG-NEXT:    16(2.242078e-44), 96(1.345247e-43)
3386; EG-NEXT:     LSHR T22.X, PS, literal.x,
3387; EG-NEXT:     BFE_INT T23.Y, PV.Z, 0.0, literal.y,
3388; EG-NEXT:     LSHR T0.Z, T22.Z, literal.y,
3389; EG-NEXT:     BFE_INT T34.W, T0.W, 0.0, literal.y,
3390; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
3391; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3392; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
3393; EG-NEXT:     LSHR T24.X, PS, literal.x,
3394; EG-NEXT:     BFE_INT * T34.Y, PV.Z, 0.0, literal.y,
3395; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
3396;
3397; GFX12-LABEL: constant_sextload_v32i16_to_v32i32:
3398; GFX12:       ; %bb.0:
3399; GFX12-NEXT:    s_load_b128 s[16:19], s[4:5], 0x24
3400; GFX12-NEXT:    s_wait_kmcnt 0x0
3401; GFX12-NEXT:    s_load_b512 s[0:15], s[18:19], 0x0
3402; GFX12-NEXT:    s_wait_kmcnt 0x0
3403; GFX12-NEXT:    s_ashr_i32 s33, s15, 16
3404; GFX12-NEXT:    s_ashr_i32 s34, s14, 16
3405; GFX12-NEXT:    s_sext_i32_i16 s14, s14
3406; GFX12-NEXT:    s_sext_i32_i16 s15, s15
3407; GFX12-NEXT:    s_ashr_i32 s30, s13, 16
3408; GFX12-NEXT:    s_ashr_i32 s31, s12, 16
3409; GFX12-NEXT:    s_sext_i32_i16 s13, s13
3410; GFX12-NEXT:    s_sext_i32_i16 s12, s12
3411; GFX12-NEXT:    v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s34
3412; GFX12-NEXT:    v_dual_mov_b32 v0, s14 :: v_dual_mov_b32 v3, s33
3413; GFX12-NEXT:    v_dual_mov_b32 v2, s15 :: v_dual_mov_b32 v5, s31
3414; GFX12-NEXT:    s_ashr_i32 s29, s10, 16
3415; GFX12-NEXT:    v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v7, s30
3416; GFX12-NEXT:    v_dual_mov_b32 v6, s13 :: v_dual_mov_b32 v9, s29
3417; GFX12-NEXT:    s_ashr_i32 s28, s11, 16
3418; GFX12-NEXT:    s_sext_i32_i16 s11, s11
3419; GFX12-NEXT:    s_sext_i32_i16 s10, s10
3420; GFX12-NEXT:    s_ashr_i32 s26, s9, 16
3421; GFX12-NEXT:    s_ashr_i32 s27, s8, 16
3422; GFX12-NEXT:    s_sext_i32_i16 s9, s9
3423; GFX12-NEXT:    s_sext_i32_i16 s8, s8
3424; GFX12-NEXT:    s_ashr_i32 s24, s7, 16
3425; GFX12-NEXT:    s_ashr_i32 s25, s6, 16
3426; GFX12-NEXT:    s_sext_i32_i16 s7, s7
3427; GFX12-NEXT:    s_sext_i32_i16 s6, s6
3428; GFX12-NEXT:    s_wait_alu 0xfffe
3429; GFX12-NEXT:    v_dual_mov_b32 v8, s10 :: v_dual_mov_b32 v11, s28
3430; GFX12-NEXT:    v_mov_b32_e32 v10, s11
3431; GFX12-NEXT:    s_ashr_i32 s22, s5, 16
3432; GFX12-NEXT:    s_ashr_i32 s23, s4, 16
3433; GFX12-NEXT:    s_sext_i32_i16 s5, s5
3434; GFX12-NEXT:    s_sext_i32_i16 s4, s4
3435; GFX12-NEXT:    s_clause 0x1
3436; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[16:17] offset:112
3437; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[16:17] offset:96
3438; GFX12-NEXT:    v_dual_mov_b32 v1, s27 :: v_dual_mov_b32 v0, s8
3439; GFX12-NEXT:    v_dual_mov_b32 v3, s26 :: v_dual_mov_b32 v2, s9
3440; GFX12-NEXT:    v_mov_b32_e32 v5, s25
3441; GFX12-NEXT:    s_ashr_i32 s20, s3, 16
3442; GFX12-NEXT:    s_ashr_i32 s21, s2, 16
3443; GFX12-NEXT:    s_sext_i32_i16 s3, s3
3444; GFX12-NEXT:    s_sext_i32_i16 s2, s2
3445; GFX12-NEXT:    v_dual_mov_b32 v4, s6 :: v_dual_mov_b32 v7, s24
3446; GFX12-NEXT:    v_dual_mov_b32 v6, s7 :: v_dual_mov_b32 v13, s23
3447; GFX12-NEXT:    s_ashr_i32 s18, s1, 16
3448; GFX12-NEXT:    s_ashr_i32 s19, s0, 16
3449; GFX12-NEXT:    s_sext_i32_i16 s1, s1
3450; GFX12-NEXT:    s_sext_i32_i16 s0, s0
3451; GFX12-NEXT:    v_dual_mov_b32 v12, s4 :: v_dual_mov_b32 v15, s22
3452; GFX12-NEXT:    v_dual_mov_b32 v14, s5 :: v_dual_mov_b32 v17, s21
3453; GFX12-NEXT:    v_dual_mov_b32 v16, s2 :: v_dual_mov_b32 v19, s20
3454; GFX12-NEXT:    v_dual_mov_b32 v18, s3 :: v_dual_mov_b32 v21, s19
3455; GFX12-NEXT:    v_dual_mov_b32 v20, s0 :: v_dual_mov_b32 v23, s18
3456; GFX12-NEXT:    v_mov_b32_e32 v22, s1
3457; GFX12-NEXT:    s_clause 0x5
3458; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[16:17] offset:80
3459; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[16:17] offset:64
3460; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[16:17] offset:48
3461; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[16:17] offset:32
3462; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[16:17] offset:16
3463; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[16:17]
3464; GFX12-NEXT:    s_endpgm
3465  %load = load <32 x i16>, ptr addrspace(4) %in
3466  %ext = sext <32 x i16> %load to <32 x i32>
3467  store <32 x i32> %ext, ptr addrspace(1) %out
3468  ret void
3469}
3470
3471define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
3472; GCN-NOHSA-SI-LABEL: constant_zextload_v64i16_to_v64i32:
3473; GCN-NOHSA-SI:       ; %bb.0:
3474; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[36:39], s[4:5], 0x9
3475; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
3476; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[0:15], s[38:39], 0x0
3477; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[16:31], s[38:39], 0x10
3478; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
3479; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s33, s1, 16
3480; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s34, s0, 16
3481; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s35, s3, 16
3482; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s38, s2, 16
3483; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s41, s5, 16
3484; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s42, s4, 16
3485; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s45, s7, 16
3486; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s46, s6, 16
3487; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s47, s9, 16
3488; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s48, s8, 16
3489; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s49, s11, 16
3490; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s50, s10, 16
3491; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s51, s13, 16
3492; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s52, s12, 16
3493; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s53, s15, 16
3494; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s54, s14, 16
3495; GCN-NOHSA-SI-NEXT:    s_and_b32 s39, s1, 0xffff
3496; GCN-NOHSA-SI-NEXT:    s_and_b32 s40, s0, 0xffff
3497; GCN-NOHSA-SI-NEXT:    s_and_b32 s43, s3, 0xffff
3498; GCN-NOHSA-SI-NEXT:    s_and_b32 s44, s2, 0xffff
3499; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
3500; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, 0xffff
3501; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, 0xffff
3502; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, 0xffff
3503; GCN-NOHSA-SI-NEXT:    s_and_b32 s9, s9, 0xffff
3504; GCN-NOHSA-SI-NEXT:    s_and_b32 s8, s8, 0xffff
3505; GCN-NOHSA-SI-NEXT:    s_and_b32 s11, s11, 0xffff
3506; GCN-NOHSA-SI-NEXT:    s_and_b32 s10, s10, 0xffff
3507; GCN-NOHSA-SI-NEXT:    s_and_b32 s13, s13, 0xffff
3508; GCN-NOHSA-SI-NEXT:    s_and_b32 s12, s12, 0xffff
3509; GCN-NOHSA-SI-NEXT:    s_and_b32 s15, s15, 0xffff
3510; GCN-NOHSA-SI-NEXT:    s_and_b32 s14, s14, 0xffff
3511; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s55, s17, 16
3512; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s56, s16, 16
3513; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s57, s19, 16
3514; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s58, s18, 16
3515; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s59, s21, 16
3516; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s60, s20, 16
3517; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s61, s23, 16
3518; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s62, s22, 16
3519; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s63, s25, 16
3520; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s64, s24, 16
3521; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s65, s27, 16
3522; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s66, s26, 16
3523; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s67, s29, 16
3524; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s68, s28, 16
3525; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s69, s31, 16
3526; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s70, s30, 16
3527; GCN-NOHSA-SI-NEXT:    s_and_b32 s17, s17, 0xffff
3528; GCN-NOHSA-SI-NEXT:    s_and_b32 s16, s16, 0xffff
3529; GCN-NOHSA-SI-NEXT:    s_and_b32 s19, s19, 0xffff
3530; GCN-NOHSA-SI-NEXT:    s_and_b32 s18, s18, 0xffff
3531; GCN-NOHSA-SI-NEXT:    s_and_b32 s20, s20, 0xffff
3532; GCN-NOHSA-SI-NEXT:    s_and_b32 s23, s23, 0xffff
3533; GCN-NOHSA-SI-NEXT:    s_and_b32 s22, s22, 0xffff
3534; GCN-NOHSA-SI-NEXT:    s_and_b32 s25, s25, 0xffff
3535; GCN-NOHSA-SI-NEXT:    s_and_b32 s24, s24, 0xffff
3536; GCN-NOHSA-SI-NEXT:    s_and_b32 s27, s27, 0xffff
3537; GCN-NOHSA-SI-NEXT:    s_and_b32 s26, s26, 0xffff
3538; GCN-NOHSA-SI-NEXT:    s_and_b32 s29, s29, 0xffff
3539; GCN-NOHSA-SI-NEXT:    s_and_b32 s28, s28, 0xffff
3540; GCN-NOHSA-SI-NEXT:    s_and_b32 s31, s31, 0xffff
3541; GCN-NOHSA-SI-NEXT:    s_and_b32 s30, s30, 0xffff
3542; GCN-NOHSA-SI-NEXT:    s_and_b32 s21, s21, 0xffff
3543; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s36
3544; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s37
3545; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
3546; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
3547; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s30
3548; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s70
3549; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s31
3550; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s69
3551; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s28
3552; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s68
3553; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s29
3554; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s67
3555; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s26
3556; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s66
3557; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s27
3558; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s65
3559; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s24
3560; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s64
3561; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s25
3562; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s63
3563; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s22
3564; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s62
3565; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s23
3566; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
3567; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3568; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s20
3569; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, s61
3570; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s60
3571; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s21
3572; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s59
3573; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
3574; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
3575; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
3576; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
3577; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
3578; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3579; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
3580; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s58
3581; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s19
3582; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s57
3583; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
3584; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3585; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s16
3586; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s56
3587; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s17
3588; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s55
3589; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
3590; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3591; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
3592; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s54
3593; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s15
3594; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s53
3595; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
3596; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3597; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
3598; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s52
3599; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s13
3600; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s51
3601; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
3602; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3603; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
3604; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s50
3605; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
3606; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s49
3607; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
3608; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3609; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
3610; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s48
3611; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
3612; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s47
3613; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
3614; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3615; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
3616; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s46
3617; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
3618; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s45
3619; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
3620; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3621; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
3622; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s42
3623; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
3624; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s41
3625; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
3626; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3627; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s44
3628; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s38
3629; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s43
3630; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s35
3631; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
3632; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
3633; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s40
3634; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s34
3635; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s39
3636; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s33
3637; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
3638; GCN-NOHSA-SI-NEXT:    s_endpgm
3639;
3640; GCN-HSA-LABEL: constant_zextload_v64i16_to_v64i32:
3641; GCN-HSA:       ; %bb.0:
3642; GCN-HSA-NEXT:    s_load_dwordx4 s[16:19], s[8:9], 0x0
3643; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3644; GCN-HSA-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
3645; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3646; GCN-HSA-NEXT:    s_lshr_b32 s20, s1, 16
3647; GCN-HSA-NEXT:    s_lshr_b32 s21, s0, 16
3648; GCN-HSA-NEXT:    s_lshr_b32 s22, s3, 16
3649; GCN-HSA-NEXT:    s_lshr_b32 s23, s2, 16
3650; GCN-HSA-NEXT:    s_lshr_b32 s24, s5, 16
3651; GCN-HSA-NEXT:    s_lshr_b32 s26, s4, 16
3652; GCN-HSA-NEXT:    s_lshr_b32 s28, s7, 16
3653; GCN-HSA-NEXT:    s_lshr_b32 s30, s6, 16
3654; GCN-HSA-NEXT:    s_lshr_b32 s33, s9, 16
3655; GCN-HSA-NEXT:    s_lshr_b32 s35, s8, 16
3656; GCN-HSA-NEXT:    s_lshr_b32 s37, s11, 16
3657; GCN-HSA-NEXT:    s_lshr_b32 s39, s10, 16
3658; GCN-HSA-NEXT:    s_lshr_b32 s42, s13, 16
3659; GCN-HSA-NEXT:    s_lshr_b32 s44, s12, 16
3660; GCN-HSA-NEXT:    s_lshr_b32 s45, s15, 16
3661; GCN-HSA-NEXT:    s_lshr_b32 s46, s14, 16
3662; GCN-HSA-NEXT:    s_and_b32 s25, s1, 0xffff
3663; GCN-HSA-NEXT:    s_and_b32 s27, s0, 0xffff
3664; GCN-HSA-NEXT:    s_and_b32 s29, s3, 0xffff
3665; GCN-HSA-NEXT:    s_and_b32 s31, s2, 0xffff
3666; GCN-HSA-NEXT:    s_and_b32 s34, s5, 0xffff
3667; GCN-HSA-NEXT:    s_and_b32 s36, s4, 0xffff
3668; GCN-HSA-NEXT:    s_and_b32 s38, s7, 0xffff
3669; GCN-HSA-NEXT:    s_and_b32 s40, s6, 0xffff
3670; GCN-HSA-NEXT:    s_and_b32 s41, s9, 0xffff
3671; GCN-HSA-NEXT:    s_and_b32 s43, s8, 0xffff
3672; GCN-HSA-NEXT:    s_and_b32 s47, s11, 0xffff
3673; GCN-HSA-NEXT:    s_and_b32 s48, s10, 0xffff
3674; GCN-HSA-NEXT:    s_and_b32 s49, s13, 0xffff
3675; GCN-HSA-NEXT:    s_and_b32 s51, s12, 0xffff
3676; GCN-HSA-NEXT:    s_and_b32 s50, s15, 0xffff
3677; GCN-HSA-NEXT:    s_and_b32 s52, s14, 0xffff
3678; GCN-HSA-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x10
3679; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
3680; GCN-HSA-NEXT:    s_lshr_b32 s18, s1, 16
3681; GCN-HSA-NEXT:    s_lshr_b32 s19, s0, 16
3682; GCN-HSA-NEXT:    s_lshr_b32 s53, s3, 16
3683; GCN-HSA-NEXT:    s_lshr_b32 s54, s2, 16
3684; GCN-HSA-NEXT:    s_lshr_b32 s55, s5, 16
3685; GCN-HSA-NEXT:    s_lshr_b32 s56, s4, 16
3686; GCN-HSA-NEXT:    s_lshr_b32 s57, s7, 16
3687; GCN-HSA-NEXT:    s_lshr_b32 s58, s6, 16
3688; GCN-HSA-NEXT:    s_lshr_b32 s59, s9, 16
3689; GCN-HSA-NEXT:    s_lshr_b32 s60, s8, 16
3690; GCN-HSA-NEXT:    s_lshr_b32 s61, s11, 16
3691; GCN-HSA-NEXT:    s_lshr_b32 s62, s10, 16
3692; GCN-HSA-NEXT:    s_lshr_b32 s63, s13, 16
3693; GCN-HSA-NEXT:    s_lshr_b32 s64, s12, 16
3694; GCN-HSA-NEXT:    s_lshr_b32 s65, s15, 16
3695; GCN-HSA-NEXT:    s_lshr_b32 s66, s14, 16
3696; GCN-HSA-NEXT:    s_and_b32 s67, s1, 0xffff
3697; GCN-HSA-NEXT:    s_and_b32 s68, s0, 0xffff
3698; GCN-HSA-NEXT:    s_and_b32 s3, s3, 0xffff
3699; GCN-HSA-NEXT:    s_and_b32 s2, s2, 0xffff
3700; GCN-HSA-NEXT:    s_and_b32 s5, s5, 0xffff
3701; GCN-HSA-NEXT:    s_and_b32 s4, s4, 0xffff
3702; GCN-HSA-NEXT:    s_and_b32 s7, s7, 0xffff
3703; GCN-HSA-NEXT:    s_and_b32 s6, s6, 0xffff
3704; GCN-HSA-NEXT:    s_and_b32 s9, s9, 0xffff
3705; GCN-HSA-NEXT:    s_and_b32 s8, s8, 0xffff
3706; GCN-HSA-NEXT:    s_and_b32 s11, s11, 0xffff
3707; GCN-HSA-NEXT:    s_and_b32 s10, s10, 0xffff
3708; GCN-HSA-NEXT:    s_and_b32 s13, s13, 0xffff
3709; GCN-HSA-NEXT:    s_and_b32 s12, s12, 0xffff
3710; GCN-HSA-NEXT:    s_and_b32 s15, s15, 0xffff
3711; GCN-HSA-NEXT:    s_and_b32 s14, s14, 0xffff
3712; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0xf0
3713; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
3714; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s1
3715; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s0
3716; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0xe0
3717; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
3718; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s1
3719; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s0
3720; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0xd0
3721; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
3722; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s1
3723; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s0
3724; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0xc0
3725; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
3726; GCN-HSA-NEXT:    v_mov_b32_e32 v26, s1
3727; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s0
3728; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0xb0
3729; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
3730; GCN-HSA-NEXT:    v_mov_b32_e32 v28, s1
3731; GCN-HSA-NEXT:    v_mov_b32_e32 v27, s0
3732; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0xa0
3733; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s10
3734; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s62
3735; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s11
3736; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s61
3737; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
3738; GCN-HSA-NEXT:    flat_store_dwordx4 v[23:24], v[8:11]
3739; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s8
3740; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s1
3741; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s0
3742; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0x90
3743; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s60
3744; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s9
3745; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s59
3746; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
3747; GCN-HSA-NEXT:    flat_store_dwordx4 v[25:26], v[12:15]
3748; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s14
3749; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s1
3750; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s0
3751; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0x80
3752; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
3753; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s1
3754; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s66
3755; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s15
3756; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s65
3757; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s0
3758; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0x70
3759; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s6
3760; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s58
3761; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s7
3762; GCN-HSA-NEXT:    flat_store_dwordx4 v[19:20], v[0:3]
3763; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s57
3764; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
3765; GCN-HSA-NEXT:    flat_store_dwordx4 v[27:28], v[16:19]
3766; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s12
3767; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s1
3768; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s0
3769; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0x60
3770; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
3771; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s1
3772; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s64
3773; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s13
3774; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s63
3775; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s0
3776; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0x50
3777; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
3778; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s2
3779; GCN-HSA-NEXT:    flat_store_dwordx4 v[21:22], v[4:7]
3780; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s56
3781; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
3782; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s55
3783; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s54
3784; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s3
3785; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s68
3786; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s53
3787; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s19
3788; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
3789; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s67
3790; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s52
3791; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s18
3792; GCN-HSA-NEXT:    flat_store_dwordx4 v[9:10], v[0:3]
3793; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s46
3794; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s51
3795; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s50
3796; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s45
3797; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s44
3798; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s49
3799; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s42
3800; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[20:23]
3801; GCN-HSA-NEXT:    flat_store_dwordx4 v[14:15], v[4:7]
3802; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
3803; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[0:3]
3804; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
3805; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
3806; GCN-HSA-NEXT:    s_add_u32 s0, s16, 64
3807; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s48
3808; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s39
3809; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s47
3810; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s37
3811; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
3812; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3813; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
3814; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
3815; GCN-HSA-NEXT:    s_add_u32 s0, s16, 48
3816; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s43
3817; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s35
3818; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s41
3819; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s33
3820; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
3821; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3822; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
3823; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
3824; GCN-HSA-NEXT:    s_add_u32 s0, s16, 32
3825; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s40
3826; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s30
3827; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s38
3828; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s28
3829; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
3830; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3831; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
3832; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
3833; GCN-HSA-NEXT:    s_add_u32 s0, s16, 16
3834; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s36
3835; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s26
3836; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s34
3837; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s24
3838; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
3839; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3840; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
3841; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s31
3842; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s23
3843; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s29
3844; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s22
3845; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
3846; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3847; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s16
3848; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s27
3849; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s21
3850; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s25
3851; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s20
3852; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s17
3853; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3854; GCN-HSA-NEXT:    s_endpgm
3855;
3856; GCN-NOHSA-VI-LABEL: constant_zextload_v64i16_to_v64i32:
3857; GCN-NOHSA-VI:       ; %bb.0:
3858; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[36:39], s[4:5], 0x24
3859; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
3860; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[16:31], s[38:39], 0x0
3861; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[0:15], s[38:39], 0x40
3862; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
3863; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s33, s17, 16
3864; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s34, s16, 16
3865; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s35, s19, 16
3866; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s40, s18, 16
3867; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s41, s21, 16
3868; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s42, s20, 16
3869; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s43, s23, 16
3870; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s44, s22, 16
3871; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s45, s25, 16
3872; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s46, s24, 16
3873; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s47, s27, 16
3874; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s48, s26, 16
3875; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s38, s29, 16
3876; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s39, s28, 16
3877; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s49, s31, 16
3878; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s50, s30, 16
3879; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s51, s1, 16
3880; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s52, s0, 16
3881; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s53, s3, 16
3882; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s54, s2, 16
3883; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s55, s5, 16
3884; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s56, s4, 16
3885; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s57, s7, 16
3886; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s58, s6, 16
3887; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s59, s9, 16
3888; GCN-NOHSA-VI-NEXT:    s_and_b32 s17, s17, 0xffff
3889; GCN-NOHSA-VI-NEXT:    s_and_b32 s16, s16, 0xffff
3890; GCN-NOHSA-VI-NEXT:    s_and_b32 s19, s19, 0xffff
3891; GCN-NOHSA-VI-NEXT:    s_and_b32 s18, s18, 0xffff
3892; GCN-NOHSA-VI-NEXT:    s_and_b32 s21, s21, 0xffff
3893; GCN-NOHSA-VI-NEXT:    s_and_b32 s20, s20, 0xffff
3894; GCN-NOHSA-VI-NEXT:    s_and_b32 s23, s23, 0xffff
3895; GCN-NOHSA-VI-NEXT:    s_and_b32 s22, s22, 0xffff
3896; GCN-NOHSA-VI-NEXT:    s_and_b32 s25, s25, 0xffff
3897; GCN-NOHSA-VI-NEXT:    s_and_b32 s24, s24, 0xffff
3898; GCN-NOHSA-VI-NEXT:    s_and_b32 s27, s27, 0xffff
3899; GCN-NOHSA-VI-NEXT:    s_and_b32 s26, s26, 0xffff
3900; GCN-NOHSA-VI-NEXT:    s_and_b32 s29, s29, 0xffff
3901; GCN-NOHSA-VI-NEXT:    s_and_b32 s28, s28, 0xffff
3902; GCN-NOHSA-VI-NEXT:    s_and_b32 s31, s31, 0xffff
3903; GCN-NOHSA-VI-NEXT:    s_and_b32 s30, s30, 0xffff
3904; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s60, s8, 16
3905; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s61, s11, 16
3906; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s62, s10, 16
3907; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s63, s13, 16
3908; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s64, s12, 16
3909; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s65, s15, 16
3910; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s66, s14, 16
3911; GCN-NOHSA-VI-NEXT:    s_and_b32 s67, s1, 0xffff
3912; GCN-NOHSA-VI-NEXT:    s_and_b32 s68, s0, 0xffff
3913; GCN-NOHSA-VI-NEXT:    s_and_b32 s3, s3, 0xffff
3914; GCN-NOHSA-VI-NEXT:    s_and_b32 s2, s2, 0xffff
3915; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s5, 0xffff
3916; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, 0xffff
3917; GCN-NOHSA-VI-NEXT:    s_and_b32 s7, s7, 0xffff
3918; GCN-NOHSA-VI-NEXT:    s_and_b32 s6, s6, 0xffff
3919; GCN-NOHSA-VI-NEXT:    s_and_b32 s9, s9, 0xffff
3920; GCN-NOHSA-VI-NEXT:    s_and_b32 s8, s8, 0xffff
3921; GCN-NOHSA-VI-NEXT:    s_and_b32 s11, s11, 0xffff
3922; GCN-NOHSA-VI-NEXT:    s_and_b32 s10, s10, 0xffff
3923; GCN-NOHSA-VI-NEXT:    s_and_b32 s13, s13, 0xffff
3924; GCN-NOHSA-VI-NEXT:    s_and_b32 s12, s12, 0xffff
3925; GCN-NOHSA-VI-NEXT:    s_and_b32 s0, s15, 0xffff
3926; GCN-NOHSA-VI-NEXT:    s_and_b32 s1, s14, 0xffff
3927; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s0
3928; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 0xf0
3929; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s1
3930; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
3931; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
3932; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
3933; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 0xe0
3934; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s66
3935; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s65
3936; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
3937; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3938; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
3939; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
3940; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 0xd0
3941; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
3942; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s64
3943; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s13
3944; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s63
3945; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
3946; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3947; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
3948; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
3949; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 0xc0
3950; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
3951; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s62
3952; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
3953; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s61
3954; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
3955; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3956; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
3957; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
3958; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 0xb0
3959; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
3960; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s60
3961; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
3962; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s59
3963; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
3964; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3965; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
3966; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
3967; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 0xa0
3968; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
3969; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s58
3970; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
3971; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s57
3972; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
3973; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3974; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
3975; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
3976; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 0x90
3977; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
3978; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s56
3979; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
3980; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s55
3981; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
3982; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3983; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
3984; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
3985; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 0x80
3986; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
3987; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s54
3988; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s3
3989; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s53
3990; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
3991; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
3992; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
3993; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
3994; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 0x70
3995; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s68
3996; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s52
3997; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s67
3998; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s51
3999; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
4000; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4001; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
4002; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
4003; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 0x60
4004; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s30
4005; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s50
4006; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s31
4007; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s49
4008; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
4009; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4010; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
4011; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
4012; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 0x50
4013; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s28
4014; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s39
4015; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s29
4016; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s38
4017; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
4018; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4019; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
4020; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
4021; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 64
4022; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s26
4023; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s48
4024; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s27
4025; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s47
4026; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
4027; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4028; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
4029; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
4030; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 48
4031; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s24
4032; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s46
4033; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s25
4034; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s45
4035; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
4036; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4037; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
4038; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
4039; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 32
4040; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s22
4041; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s44
4042; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s23
4043; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s43
4044; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
4045; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4046; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
4047; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
4048; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 16
4049; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s20
4050; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s42
4051; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s21
4052; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s41
4053; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
4054; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4055; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
4056; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s18
4057; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s40
4058; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s19
4059; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s35
4060; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
4061; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4062; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s36
4063; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s16
4064; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s34
4065; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s17
4066; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s33
4067; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s37
4068; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4069; GCN-NOHSA-VI-NEXT:    s_endpgm
4070;
4071; EG-LABEL: constant_zextload_v64i16_to_v64i32:
4072; EG:       ; %bb.0:
4073; EG-NEXT:    ALU 0, @38, KC0[CB0:0-32], KC1[]
4074; EG-NEXT:    TEX 3 @22
4075; EG-NEXT:    ALU 55, @39, KC0[CB0:0-32], KC1[]
4076; EG-NEXT:    TEX 3 @30
4077; EG-NEXT:    ALU 87, @95, KC0[CB0:0-32], KC1[]
4078; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T66.X, 0
4079; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T49.X, 0
4080; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T64.X, 0
4081; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T50.X, 0
4082; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T61.X, 0
4083; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T51.X, 0
4084; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T56.XYZW, T58.X, 0
4085; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T54.XYZW, T52.X, 0
4086; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T53.XYZW, T55.X, 0
4087; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T39.X, 0
4088; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T48.X, 0
4089; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T40.X, 0
4090; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T46.X, 0
4091; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T41.X, 0
4092; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T43.X, 0
4093; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T38.X, 1
4094; EG-NEXT:    CF_END
4095; EG-NEXT:    Fetch clause starting at 22:
4096; EG-NEXT:     VTX_READ_128 T38.XYZW, T37.X, 0, #1
4097; EG-NEXT:     VTX_READ_128 T39.XYZW, T37.X, 48, #1
4098; EG-NEXT:     VTX_READ_128 T40.XYZW, T37.X, 32, #1
4099; EG-NEXT:     VTX_READ_128 T41.XYZW, T37.X, 16, #1
4100; EG-NEXT:    Fetch clause starting at 30:
4101; EG-NEXT:     VTX_READ_128 T49.XYZW, T37.X, 112, #1
4102; EG-NEXT:     VTX_READ_128 T50.XYZW, T37.X, 96, #1
4103; EG-NEXT:     VTX_READ_128 T51.XYZW, T37.X, 80, #1
4104; EG-NEXT:     VTX_READ_128 T52.XYZW, T37.X, 64, #1
4105; EG-NEXT:    ALU clause starting at 38:
4106; EG-NEXT:     MOV * T37.X, KC0[2].Z,
4107; EG-NEXT:    ALU clause starting at 39:
4108; EG-NEXT:     LSHR * T35.W, T38.Y, literal.x,
4109; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4110; EG-NEXT:     AND_INT * T35.Z, T38.Y, literal.x,
4111; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4112; EG-NEXT:     LSHR T35.Y, T38.X, literal.x,
4113; EG-NEXT:     LSHR * T36.W, T38.W, literal.x,
4114; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4115; EG-NEXT:     AND_INT T35.X, T38.X, literal.x,
4116; EG-NEXT:     AND_INT T36.Z, T38.W, literal.x,
4117; EG-NEXT:     LSHR * T38.X, KC0[2].Y, literal.y,
4118; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
4119; EG-NEXT:     LSHR T36.Y, T38.Z, literal.x,
4120; EG-NEXT:     LSHR * T42.W, T41.Y, literal.x,
4121; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4122; EG-NEXT:     AND_INT T36.X, T38.Z, literal.x,
4123; EG-NEXT:     AND_INT T42.Z, T41.Y, literal.x,
4124; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4125; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
4126; EG-NEXT:     LSHR T43.X, PV.W, literal.x,
4127; EG-NEXT:     LSHR T42.Y, T41.X, literal.y,
4128; EG-NEXT:     LSHR T44.W, T41.W, literal.y,
4129; EG-NEXT:     AND_INT * T42.X, T41.X, literal.z,
4130; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4131; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4132; EG-NEXT:     AND_INT T44.Z, T41.W, literal.x,
4133; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4134; EG-NEXT:    65535(9.183409e-41), 32(4.484155e-44)
4135; EG-NEXT:     LSHR T41.X, PV.W, literal.x,
4136; EG-NEXT:     LSHR T44.Y, T41.Z, literal.y,
4137; EG-NEXT:     LSHR T45.W, T40.Y, literal.y,
4138; EG-NEXT:     AND_INT * T44.X, T41.Z, literal.z,
4139; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4140; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4141; EG-NEXT:     AND_INT T45.Z, T40.Y, literal.x,
4142; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4143; EG-NEXT:    65535(9.183409e-41), 48(6.726233e-44)
4144; EG-NEXT:     LSHR T46.X, PV.W, literal.x,
4145; EG-NEXT:     LSHR T45.Y, T40.X, literal.y,
4146; EG-NEXT:     LSHR T47.W, T40.W, literal.y,
4147; EG-NEXT:     AND_INT * T45.X, T40.X, literal.z,
4148; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4149; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4150; EG-NEXT:     AND_INT T47.Z, T40.W, literal.x,
4151; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4152; EG-NEXT:    65535(9.183409e-41), 64(8.968310e-44)
4153; EG-NEXT:     LSHR T40.X, PV.W, literal.x,
4154; EG-NEXT:     LSHR T47.Y, T40.Z, literal.y,
4155; EG-NEXT:     AND_INT * T47.X, T40.Z, literal.z,
4156; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4157; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4158; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.x,
4159; EG-NEXT:     LSHR * T37.W, T39.Y, literal.y,
4160; EG-NEXT:    80(1.121039e-43), 16(2.242078e-44)
4161; EG-NEXT:     LSHR T48.X, PV.W, literal.x,
4162; EG-NEXT:     AND_INT * T37.Z, T39.Y, literal.y,
4163; EG-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
4164; EG-NEXT:    ALU clause starting at 95:
4165; EG-NEXT:     LSHR T37.Y, T39.X, literal.x,
4166; EG-NEXT:     LSHR * T53.W, T39.W, literal.x,
4167; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4168; EG-NEXT:     AND_INT T37.X, T39.X, literal.x,
4169; EG-NEXT:     AND_INT T53.Z, T39.W, literal.x,
4170; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4171; EG-NEXT:    65535(9.183409e-41), 96(1.345247e-43)
4172; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
4173; EG-NEXT:     LSHR T53.Y, T39.Z, literal.y,
4174; EG-NEXT:     LSHR T54.W, T52.Y, literal.y,
4175; EG-NEXT:     AND_INT * T53.X, T39.Z, literal.z,
4176; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4177; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4178; EG-NEXT:     AND_INT T54.Z, T52.Y, literal.x,
4179; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4180; EG-NEXT:    65535(9.183409e-41), 112(1.569454e-43)
4181; EG-NEXT:     LSHR T55.X, PV.W, literal.x,
4182; EG-NEXT:     LSHR T54.Y, T52.X, literal.y,
4183; EG-NEXT:     LSHR T56.W, T52.W, literal.y,
4184; EG-NEXT:     AND_INT * T54.X, T52.X, literal.z,
4185; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4186; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4187; EG-NEXT:     AND_INT T56.Z, T52.W, literal.x,
4188; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4189; EG-NEXT:    65535(9.183409e-41), 128(1.793662e-43)
4190; EG-NEXT:     LSHR T52.X, PV.W, literal.x,
4191; EG-NEXT:     LSHR T56.Y, T52.Z, literal.y,
4192; EG-NEXT:     LSHR T57.W, T51.Y, literal.y,
4193; EG-NEXT:     AND_INT * T56.X, T52.Z, literal.z,
4194; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4195; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4196; EG-NEXT:     AND_INT T57.Z, T51.Y, literal.x,
4197; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4198; EG-NEXT:    65535(9.183409e-41), 144(2.017870e-43)
4199; EG-NEXT:     LSHR T58.X, PV.W, literal.x,
4200; EG-NEXT:     LSHR T57.Y, T51.X, literal.y,
4201; EG-NEXT:     LSHR T59.W, T51.W, literal.y,
4202; EG-NEXT:     AND_INT * T57.X, T51.X, literal.z,
4203; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4204; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4205; EG-NEXT:     AND_INT T59.Z, T51.W, literal.x,
4206; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4207; EG-NEXT:    65535(9.183409e-41), 160(2.242078e-43)
4208; EG-NEXT:     LSHR T51.X, PV.W, literal.x,
4209; EG-NEXT:     LSHR T59.Y, T51.Z, literal.y,
4210; EG-NEXT:     LSHR T60.W, T50.Y, literal.y,
4211; EG-NEXT:     AND_INT * T59.X, T51.Z, literal.z,
4212; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4213; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4214; EG-NEXT:     AND_INT T60.Z, T50.Y, literal.x,
4215; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4216; EG-NEXT:    65535(9.183409e-41), 176(2.466285e-43)
4217; EG-NEXT:     LSHR T61.X, PV.W, literal.x,
4218; EG-NEXT:     LSHR T60.Y, T50.X, literal.y,
4219; EG-NEXT:     LSHR T62.W, T50.W, literal.y,
4220; EG-NEXT:     AND_INT * T60.X, T50.X, literal.z,
4221; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4222; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4223; EG-NEXT:     AND_INT T62.Z, T50.W, literal.x,
4224; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4225; EG-NEXT:    65535(9.183409e-41), 192(2.690493e-43)
4226; EG-NEXT:     LSHR T50.X, PV.W, literal.x,
4227; EG-NEXT:     LSHR T62.Y, T50.Z, literal.y,
4228; EG-NEXT:     LSHR T63.W, T49.Y, literal.y,
4229; EG-NEXT:     AND_INT * T62.X, T50.Z, literal.z,
4230; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4231; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4232; EG-NEXT:     AND_INT T63.Z, T49.Y, literal.x,
4233; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4234; EG-NEXT:    65535(9.183409e-41), 208(2.914701e-43)
4235; EG-NEXT:     LSHR T64.X, PV.W, literal.x,
4236; EG-NEXT:     LSHR T63.Y, T49.X, literal.y,
4237; EG-NEXT:     LSHR T65.W, T49.W, literal.y,
4238; EG-NEXT:     AND_INT * T63.X, T49.X, literal.z,
4239; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4240; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4241; EG-NEXT:     AND_INT T65.Z, T49.W, literal.x,
4242; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
4243; EG-NEXT:    65535(9.183409e-41), 224(3.138909e-43)
4244; EG-NEXT:     LSHR T49.X, PV.W, literal.x,
4245; EG-NEXT:     LSHR T65.Y, T49.Z, literal.y,
4246; EG-NEXT:     AND_INT * T65.X, T49.Z, literal.z,
4247; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
4248; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
4249; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
4250; EG-NEXT:    240(3.363116e-43), 0(0.000000e+00)
4251; EG-NEXT:     LSHR * T66.X, PV.W, literal.x,
4252; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4253;
4254; GFX12-LABEL: constant_zextload_v64i16_to_v64i32:
4255; GFX12:       ; %bb.0:
4256; GFX12-NEXT:    s_load_b128 s[36:39], s[4:5], 0x24
4257; GFX12-NEXT:    s_wait_kmcnt 0x0
4258; GFX12-NEXT:    s_clause 0x1
4259; GFX12-NEXT:    s_load_b512 s[16:31], s[38:39], 0x0
4260; GFX12-NEXT:    s_load_b512 s[0:15], s[38:39], 0x40
4261; GFX12-NEXT:    s_wait_kmcnt 0x0
4262; GFX12-NEXT:    s_lshr_b32 s49, s31, 16
4263; GFX12-NEXT:    s_lshr_b32 s65, s15, 16
4264; GFX12-NEXT:    s_lshr_b32 s66, s14, 16
4265; GFX12-NEXT:    s_and_b32 s14, s14, 0xffff
4266; GFX12-NEXT:    s_and_b32 s15, s15, 0xffff
4267; GFX12-NEXT:    s_lshr_b32 s63, s13, 16
4268; GFX12-NEXT:    s_lshr_b32 s64, s12, 16
4269; GFX12-NEXT:    s_and_b32 s13, s13, 0xffff
4270; GFX12-NEXT:    s_and_b32 s12, s12, 0xffff
4271; GFX12-NEXT:    v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s66
4272; GFX12-NEXT:    s_lshr_b32 s61, s11, 16
4273; GFX12-NEXT:    s_lshr_b32 s62, s10, 16
4274; GFX12-NEXT:    s_and_b32 s11, s11, 0xffff
4275; GFX12-NEXT:    s_and_b32 s10, s10, 0xffff
4276; GFX12-NEXT:    v_dual_mov_b32 v0, s14 :: v_dual_mov_b32 v3, s65
4277; GFX12-NEXT:    v_dual_mov_b32 v2, s15 :: v_dual_mov_b32 v5, s64
4278; GFX12-NEXT:    s_lshr_b32 s59, s9, 16
4279; GFX12-NEXT:    s_lshr_b32 s60, s8, 16
4280; GFX12-NEXT:    s_and_b32 s9, s9, 0xffff
4281; GFX12-NEXT:    s_and_b32 s8, s8, 0xffff
4282; GFX12-NEXT:    v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v7, s63
4283; GFX12-NEXT:    v_dual_mov_b32 v6, s13 :: v_dual_mov_b32 v9, s62
4284; GFX12-NEXT:    v_dual_mov_b32 v8, s10 :: v_dual_mov_b32 v11, s61
4285; GFX12-NEXT:    v_dual_mov_b32 v10, s11 :: v_dual_mov_b32 v13, s60
4286; GFX12-NEXT:    s_lshr_b32 s57, s7, 16
4287; GFX12-NEXT:    s_lshr_b32 s58, s6, 16
4288; GFX12-NEXT:    s_and_b32 s7, s7, 0xffff
4289; GFX12-NEXT:    v_dual_mov_b32 v12, s8 :: v_dual_mov_b32 v15, s59
4290; GFX12-NEXT:    v_mov_b32_e32 v14, s9
4291; GFX12-NEXT:    s_and_b32 s6, s6, 0xffff
4292; GFX12-NEXT:    s_lshr_b32 s55, s5, 16
4293; GFX12-NEXT:    s_lshr_b32 s56, s4, 16
4294; GFX12-NEXT:    s_and_b32 s5, s5, 0xffff
4295; GFX12-NEXT:    s_and_b32 s4, s4, 0xffff
4296; GFX12-NEXT:    s_lshr_b32 s53, s3, 16
4297; GFX12-NEXT:    s_lshr_b32 s54, s2, 16
4298; GFX12-NEXT:    s_and_b32 s3, s3, 0xffff
4299; GFX12-NEXT:    s_and_b32 s2, s2, 0xffff
4300; GFX12-NEXT:    s_clause 0x3
4301; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[36:37] offset:240
4302; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[36:37] offset:224
4303; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[36:37] offset:208
4304; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[36:37] offset:192
4305; GFX12-NEXT:    v_dual_mov_b32 v1, s58 :: v_dual_mov_b32 v0, s6
4306; GFX12-NEXT:    v_dual_mov_b32 v3, s57 :: v_dual_mov_b32 v2, s7
4307; GFX12-NEXT:    v_mov_b32_e32 v5, s56
4308; GFX12-NEXT:    s_lshr_b32 s51, s1, 16
4309; GFX12-NEXT:    s_lshr_b32 s52, s0, 16
4310; GFX12-NEXT:    s_and_b32 s1, s1, 0xffff
4311; GFX12-NEXT:    s_and_b32 s0, s0, 0xffff
4312; GFX12-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v7, s55
4313; GFX12-NEXT:    v_dual_mov_b32 v6, s5 :: v_dual_mov_b32 v9, s54
4314; GFX12-NEXT:    s_lshr_b32 s50, s30, 16
4315; GFX12-NEXT:    s_and_b32 s31, s31, 0xffff
4316; GFX12-NEXT:    s_and_b32 s30, s30, 0xffff
4317; GFX12-NEXT:    v_dual_mov_b32 v8, s2 :: v_dual_mov_b32 v11, s53
4318; GFX12-NEXT:    v_dual_mov_b32 v10, s3 :: v_dual_mov_b32 v13, s52
4319; GFX12-NEXT:    s_lshr_b32 s45, s27, 16
4320; GFX12-NEXT:    s_lshr_b32 s46, s26, 16
4321; GFX12-NEXT:    s_lshr_b32 s47, s29, 16
4322; GFX12-NEXT:    s_lshr_b32 s48, s28, 16
4323; GFX12-NEXT:    s_and_b32 s27, s27, 0xffff
4324; GFX12-NEXT:    s_and_b32 s26, s26, 0xffff
4325; GFX12-NEXT:    s_and_b32 s29, s29, 0xffff
4326; GFX12-NEXT:    s_and_b32 s28, s28, 0xffff
4327; GFX12-NEXT:    v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v15, s51
4328; GFX12-NEXT:    v_dual_mov_b32 v14, s1 :: v_dual_mov_b32 v17, s50
4329; GFX12-NEXT:    s_lshr_b32 s43, s25, 16
4330; GFX12-NEXT:    s_lshr_b32 s44, s24, 16
4331; GFX12-NEXT:    s_and_b32 s25, s25, 0xffff
4332; GFX12-NEXT:    s_and_b32 s24, s24, 0xffff
4333; GFX12-NEXT:    v_dual_mov_b32 v16, s30 :: v_dual_mov_b32 v19, s49
4334; GFX12-NEXT:    v_dual_mov_b32 v18, s31 :: v_dual_mov_b32 v21, s48
4335; GFX12-NEXT:    s_lshr_b32 s41, s23, 16
4336; GFX12-NEXT:    s_lshr_b32 s42, s22, 16
4337; GFX12-NEXT:    s_and_b32 s23, s23, 0xffff
4338; GFX12-NEXT:    s_and_b32 s22, s22, 0xffff
4339; GFX12-NEXT:    v_dual_mov_b32 v20, s28 :: v_dual_mov_b32 v23, s47
4340; GFX12-NEXT:    v_mov_b32_e32 v22, s29
4341; GFX12-NEXT:    s_clause 0x5
4342; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[36:37] offset:176
4343; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[36:37] offset:160
4344; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[36:37] offset:144
4345; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[36:37] offset:128
4346; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[36:37] offset:112
4347; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[36:37] offset:96
4348; GFX12-NEXT:    v_dual_mov_b32 v1, s46 :: v_dual_mov_b32 v0, s26
4349; GFX12-NEXT:    v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v2, s27
4350; GFX12-NEXT:    v_mov_b32_e32 v5, s44
4351; GFX12-NEXT:    s_lshr_b32 s39, s21, 16
4352; GFX12-NEXT:    s_lshr_b32 s40, s20, 16
4353; GFX12-NEXT:    s_and_b32 s21, s21, 0xffff
4354; GFX12-NEXT:    s_and_b32 s20, s20, 0xffff
4355; GFX12-NEXT:    v_dual_mov_b32 v4, s24 :: v_dual_mov_b32 v7, s43
4356; GFX12-NEXT:    v_dual_mov_b32 v6, s25 :: v_dual_mov_b32 v9, s42
4357; GFX12-NEXT:    s_lshr_b32 s35, s19, 16
4358; GFX12-NEXT:    s_lshr_b32 s38, s18, 16
4359; GFX12-NEXT:    s_and_b32 s19, s19, 0xffff
4360; GFX12-NEXT:    s_and_b32 s18, s18, 0xffff
4361; GFX12-NEXT:    v_dual_mov_b32 v8, s22 :: v_dual_mov_b32 v11, s41
4362; GFX12-NEXT:    v_dual_mov_b32 v10, s23 :: v_dual_mov_b32 v13, s40
4363; GFX12-NEXT:    s_lshr_b32 s33, s17, 16
4364; GFX12-NEXT:    s_lshr_b32 s34, s16, 16
4365; GFX12-NEXT:    s_and_b32 s17, s17, 0xffff
4366; GFX12-NEXT:    s_and_b32 s16, s16, 0xffff
4367; GFX12-NEXT:    v_dual_mov_b32 v12, s20 :: v_dual_mov_b32 v15, s39
4368; GFX12-NEXT:    v_dual_mov_b32 v14, s21 :: v_dual_mov_b32 v17, s38
4369; GFX12-NEXT:    v_dual_mov_b32 v16, s18 :: v_dual_mov_b32 v19, s35
4370; GFX12-NEXT:    v_dual_mov_b32 v18, s19 :: v_dual_mov_b32 v21, s34
4371; GFX12-NEXT:    v_dual_mov_b32 v20, s16 :: v_dual_mov_b32 v23, s33
4372; GFX12-NEXT:    v_mov_b32_e32 v22, s17
4373; GFX12-NEXT:    s_clause 0x5
4374; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[36:37] offset:80
4375; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[36:37] offset:64
4376; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[36:37] offset:48
4377; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[36:37] offset:32
4378; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[36:37] offset:16
4379; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[36:37]
4380; GFX12-NEXT:    s_endpgm
4381  %load = load <64 x i16>, ptr addrspace(4) %in
4382  %ext = zext <64 x i16> %load to <64 x i32>
4383  store <64 x i32> %ext, ptr addrspace(1) %out
4384  ret void
4385}
4386
4387define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
4388; GCN-NOHSA-SI-LABEL: constant_sextload_v64i16_to_v64i32:
4389; GCN-NOHSA-SI:       ; %bb.0:
4390; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[36:39], s[4:5], 0x9
4391; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4392; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[16:31], s[38:39], 0x0
4393; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[0:15], s[38:39], 0x10
4394; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
4395; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s33, s17, 16
4396; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s34, s16, 16
4397; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s17, s17
4398; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s16, s16
4399; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s35, s19, 16
4400; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s38, s18, 16
4401; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s19, s19
4402; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s18, s18
4403; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s39, s21, 16
4404; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s40, s20, 16
4405; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s21, s21
4406; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s20, s20
4407; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s41, s23, 16
4408; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s42, s22, 16
4409; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s23, s23
4410; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s22, s22
4411; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s43, s25, 16
4412; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s44, s24, 16
4413; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s25, s25
4414; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s24, s24
4415; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s45, s27, 16
4416; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s46, s26, 16
4417; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s27, s27
4418; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s26, s26
4419; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s47, s29, 16
4420; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s48, s28, 16
4421; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s29, s29
4422; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s28, s28
4423; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s49, s31, 16
4424; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s50, s30, 16
4425; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s31, s31
4426; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s30, s30
4427; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s51, s1, 16
4428; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s52, s0, 16
4429; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s53, s1
4430; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s54, s0
4431; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s55, s3, 16
4432; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s56, s2, 16
4433; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s57, s3
4434; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s58, s2
4435; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s59, s5, 16
4436; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s60, s4, 16
4437; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s5, s5
4438; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s4, s4
4439; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s61, s6, 16
4440; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s62, s7
4441; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s6, s6
4442; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s63, s9, 16
4443; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s64, s8, 16
4444; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s9, s9
4445; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s8, s8
4446; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s65, s11, 16
4447; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s66, s10, 16
4448; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s11, s11
4449; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s10, s10
4450; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s67, s13, 16
4451; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s68, s12, 16
4452; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s13, s13
4453; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s12, s12
4454; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s69, s15, 16
4455; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s70, s14, 16
4456; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s15, s15
4457; GCN-NOHSA-SI-NEXT:    s_sext_i32_i16 s14, s14
4458; GCN-NOHSA-SI-NEXT:    s_ashr_i32 s7, s7, 16
4459; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s36
4460; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s37
4461; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
4462; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
4463; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
4464; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s70
4465; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s15
4466; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s69
4467; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s12
4468; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s68
4469; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s13
4470; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s67
4471; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s10
4472; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s66
4473; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s11
4474; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s65
4475; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s8
4476; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s64
4477; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s9
4478; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s63
4479; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s6
4480; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s61
4481; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s62
4482; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
4483; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
4484; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
4485; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, s7
4486; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s60
4487; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s5
4488; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s59
4489; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
4490; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
4491; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
4492; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
4493; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
4494; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
4495; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s58
4496; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s56
4497; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s57
4498; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s55
4499; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
4500; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
4501; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s54
4502; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s52
4503; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s53
4504; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s51
4505; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
4506; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
4507; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s30
4508; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s50
4509; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s31
4510; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s49
4511; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
4512; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
4513; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s28
4514; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s48
4515; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s29
4516; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s47
4517; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
4518; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
4519; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s26
4520; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s46
4521; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s27
4522; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s45
4523; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
4524; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
4525; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s24
4526; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s44
4527; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s25
4528; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s43
4529; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
4530; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
4531; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s22
4532; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s42
4533; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s23
4534; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s41
4535; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
4536; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
4537; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s20
4538; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s40
4539; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s21
4540; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s39
4541; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
4542; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
4543; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
4544; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s38
4545; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s19
4546; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s35
4547; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
4548; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
4549; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s16
4550; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s34
4551; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s17
4552; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s33
4553; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
4554; GCN-NOHSA-SI-NEXT:    s_endpgm
4555;
4556; GCN-HSA-LABEL: constant_sextload_v64i16_to_v64i32:
4557; GCN-HSA:       ; %bb.0:
4558; GCN-HSA-NEXT:    s_load_dwordx4 s[16:19], s[8:9], 0x0
4559; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4560; GCN-HSA-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
4561; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4562; GCN-HSA-NEXT:    s_ashr_i32 s20, s1, 16
4563; GCN-HSA-NEXT:    s_ashr_i32 s21, s0, 16
4564; GCN-HSA-NEXT:    s_sext_i32_i16 s22, s1
4565; GCN-HSA-NEXT:    s_sext_i32_i16 s23, s0
4566; GCN-HSA-NEXT:    s_ashr_i32 s24, s3, 16
4567; GCN-HSA-NEXT:    s_ashr_i32 s25, s2, 16
4568; GCN-HSA-NEXT:    s_sext_i32_i16 s26, s3
4569; GCN-HSA-NEXT:    s_sext_i32_i16 s27, s2
4570; GCN-HSA-NEXT:    s_ashr_i32 s28, s5, 16
4571; GCN-HSA-NEXT:    s_ashr_i32 s29, s4, 16
4572; GCN-HSA-NEXT:    s_sext_i32_i16 s30, s5
4573; GCN-HSA-NEXT:    s_sext_i32_i16 s31, s4
4574; GCN-HSA-NEXT:    s_ashr_i32 s33, s7, 16
4575; GCN-HSA-NEXT:    s_ashr_i32 s34, s6, 16
4576; GCN-HSA-NEXT:    s_sext_i32_i16 s35, s7
4577; GCN-HSA-NEXT:    s_sext_i32_i16 s36, s6
4578; GCN-HSA-NEXT:    s_ashr_i32 s37, s9, 16
4579; GCN-HSA-NEXT:    s_ashr_i32 s38, s8, 16
4580; GCN-HSA-NEXT:    s_sext_i32_i16 s39, s9
4581; GCN-HSA-NEXT:    s_sext_i32_i16 s40, s8
4582; GCN-HSA-NEXT:    s_ashr_i32 s41, s11, 16
4583; GCN-HSA-NEXT:    s_ashr_i32 s42, s10, 16
4584; GCN-HSA-NEXT:    s_sext_i32_i16 s43, s11
4585; GCN-HSA-NEXT:    s_sext_i32_i16 s44, s10
4586; GCN-HSA-NEXT:    s_ashr_i32 s45, s13, 16
4587; GCN-HSA-NEXT:    s_ashr_i32 s47, s12, 16
4588; GCN-HSA-NEXT:    s_sext_i32_i16 s46, s13
4589; GCN-HSA-NEXT:    s_sext_i32_i16 s49, s12
4590; GCN-HSA-NEXT:    s_ashr_i32 s48, s15, 16
4591; GCN-HSA-NEXT:    s_ashr_i32 s50, s14, 16
4592; GCN-HSA-NEXT:    s_sext_i32_i16 s51, s15
4593; GCN-HSA-NEXT:    s_sext_i32_i16 s52, s14
4594; GCN-HSA-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x10
4595; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
4596; GCN-HSA-NEXT:    s_ashr_i32 s18, s1, 16
4597; GCN-HSA-NEXT:    s_ashr_i32 s19, s0, 16
4598; GCN-HSA-NEXT:    s_ashr_i32 s53, s3, 16
4599; GCN-HSA-NEXT:    s_ashr_i32 s54, s2, 16
4600; GCN-HSA-NEXT:    s_ashr_i32 s57, s5, 16
4601; GCN-HSA-NEXT:    s_ashr_i32 s58, s4, 16
4602; GCN-HSA-NEXT:    s_ashr_i32 s59, s7, 16
4603; GCN-HSA-NEXT:    s_ashr_i32 s60, s6, 16
4604; GCN-HSA-NEXT:    s_ashr_i32 s61, s9, 16
4605; GCN-HSA-NEXT:    s_ashr_i32 s62, s8, 16
4606; GCN-HSA-NEXT:    s_ashr_i32 s63, s11, 16
4607; GCN-HSA-NEXT:    s_ashr_i32 s64, s10, 16
4608; GCN-HSA-NEXT:    s_ashr_i32 s65, s13, 16
4609; GCN-HSA-NEXT:    s_ashr_i32 s66, s12, 16
4610; GCN-HSA-NEXT:    s_ashr_i32 s67, s15, 16
4611; GCN-HSA-NEXT:    s_ashr_i32 s68, s14, 16
4612; GCN-HSA-NEXT:    s_sext_i32_i16 s56, s2
4613; GCN-HSA-NEXT:    s_add_u32 s2, s16, 0xf0
4614; GCN-HSA-NEXT:    s_sext_i32_i16 s55, s3
4615; GCN-HSA-NEXT:    s_addc_u32 s3, s17, 0
4616; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s3
4617; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s2
4618; GCN-HSA-NEXT:    s_add_u32 s2, s16, 0xe0
4619; GCN-HSA-NEXT:    s_addc_u32 s3, s17, 0
4620; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s3
4621; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s2
4622; GCN-HSA-NEXT:    s_add_u32 s2, s16, 0xd0
4623; GCN-HSA-NEXT:    s_addc_u32 s3, s17, 0
4624; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s3
4625; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s2
4626; GCN-HSA-NEXT:    s_add_u32 s2, s16, 0xc0
4627; GCN-HSA-NEXT:    s_addc_u32 s3, s17, 0
4628; GCN-HSA-NEXT:    v_mov_b32_e32 v26, s3
4629; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s2
4630; GCN-HSA-NEXT:    s_add_u32 s2, s16, 0xb0
4631; GCN-HSA-NEXT:    s_addc_u32 s3, s17, 0
4632; GCN-HSA-NEXT:    v_mov_b32_e32 v28, s3
4633; GCN-HSA-NEXT:    s_sext_i32_i16 s11, s11
4634; GCN-HSA-NEXT:    s_sext_i32_i16 s10, s10
4635; GCN-HSA-NEXT:    v_mov_b32_e32 v27, s2
4636; GCN-HSA-NEXT:    s_add_u32 s2, s16, 0xa0
4637; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s10
4638; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s64
4639; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s11
4640; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s63
4641; GCN-HSA-NEXT:    s_addc_u32 s3, s17, 0
4642; GCN-HSA-NEXT:    flat_store_dwordx4 v[23:24], v[8:11]
4643; GCN-HSA-NEXT:    s_sext_i32_i16 s9, s9
4644; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s3
4645; GCN-HSA-NEXT:    s_sext_i32_i16 s8, s8
4646; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s2
4647; GCN-HSA-NEXT:    s_add_u32 s2, s16, 0x90
4648; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s8
4649; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s62
4650; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s9
4651; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s61
4652; GCN-HSA-NEXT:    s_addc_u32 s3, s17, 0
4653; GCN-HSA-NEXT:    flat_store_dwordx4 v[25:26], v[12:15]
4654; GCN-HSA-NEXT:    s_sext_i32_i16 s13, s13
4655; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s3
4656; GCN-HSA-NEXT:    s_sext_i32_i16 s12, s12
4657; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s2
4658; GCN-HSA-NEXT:    s_add_u32 s2, s16, 0x80
4659; GCN-HSA-NEXT:    s_sext_i32_i16 s0, s0
4660; GCN-HSA-NEXT:    s_sext_i32_i16 s15, s15
4661; GCN-HSA-NEXT:    s_sext_i32_i16 s14, s14
4662; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s12
4663; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s66
4664; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s13
4665; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s65
4666; GCN-HSA-NEXT:    s_addc_u32 s3, s17, 0
4667; GCN-HSA-NEXT:    s_sext_i32_i16 s1, s1
4668; GCN-HSA-NEXT:    s_sext_i32_i16 s7, s7
4669; GCN-HSA-NEXT:    s_sext_i32_i16 s6, s6
4670; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s14
4671; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s68
4672; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s15
4673; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s67
4674; GCN-HSA-NEXT:    flat_store_dwordx4 v[21:22], v[4:7]
4675; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s6
4676; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
4677; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0x70
4678; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s60
4679; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s7
4680; GCN-HSA-NEXT:    flat_store_dwordx4 v[19:20], v[0:3]
4681; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s59
4682; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s1
4683; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
4684; GCN-HSA-NEXT:    flat_store_dwordx4 v[27:28], v[16:19]
4685; GCN-HSA-NEXT:    s_sext_i32_i16 s5, s5
4686; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s1
4687; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s0
4688; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0x60
4689; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
4690; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s1
4691; GCN-HSA-NEXT:    s_sext_i32_i16 s4, s4
4692; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s0
4693; GCN-HSA-NEXT:    s_add_u32 s0, s16, 0x50
4694; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
4695; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s56
4696; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s58
4697; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
4698; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s57
4699; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s54
4700; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s55
4701; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s53
4702; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s19
4703; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s3
4704; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
4705; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s2
4706; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s52
4707; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s18
4708; GCN-HSA-NEXT:    flat_store_dwordx4 v[9:10], v[0:3]
4709; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s50
4710; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s49
4711; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s51
4712; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s48
4713; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s47
4714; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s46
4715; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s45
4716; GCN-HSA-NEXT:    flat_store_dwordx4 v[12:13], v[20:23]
4717; GCN-HSA-NEXT:    flat_store_dwordx4 v[14:15], v[4:7]
4718; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
4719; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[0:3]
4720; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
4721; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
4722; GCN-HSA-NEXT:    s_add_u32 s0, s16, 64
4723; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s44
4724; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s42
4725; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s43
4726; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s41
4727; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
4728; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4729; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
4730; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
4731; GCN-HSA-NEXT:    s_add_u32 s0, s16, 48
4732; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s40
4733; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s38
4734; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s39
4735; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s37
4736; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
4737; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4738; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
4739; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
4740; GCN-HSA-NEXT:    s_add_u32 s0, s16, 32
4741; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s36
4742; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s34
4743; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s35
4744; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s33
4745; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
4746; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4747; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
4748; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
4749; GCN-HSA-NEXT:    s_add_u32 s0, s16, 16
4750; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s31
4751; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s29
4752; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s30
4753; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s28
4754; GCN-HSA-NEXT:    s_addc_u32 s1, s17, 0
4755; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4756; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
4757; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s27
4758; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s25
4759; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s26
4760; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s24
4761; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
4762; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4763; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s16
4764; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s23
4765; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s21
4766; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s22
4767; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s20
4768; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s17
4769; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4770; GCN-HSA-NEXT:    s_endpgm
4771;
4772; GCN-NOHSA-VI-LABEL: constant_sextload_v64i16_to_v64i32:
4773; GCN-NOHSA-VI:       ; %bb.0:
4774; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[36:39], s[4:5], 0x24
4775; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4776; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[16:31], s[38:39], 0x0
4777; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[0:15], s[38:39], 0x40
4778; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
4779; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s33, s17, 16
4780; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s34, s16, 16
4781; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s35, s19, 16
4782; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s38, s18, 16
4783; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s39, s21, 16
4784; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s40, s20, 16
4785; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s41, s23, 16
4786; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s42, s22, 16
4787; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s43, s25, 16
4788; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s44, s24, 16
4789; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s45, s27, 16
4790; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s46, s26, 16
4791; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s47, s29, 16
4792; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s48, s28, 16
4793; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s49, s31, 16
4794; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s50, s30, 16
4795; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s51, s1, 16
4796; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s52, s0, 16
4797; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s53, s1
4798; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s54, s0
4799; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s55, s3, 16
4800; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s56, s2, 16
4801; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s57, s5, 16
4802; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s58, s4, 16
4803; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s59, s7, 16
4804; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s60, s6, 16
4805; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s61, s9, 16
4806; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s62, s8, 16
4807; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s63, s11, 16
4808; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s64, s10, 16
4809; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s65, s13, 16
4810; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s66, s12, 16
4811; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s0, s15, 16
4812; GCN-NOHSA-VI-NEXT:    s_ashr_i32 s1, s14, 16
4813; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s0
4814; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 0xf0
4815; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
4816; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
4817; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
4818; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s15, s15
4819; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s14, s14
4820; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
4821; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 0xe0
4822; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
4823; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s15
4824; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
4825; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4826; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
4827; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s13, s13
4828; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s12, s12
4829; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
4830; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 0xd0
4831; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
4832; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s66
4833; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s13
4834; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s65
4835; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
4836; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4837; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
4838; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s11, s11
4839; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s10, s10
4840; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
4841; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 0xc0
4842; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
4843; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s64
4844; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s11
4845; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s63
4846; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
4847; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4848; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
4849; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s9, s9
4850; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s8, s8
4851; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
4852; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 0xb0
4853; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
4854; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s62
4855; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
4856; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s61
4857; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
4858; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4859; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
4860; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s7, s7
4861; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s6, s6
4862; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
4863; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 0xa0
4864; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
4865; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s60
4866; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s7
4867; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s59
4868; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
4869; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4870; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
4871; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s5, s5
4872; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s4, s4
4873; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
4874; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 0x90
4875; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
4876; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s58
4877; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s5
4878; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s57
4879; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
4880; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4881; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
4882; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s3, s3
4883; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s2, s2
4884; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
4885; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 0x80
4886; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
4887; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s56
4888; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s3
4889; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s55
4890; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
4891; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4892; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
4893; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
4894; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 0x70
4895; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s54
4896; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s52
4897; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s53
4898; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s51
4899; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
4900; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4901; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
4902; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s31, s31
4903; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s30, s30
4904; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
4905; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 0x60
4906; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s30
4907; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s50
4908; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s31
4909; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s49
4910; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
4911; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4912; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
4913; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s29, s29
4914; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s28, s28
4915; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
4916; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 0x50
4917; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s28
4918; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s48
4919; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s29
4920; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s47
4921; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
4922; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4923; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
4924; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s27, s27
4925; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s26, s26
4926; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
4927; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 64
4928; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s26
4929; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s46
4930; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s27
4931; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s45
4932; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
4933; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4934; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
4935; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s25, s25
4936; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s24, s24
4937; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
4938; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 48
4939; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s24
4940; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s44
4941; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s25
4942; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s43
4943; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
4944; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4945; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
4946; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s23, s23
4947; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s22, s22
4948; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
4949; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 32
4950; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s22
4951; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s42
4952; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s23
4953; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s41
4954; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
4955; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4956; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
4957; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s21, s21
4958; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s20, s20
4959; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
4960; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s36, 16
4961; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s20
4962; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s40
4963; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s21
4964; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s39
4965; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s37, 0
4966; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s19, s19
4967; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s18, s18
4968; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4969; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
4970; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s18
4971; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s38
4972; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s19
4973; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s35
4974; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
4975; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s17, s17
4976; GCN-NOHSA-VI-NEXT:    s_sext_i32_i16 s16, s16
4977; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4978; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s36
4979; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s16
4980; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s34
4981; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s17
4982; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s33
4983; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s37
4984; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4985; GCN-NOHSA-VI-NEXT:    s_endpgm
4986;
4987; EG-LABEL: constant_sextload_v64i16_to_v64i32:
4988; EG:       ; %bb.0:
4989; EG-NEXT:    ALU 17, @38, KC0[CB0:0-32], KC1[]
4990; EG-NEXT:    TEX 7 @22
4991; EG-NEXT:    ALU 75, @56, KC0[CB0:0-32], KC1[]
4992; EG-NEXT:    ALU 71, @132, KC0[CB0:0-32], KC1[]
4993; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T66.XYZW, T48.X, 0
4994; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T41.X, 0
4995; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T56.X, 0
4996; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T55.X, 0
4997; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T64.XYZW, T54.X, 0
4998; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T53.X, 0
4999; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T52.X, 0
5000; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T51.X, 0
5001; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T50.X, 0
5002; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T49.X, 0
5003; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T61.XYZW, T40.X, 0
5004; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T39.X, 0
5005; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T38.X, 0
5006; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T37.X, 0
5007; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T58.XYZW, T36.X, 0
5008; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T35.X, 1
5009; EG-NEXT:    CF_END
5010; EG-NEXT:    PAD
5011; EG-NEXT:    Fetch clause starting at 22:
5012; EG-NEXT:     VTX_READ_128 T42.XYZW, T41.X, 16, #1
5013; EG-NEXT:     VTX_READ_128 T43.XYZW, T41.X, 32, #1
5014; EG-NEXT:     VTX_READ_128 T44.XYZW, T41.X, 0, #1
5015; EG-NEXT:     VTX_READ_128 T45.XYZW, T41.X, 48, #1
5016; EG-NEXT:     VTX_READ_128 T46.XYZW, T41.X, 64, #1
5017; EG-NEXT:     VTX_READ_128 T47.XYZW, T41.X, 80, #1
5018; EG-NEXT:     VTX_READ_128 T48.XYZW, T41.X, 96, #1
5019; EG-NEXT:     VTX_READ_128 T41.XYZW, T41.X, 112, #1
5020; EG-NEXT:    ALU clause starting at 38:
5021; EG-NEXT:     LSHR T35.X, KC0[2].Y, literal.x,
5022; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5023; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5024; EG-NEXT:     LSHR T36.X, PV.W, literal.x,
5025; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5026; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
5027; EG-NEXT:     LSHR T37.X, PV.W, literal.x,
5028; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5029; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
5030; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
5031; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5032; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
5033; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
5034; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5035; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
5036; EG-NEXT:     LSHR T40.X, PV.W, literal.x,
5037; EG-NEXT:     MOV * T41.X, KC0[2].Z,
5038; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5039; EG-NEXT:    ALU clause starting at 56:
5040; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
5041; EG-NEXT:    96(1.345247e-43), 0(0.000000e+00)
5042; EG-NEXT:     LSHR T49.X, PV.W, literal.x,
5043; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5044; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
5045; EG-NEXT:     LSHR T50.X, PV.W, literal.x,
5046; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5047; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
5048; EG-NEXT:     LSHR T51.X, PV.W, literal.x,
5049; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5050; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
5051; EG-NEXT:     LSHR T52.X, PV.W, literal.x,
5052; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5053; EG-NEXT:    2(2.802597e-45), 160(2.242078e-43)
5054; EG-NEXT:     LSHR T53.X, PV.W, literal.x,
5055; EG-NEXT:     LSHR T0.Y, T41.W, literal.y,
5056; EG-NEXT:     LSHR T0.Z, T41.Y, literal.y,
5057; EG-NEXT:     LSHR T0.W, T48.W, literal.y, BS:VEC_120/SCL_212
5058; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.z,
5059; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5060; EG-NEXT:    176(2.466285e-43), 0(0.000000e+00)
5061; EG-NEXT:     LSHR T54.X, PS, literal.x,
5062; EG-NEXT:     LSHR T1.Y, T48.Y, literal.y,
5063; EG-NEXT:     LSHR T1.Z, T47.W, literal.y,
5064; EG-NEXT:     LSHR T1.W, T47.Y, literal.y, BS:VEC_120/SCL_212
5065; EG-NEXT:     ADD_INT * T2.W, KC0[2].Y, literal.z,
5066; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5067; EG-NEXT:    192(2.690493e-43), 0(0.000000e+00)
5068; EG-NEXT:     LSHR T55.X, PS, literal.x,
5069; EG-NEXT:     LSHR T2.Y, T46.W, literal.y,
5070; EG-NEXT:     LSHR T2.Z, T46.Y, literal.y,
5071; EG-NEXT:     LSHR T2.W, T45.W, literal.y, BS:VEC_120/SCL_212
5072; EG-NEXT:     ADD_INT * T3.W, KC0[2].Y, literal.z,
5073; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5074; EG-NEXT:    208(2.914701e-43), 0(0.000000e+00)
5075; EG-NEXT:     LSHR T56.X, PS, literal.x,
5076; EG-NEXT:     LSHR T3.Y, T45.Y, literal.y,
5077; EG-NEXT:     BFE_INT T57.Z, T44.Y, 0.0, literal.y, BS:VEC_120/SCL_212
5078; EG-NEXT:     LSHR T3.W, T43.W, literal.y,
5079; EG-NEXT:     LSHR * T4.W, T43.Y, literal.y,
5080; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5081; EG-NEXT:     BFE_INT T57.X, T44.X, 0.0, literal.x,
5082; EG-NEXT:     LSHR T4.Y, T42.W, literal.x,
5083; EG-NEXT:     BFE_INT T58.Z, T44.W, 0.0, literal.x, BS:VEC_120/SCL_212
5084; EG-NEXT:     LSHR T5.W, T42.Y, literal.x,
5085; EG-NEXT:     LSHR * T6.W, T44.Y, literal.x,
5086; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5087; EG-NEXT:     BFE_INT T58.X, T44.Z, 0.0, literal.x,
5088; EG-NEXT:     LSHR T5.Y, T44.W, literal.x,
5089; EG-NEXT:     BFE_INT T59.Z, T42.Y, 0.0, literal.x,
5090; EG-NEXT:     BFE_INT T57.W, PS, 0.0, literal.x,
5091; EG-NEXT:     LSHR * T6.W, T44.X, literal.x,
5092; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5093; EG-NEXT:     BFE_INT T59.X, T42.X, 0.0, literal.x,
5094; EG-NEXT:     BFE_INT T57.Y, PS, 0.0, literal.x,
5095; EG-NEXT:     BFE_INT T60.Z, T42.W, 0.0, literal.x,
5096; EG-NEXT:     BFE_INT T58.W, PV.Y, 0.0, literal.x,
5097; EG-NEXT:     LSHR * T6.W, T44.Z, literal.x,
5098; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5099; EG-NEXT:     BFE_INT T60.X, T42.Z, 0.0, literal.x,
5100; EG-NEXT:     BFE_INT T58.Y, PS, 0.0, literal.x,
5101; EG-NEXT:     BFE_INT T44.Z, T43.Y, 0.0, literal.x,
5102; EG-NEXT:     BFE_INT T59.W, T5.W, 0.0, literal.x,
5103; EG-NEXT:     LSHR * T5.W, T42.X, literal.x,
5104; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5105; EG-NEXT:     BFE_INT T44.X, T43.X, 0.0, literal.x,
5106; EG-NEXT:     BFE_INT T59.Y, PS, 0.0, literal.x,
5107; EG-NEXT:     BFE_INT T61.Z, T43.W, 0.0, literal.x,
5108; EG-NEXT:     BFE_INT T60.W, T4.Y, 0.0, literal.x,
5109; EG-NEXT:     LSHR * T5.W, T42.Z, literal.x,
5110; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5111; EG-NEXT:     BFE_INT T61.X, T43.Z, 0.0, literal.x,
5112; EG-NEXT:     BFE_INT T60.Y, PS, 0.0, literal.x,
5113; EG-NEXT:     BFE_INT T42.Z, T45.Y, 0.0, literal.x,
5114; EG-NEXT:     BFE_INT * T44.W, T4.W, 0.0, literal.x,
5115; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5116; EG-NEXT:    ALU clause starting at 132:
5117; EG-NEXT:     LSHR * T4.W, T43.X, literal.x,
5118; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5119; EG-NEXT:     BFE_INT T42.X, T45.X, 0.0, literal.x,
5120; EG-NEXT:     BFE_INT T44.Y, PV.W, 0.0, literal.x,
5121; EG-NEXT:     BFE_INT T62.Z, T45.W, 0.0, literal.x,
5122; EG-NEXT:     BFE_INT T61.W, T3.W, 0.0, literal.x, BS:VEC_120/SCL_212
5123; EG-NEXT:     LSHR * T3.W, T43.Z, literal.x,
5124; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5125; EG-NEXT:     BFE_INT T62.X, T45.Z, 0.0, literal.x,
5126; EG-NEXT:     BFE_INT T61.Y, PS, 0.0, literal.x,
5127; EG-NEXT:     BFE_INT T43.Z, T46.Y, 0.0, literal.x,
5128; EG-NEXT:     BFE_INT T42.W, T3.Y, 0.0, literal.x, BS:VEC_120/SCL_212
5129; EG-NEXT:     LSHR * T3.W, T45.X, literal.x,
5130; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5131; EG-NEXT:     BFE_INT T43.X, T46.X, 0.0, literal.x,
5132; EG-NEXT:     BFE_INT T42.Y, PS, 0.0, literal.x,
5133; EG-NEXT:     BFE_INT T63.Z, T46.W, 0.0, literal.x,
5134; EG-NEXT:     BFE_INT T62.W, T2.W, 0.0, literal.x, BS:VEC_120/SCL_212
5135; EG-NEXT:     LSHR * T2.W, T45.Z, literal.x,
5136; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5137; EG-NEXT:     BFE_INT T63.X, T46.Z, 0.0, literal.x,
5138; EG-NEXT:     BFE_INT T62.Y, PS, 0.0, literal.x,
5139; EG-NEXT:     BFE_INT T45.Z, T47.Y, 0.0, literal.x,
5140; EG-NEXT:     BFE_INT T43.W, T2.Z, 0.0, literal.x, BS:VEC_120/SCL_212
5141; EG-NEXT:     LSHR * T2.W, T46.X, literal.x,
5142; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5143; EG-NEXT:     BFE_INT T45.X, T47.X, 0.0, literal.x,
5144; EG-NEXT:     BFE_INT T43.Y, PS, 0.0, literal.x,
5145; EG-NEXT:     BFE_INT T64.Z, T47.W, 0.0, literal.x,
5146; EG-NEXT:     BFE_INT T63.W, T2.Y, 0.0, literal.x,
5147; EG-NEXT:     LSHR * T2.W, T46.Z, literal.x,
5148; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5149; EG-NEXT:     BFE_INT T64.X, T47.Z, 0.0, literal.x,
5150; EG-NEXT:     BFE_INT T63.Y, PS, 0.0, literal.x,
5151; EG-NEXT:     BFE_INT T46.Z, T48.Y, 0.0, literal.x,
5152; EG-NEXT:     BFE_INT T45.W, T1.W, 0.0, literal.x,
5153; EG-NEXT:     LSHR * T1.W, T47.X, literal.x,
5154; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5155; EG-NEXT:     BFE_INT T46.X, T48.X, 0.0, literal.x,
5156; EG-NEXT:     BFE_INT T45.Y, PS, 0.0, literal.x,
5157; EG-NEXT:     BFE_INT T65.Z, T48.W, 0.0, literal.x,
5158; EG-NEXT:     BFE_INT T64.W, T1.Z, 0.0, literal.x,
5159; EG-NEXT:     LSHR * T1.W, T47.Z, literal.x,
5160; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5161; EG-NEXT:     BFE_INT T65.X, T48.Z, 0.0, literal.x,
5162; EG-NEXT:     BFE_INT T64.Y, PS, 0.0, literal.x,
5163; EG-NEXT:     BFE_INT T47.Z, T41.Y, 0.0, literal.x,
5164; EG-NEXT:     BFE_INT T46.W, T1.Y, 0.0, literal.x, BS:VEC_120/SCL_212
5165; EG-NEXT:     LSHR * T1.W, T48.X, literal.x,
5166; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5167; EG-NEXT:     BFE_INT T47.X, T41.X, 0.0, literal.x,
5168; EG-NEXT:     BFE_INT T46.Y, PS, 0.0, literal.x,
5169; EG-NEXT:     BFE_INT T66.Z, T41.W, 0.0, literal.x,
5170; EG-NEXT:     BFE_INT T65.W, T0.W, 0.0, literal.x, BS:VEC_120/SCL_212
5171; EG-NEXT:     LSHR * T0.W, T48.Z, literal.x,
5172; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5173; EG-NEXT:     BFE_INT T66.X, T41.Z, 0.0, literal.x,
5174; EG-NEXT:     BFE_INT T65.Y, PS, 0.0, literal.x,
5175; EG-NEXT:     LSHR T1.Z, T41.X, literal.x,
5176; EG-NEXT:     BFE_INT T47.W, T0.Z, 0.0, literal.x, BS:VEC_120/SCL_212
5177; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5178; EG-NEXT:    16(2.242078e-44), 224(3.138909e-43)
5179; EG-NEXT:     LSHR T41.X, PS, literal.x,
5180; EG-NEXT:     BFE_INT T47.Y, PV.Z, 0.0, literal.y,
5181; EG-NEXT:     LSHR T0.Z, T41.Z, literal.y,
5182; EG-NEXT:     BFE_INT T66.W, T0.Y, 0.0, literal.y,
5183; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
5184; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5185; EG-NEXT:    240(3.363116e-43), 0(0.000000e+00)
5186; EG-NEXT:     LSHR T48.X, PS, literal.x,
5187; EG-NEXT:     BFE_INT * T66.Y, PV.Z, 0.0, literal.y,
5188; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5189;
5190; GFX12-LABEL: constant_sextload_v64i16_to_v64i32:
5191; GFX12:       ; %bb.0:
5192; GFX12-NEXT:    s_load_b128 s[36:39], s[4:5], 0x24
5193; GFX12-NEXT:    s_wait_kmcnt 0x0
5194; GFX12-NEXT:    s_clause 0x1
5195; GFX12-NEXT:    s_load_b512 s[0:15], s[38:39], 0x40
5196; GFX12-NEXT:    s_load_b512 s[16:31], s[38:39], 0x0
5197; GFX12-NEXT:    s_wait_kmcnt 0x0
5198; GFX12-NEXT:    s_ashr_i32 s65, s15, 16
5199; GFX12-NEXT:    s_ashr_i32 s66, s14, 16
5200; GFX12-NEXT:    s_sext_i32_i16 s14, s14
5201; GFX12-NEXT:    s_sext_i32_i16 s15, s15
5202; GFX12-NEXT:    s_ashr_i32 s63, s13, 16
5203; GFX12-NEXT:    s_ashr_i32 s64, s12, 16
5204; GFX12-NEXT:    s_sext_i32_i16 s13, s13
5205; GFX12-NEXT:    s_sext_i32_i16 s12, s12
5206; GFX12-NEXT:    v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s66
5207; GFX12-NEXT:    s_ashr_i32 s61, s11, 16
5208; GFX12-NEXT:    s_ashr_i32 s62, s10, 16
5209; GFX12-NEXT:    s_sext_i32_i16 s11, s11
5210; GFX12-NEXT:    s_sext_i32_i16 s10, s10
5211; GFX12-NEXT:    v_dual_mov_b32 v0, s14 :: v_dual_mov_b32 v3, s65
5212; GFX12-NEXT:    v_dual_mov_b32 v2, s15 :: v_dual_mov_b32 v5, s64
5213; GFX12-NEXT:    s_ashr_i32 s59, s9, 16
5214; GFX12-NEXT:    s_ashr_i32 s60, s8, 16
5215; GFX12-NEXT:    s_sext_i32_i16 s9, s9
5216; GFX12-NEXT:    s_sext_i32_i16 s8, s8
5217; GFX12-NEXT:    v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v7, s63
5218; GFX12-NEXT:    v_dual_mov_b32 v6, s13 :: v_dual_mov_b32 v9, s62
5219; GFX12-NEXT:    v_dual_mov_b32 v8, s10 :: v_dual_mov_b32 v11, s61
5220; GFX12-NEXT:    v_dual_mov_b32 v10, s11 :: v_dual_mov_b32 v13, s60
5221; GFX12-NEXT:    s_ashr_i32 s57, s7, 16
5222; GFX12-NEXT:    s_ashr_i32 s58, s6, 16
5223; GFX12-NEXT:    s_sext_i32_i16 s7, s7
5224; GFX12-NEXT:    v_dual_mov_b32 v12, s8 :: v_dual_mov_b32 v15, s59
5225; GFX12-NEXT:    v_mov_b32_e32 v14, s9
5226; GFX12-NEXT:    s_sext_i32_i16 s6, s6
5227; GFX12-NEXT:    s_ashr_i32 s55, s5, 16
5228; GFX12-NEXT:    s_ashr_i32 s56, s4, 16
5229; GFX12-NEXT:    s_sext_i32_i16 s5, s5
5230; GFX12-NEXT:    s_sext_i32_i16 s4, s4
5231; GFX12-NEXT:    s_ashr_i32 s53, s3, 16
5232; GFX12-NEXT:    s_ashr_i32 s54, s2, 16
5233; GFX12-NEXT:    s_sext_i32_i16 s3, s3
5234; GFX12-NEXT:    s_sext_i32_i16 s2, s2
5235; GFX12-NEXT:    s_clause 0x3
5236; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[36:37] offset:240
5237; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[36:37] offset:224
5238; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[36:37] offset:208
5239; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[36:37] offset:192
5240; GFX12-NEXT:    v_dual_mov_b32 v1, s58 :: v_dual_mov_b32 v0, s6
5241; GFX12-NEXT:    v_dual_mov_b32 v3, s57 :: v_dual_mov_b32 v2, s7
5242; GFX12-NEXT:    v_mov_b32_e32 v5, s56
5243; GFX12-NEXT:    s_ashr_i32 s51, s1, 16
5244; GFX12-NEXT:    s_ashr_i32 s52, s0, 16
5245; GFX12-NEXT:    s_sext_i32_i16 s1, s1
5246; GFX12-NEXT:    s_sext_i32_i16 s0, s0
5247; GFX12-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v7, s55
5248; GFX12-NEXT:    v_dual_mov_b32 v6, s5 :: v_dual_mov_b32 v9, s54
5249; GFX12-NEXT:    s_ashr_i32 s49, s31, 16
5250; GFX12-NEXT:    s_ashr_i32 s50, s30, 16
5251; GFX12-NEXT:    s_sext_i32_i16 s31, s31
5252; GFX12-NEXT:    s_sext_i32_i16 s30, s30
5253; GFX12-NEXT:    v_dual_mov_b32 v8, s2 :: v_dual_mov_b32 v11, s53
5254; GFX12-NEXT:    v_dual_mov_b32 v10, s3 :: v_dual_mov_b32 v13, s52
5255; GFX12-NEXT:    s_ashr_i32 s45, s27, 16
5256; GFX12-NEXT:    s_ashr_i32 s46, s26, 16
5257; GFX12-NEXT:    s_sext_i32_i16 s27, s27
5258; GFX12-NEXT:    s_sext_i32_i16 s26, s26
5259; GFX12-NEXT:    s_ashr_i32 s47, s29, 16
5260; GFX12-NEXT:    s_ashr_i32 s48, s28, 16
5261; GFX12-NEXT:    s_sext_i32_i16 s29, s29
5262; GFX12-NEXT:    s_sext_i32_i16 s28, s28
5263; GFX12-NEXT:    v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v15, s51
5264; GFX12-NEXT:    v_dual_mov_b32 v14, s1 :: v_dual_mov_b32 v17, s50
5265; GFX12-NEXT:    s_ashr_i32 s43, s25, 16
5266; GFX12-NEXT:    s_ashr_i32 s44, s24, 16
5267; GFX12-NEXT:    s_sext_i32_i16 s25, s25
5268; GFX12-NEXT:    s_sext_i32_i16 s24, s24
5269; GFX12-NEXT:    v_dual_mov_b32 v16, s30 :: v_dual_mov_b32 v19, s49
5270; GFX12-NEXT:    v_dual_mov_b32 v18, s31 :: v_dual_mov_b32 v21, s48
5271; GFX12-NEXT:    s_ashr_i32 s41, s23, 16
5272; GFX12-NEXT:    s_ashr_i32 s42, s22, 16
5273; GFX12-NEXT:    s_sext_i32_i16 s23, s23
5274; GFX12-NEXT:    s_sext_i32_i16 s22, s22
5275; GFX12-NEXT:    v_dual_mov_b32 v20, s28 :: v_dual_mov_b32 v23, s47
5276; GFX12-NEXT:    v_mov_b32_e32 v22, s29
5277; GFX12-NEXT:    s_clause 0x5
5278; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[36:37] offset:176
5279; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[36:37] offset:160
5280; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[36:37] offset:144
5281; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[36:37] offset:128
5282; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[36:37] offset:112
5283; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[36:37] offset:96
5284; GFX12-NEXT:    v_dual_mov_b32 v1, s46 :: v_dual_mov_b32 v0, s26
5285; GFX12-NEXT:    v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v2, s27
5286; GFX12-NEXT:    v_mov_b32_e32 v5, s44
5287; GFX12-NEXT:    s_ashr_i32 s39, s21, 16
5288; GFX12-NEXT:    s_ashr_i32 s40, s20, 16
5289; GFX12-NEXT:    s_sext_i32_i16 s21, s21
5290; GFX12-NEXT:    s_sext_i32_i16 s20, s20
5291; GFX12-NEXT:    v_dual_mov_b32 v4, s24 :: v_dual_mov_b32 v7, s43
5292; GFX12-NEXT:    v_dual_mov_b32 v6, s25 :: v_dual_mov_b32 v9, s42
5293; GFX12-NEXT:    s_ashr_i32 s35, s19, 16
5294; GFX12-NEXT:    s_ashr_i32 s38, s18, 16
5295; GFX12-NEXT:    s_sext_i32_i16 s19, s19
5296; GFX12-NEXT:    s_sext_i32_i16 s18, s18
5297; GFX12-NEXT:    v_dual_mov_b32 v8, s22 :: v_dual_mov_b32 v11, s41
5298; GFX12-NEXT:    v_dual_mov_b32 v10, s23 :: v_dual_mov_b32 v13, s40
5299; GFX12-NEXT:    s_ashr_i32 s33, s17, 16
5300; GFX12-NEXT:    s_ashr_i32 s34, s16, 16
5301; GFX12-NEXT:    s_sext_i32_i16 s17, s17
5302; GFX12-NEXT:    s_sext_i32_i16 s16, s16
5303; GFX12-NEXT:    v_dual_mov_b32 v12, s20 :: v_dual_mov_b32 v15, s39
5304; GFX12-NEXT:    v_dual_mov_b32 v14, s21 :: v_dual_mov_b32 v17, s38
5305; GFX12-NEXT:    v_dual_mov_b32 v16, s18 :: v_dual_mov_b32 v19, s35
5306; GFX12-NEXT:    v_dual_mov_b32 v18, s19 :: v_dual_mov_b32 v21, s34
5307; GFX12-NEXT:    v_dual_mov_b32 v20, s16 :: v_dual_mov_b32 v23, s33
5308; GFX12-NEXT:    v_mov_b32_e32 v22, s17
5309; GFX12-NEXT:    s_clause 0x5
5310; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[36:37] offset:80
5311; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[36:37] offset:64
5312; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[36:37] offset:48
5313; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[36:37] offset:32
5314; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[36:37] offset:16
5315; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[36:37]
5316; GFX12-NEXT:    s_endpgm
5317  %load = load <64 x i16>, ptr addrspace(4) %in
5318  %ext = sext <64 x i16> %load to <64 x i32>
5319  store <64 x i32> %ext, ptr addrspace(1) %out
5320  ret void
5321}
5322
5323define amdgpu_kernel void @constant_zextload_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
5324; GCN-NOHSA-SI-LABEL: constant_zextload_i16_to_i64:
5325; GCN-NOHSA-SI:       ; %bb.0:
5326; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
5327; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
5328; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
5329; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
5330; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
5331; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5332; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
5333; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
5334; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
5335; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
5336; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
5337; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
5338; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
5339; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5340; GCN-NOHSA-SI-NEXT:    s_endpgm
5341;
5342; GCN-HSA-LABEL: constant_zextload_i16_to_i64:
5343; GCN-HSA:       ; %bb.0:
5344; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
5345; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5346; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5347; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5348; GCN-HSA-NEXT:    flat_load_ushort v0, v[0:1]
5349; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
5350; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
5351; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
5352; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
5353; GCN-HSA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
5354; GCN-HSA-NEXT:    s_endpgm
5355;
5356; GCN-NOHSA-VI-LABEL: constant_zextload_i16_to_i64:
5357; GCN-NOHSA-VI:       ; %bb.0:
5358; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
5359; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, 0
5360; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5361; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
5362; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s3
5363; GCN-NOHSA-VI-NEXT:    flat_load_ushort v2, v[0:1]
5364; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
5365; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
5366; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
5367; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
5368; GCN-NOHSA-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
5369; GCN-NOHSA-VI-NEXT:    s_endpgm
5370;
5371; EG-LABEL: constant_zextload_i16_to_i64:
5372; EG:       ; %bb.0:
5373; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5374; EG-NEXT:    TEX 0 @6
5375; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
5376; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
5377; EG-NEXT:    CF_END
5378; EG-NEXT:    PAD
5379; EG-NEXT:    Fetch clause starting at 6:
5380; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
5381; EG-NEXT:    ALU clause starting at 8:
5382; EG-NEXT:     MOV * T0.X, KC0[2].Z,
5383; EG-NEXT:    ALU clause starting at 9:
5384; EG-NEXT:     MOV * T0.Y, 0.0,
5385; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
5386; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5387;
5388; GFX12-LABEL: constant_zextload_i16_to_i64:
5389; GFX12:       ; %bb.0:
5390; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
5391; GFX12-NEXT:    v_mov_b32_e32 v1, 0
5392; GFX12-NEXT:    s_wait_kmcnt 0x0
5393; GFX12-NEXT:    global_load_u16 v0, v1, s[2:3]
5394; GFX12-NEXT:    s_wait_loadcnt 0x0
5395; GFX12-NEXT:    v_and_b32_e32 v0, 0xffff, v0
5396; GFX12-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
5397; GFX12-NEXT:    s_endpgm
5398  %a = load i16, ptr addrspace(4) %in
5399  %ext = zext i16 %a to i64
5400  store i64 %ext, ptr addrspace(1) %out
5401  ret void
5402}
5403
5404; FIXME: Need to optimize this sequence to avoid extra bfe:
5405;  t28: i32,ch = load<LD2[%in(addrspace=1)], anyext from i16> t12, t27, undef:i64
5406;          t31: i64 = any_extend t28
5407;        t33: i64 = sign_extend_inreg t31, ValueType:ch:i16
5408; TODO: These could be expanded earlier using ASHR 15
5409define amdgpu_kernel void @constant_sextload_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
5410; GCN-NOHSA-SI-LABEL: constant_sextload_i16_to_i64:
5411; GCN-NOHSA-SI:       ; %bb.0:
5412; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
5413; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
5414; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
5415; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
5416; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
5417; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5418; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
5419; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
5420; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
5421; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
5422; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
5423; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
5424; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5425; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5426; GCN-NOHSA-SI-NEXT:    s_endpgm
5427;
5428; GCN-HSA-LABEL: constant_sextload_i16_to_i64:
5429; GCN-HSA:       ; %bb.0:
5430; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
5431; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5432; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5433; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5434; GCN-HSA-NEXT:    flat_load_sshort v0, v[0:1]
5435; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
5436; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
5437; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
5438; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5439; GCN-HSA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
5440; GCN-HSA-NEXT:    s_endpgm
5441;
5442; GCN-NOHSA-VI-LABEL: constant_sextload_i16_to_i64:
5443; GCN-NOHSA-VI:       ; %bb.0:
5444; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
5445; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5446; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
5447; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s3
5448; GCN-NOHSA-VI-NEXT:    flat_load_ushort v2, v[0:1]
5449; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
5450; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
5451; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
5452; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v2, v2, 0, 16
5453; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5454; GCN-NOHSA-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
5455; GCN-NOHSA-VI-NEXT:    s_endpgm
5456;
5457; EG-LABEL: constant_sextload_i16_to_i64:
5458; EG:       ; %bb.0:
5459; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5460; EG-NEXT:    TEX 0 @6
5461; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
5462; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
5463; EG-NEXT:    CF_END
5464; EG-NEXT:    PAD
5465; EG-NEXT:    Fetch clause starting at 6:
5466; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
5467; EG-NEXT:    ALU clause starting at 8:
5468; EG-NEXT:     MOV * T0.X, KC0[2].Z,
5469; EG-NEXT:    ALU clause starting at 9:
5470; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
5471; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
5472; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
5473; EG-NEXT:     ASHR * T0.Y, PV.X, literal.x,
5474; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5475;
5476; GFX12-LABEL: constant_sextload_i16_to_i64:
5477; GFX12:       ; %bb.0:
5478; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
5479; GFX12-NEXT:    v_mov_b32_e32 v2, 0
5480; GFX12-NEXT:    s_wait_kmcnt 0x0
5481; GFX12-NEXT:    global_load_u16 v0, v2, s[2:3]
5482; GFX12-NEXT:    s_wait_loadcnt 0x0
5483; GFX12-NEXT:    v_bfe_i32 v0, v0, 0, 16
5484; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5485; GFX12-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5486; GFX12-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
5487; GFX12-NEXT:    s_endpgm
5488  %a = load i16, ptr addrspace(4) %in
5489  %ext = sext i16 %a to i64
5490  store i64 %ext, ptr addrspace(1) %out
5491  ret void
5492}
5493
5494define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
5495; GCN-NOHSA-SI-LABEL: constant_zextload_v1i16_to_v1i64:
5496; GCN-NOHSA-SI:       ; %bb.0:
5497; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
5498; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
5499; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
5500; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
5501; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
5502; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5503; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
5504; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
5505; GCN-NOHSA-SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
5506; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
5507; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
5508; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
5509; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
5510; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5511; GCN-NOHSA-SI-NEXT:    s_endpgm
5512;
5513; GCN-HSA-LABEL: constant_zextload_v1i16_to_v1i64:
5514; GCN-HSA:       ; %bb.0:
5515; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
5516; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5517; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5518; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5519; GCN-HSA-NEXT:    flat_load_ushort v0, v[0:1]
5520; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
5521; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
5522; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
5523; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
5524; GCN-HSA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
5525; GCN-HSA-NEXT:    s_endpgm
5526;
5527; GCN-NOHSA-VI-LABEL: constant_zextload_v1i16_to_v1i64:
5528; GCN-NOHSA-VI:       ; %bb.0:
5529; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
5530; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, 0
5531; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5532; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
5533; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s3
5534; GCN-NOHSA-VI-NEXT:    flat_load_ushort v2, v[0:1]
5535; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
5536; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
5537; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
5538; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
5539; GCN-NOHSA-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
5540; GCN-NOHSA-VI-NEXT:    s_endpgm
5541;
5542; EG-LABEL: constant_zextload_v1i16_to_v1i64:
5543; EG:       ; %bb.0:
5544; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5545; EG-NEXT:    TEX 0 @6
5546; EG-NEXT:    ALU 2, @9, KC0[CB0:0-32], KC1[]
5547; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
5548; EG-NEXT:    CF_END
5549; EG-NEXT:    PAD
5550; EG-NEXT:    Fetch clause starting at 6:
5551; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
5552; EG-NEXT:    ALU clause starting at 8:
5553; EG-NEXT:     MOV * T0.X, KC0[2].Z,
5554; EG-NEXT:    ALU clause starting at 9:
5555; EG-NEXT:     MOV * T0.Y, 0.0,
5556; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
5557; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5558;
5559; GFX12-LABEL: constant_zextload_v1i16_to_v1i64:
5560; GFX12:       ; %bb.0:
5561; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
5562; GFX12-NEXT:    v_mov_b32_e32 v1, 0
5563; GFX12-NEXT:    s_wait_kmcnt 0x0
5564; GFX12-NEXT:    global_load_u16 v0, v1, s[2:3]
5565; GFX12-NEXT:    s_wait_loadcnt 0x0
5566; GFX12-NEXT:    v_and_b32_e32 v0, 0xffff, v0
5567; GFX12-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
5568; GFX12-NEXT:    s_endpgm
5569  %load = load <1 x i16>, ptr addrspace(4) %in
5570  %ext = zext <1 x i16> %load to <1 x i64>
5571  store <1 x i64> %ext, ptr addrspace(1) %out
5572  ret void
5573}
5574
5575define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
5576; GCN-NOHSA-SI-LABEL: constant_sextload_v1i16_to_v1i64:
5577; GCN-NOHSA-SI:       ; %bb.0:
5578; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
5579; GCN-NOHSA-SI-NEXT:    s_mov_b32 s7, 0xf000
5580; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, -1
5581; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s6
5582; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, s7
5583; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5584; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s2
5585; GCN-NOHSA-SI-NEXT:    s_mov_b32 s9, s3
5586; GCN-NOHSA-SI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
5587; GCN-NOHSA-SI-NEXT:    s_mov_b32 s4, s0
5588; GCN-NOHSA-SI-NEXT:    s_mov_b32 s5, s1
5589; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
5590; GCN-NOHSA-SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5591; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
5592; GCN-NOHSA-SI-NEXT:    s_endpgm
5593;
5594; GCN-HSA-LABEL: constant_sextload_v1i16_to_v1i64:
5595; GCN-HSA:       ; %bb.0:
5596; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
5597; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5598; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5599; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5600; GCN-HSA-NEXT:    flat_load_sshort v0, v[0:1]
5601; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
5602; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
5603; GCN-HSA-NEXT:    s_waitcnt vmcnt(0)
5604; GCN-HSA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5605; GCN-HSA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
5606; GCN-HSA-NEXT:    s_endpgm
5607;
5608; GCN-NOHSA-VI-LABEL: constant_sextload_v1i16_to_v1i64:
5609; GCN-NOHSA-VI:       ; %bb.0:
5610; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
5611; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5612; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
5613; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s3
5614; GCN-NOHSA-VI-NEXT:    flat_load_ushort v2, v[0:1]
5615; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
5616; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
5617; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
5618; GCN-NOHSA-VI-NEXT:    v_bfe_i32 v2, v2, 0, 16
5619; GCN-NOHSA-VI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
5620; GCN-NOHSA-VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
5621; GCN-NOHSA-VI-NEXT:    s_endpgm
5622;
5623; EG-LABEL: constant_sextload_v1i16_to_v1i64:
5624; EG:       ; %bb.0:
5625; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5626; EG-NEXT:    TEX 0 @6
5627; EG-NEXT:    ALU 4, @9, KC0[CB0:0-32], KC1[]
5628; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
5629; EG-NEXT:    CF_END
5630; EG-NEXT:    PAD
5631; EG-NEXT:    Fetch clause starting at 6:
5632; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
5633; EG-NEXT:    ALU clause starting at 8:
5634; EG-NEXT:     MOV * T0.X, KC0[2].Z,
5635; EG-NEXT:    ALU clause starting at 9:
5636; EG-NEXT:     BFE_INT T0.X, T0.X, 0.0, literal.x,
5637; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
5638; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
5639; EG-NEXT:     ASHR * T0.Y, PV.X, literal.x,
5640; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5641;
5642; GFX12-LABEL: constant_sextload_v1i16_to_v1i64:
5643; GFX12:       ; %bb.0:
5644; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
5645; GFX12-NEXT:    v_mov_b32_e32 v2, 0
5646; GFX12-NEXT:    s_wait_kmcnt 0x0
5647; GFX12-NEXT:    global_load_u16 v0, v2, s[2:3]
5648; GFX12-NEXT:    s_wait_loadcnt 0x0
5649; GFX12-NEXT:    v_bfe_i32 v0, v0, 0, 16
5650; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5651; GFX12-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
5652; GFX12-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
5653; GFX12-NEXT:    s_endpgm
5654  %load = load <1 x i16>, ptr addrspace(4) %in
5655  %ext = sext <1 x i16> %load to <1 x i64>
5656  store <1 x i64> %ext, ptr addrspace(1) %out
5657  ret void
5658}
5659
5660define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
5661; GCN-NOHSA-SI-LABEL: constant_zextload_v2i16_to_v2i64:
5662; GCN-NOHSA-SI:       ; %bb.0:
5663; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
5664; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5665; GCN-NOHSA-SI-NEXT:    s_load_dword s2, s[2:3], 0x0
5666; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
5667; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
5668; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5669; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s4, s2, 16
5670; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s2, 0xffff
5671; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
5672; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
5673; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
5674; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
5675; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5676; GCN-NOHSA-SI-NEXT:    s_endpgm
5677;
5678; GCN-HSA-LABEL: constant_zextload_v2i16_to_v2i64:
5679; GCN-HSA:       ; %bb.0:
5680; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
5681; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
5682; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
5683; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5684; GCN-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
5685; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
5686; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
5687; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5688; GCN-HSA-NEXT:    s_lshr_b32 s0, s2, 16
5689; GCN-HSA-NEXT:    s_and_b32 s1, s2, 0xffff
5690; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s1
5691; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
5692; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5693; GCN-HSA-NEXT:    s_endpgm
5694;
5695; GCN-NOHSA-VI-LABEL: constant_zextload_v2i16_to_v2i64:
5696; GCN-NOHSA-VI:       ; %bb.0:
5697; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
5698; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
5699; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
5700; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5701; GCN-NOHSA-VI-NEXT:    s_load_dword s2, s[2:3], 0x0
5702; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
5703; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
5704; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5705; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s0, s2, 16
5706; GCN-NOHSA-VI-NEXT:    s_and_b32 s1, s2, 0xffff
5707; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s1
5708; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s0
5709; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5710; GCN-NOHSA-VI-NEXT:    s_endpgm
5711;
5712; EG-LABEL: constant_zextload_v2i16_to_v2i64:
5713; EG:       ; %bb.0:
5714; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5715; EG-NEXT:    TEX 0 @6
5716; EG-NEXT:    ALU 6, @9, KC0[CB0:0-32], KC1[]
5717; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1
5718; EG-NEXT:    CF_END
5719; EG-NEXT:    PAD
5720; EG-NEXT:    Fetch clause starting at 6:
5721; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
5722; EG-NEXT:    ALU clause starting at 8:
5723; EG-NEXT:     MOV * T4.X, KC0[2].Z,
5724; EG-NEXT:    ALU clause starting at 9:
5725; EG-NEXT:     LSHR * T4.Z, T4.X, literal.x,
5726; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5727; EG-NEXT:     AND_INT T4.X, T4.X, literal.x,
5728; EG-NEXT:     MOV T4.Y, 0.0,
5729; EG-NEXT:     MOV T4.W, 0.0,
5730; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
5731; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
5732;
5733; GFX12-LABEL: constant_zextload_v2i16_to_v2i64:
5734; GFX12:       ; %bb.0:
5735; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
5736; GFX12-NEXT:    s_wait_kmcnt 0x0
5737; GFX12-NEXT:    s_load_b32 s2, s[2:3], 0x0
5738; GFX12-NEXT:    s_wait_kmcnt 0x0
5739; GFX12-NEXT:    s_and_b32 s3, 0xffff, s2
5740; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
5741; GFX12-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s3
5742; GFX12-NEXT:    s_pack_hl_b32_b16 s2, s2, 0
5743; GFX12-NEXT:    s_wait_alu 0xfffe
5744; GFX12-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, v1
5745; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1]
5746; GFX12-NEXT:    s_endpgm
5747  %load = load <2 x i16>, ptr addrspace(4) %in
5748  %ext = zext <2 x i16> %load to <2 x i64>
5749  store <2 x i64> %ext, ptr addrspace(1) %out
5750  ret void
5751}
5752
5753define amdgpu_kernel void @constant_sextload_v2i16_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
5754; GCN-NOHSA-SI-LABEL: constant_sextload_v2i16_to_v2i64:
5755; GCN-NOHSA-SI:       ; %bb.0:
5756; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
5757; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5758; GCN-NOHSA-SI-NEXT:    s_load_dword s2, s[2:3], 0x0
5759; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
5760; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5761; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s4, s2, 16
5762; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[6:7], s[2:3], 0x100000
5763; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
5764; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
5765; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
5766; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s7
5767; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
5768; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s5
5769; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5770; GCN-NOHSA-SI-NEXT:    s_endpgm
5771;
5772; GCN-HSA-LABEL: constant_sextload_v2i16_to_v2i64:
5773; GCN-HSA:       ; %bb.0:
5774; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
5775; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5776; GCN-HSA-NEXT:    s_load_dword s2, s[2:3], 0x0
5777; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
5778; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
5779; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5780; GCN-HSA-NEXT:    s_lshr_b32 s0, s2, 16
5781; GCN-HSA-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x100000
5782; GCN-HSA-NEXT:    s_bfe_i64 s[0:1], s[0:1], 0x100000
5783; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5784; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
5785; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
5786; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
5787; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5788; GCN-HSA-NEXT:    s_endpgm
5789;
5790; GCN-NOHSA-VI-LABEL: constant_sextload_v2i16_to_v2i64:
5791; GCN-NOHSA-VI:       ; %bb.0:
5792; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
5793; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5794; GCN-NOHSA-VI-NEXT:    s_load_dword s2, s[2:3], 0x0
5795; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
5796; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
5797; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5798; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x100000
5799; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s2, s2, 16
5800; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x100000
5801; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
5802; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
5803; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s2
5804; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s3
5805; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5806; GCN-NOHSA-VI-NEXT:    s_endpgm
5807;
5808; EG-LABEL: constant_sextload_v2i16_to_v2i64:
5809; EG:       ; %bb.0:
5810; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5811; EG-NEXT:    TEX 0 @6
5812; EG-NEXT:    ALU 8, @9, KC0[CB0:0-32], KC1[]
5813; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1
5814; EG-NEXT:    CF_END
5815; EG-NEXT:    PAD
5816; EG-NEXT:    Fetch clause starting at 6:
5817; EG-NEXT:     VTX_READ_32 T4.X, T4.X, 0, #1
5818; EG-NEXT:    ALU clause starting at 8:
5819; EG-NEXT:     MOV * T4.X, KC0[2].Z,
5820; EG-NEXT:    ALU clause starting at 9:
5821; EG-NEXT:     ASHR * T4.W, T4.X, literal.x,
5822; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5823; EG-NEXT:     ASHR * T4.Z, T4.X, literal.x,
5824; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5825; EG-NEXT:     BFE_INT T4.X, T4.X, 0.0, literal.x,
5826; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
5827; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
5828; EG-NEXT:     ASHR * T4.Y, PV.X, literal.x,
5829; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
5830;
5831; GFX12-LABEL: constant_sextload_v2i16_to_v2i64:
5832; GFX12:       ; %bb.0:
5833; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
5834; GFX12-NEXT:    s_wait_kmcnt 0x0
5835; GFX12-NEXT:    s_load_b32 s2, s[2:3], 0x0
5836; GFX12-NEXT:    s_wait_kmcnt 0x0
5837; GFX12-NEXT:    s_lshr_b32 s4, s2, 16
5838; GFX12-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x100000
5839; GFX12-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
5840; GFX12-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s3
5841; GFX12-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, s5
5842; GFX12-NEXT:    v_mov_b32_e32 v2, s4
5843; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[0:1]
5844; GFX12-NEXT:    s_endpgm
5845  %load = load <2 x i16>, ptr addrspace(4) %in
5846  %ext = sext <2 x i16> %load to <2 x i64>
5847  store <2 x i64> %ext, ptr addrspace(1) %out
5848  ret void
5849}
5850
5851define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
5852; GCN-NOHSA-SI-LABEL: constant_zextload_v4i16_to_v4i64:
5853; GCN-NOHSA-SI:       ; %bb.0:
5854; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
5855; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5856; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
5857; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
5858; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
5859; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
5860; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
5861; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5862; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s6, s5, 16
5863; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s7, s4, 16
5864; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
5865; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, 0xffff
5866; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
5867; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
5868; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5869; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
5870; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
5871; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s7
5872; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5873; GCN-NOHSA-SI-NEXT:    s_endpgm
5874;
5875; GCN-HSA-LABEL: constant_zextload_v4i16_to_v4i64:
5876; GCN-HSA:       ; %bb.0:
5877; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
5878; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
5879; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
5880; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5881; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
5882; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
5883; GCN-HSA-NEXT:    s_lshr_b32 s4, s3, 16
5884; GCN-HSA-NEXT:    s_lshr_b32 s5, s2, 16
5885; GCN-HSA-NEXT:    s_and_b32 s6, s2, 0xffff
5886; GCN-HSA-NEXT:    s_and_b32 s2, s3, 0xffff
5887; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
5888; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
5889; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
5890; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
5891; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s4
5892; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
5893; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5894; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
5895; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
5896; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s5
5897; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
5898; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5899; GCN-HSA-NEXT:    s_endpgm
5900;
5901; GCN-NOHSA-VI-LABEL: constant_zextload_v4i16_to_v4i64:
5902; GCN-NOHSA-VI:       ; %bb.0:
5903; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
5904; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
5905; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
5906; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5907; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
5908; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
5909; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s2, 16
5910; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s2, 0xffff
5911; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s2, s3, 16
5912; GCN-NOHSA-VI-NEXT:    s_and_b32 s3, s3, 0xffff
5913; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s2
5914; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 16
5915; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s3
5916; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
5917; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
5918; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
5919; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5920; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
5921; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s5
5922; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
5923; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
5924; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
5925; GCN-NOHSA-VI-NEXT:    s_endpgm
5926;
5927; EG-LABEL: constant_zextload_v4i16_to_v4i64:
5928; EG:       ; %bb.0:
5929; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
5930; EG-NEXT:    TEX 0 @6
5931; EG-NEXT:    ALU 14, @9, KC0[CB0:0-32], KC1[]
5932; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T8.X, 0
5933; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T7.X, 1
5934; EG-NEXT:    CF_END
5935; EG-NEXT:    Fetch clause starting at 6:
5936; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
5937; EG-NEXT:    ALU clause starting at 8:
5938; EG-NEXT:     MOV * T5.X, KC0[2].Z,
5939; EG-NEXT:    ALU clause starting at 9:
5940; EG-NEXT:     LSHR * T6.Z, T5.Y, literal.x,
5941; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
5942; EG-NEXT:     AND_INT T6.X, T5.Y, literal.x,
5943; EG-NEXT:     MOV T6.Y, 0.0,
5944; EG-NEXT:     LSHR T5.Z, T5.X, literal.y,
5945; EG-NEXT:     AND_INT * T5.X, T5.X, literal.x,
5946; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
5947; EG-NEXT:     MOV T5.Y, 0.0,
5948; EG-NEXT:     MOV T6.W, 0.0,
5949; EG-NEXT:     MOV * T5.W, 0.0,
5950; EG-NEXT:     LSHR T7.X, KC0[2].Y, literal.x,
5951; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
5952; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
5953; EG-NEXT:     LSHR * T8.X, PV.W, literal.x,
5954; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
5955;
5956; GFX12-LABEL: constant_zextload_v4i16_to_v4i64:
5957; GFX12:       ; %bb.0:
5958; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
5959; GFX12-NEXT:    s_wait_kmcnt 0x0
5960; GFX12-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
5961; GFX12-NEXT:    s_wait_kmcnt 0x0
5962; GFX12-NEXT:    s_and_b32 s4, 0xffff, s2
5963; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5964; GFX12-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s4
5965; GFX12-NEXT:    s_pack_hl_b32_b16 s2, s2, 0
5966; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
5967; GFX12-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, v1
5968; GFX12-NEXT:    s_pack_hl_b32_b16 s2, s3, 0
5969; GFX12-NEXT:    s_and_b32 s3, 0xffff, s3
5970; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1]
5971; GFX12-NEXT:    s_wait_alu 0xfffe
5972; GFX12-NEXT:    v_mov_b32_e32 v0, s3
5973; GFX12-NEXT:    v_mov_b32_e32 v2, s2
5974; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:16
5975; GFX12-NEXT:    s_endpgm
5976  %load = load <4 x i16>, ptr addrspace(4) %in
5977  %ext = zext <4 x i16> %load to <4 x i64>
5978  store <4 x i64> %ext, ptr addrspace(1) %out
5979  ret void
5980}
5981
5982define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
5983; GCN-NOHSA-SI-LABEL: constant_sextload_v4i16_to_v4i64:
5984; GCN-NOHSA-SI:       ; %bb.0:
5985; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
5986; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5987; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
5988; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
5989; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
5990; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
5991; GCN-NOHSA-SI-NEXT:    s_mov_b32 s6, s5
5992; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s8, s4, 16
5993; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[10:11], s[4:5], 0x100000
5994; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[4:5], s[4:5], 48
5995; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
5996; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x100000
5997; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
5998; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s7
5999; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
6000; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s5
6001; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
6002; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6003; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
6004; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s11
6005; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s8
6006; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s9
6007; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
6008; GCN-NOHSA-SI-NEXT:    s_endpgm
6009;
6010; GCN-HSA-LABEL: constant_sextload_v4i16_to_v4i64:
6011; GCN-HSA:       ; %bb.0:
6012; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
6013; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6014; GCN-HSA-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
6015; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6016; GCN-HSA-NEXT:    s_mov_b32 s4, s3
6017; GCN-HSA-NEXT:    s_lshr_b32 s6, s2, 16
6018; GCN-HSA-NEXT:    s_bfe_i64 s[8:9], s[2:3], 0x100000
6019; GCN-HSA-NEXT:    s_ashr_i64 s[2:3], s[2:3], 48
6020; GCN-HSA-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
6021; GCN-HSA-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
6022; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
6023; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
6024; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
6025; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6026; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6027; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
6028; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
6029; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6030; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6031; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
6032; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
6033; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
6034; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
6035; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
6036; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
6037; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6038; GCN-HSA-NEXT:    s_endpgm
6039;
6040; GCN-NOHSA-VI-LABEL: constant_sextload_v4i16_to_v4i64:
6041; GCN-NOHSA-VI:       ; %bb.0:
6042; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
6043; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6044; GCN-NOHSA-VI-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
6045; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6046; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[4:5], s[2:3], 0x100000
6047; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s2, s2, 16
6048; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s3
6049; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s8, s3, 16
6050; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x100000
6051; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
6052; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x100000
6053; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
6054; GCN-NOHSA-VI-NEXT:    s_add_u32 s6, s0, 16
6055; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s7
6056; GCN-NOHSA-VI-NEXT:    s_addc_u32 s7, s1, 0
6057; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s6
6058; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s8
6059; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s9
6060; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s7
6061; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6062; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
6063; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
6064; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s5
6065; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s2
6066; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s3
6067; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
6068; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6069; GCN-NOHSA-VI-NEXT:    s_endpgm
6070;
6071; EG-LABEL: constant_sextload_v4i16_to_v4i64:
6072; EG:       ; %bb.0:
6073; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
6074; EG-NEXT:    TEX 0 @6
6075; EG-NEXT:    ALU 16, @9, KC0[CB0:0-32], KC1[]
6076; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 0
6077; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1
6078; EG-NEXT:    CF_END
6079; EG-NEXT:    Fetch clause starting at 6:
6080; EG-NEXT:     VTX_READ_64 T5.XY, T5.X, 0, #1
6081; EG-NEXT:    ALU clause starting at 8:
6082; EG-NEXT:     MOV * T5.X, KC0[2].Z,
6083; EG-NEXT:    ALU clause starting at 9:
6084; EG-NEXT:     ASHR * T5.W, T5.X, literal.x,
6085; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6086; EG-NEXT:     LSHR T6.X, KC0[2].Y, literal.x,
6087; EG-NEXT:     ASHR T5.Z, T5.X, literal.y,
6088; EG-NEXT:     ASHR * T7.W, T5.Y, literal.z,
6089; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6090; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6091; EG-NEXT:     BFE_INT T5.X, T5.X, 0.0, literal.x,
6092; EG-NEXT:     ASHR * T7.Z, T5.Y, literal.x,
6093; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6094; EG-NEXT:     BFE_INT T7.X, T5.Y, 0.0, literal.x,
6095; EG-NEXT:     ASHR T5.Y, PV.X, literal.y,
6096; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.x,
6097; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6098; EG-NEXT:     LSHR T8.X, PV.W, literal.x,
6099; EG-NEXT:     ASHR * T7.Y, PV.X, literal.y,
6100; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
6101;
6102; GFX12-LABEL: constant_sextload_v4i16_to_v4i64:
6103; GFX12:       ; %bb.0:
6104; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
6105; GFX12-NEXT:    s_wait_kmcnt 0x0
6106; GFX12-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
6107; GFX12-NEXT:    s_wait_kmcnt 0x0
6108; GFX12-NEXT:    s_mov_b32 s6, s3
6109; GFX12-NEXT:    s_lshr_b32 s8, s3, 16
6110; GFX12-NEXT:    s_bfe_i64 s[4:5], s[2:3], 0x100000
6111; GFX12-NEXT:    s_lshr_b32 s2, s2, 16
6112; GFX12-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
6113; GFX12-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x100000
6114; GFX12-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x100000
6115; GFX12-NEXT:    v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s5
6116; GFX12-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v5, s7
6117; GFX12-NEXT:    v_dual_mov_b32 v4, s6 :: v_dual_mov_b32 v7, s9
6118; GFX12-NEXT:    v_dual_mov_b32 v6, s8 :: v_dual_mov_b32 v3, s3
6119; GFX12-NEXT:    v_mov_b32_e32 v2, s2
6120; GFX12-NEXT:    s_clause 0x1
6121; GFX12-NEXT:    global_store_b128 v8, v[4:7], s[0:1] offset:16
6122; GFX12-NEXT:    global_store_b128 v8, v[0:3], s[0:1]
6123; GFX12-NEXT:    s_endpgm
6124  %load = load <4 x i16>, ptr addrspace(4) %in
6125  %ext = sext <4 x i16> %load to <4 x i64>
6126  store <4 x i64> %ext, ptr addrspace(1) %out
6127  ret void
6128}
6129
6130define amdgpu_kernel void @constant_zextload_v8i16_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
6131; GCN-NOHSA-SI-LABEL: constant_zextload_v8i16_to_v8i64:
6132; GCN-NOHSA-SI:       ; %bb.0:
6133; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
6134; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6135; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
6136; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
6137; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
6138; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
6139; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
6140; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6141; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s8, s5, 16
6142; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s9, s7, 16
6143; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s10, s6, 16
6144; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s11, s4, 16
6145; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, 0xffff
6146; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, 0xffff
6147; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, 0xffff
6148; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
6149; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s7
6150; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s9
6151; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
6152; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6153; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
6154; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s8
6155; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
6156; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6157; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
6158; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s10
6159; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
6160; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6161; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
6162; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s11
6163; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
6164; GCN-NOHSA-SI-NEXT:    s_endpgm
6165;
6166; GCN-HSA-LABEL: constant_zextload_v8i16_to_v8i64:
6167; GCN-HSA:       ; %bb.0:
6168; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
6169; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
6170; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
6171; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6172; GCN-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
6173; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6174; GCN-HSA-NEXT:    s_lshr_b32 s8, s5, 16
6175; GCN-HSA-NEXT:    s_lshr_b32 s2, s7, 16
6176; GCN-HSA-NEXT:    s_lshr_b32 s9, s6, 16
6177; GCN-HSA-NEXT:    s_lshr_b32 s10, s4, 16
6178; GCN-HSA-NEXT:    s_and_b32 s4, s4, 0xffff
6179; GCN-HSA-NEXT:    s_and_b32 s6, s6, 0xffff
6180; GCN-HSA-NEXT:    s_and_b32 s5, s5, 0xffff
6181; GCN-HSA-NEXT:    s_and_b32 s3, s7, 0xffff
6182; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
6183; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
6184; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s3
6185; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6186; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6187; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6188; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
6189; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6190; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6191; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6192; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6193; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
6194; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s5
6195; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s8
6196; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6197; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6198; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6199; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
6200; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s9
6201; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6202; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6203; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
6204; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
6205; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s10
6206; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
6207; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6208; GCN-HSA-NEXT:    s_endpgm
6209;
6210; GCN-NOHSA-VI-LABEL: constant_zextload_v8i16_to_v8i64:
6211; GCN-NOHSA-VI:       ; %bb.0:
6212; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
6213; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
6214; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
6215; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6216; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
6217; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6218; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s8, s4, 16
6219; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, 0xffff
6220; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s9, s5, 16
6221; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s5, 0xffff
6222; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s10, s6, 16
6223; GCN-NOHSA-VI-NEXT:    s_and_b32 s6, s6, 0xffff
6224; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s2, s7, 16
6225; GCN-NOHSA-VI-NEXT:    s_and_b32 s3, s7, 0xffff
6226; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s2
6227; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 48
6228; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s3
6229; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
6230; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
6231; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
6232; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 32
6233; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
6234; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6235; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
6236; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
6237; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 16
6238; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
6239; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s10
6240; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
6241; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6242; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
6243; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s5
6244; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s9
6245; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
6246; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6247; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
6248; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
6249; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s8
6250; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
6251; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6252; GCN-NOHSA-VI-NEXT:    s_endpgm
6253;
6254; EG-LABEL: constant_zextload_v8i16_to_v8i64:
6255; EG:       ; %bb.0:
6256; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
6257; EG-NEXT:    TEX 0 @8
6258; EG-NEXT:    ALU 30, @11, KC0[CB0:0-32], KC1[]
6259; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T14.X, 0
6260; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T13.X, 0
6261; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T12.X, 0
6262; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T11.X, 1
6263; EG-NEXT:    CF_END
6264; EG-NEXT:    Fetch clause starting at 8:
6265; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
6266; EG-NEXT:    ALU clause starting at 10:
6267; EG-NEXT:     MOV * T7.X, KC0[2].Z,
6268; EG-NEXT:    ALU clause starting at 11:
6269; EG-NEXT:     LSHR * T8.Z, T7.W, literal.x,
6270; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6271; EG-NEXT:     AND_INT T8.X, T7.W, literal.x,
6272; EG-NEXT:     MOV T8.Y, 0.0,
6273; EG-NEXT:     LSHR T9.Z, T7.Z, literal.y,
6274; EG-NEXT:     AND_INT * T9.X, T7.Z, literal.x,
6275; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6276; EG-NEXT:     MOV T9.Y, 0.0,
6277; EG-NEXT:     LSHR * T10.Z, T7.Y, literal.x,
6278; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6279; EG-NEXT:     AND_INT T10.X, T7.Y, literal.x,
6280; EG-NEXT:     MOV T10.Y, 0.0,
6281; EG-NEXT:     LSHR T7.Z, T7.X, literal.y,
6282; EG-NEXT:     AND_INT * T7.X, T7.X, literal.x,
6283; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6284; EG-NEXT:     MOV T7.Y, 0.0,
6285; EG-NEXT:     MOV T8.W, 0.0,
6286; EG-NEXT:     MOV * T9.W, 0.0,
6287; EG-NEXT:     MOV T10.W, 0.0,
6288; EG-NEXT:     MOV * T7.W, 0.0,
6289; EG-NEXT:     LSHR T11.X, KC0[2].Y, literal.x,
6290; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6291; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6292; EG-NEXT:     LSHR T12.X, PV.W, literal.x,
6293; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6294; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6295; EG-NEXT:     LSHR T13.X, PV.W, literal.x,
6296; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6297; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
6298; EG-NEXT:     LSHR * T14.X, PV.W, literal.x,
6299; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6300;
6301; GFX12-LABEL: constant_zextload_v8i16_to_v8i64:
6302; GFX12:       ; %bb.0:
6303; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
6304; GFX12-NEXT:    s_wait_kmcnt 0x0
6305; GFX12-NEXT:    s_load_b128 s[4:7], s[2:3], 0x0
6306; GFX12-NEXT:    s_wait_kmcnt 0x0
6307; GFX12-NEXT:    s_and_b32 s2, 0xffff, s7
6308; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
6309; GFX12-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
6310; GFX12-NEXT:    s_pack_hl_b32_b16 s3, s7, 0
6311; GFX12-NEXT:    s_pack_hl_b32_b16 s2, s6, 0
6312; GFX12-NEXT:    s_wait_alu 0xfffe
6313; GFX12-NEXT:    v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v3, v1
6314; GFX12-NEXT:    s_and_b32 s3, 0xffff, s6
6315; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:48
6316; GFX12-NEXT:    s_wait_alu 0xfffe
6317; GFX12-NEXT:    v_mov_b32_e32 v0, s3
6318; GFX12-NEXT:    v_mov_b32_e32 v2, s2
6319; GFX12-NEXT:    s_pack_hl_b32_b16 s2, s5, 0
6320; GFX12-NEXT:    s_and_b32 s3, 0xffff, s5
6321; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:32
6322; GFX12-NEXT:    s_wait_alu 0xfffe
6323; GFX12-NEXT:    v_mov_b32_e32 v0, s3
6324; GFX12-NEXT:    v_mov_b32_e32 v2, s2
6325; GFX12-NEXT:    s_pack_hl_b32_b16 s2, s4, 0
6326; GFX12-NEXT:    s_and_b32 s3, 0xffff, s4
6327; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:16
6328; GFX12-NEXT:    s_wait_alu 0xfffe
6329; GFX12-NEXT:    v_mov_b32_e32 v0, s3
6330; GFX12-NEXT:    v_mov_b32_e32 v2, s2
6331; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1]
6332; GFX12-NEXT:    s_endpgm
6333  %load = load <8 x i16>, ptr addrspace(4) %in
6334  %ext = zext <8 x i16> %load to <8 x i64>
6335  store <8 x i64> %ext, ptr addrspace(1) %out
6336  ret void
6337}
6338
6339define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
6340; GCN-NOHSA-SI-LABEL: constant_sextload_v8i16_to_v8i64:
6341; GCN-NOHSA-SI:       ; %bb.0:
6342; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
6343; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6344; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
6345; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
6346; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
6347; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6348; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, s7
6349; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, s5
6350; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s12, s6, 16
6351; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s14, s4, 16
6352; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[16:17], s[4:5], 0x100000
6353; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[18:19], s[6:7], 0x100000
6354; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[4:5], s[4:5], 48
6355; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[6:7], s[6:7], 48
6356; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x100000
6357; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x100000
6358; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x100000
6359; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x100000
6360; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
6361; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s9
6362; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
6363; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s7
6364; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
6365; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6366; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
6367; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s11
6368; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
6369; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s5
6370; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
6371; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6372; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
6373; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
6374; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s16
6375; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s17
6376; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s12
6377; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s13
6378; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
6379; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s14
6380; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s15
6381; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
6382; GCN-NOHSA-SI-NEXT:    s_endpgm
6383;
6384; GCN-HSA-LABEL: constant_sextload_v8i16_to_v8i64:
6385; GCN-HSA:       ; %bb.0:
6386; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
6387; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6388; GCN-HSA-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
6389; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6390; GCN-HSA-NEXT:    s_mov_b32 s2, s7
6391; GCN-HSA-NEXT:    s_mov_b32 s8, s5
6392; GCN-HSA-NEXT:    s_lshr_b32 s10, s6, 16
6393; GCN-HSA-NEXT:    s_lshr_b32 s12, s4, 16
6394; GCN-HSA-NEXT:    s_bfe_i64 s[14:15], s[4:5], 0x100000
6395; GCN-HSA-NEXT:    s_bfe_i64 s[16:17], s[6:7], 0x100000
6396; GCN-HSA-NEXT:    s_ashr_i64 s[4:5], s[4:5], 48
6397; GCN-HSA-NEXT:    s_ashr_i64 s[6:7], s[6:7], 48
6398; GCN-HSA-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x100000
6399; GCN-HSA-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x100000
6400; GCN-HSA-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x100000
6401; GCN-HSA-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x100000
6402; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
6403; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
6404; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
6405; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6406; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6407; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6408; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
6409; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
6410; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
6411; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6412; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6413; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6414; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6415; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
6416; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
6417; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
6418; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s4
6419; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s5
6420; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6421; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6422; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6423; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s16
6424; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s17
6425; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s10
6426; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s11
6427; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6428; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6429; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
6430; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s14
6431; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s15
6432; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s12
6433; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s13
6434; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
6435; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6436; GCN-HSA-NEXT:    s_endpgm
6437;
6438; GCN-NOHSA-VI-LABEL: constant_sextload_v8i16_to_v8i64:
6439; GCN-NOHSA-VI:       ; %bb.0:
6440; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
6441; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6442; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
6443; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6444; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[12:13], s[6:7], 0x100000
6445; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s6, 16
6446; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[14:15], s[6:7], 0x100000
6447; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s7
6448; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[2:3], s[4:5], 0x100000
6449; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
6450; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s5
6451; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s10, s5, 16
6452; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[16:17], s[6:7], 0x100000
6453; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s7, 16
6454; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
6455; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x100000
6456; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x100000
6457; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
6458; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
6459; GCN-NOHSA-VI-NEXT:    s_add_u32 s6, s0, 48
6460; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s7
6461; GCN-NOHSA-VI-NEXT:    s_addc_u32 s7, s1, 0
6462; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s6
6463; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s16
6464; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s17
6465; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s7
6466; GCN-NOHSA-VI-NEXT:    s_add_u32 s6, s0, 32
6467; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6468; GCN-NOHSA-VI-NEXT:    s_addc_u32 s7, s1, 0
6469; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s6
6470; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
6471; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s13
6472; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s14
6473; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s15
6474; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s7
6475; GCN-NOHSA-VI-NEXT:    s_add_u32 s6, s0, 16
6476; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6477; GCN-NOHSA-VI-NEXT:    s_addc_u32 s7, s1, 0
6478; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s6
6479; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
6480; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s9
6481; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s10
6482; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s11
6483; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s7
6484; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6485; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
6486; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
6487; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s3
6488; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
6489; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s5
6490; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
6491; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6492; GCN-NOHSA-VI-NEXT:    s_endpgm
6493;
6494; EG-LABEL: constant_sextload_v8i16_to_v8i64:
6495; EG:       ; %bb.0:
6496; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
6497; EG-NEXT:    TEX 0 @8
6498; EG-NEXT:    ALU 33, @11, KC0[CB0:0-32], KC1[]
6499; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T7.X, 0
6500; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 0
6501; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T9.X, 0
6502; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T8.X, 1
6503; EG-NEXT:    CF_END
6504; EG-NEXT:    Fetch clause starting at 8:
6505; EG-NEXT:     VTX_READ_128 T7.XYZW, T7.X, 0, #1
6506; EG-NEXT:    ALU clause starting at 10:
6507; EG-NEXT:     MOV * T7.X, KC0[2].Z,
6508; EG-NEXT:    ALU clause starting at 11:
6509; EG-NEXT:     LSHR T8.X, KC0[2].Y, literal.x,
6510; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6511; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6512; EG-NEXT:     LSHR T9.X, PV.W, literal.x,
6513; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.y,
6514; EG-NEXT:     ASHR * T10.W, T7.X, literal.z,
6515; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6516; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6517; EG-NEXT:     LSHR T11.X, PV.W, literal.x,
6518; EG-NEXT:     ASHR T10.Z, T7.X, literal.y,
6519; EG-NEXT:     ASHR * T12.W, T7.Y, literal.z,
6520; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6521; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6522; EG-NEXT:     BFE_INT T10.X, T7.X, 0.0, literal.x,
6523; EG-NEXT:     ASHR T12.Z, T7.Y, literal.x,
6524; EG-NEXT:     ASHR * T13.W, T7.Z, literal.y,
6525; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6526; EG-NEXT:     BFE_INT T12.X, T7.Y, 0.0, literal.x,
6527; EG-NEXT:     ASHR T10.Y, PV.X, literal.y,
6528; EG-NEXT:     ASHR T13.Z, T7.Z, literal.x,
6529; EG-NEXT:     ASHR * T14.W, T7.W, literal.y,
6530; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6531; EG-NEXT:     BFE_INT T13.X, T7.Z, 0.0, literal.x,
6532; EG-NEXT:     ASHR T12.Y, PV.X, literal.y,
6533; EG-NEXT:     ASHR * T14.Z, T7.W, literal.x,
6534; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6535; EG-NEXT:     BFE_INT T14.X, T7.W, 0.0, literal.x,
6536; EG-NEXT:     ASHR T13.Y, PV.X, literal.y,
6537; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
6538; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
6539; EG-NEXT:    48(6.726233e-44), 0(0.000000e+00)
6540; EG-NEXT:     LSHR T7.X, PV.W, literal.x,
6541; EG-NEXT:     ASHR * T14.Y, PV.X, literal.y,
6542; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
6543;
6544; GFX12-LABEL: constant_sextload_v8i16_to_v8i64:
6545; GFX12:       ; %bb.0:
6546; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
6547; GFX12-NEXT:    s_wait_kmcnt 0x0
6548; GFX12-NEXT:    s_load_b128 s[4:7], s[2:3], 0x0
6549; GFX12-NEXT:    s_wait_kmcnt 0x0
6550; GFX12-NEXT:    s_mov_b32 s14, s7
6551; GFX12-NEXT:    s_lshr_b32 s16, s7, 16
6552; GFX12-NEXT:    s_bfe_i64 s[12:13], s[6:7], 0x100000
6553; GFX12-NEXT:    s_lshr_b32 s6, s6, 16
6554; GFX12-NEXT:    s_bfe_i64 s[2:3], s[4:5], 0x100000
6555; GFX12-NEXT:    s_mov_b32 s8, s5
6556; GFX12-NEXT:    s_lshr_b32 s10, s5, 16
6557; GFX12-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x100000
6558; GFX12-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x100000
6559; GFX12-NEXT:    s_lshr_b32 s4, s4, 16
6560; GFX12-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
6561; GFX12-NEXT:    v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v1, s13
6562; GFX12-NEXT:    s_bfe_i64 s[8:9], s[8:9], 0x100000
6563; GFX12-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x100000
6564; GFX12-NEXT:    v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v9, s15
6565; GFX12-NEXT:    v_dual_mov_b32 v8, s14 :: v_dual_mov_b32 v11, s17
6566; GFX12-NEXT:    v_dual_mov_b32 v10, s16 :: v_dual_mov_b32 v3, s7
6567; GFX12-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
6568; GFX12-NEXT:    v_dual_mov_b32 v0, s12 :: v_dual_mov_b32 v5, s3
6569; GFX12-NEXT:    v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v13, s9
6570; GFX12-NEXT:    v_dual_mov_b32 v12, s8 :: v_dual_mov_b32 v15, s11
6571; GFX12-NEXT:    v_dual_mov_b32 v14, s10 :: v_dual_mov_b32 v7, s5
6572; GFX12-NEXT:    v_mov_b32_e32 v6, s4
6573; GFX12-NEXT:    s_clause 0x3
6574; GFX12-NEXT:    global_store_b128 v16, v[8:11], s[0:1] offset:48
6575; GFX12-NEXT:    global_store_b128 v16, v[0:3], s[0:1] offset:32
6576; GFX12-NEXT:    global_store_b128 v16, v[12:15], s[0:1] offset:16
6577; GFX12-NEXT:    global_store_b128 v16, v[4:7], s[0:1]
6578; GFX12-NEXT:    s_endpgm
6579  %load = load <8 x i16>, ptr addrspace(4) %in
6580  %ext = sext <8 x i16> %load to <8 x i64>
6581  store <8 x i64> %ext, ptr addrspace(1) %out
6582  ret void
6583}
6584
6585define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
6586; GCN-NOHSA-SI-LABEL: constant_zextload_v16i16_to_v16i64:
6587; GCN-NOHSA-SI:       ; %bb.0:
6588; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x9
6589; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6590; GCN-NOHSA-SI-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
6591; GCN-NOHSA-SI-NEXT:    s_mov_b32 s11, 0xf000
6592; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
6593; GCN-NOHSA-SI-NEXT:    s_mov_b32 s10, -1
6594; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
6595; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6596; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s12, s1, 16
6597; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s13, s3, 16
6598; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s14, s7, 16
6599; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s15, s5, 16
6600; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s16, s4, 16
6601; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s17, s6, 16
6602; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s18, s2, 16
6603; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s19, s0, 16
6604; GCN-NOHSA-SI-NEXT:    s_and_b32 s0, s0, 0xffff
6605; GCN-NOHSA-SI-NEXT:    s_and_b32 s2, s2, 0xffff
6606; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, 0xffff
6607; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, 0xffff
6608; GCN-NOHSA-SI-NEXT:    s_and_b32 s1, s1, 0xffff
6609; GCN-NOHSA-SI-NEXT:    s_and_b32 s3, s3, 0xffff
6610; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
6611; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, 0xffff
6612; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
6613; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s15
6614; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:80
6615; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6616; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s7
6617; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s14
6618; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:112
6619; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6620; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s3
6621; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s13
6622; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:48
6623; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6624; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s1
6625; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s12
6626; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
6627; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6628; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
6629; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s16
6630; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:64
6631; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6632; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
6633; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s17
6634; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:96
6635; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6636; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s2
6637; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s18
6638; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:32
6639; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6640; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s0
6641; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s19
6642; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
6643; GCN-NOHSA-SI-NEXT:    s_endpgm
6644;
6645; GCN-HSA-LABEL: constant_zextload_v16i16_to_v16i64:
6646; GCN-HSA:       ; %bb.0:
6647; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
6648; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
6649; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
6650; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6651; GCN-HSA-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
6652; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
6653; GCN-HSA-NEXT:    s_lshr_b32 s12, s5, 16
6654; GCN-HSA-NEXT:    s_lshr_b32 s13, s7, 16
6655; GCN-HSA-NEXT:    s_lshr_b32 s14, s11, 16
6656; GCN-HSA-NEXT:    s_lshr_b32 s2, s9, 16
6657; GCN-HSA-NEXT:    s_lshr_b32 s15, s8, 16
6658; GCN-HSA-NEXT:    s_lshr_b32 s16, s10, 16
6659; GCN-HSA-NEXT:    s_lshr_b32 s17, s6, 16
6660; GCN-HSA-NEXT:    s_lshr_b32 s18, s4, 16
6661; GCN-HSA-NEXT:    s_and_b32 s4, s4, 0xffff
6662; GCN-HSA-NEXT:    s_and_b32 s6, s6, 0xffff
6663; GCN-HSA-NEXT:    s_and_b32 s10, s10, 0xffff
6664; GCN-HSA-NEXT:    s_and_b32 s8, s8, 0xffff
6665; GCN-HSA-NEXT:    s_and_b32 s5, s5, 0xffff
6666; GCN-HSA-NEXT:    s_and_b32 s7, s7, 0xffff
6667; GCN-HSA-NEXT:    s_and_b32 s11, s11, 0xffff
6668; GCN-HSA-NEXT:    s_and_b32 s3, s9, 0xffff
6669; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
6670; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x50
6671; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s3
6672; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6673; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6674; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6675; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x70
6676; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6677; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6678; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6679; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6680; GCN-HSA-NEXT:    s_add_u32 s2, s0, 48
6681; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s11
6682; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s14
6683; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6684; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6685; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6686; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6687; GCN-HSA-NEXT:    s_add_u32 s2, s0, 16
6688; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s7
6689; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s13
6690; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6691; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6692; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6693; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6694; GCN-HSA-NEXT:    s_add_u32 s2, s0, 64
6695; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s5
6696; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s12
6697; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6698; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6699; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6700; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6701; GCN-HSA-NEXT:    s_add_u32 s2, s0, 0x60
6702; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
6703; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s15
6704; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6705; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6706; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6707; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6708; GCN-HSA-NEXT:    s_add_u32 s2, s0, 32
6709; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
6710; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s16
6711; GCN-HSA-NEXT:    s_addc_u32 s3, s1, 0
6712; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6713; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
6714; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
6715; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s17
6716; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
6717; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6718; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
6719; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
6720; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s18
6721; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
6722; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6723; GCN-HSA-NEXT:    s_endpgm
6724;
6725; GCN-NOHSA-VI-LABEL: constant_zextload_v16i16_to_v16i64:
6726; GCN-NOHSA-VI:       ; %bb.0:
6727; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
6728; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
6729; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
6730; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6731; GCN-NOHSA-VI-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
6732; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
6733; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s12, s4, 16
6734; GCN-NOHSA-VI-NEXT:    s_and_b32 s4, s4, 0xffff
6735; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s13, s5, 16
6736; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s5, 0xffff
6737; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s14, s6, 16
6738; GCN-NOHSA-VI-NEXT:    s_and_b32 s6, s6, 0xffff
6739; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s15, s7, 16
6740; GCN-NOHSA-VI-NEXT:    s_and_b32 s7, s7, 0xffff
6741; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s16, s10, 16
6742; GCN-NOHSA-VI-NEXT:    s_and_b32 s10, s10, 0xffff
6743; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s17, s11, 16
6744; GCN-NOHSA-VI-NEXT:    s_and_b32 s11, s11, 0xffff
6745; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s18, s8, 16
6746; GCN-NOHSA-VI-NEXT:    s_and_b32 s8, s8, 0xffff
6747; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s2, s9, 16
6748; GCN-NOHSA-VI-NEXT:    s_and_b32 s3, s9, 0xffff
6749; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s2
6750; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 0x50
6751; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s3
6752; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
6753; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
6754; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
6755; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 64
6756; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
6757; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6758; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
6759; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
6760; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 0x70
6761; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
6762; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s18
6763; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
6764; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6765; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
6766; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
6767; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 0x60
6768; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s11
6769; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s17
6770; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
6771; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6772; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
6773; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
6774; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 48
6775; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
6776; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s16
6777; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
6778; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6779; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
6780; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
6781; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 32
6782; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s7
6783; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s15
6784; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
6785; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6786; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
6787; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
6788; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s0, 16
6789; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
6790; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s14
6791; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s1, 0
6792; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6793; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
6794; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s5
6795; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s13
6796; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
6797; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6798; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
6799; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s4
6800; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s12
6801; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
6802; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
6803; GCN-NOHSA-VI-NEXT:    s_endpgm
6804;
6805; EG-LABEL: constant_zextload_v16i16_to_v16i64:
6806; EG:       ; %bb.0:
6807; EG-NEXT:    ALU 0, @16, KC0[CB0:0-32], KC1[]
6808; EG-NEXT:    TEX 1 @12
6809; EG-NEXT:    ALU 62, @17, KC0[CB0:0-32], KC1[]
6810; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T26.X, 0
6811; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T25.X, 0
6812; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T24.X, 0
6813; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T23.X, 0
6814; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T22.X, 0
6815; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T21.X, 0
6816; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T20.X, 0
6817; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T19.X, 1
6818; EG-NEXT:    CF_END
6819; EG-NEXT:    Fetch clause starting at 12:
6820; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
6821; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
6822; EG-NEXT:    ALU clause starting at 16:
6823; EG-NEXT:     MOV * T11.X, KC0[2].Z,
6824; EG-NEXT:    ALU clause starting at 17:
6825; EG-NEXT:     LSHR * T13.Z, T12.W, literal.x,
6826; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6827; EG-NEXT:     AND_INT T13.X, T12.W, literal.x,
6828; EG-NEXT:     MOV T13.Y, 0.0,
6829; EG-NEXT:     LSHR T14.Z, T12.Z, literal.y,
6830; EG-NEXT:     AND_INT * T14.X, T12.Z, literal.x,
6831; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6832; EG-NEXT:     MOV T14.Y, 0.0,
6833; EG-NEXT:     LSHR * T15.Z, T12.Y, literal.x,
6834; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6835; EG-NEXT:     AND_INT T15.X, T12.Y, literal.x,
6836; EG-NEXT:     MOV T15.Y, 0.0,
6837; EG-NEXT:     LSHR T12.Z, T12.X, literal.y,
6838; EG-NEXT:     AND_INT * T12.X, T12.X, literal.x,
6839; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6840; EG-NEXT:     MOV T12.Y, 0.0,
6841; EG-NEXT:     LSHR * T16.Z, T11.W, literal.x,
6842; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6843; EG-NEXT:     AND_INT T16.X, T11.W, literal.x,
6844; EG-NEXT:     MOV T16.Y, 0.0,
6845; EG-NEXT:     LSHR T17.Z, T11.Z, literal.y,
6846; EG-NEXT:     AND_INT * T17.X, T11.Z, literal.x,
6847; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6848; EG-NEXT:     MOV T17.Y, 0.0,
6849; EG-NEXT:     LSHR * T18.Z, T11.Y, literal.x,
6850; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6851; EG-NEXT:     AND_INT T18.X, T11.Y, literal.x,
6852; EG-NEXT:     MOV T18.Y, 0.0,
6853; EG-NEXT:     LSHR T11.Z, T11.X, literal.y,
6854; EG-NEXT:     AND_INT * T11.X, T11.X, literal.x,
6855; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
6856; EG-NEXT:     MOV T11.Y, 0.0,
6857; EG-NEXT:     MOV T13.W, 0.0,
6858; EG-NEXT:     MOV * T14.W, 0.0,
6859; EG-NEXT:     MOV T15.W, 0.0,
6860; EG-NEXT:     MOV * T12.W, 0.0,
6861; EG-NEXT:     MOV T16.W, 0.0,
6862; EG-NEXT:     MOV * T17.W, 0.0,
6863; EG-NEXT:     MOV T18.W, 0.0,
6864; EG-NEXT:     MOV * T11.W, 0.0,
6865; EG-NEXT:     LSHR T19.X, KC0[2].Y, literal.x,
6866; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6867; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
6868; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
6869; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6870; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
6871; EG-NEXT:     LSHR T21.X, PV.W, literal.x,
6872; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6873; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
6874; EG-NEXT:     LSHR T22.X, PV.W, literal.x,
6875; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6876; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
6877; EG-NEXT:     LSHR T23.X, PV.W, literal.x,
6878; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6879; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
6880; EG-NEXT:     LSHR T24.X, PV.W, literal.x,
6881; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6882; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
6883; EG-NEXT:     LSHR T25.X, PV.W, literal.x,
6884; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
6885; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
6886; EG-NEXT:     LSHR * T26.X, PV.W, literal.x,
6887; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
6888;
6889; GFX12-LABEL: constant_zextload_v16i16_to_v16i64:
6890; GFX12:       ; %bb.0:
6891; GFX12-NEXT:    s_load_b128 s[8:11], s[4:5], 0x24
6892; GFX12-NEXT:    s_wait_kmcnt 0x0
6893; GFX12-NEXT:    s_load_b256 s[0:7], s[10:11], 0x0
6894; GFX12-NEXT:    s_wait_kmcnt 0x0
6895; GFX12-NEXT:    s_and_b32 s10, s5, 0xffff
6896; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
6897; GFX12-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s10
6898; GFX12-NEXT:    s_lshr_b32 s5, s5, 16
6899; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
6900; GFX12-NEXT:    v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v3, v1
6901; GFX12-NEXT:    s_lshr_b32 s5, s4, 16
6902; GFX12-NEXT:    s_and_b32 s4, s4, 0xffff
6903; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9] offset:80
6904; GFX12-NEXT:    s_wait_alu 0xfffe
6905; GFX12-NEXT:    v_mov_b32_e32 v0, s4
6906; GFX12-NEXT:    v_mov_b32_e32 v2, s5
6907; GFX12-NEXT:    s_lshr_b32 s4, s7, 16
6908; GFX12-NEXT:    s_and_b32 s5, s7, 0xffff
6909; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9] offset:64
6910; GFX12-NEXT:    s_wait_alu 0xfffe
6911; GFX12-NEXT:    v_mov_b32_e32 v0, s5
6912; GFX12-NEXT:    v_mov_b32_e32 v2, s4
6913; GFX12-NEXT:    s_lshr_b32 s4, s6, 16
6914; GFX12-NEXT:    s_and_b32 s5, s6, 0xffff
6915; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9] offset:112
6916; GFX12-NEXT:    s_wait_alu 0xfffe
6917; GFX12-NEXT:    v_mov_b32_e32 v0, s5
6918; GFX12-NEXT:    v_mov_b32_e32 v2, s4
6919; GFX12-NEXT:    s_lshr_b32 s4, s3, 16
6920; GFX12-NEXT:    s_and_b32 s3, s3, 0xffff
6921; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9] offset:96
6922; GFX12-NEXT:    v_mov_b32_e32 v0, s3
6923; GFX12-NEXT:    s_wait_alu 0xfffe
6924; GFX12-NEXT:    v_mov_b32_e32 v2, s4
6925; GFX12-NEXT:    s_lshr_b32 s3, s2, 16
6926; GFX12-NEXT:    s_and_b32 s2, s2, 0xffff
6927; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9] offset:48
6928; GFX12-NEXT:    s_wait_alu 0xfffe
6929; GFX12-NEXT:    v_mov_b32_e32 v0, s2
6930; GFX12-NEXT:    v_mov_b32_e32 v2, s3
6931; GFX12-NEXT:    s_lshr_b32 s2, s1, 16
6932; GFX12-NEXT:    s_and_b32 s1, s1, 0xffff
6933; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9] offset:32
6934; GFX12-NEXT:    v_mov_b32_e32 v0, s1
6935; GFX12-NEXT:    s_wait_alu 0xfffe
6936; GFX12-NEXT:    v_mov_b32_e32 v2, s2
6937; GFX12-NEXT:    s_lshr_b32 s1, s0, 16
6938; GFX12-NEXT:    s_and_b32 s0, s0, 0xffff
6939; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9] offset:16
6940; GFX12-NEXT:    s_wait_alu 0xfffe
6941; GFX12-NEXT:    v_mov_b32_e32 v0, s0
6942; GFX12-NEXT:    v_mov_b32_e32 v2, s1
6943; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[8:9]
6944; GFX12-NEXT:    s_endpgm
6945  %load = load <16 x i16>, ptr addrspace(4) %in
6946  %ext = zext <16 x i16> %load to <16 x i64>
6947  store <16 x i64> %ext, ptr addrspace(1) %out
6948  ret void
6949}
6950
6951define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
6952; GCN-NOHSA-SI-LABEL: constant_sextload_v16i16_to_v16i64:
6953; GCN-NOHSA-SI:       ; %bb.0:
6954; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
6955; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6956; GCN-NOHSA-SI-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
6957; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
6958; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
6959; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
6960; GCN-NOHSA-SI-NEXT:    s_mov_b32 s12, s11
6961; GCN-NOHSA-SI-NEXT:    s_mov_b32 s14, s9
6962; GCN-NOHSA-SI-NEXT:    s_mov_b32 s16, s7
6963; GCN-NOHSA-SI-NEXT:    s_mov_b32 s18, s5
6964; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s20, s10, 16
6965; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s22, s8, 16
6966; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s24, s6, 16
6967; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s26, s4, 16
6968; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[28:29], s[4:5], 0x100000
6969; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[30:31], s[6:7], 0x100000
6970; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[34:35], s[8:9], 0x100000
6971; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[36:37], s[10:11], 0x100000
6972; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[4:5], s[4:5], 48
6973; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[6:7], s[6:7], 48
6974; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[8:9], s[8:9], 48
6975; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[10:11], s[10:11], 48
6976; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x100000
6977; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[16:17], s[16:17], 0x100000
6978; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x100000
6979; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x100000
6980; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x100000
6981; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x100000
6982; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x100000
6983; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x100000
6984; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
6985; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s13
6986; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s10
6987; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s11
6988; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
6989; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6990; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
6991; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s15
6992; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s8
6993; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s9
6994; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
6995; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
6996; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s16
6997; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s17
6998; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s6
6999; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s7
7000; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
7001; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7002; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
7003; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
7004; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
7005; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s5
7006; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
7007; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7008; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s36
7009; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s37
7010; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s34
7011; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s35
7012; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s30
7013; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s31
7014; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s28
7015; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s29
7016; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s20
7017; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s21
7018; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
7019; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s22
7020; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s23
7021; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64
7022; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s24
7023; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s25
7024; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
7025; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s26
7026; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s27
7027; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0
7028; GCN-NOHSA-SI-NEXT:    s_endpgm
7029;
7030; GCN-HSA-LABEL: constant_sextload_v16i16_to_v16i64:
7031; GCN-HSA:       ; %bb.0:
7032; GCN-HSA-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
7033; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
7034; GCN-HSA-NEXT:    s_load_dwordx8 s[12:19], s[2:3], 0x0
7035; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
7036; GCN-HSA-NEXT:    s_mov_b32 s6, s19
7037; GCN-HSA-NEXT:    s_mov_b32 s10, s17
7038; GCN-HSA-NEXT:    s_mov_b32 s20, s15
7039; GCN-HSA-NEXT:    s_mov_b32 s22, s13
7040; GCN-HSA-NEXT:    s_lshr_b32 s24, s18, 16
7041; GCN-HSA-NEXT:    s_lshr_b32 s26, s16, 16
7042; GCN-HSA-NEXT:    s_lshr_b32 s28, s14, 16
7043; GCN-HSA-NEXT:    s_lshr_b32 s30, s12, 16
7044; GCN-HSA-NEXT:    s_bfe_i64 s[34:35], s[18:19], 0x100000
7045; GCN-HSA-NEXT:    s_ashr_i64 s[18:19], s[18:19], 48
7046; GCN-HSA-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
7047; GCN-HSA-NEXT:    s_bfe_i64 s[2:3], s[12:13], 0x100000
7048; GCN-HSA-NEXT:    s_bfe_i64 s[4:5], s[14:15], 0x100000
7049; GCN-HSA-NEXT:    s_bfe_i64 s[8:9], s[16:17], 0x100000
7050; GCN-HSA-NEXT:    s_ashr_i64 s[12:13], s[12:13], 48
7051; GCN-HSA-NEXT:    s_ashr_i64 s[14:15], s[14:15], 48
7052; GCN-HSA-NEXT:    s_ashr_i64 s[16:17], s[16:17], 48
7053; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
7054; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s7
7055; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s18
7056; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s19
7057; GCN-HSA-NEXT:    s_bfe_i64 s[6:7], s[30:31], 0x100000
7058; GCN-HSA-NEXT:    s_bfe_i64 s[18:19], s[28:29], 0x100000
7059; GCN-HSA-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x100000
7060; GCN-HSA-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x100000
7061; GCN-HSA-NEXT:    s_bfe_i64 s[22:23], s[22:23], 0x100000
7062; GCN-HSA-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x100000
7063; GCN-HSA-NEXT:    s_bfe_i64 s[10:11], s[10:11], 0x100000
7064; GCN-HSA-NEXT:    s_add_u32 s28, s0, 0x70
7065; GCN-HSA-NEXT:    s_addc_u32 s29, s1, 0
7066; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s10
7067; GCN-HSA-NEXT:    s_add_u32 s10, s0, 0x50
7068; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s28
7069; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s11
7070; GCN-HSA-NEXT:    s_addc_u32 s11, s1, 0
7071; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s10
7072; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s29
7073; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s11
7074; GCN-HSA-NEXT:    s_add_u32 s10, s0, 48
7075; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s16
7076; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s17
7077; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
7078; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
7079; GCN-HSA-NEXT:    s_addc_u32 s11, s1, 0
7080; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s10
7081; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s20
7082; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s21
7083; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s14
7084; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s15
7085; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s11
7086; GCN-HSA-NEXT:    s_add_u32 s10, s0, 16
7087; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7088; GCN-HSA-NEXT:    s_addc_u32 s11, s1, 0
7089; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s10
7090; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s22
7091; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s23
7092; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s12
7093; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s13
7094; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s11
7095; GCN-HSA-NEXT:    s_add_u32 s10, s0, 0x60
7096; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7097; GCN-HSA-NEXT:    s_addc_u32 s11, s1, 0
7098; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s10
7099; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s34
7100; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s35
7101; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s24
7102; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s25
7103; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s11
7104; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7105; GCN-HSA-NEXT:    s_nop 0
7106; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
7107; GCN-HSA-NEXT:    s_add_u32 s8, s0, 64
7108; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s9
7109; GCN-HSA-NEXT:    s_addc_u32 s9, s1, 0
7110; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s8
7111; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s26
7112; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s27
7113; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s9
7114; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7115; GCN-HSA-NEXT:    s_nop 0
7116; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s4
7117; GCN-HSA-NEXT:    s_add_u32 s4, s0, 32
7118; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s5
7119; GCN-HSA-NEXT:    s_addc_u32 s5, s1, 0
7120; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
7121; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s18
7122; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s19
7123; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
7124; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7125; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s1
7126; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
7127; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
7128; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
7129; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
7130; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s0
7131; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7132; GCN-HSA-NEXT:    s_endpgm
7133;
7134; GCN-NOHSA-VI-LABEL: constant_sextload_v16i16_to_v16i64:
7135; GCN-NOHSA-VI:       ; %bb.0:
7136; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x24
7137; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
7138; GCN-NOHSA-VI-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
7139; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
7140; GCN-NOHSA-VI-NEXT:    s_mov_b32 s12, s1
7141; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s18, s1, 16
7142; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[16:17], s[2:3], 0x100000
7143; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s2, s2, 16
7144; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[22:23], s[6:7], 0x100000
7145; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s6, 16
7146; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[28:29], s[4:5], 0x100000
7147; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s4, 16
7148; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[14:15], s[12:13], 0x100000
7149; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[12:13], s[18:19], 0x100000
7150; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[18:19], s[2:3], 0x100000
7151; GCN-NOHSA-VI-NEXT:    s_mov_b32 s2, s3
7152; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[24:25], s[6:7], 0x100000
7153; GCN-NOHSA-VI-NEXT:    s_mov_b32 s6, s7
7154; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[30:31], s[4:5], 0x100000
7155; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s5
7156; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[10:11], s[0:1], 0x100000
7157; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s0, s0, 16
7158; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[20:21], s[2:3], 0x100000
7159; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s2, s3, 16
7160; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[26:27], s[6:7], 0x100000
7161; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s6, s7, 16
7162; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[34:35], s[4:5], 0x100000
7163; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s5, 16
7164; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[0:1], s[0:1], 0x100000
7165; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x100000
7166; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
7167; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
7168; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
7169; GCN-NOHSA-VI-NEXT:    s_add_u32 s4, s8, 0x50
7170; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s5
7171; GCN-NOHSA-VI-NEXT:    s_addc_u32 s5, s9, 0
7172; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s4
7173; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s34
7174; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s35
7175; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s5
7176; GCN-NOHSA-VI-NEXT:    s_add_u32 s4, s8, 64
7177; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7178; GCN-NOHSA-VI-NEXT:    s_addc_u32 s5, s9, 0
7179; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s4
7180; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s28
7181; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s29
7182; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s30
7183; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s31
7184; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s5
7185; GCN-NOHSA-VI-NEXT:    s_add_u32 s4, s8, 0x70
7186; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7187; GCN-NOHSA-VI-NEXT:    s_addc_u32 s5, s9, 0
7188; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s4
7189; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s26
7190; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s27
7191; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
7192; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s7
7193; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s5
7194; GCN-NOHSA-VI-NEXT:    s_add_u32 s4, s8, 0x60
7195; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7196; GCN-NOHSA-VI-NEXT:    s_addc_u32 s5, s9, 0
7197; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s4
7198; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s22
7199; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s23
7200; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s24
7201; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s25
7202; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s5
7203; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7204; GCN-NOHSA-VI-NEXT:    s_nop 0
7205; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s2
7206; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s8, 48
7207; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s3
7208; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s9, 0
7209; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
7210; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
7211; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s8, 32
7212; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s20
7213; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s21
7214; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s9, 0
7215; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7216; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
7217; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
7218; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s8, 16
7219; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s16
7220; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s17
7221; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s18
7222; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s19
7223; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s9, 0
7224; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7225; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
7226; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
7227; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s15
7228; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s12
7229; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s13
7230; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
7231; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7232; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s8
7233; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
7234; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s11
7235; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s0
7236; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s1
7237; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s9
7238; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7239; GCN-NOHSA-VI-NEXT:    s_endpgm
7240;
7241; EG-LABEL: constant_sextload_v16i16_to_v16i64:
7242; EG:       ; %bb.0:
7243; EG-NEXT:    ALU 0, @16, KC0[CB0:0-32], KC1[]
7244; EG-NEXT:    TEX 1 @12
7245; EG-NEXT:    ALU 65, @17, KC0[CB0:0-32], KC1[]
7246; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T12.X, 0
7247; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T20.X, 0
7248; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T18.X, 0
7249; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T17.X, 0
7250; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T16.X, 0
7251; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T15.X, 0
7252; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T14.X, 0
7253; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T13.X, 1
7254; EG-NEXT:    CF_END
7255; EG-NEXT:    Fetch clause starting at 12:
7256; EG-NEXT:     VTX_READ_128 T12.XYZW, T11.X, 16, #1
7257; EG-NEXT:     VTX_READ_128 T11.XYZW, T11.X, 0, #1
7258; EG-NEXT:    ALU clause starting at 16:
7259; EG-NEXT:     MOV * T11.X, KC0[2].Z,
7260; EG-NEXT:    ALU clause starting at 17:
7261; EG-NEXT:     LSHR T13.X, KC0[2].Y, literal.x,
7262; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7263; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
7264; EG-NEXT:     LSHR T14.X, PV.W, literal.x,
7265; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7266; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
7267; EG-NEXT:     LSHR T15.X, PV.W, literal.x,
7268; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7269; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
7270; EG-NEXT:     LSHR T16.X, PV.W, literal.x,
7271; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7272; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
7273; EG-NEXT:     LSHR T17.X, PV.W, literal.x,
7274; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7275; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
7276; EG-NEXT:     LSHR T18.X, PV.W, literal.x,
7277; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.y,
7278; EG-NEXT:     ASHR * T19.W, T11.X, literal.z,
7279; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
7280; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7281; EG-NEXT:     LSHR T20.X, PV.W, literal.x,
7282; EG-NEXT:     ASHR T19.Z, T11.X, literal.y,
7283; EG-NEXT:     ASHR * T21.W, T11.Y, literal.z,
7284; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
7285; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7286; EG-NEXT:     BFE_INT T19.X, T11.X, 0.0, literal.x,
7287; EG-NEXT:     ASHR T21.Z, T11.Y, literal.x,
7288; EG-NEXT:     ASHR * T22.W, T11.Z, literal.y,
7289; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7290; EG-NEXT:     BFE_INT T21.X, T11.Y, 0.0, literal.x,
7291; EG-NEXT:     ASHR T19.Y, PV.X, literal.y,
7292; EG-NEXT:     ASHR T22.Z, T11.Z, literal.x,
7293; EG-NEXT:     ASHR * T23.W, T11.W, literal.y,
7294; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7295; EG-NEXT:     BFE_INT T22.X, T11.Z, 0.0, literal.x,
7296; EG-NEXT:     ASHR T21.Y, PV.X, literal.y,
7297; EG-NEXT:     ASHR T23.Z, T11.W, literal.x,
7298; EG-NEXT:     ASHR * T24.W, T12.X, literal.y,
7299; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7300; EG-NEXT:     BFE_INT T23.X, T11.W, 0.0, literal.x,
7301; EG-NEXT:     ASHR T22.Y, PV.X, literal.y,
7302; EG-NEXT:     ASHR T24.Z, T12.X, literal.x,
7303; EG-NEXT:     ASHR * T11.W, T12.Y, literal.y,
7304; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7305; EG-NEXT:     BFE_INT T24.X, T12.X, 0.0, literal.x,
7306; EG-NEXT:     ASHR T23.Y, PV.X, literal.y,
7307; EG-NEXT:     ASHR T11.Z, T12.Y, literal.x,
7308; EG-NEXT:     ASHR * T25.W, T12.Z, literal.y,
7309; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7310; EG-NEXT:     BFE_INT T11.X, T12.Y, 0.0, literal.x,
7311; EG-NEXT:     ASHR T24.Y, PV.X, literal.y,
7312; EG-NEXT:     ASHR T25.Z, T12.Z, literal.x,
7313; EG-NEXT:     ASHR * T26.W, T12.W, literal.y,
7314; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7315; EG-NEXT:     BFE_INT T25.X, T12.Z, 0.0, literal.x,
7316; EG-NEXT:     ASHR T11.Y, PV.X, literal.y,
7317; EG-NEXT:     ASHR * T26.Z, T12.W, literal.x,
7318; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7319; EG-NEXT:     BFE_INT T26.X, T12.W, 0.0, literal.x,
7320; EG-NEXT:     ASHR T25.Y, PV.X, literal.y,
7321; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
7322; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
7323; EG-NEXT:    112(1.569454e-43), 0(0.000000e+00)
7324; EG-NEXT:     LSHR T12.X, PV.W, literal.x,
7325; EG-NEXT:     ASHR * T26.Y, PV.X, literal.y,
7326; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
7327;
7328; GFX12-LABEL: constant_sextload_v16i16_to_v16i64:
7329; GFX12:       ; %bb.0:
7330; GFX12-NEXT:    s_load_b128 s[8:11], s[4:5], 0x24
7331; GFX12-NEXT:    s_wait_kmcnt 0x0
7332; GFX12-NEXT:    s_load_b256 s[0:7], s[10:11], 0x0
7333; GFX12-NEXT:    s_wait_kmcnt 0x0
7334; GFX12-NEXT:    s_mov_b32 s30, s5
7335; GFX12-NEXT:    s_lshr_b32 s34, s5, 16
7336; GFX12-NEXT:    s_bfe_i64 s[28:29], s[4:5], 0x100000
7337; GFX12-NEXT:    s_lshr_b32 s4, s4, 16
7338; GFX12-NEXT:    s_bfe_i64 s[22:23], s[6:7], 0x100000
7339; GFX12-NEXT:    s_mov_b32 s24, s7
7340; GFX12-NEXT:    s_lshr_b32 s26, s7, 16
7341; GFX12-NEXT:    s_bfe_i64 s[30:31], s[30:31], 0x100000
7342; GFX12-NEXT:    s_bfe_i64 s[34:35], s[34:35], 0x100000
7343; GFX12-NEXT:    s_lshr_b32 s6, s6, 16
7344; GFX12-NEXT:    s_bfe_i64 s[4:5], s[4:5], 0x100000
7345; GFX12-NEXT:    v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s29
7346; GFX12-NEXT:    s_mov_b32 s18, s3
7347; GFX12-NEXT:    s_lshr_b32 s20, s3, 16
7348; GFX12-NEXT:    s_bfe_i64 s[24:25], s[24:25], 0x100000
7349; GFX12-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x100000
7350; GFX12-NEXT:    v_dual_mov_b32 v4, s22 :: v_dual_mov_b32 v9, s31
7351; GFX12-NEXT:    v_dual_mov_b32 v8, s30 :: v_dual_mov_b32 v11, s35
7352; GFX12-NEXT:    v_dual_mov_b32 v10, s34 :: v_dual_mov_b32 v3, s5
7353; GFX12-NEXT:    s_bfe_i64 s[16:17], s[2:3], 0x100000
7354; GFX12-NEXT:    s_lshr_b32 s2, s2, 16
7355; GFX12-NEXT:    s_bfe_i64 s[6:7], s[6:7], 0x100000
7356; GFX12-NEXT:    v_dual_mov_b32 v0, s28 :: v_dual_mov_b32 v5, s23
7357; GFX12-NEXT:    v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v13, s25
7358; GFX12-NEXT:    s_mov_b32 s12, s1
7359; GFX12-NEXT:    s_lshr_b32 s14, s1, 16
7360; GFX12-NEXT:    s_bfe_i64 s[18:19], s[18:19], 0x100000
7361; GFX12-NEXT:    s_bfe_i64 s[20:21], s[20:21], 0x100000
7362; GFX12-NEXT:    v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v15, s27
7363; GFX12-NEXT:    v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v7, s7
7364; GFX12-NEXT:    s_bfe_i64 s[10:11], s[0:1], 0x100000
7365; GFX12-NEXT:    s_lshr_b32 s0, s0, 16
7366; GFX12-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x100000
7367; GFX12-NEXT:    v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v17, s19
7368; GFX12-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x100000
7369; GFX12-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x100000
7370; GFX12-NEXT:    v_dual_mov_b32 v16, s18 :: v_dual_mov_b32 v19, s21
7371; GFX12-NEXT:    v_mov_b32_e32 v18, s20
7372; GFX12-NEXT:    s_bfe_i64 s[0:1], s[0:1], 0x100000
7373; GFX12-NEXT:    s_clause 0x1
7374; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[8:9] offset:80
7375; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[8:9] offset:64
7376; GFX12-NEXT:    v_dual_mov_b32 v1, s17 :: v_dual_mov_b32 v0, s16
7377; GFX12-NEXT:    v_dual_mov_b32 v3, s3 :: v_dual_mov_b32 v2, s2
7378; GFX12-NEXT:    v_dual_mov_b32 v9, s13 :: v_dual_mov_b32 v8, s12
7379; GFX12-NEXT:    v_dual_mov_b32 v11, s15 :: v_dual_mov_b32 v10, s14
7380; GFX12-NEXT:    v_dual_mov_b32 v21, s11 :: v_dual_mov_b32 v20, s10
7381; GFX12-NEXT:    v_dual_mov_b32 v23, s1 :: v_dual_mov_b32 v22, s0
7382; GFX12-NEXT:    s_clause 0x5
7383; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[8:9] offset:112
7384; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[8:9] offset:96
7385; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[8:9] offset:48
7386; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[8:9] offset:32
7387; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[8:9] offset:16
7388; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[8:9]
7389; GFX12-NEXT:    s_endpgm
7390  %load = load <16 x i16>, ptr addrspace(4) %in
7391  %ext = sext <16 x i16> %load to <16 x i64>
7392  store <16 x i64> %ext, ptr addrspace(1) %out
7393  ret void
7394}
7395
7396define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
7397; GCN-NOHSA-SI-LABEL: constant_zextload_v32i16_to_v32i64:
7398; GCN-NOHSA-SI:       ; %bb.0:
7399; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x9
7400; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
7401; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
7402; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
7403; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s18, s1, 16
7404; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s19, s3, 16
7405; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s20, s5, 16
7406; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s21, s7, 16
7407; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s22, s9, 16
7408; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s23, s11, 16
7409; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s24, s13, 16
7410; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s25, s15, 16
7411; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s26, s14, 16
7412; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s27, s12, 16
7413; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s28, s10, 16
7414; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s29, s8, 16
7415; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s30, s6, 16
7416; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s31, s4, 16
7417; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s33, s2, 16
7418; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s34, s0, 16
7419; GCN-NOHSA-SI-NEXT:    s_and_b32 s35, s0, 0xffff
7420; GCN-NOHSA-SI-NEXT:    s_and_b32 s36, s2, 0xffff
7421; GCN-NOHSA-SI-NEXT:    s_and_b32 s4, s4, 0xffff
7422; GCN-NOHSA-SI-NEXT:    s_and_b32 s6, s6, 0xffff
7423; GCN-NOHSA-SI-NEXT:    s_and_b32 s8, s8, 0xffff
7424; GCN-NOHSA-SI-NEXT:    s_and_b32 s10, s10, 0xffff
7425; GCN-NOHSA-SI-NEXT:    s_and_b32 s12, s12, 0xffff
7426; GCN-NOHSA-SI-NEXT:    s_and_b32 s14, s14, 0xffff
7427; GCN-NOHSA-SI-NEXT:    s_and_b32 s37, s1, 0xffff
7428; GCN-NOHSA-SI-NEXT:    s_and_b32 s38, s3, 0xffff
7429; GCN-NOHSA-SI-NEXT:    s_and_b32 s5, s5, 0xffff
7430; GCN-NOHSA-SI-NEXT:    s_and_b32 s7, s7, 0xffff
7431; GCN-NOHSA-SI-NEXT:    s_and_b32 s9, s9, 0xffff
7432; GCN-NOHSA-SI-NEXT:    s_and_b32 s11, s11, 0xffff
7433; GCN-NOHSA-SI-NEXT:    s_and_b32 s13, s13, 0xffff
7434; GCN-NOHSA-SI-NEXT:    s_and_b32 s15, s15, 0xffff
7435; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
7436; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, 0
7437; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
7438; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, v1
7439; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s16
7440; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s17
7441; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s15
7442; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s25
7443; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
7444; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7445; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s13
7446; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s24
7447; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
7448; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7449; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s11
7450; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s23
7451; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
7452; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7453; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s9
7454; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s22
7455; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
7456; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7457; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s7
7458; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s21
7459; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
7460; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7461; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s5
7462; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s20
7463; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
7464; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7465; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s38
7466; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s19
7467; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
7468; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7469; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s37
7470; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s18
7471; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
7472; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7473; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s14
7474; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s26
7475; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
7476; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7477; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s12
7478; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s27
7479; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
7480; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7481; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s10
7482; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s28
7483; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
7484; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7485; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s8
7486; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s29
7487; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
7488; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7489; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s6
7490; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s30
7491; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
7492; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7493; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s4
7494; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s31
7495; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
7496; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7497; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s36
7498; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s33
7499; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
7500; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
7501; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s35
7502; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s34
7503; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
7504; GCN-NOHSA-SI-NEXT:    s_endpgm
7505;
7506; GCN-HSA-LABEL: constant_zextload_v32i16_to_v32i64:
7507; GCN-HSA:       ; %bb.0:
7508; GCN-HSA-NEXT:    s_load_dwordx4 s[16:19], s[8:9], 0x0
7509; GCN-HSA-NEXT:    v_mov_b32_e32 v1, 0
7510; GCN-HSA-NEXT:    v_mov_b32_e32 v3, v1
7511; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
7512; GCN-HSA-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
7513; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
7514; GCN-HSA-NEXT:    s_lshr_b32 s20, s1, 16
7515; GCN-HSA-NEXT:    s_lshr_b32 s21, s3, 16
7516; GCN-HSA-NEXT:    s_lshr_b32 s22, s5, 16
7517; GCN-HSA-NEXT:    s_lshr_b32 s23, s7, 16
7518; GCN-HSA-NEXT:    s_lshr_b32 s24, s9, 16
7519; GCN-HSA-NEXT:    s_lshr_b32 s25, s11, 16
7520; GCN-HSA-NEXT:    s_lshr_b32 s26, s13, 16
7521; GCN-HSA-NEXT:    s_lshr_b32 s27, s15, 16
7522; GCN-HSA-NEXT:    s_lshr_b32 s28, s14, 16
7523; GCN-HSA-NEXT:    s_lshr_b32 s29, s12, 16
7524; GCN-HSA-NEXT:    s_lshr_b32 s30, s10, 16
7525; GCN-HSA-NEXT:    s_lshr_b32 s31, s8, 16
7526; GCN-HSA-NEXT:    s_lshr_b32 s33, s6, 16
7527; GCN-HSA-NEXT:    s_lshr_b32 s34, s4, 16
7528; GCN-HSA-NEXT:    s_lshr_b32 s19, s2, 16
7529; GCN-HSA-NEXT:    s_lshr_b32 s18, s0, 16
7530; GCN-HSA-NEXT:    s_and_b32 s0, s0, 0xffff
7531; GCN-HSA-NEXT:    s_and_b32 s2, s2, 0xffff
7532; GCN-HSA-NEXT:    s_and_b32 s35, s4, 0xffff
7533; GCN-HSA-NEXT:    s_and_b32 s6, s6, 0xffff
7534; GCN-HSA-NEXT:    s_and_b32 s8, s8, 0xffff
7535; GCN-HSA-NEXT:    s_and_b32 s10, s10, 0xffff
7536; GCN-HSA-NEXT:    s_and_b32 s12, s12, 0xffff
7537; GCN-HSA-NEXT:    s_and_b32 s14, s14, 0xffff
7538; GCN-HSA-NEXT:    s_and_b32 s1, s1, 0xffff
7539; GCN-HSA-NEXT:    s_and_b32 s3, s3, 0xffff
7540; GCN-HSA-NEXT:    s_and_b32 s36, s5, 0xffff
7541; GCN-HSA-NEXT:    s_and_b32 s7, s7, 0xffff
7542; GCN-HSA-NEXT:    s_and_b32 s9, s9, 0xffff
7543; GCN-HSA-NEXT:    s_and_b32 s11, s11, 0xffff
7544; GCN-HSA-NEXT:    s_and_b32 s13, s13, 0xffff
7545; GCN-HSA-NEXT:    s_and_b32 s15, s15, 0xffff
7546; GCN-HSA-NEXT:    s_add_u32 s4, s16, 0xf0
7547; GCN-HSA-NEXT:    s_addc_u32 s5, s17, 0
7548; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
7549; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
7550; GCN-HSA-NEXT:    s_add_u32 s4, s16, 0xd0
7551; GCN-HSA-NEXT:    s_addc_u32 s5, s17, 0
7552; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s5
7553; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s4
7554; GCN-HSA-NEXT:    s_add_u32 s4, s16, 0xb0
7555; GCN-HSA-NEXT:    s_addc_u32 s5, s17, 0
7556; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s5
7557; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s4
7558; GCN-HSA-NEXT:    s_add_u32 s4, s16, 0x90
7559; GCN-HSA-NEXT:    s_addc_u32 s5, s17, 0
7560; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s15
7561; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s27
7562; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s5
7563; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7564; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s4
7565; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s13
7566; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s26
7567; GCN-HSA-NEXT:    flat_store_dwordx4 v[6:7], v[0:3]
7568; GCN-HSA-NEXT:    s_add_u32 s4, s16, 0x70
7569; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s11
7570; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s25
7571; GCN-HSA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
7572; GCN-HSA-NEXT:    s_addc_u32 s5, s17, 0
7573; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s9
7574; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s24
7575; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
7576; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[0:3]
7577; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
7578; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s7
7579; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s23
7580; GCN-HSA-NEXT:    s_add_u32 s4, s16, 0x50
7581; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7582; GCN-HSA-NEXT:    s_addc_u32 s5, s17, 0
7583; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
7584; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s36
7585; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s22
7586; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
7587; GCN-HSA-NEXT:    s_add_u32 s4, s16, 48
7588; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7589; GCN-HSA-NEXT:    s_addc_u32 s5, s17, 0
7590; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
7591; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s3
7592; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s21
7593; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
7594; GCN-HSA-NEXT:    s_add_u32 s4, s16, 16
7595; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7596; GCN-HSA-NEXT:    s_addc_u32 s5, s17, 0
7597; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
7598; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s1
7599; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s20
7600; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
7601; GCN-HSA-NEXT:    s_add_u32 s4, s16, 0xe0
7602; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7603; GCN-HSA-NEXT:    s_addc_u32 s5, s17, 0
7604; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
7605; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s14
7606; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s28
7607; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
7608; GCN-HSA-NEXT:    s_add_u32 s4, s16, 0xc0
7609; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7610; GCN-HSA-NEXT:    s_addc_u32 s5, s17, 0
7611; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
7612; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s12
7613; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s29
7614; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
7615; GCN-HSA-NEXT:    s_add_u32 s4, s16, 0xa0
7616; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7617; GCN-HSA-NEXT:    s_addc_u32 s5, s17, 0
7618; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
7619; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s10
7620; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s30
7621; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
7622; GCN-HSA-NEXT:    s_add_u32 s4, s16, 0x80
7623; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7624; GCN-HSA-NEXT:    s_addc_u32 s5, s17, 0
7625; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
7626; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s8
7627; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s31
7628; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
7629; GCN-HSA-NEXT:    s_add_u32 s4, s16, 0x60
7630; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7631; GCN-HSA-NEXT:    s_addc_u32 s5, s17, 0
7632; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
7633; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s6
7634; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s33
7635; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
7636; GCN-HSA-NEXT:    s_add_u32 s4, s16, 64
7637; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7638; GCN-HSA-NEXT:    s_addc_u32 s5, s17, 0
7639; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
7640; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s35
7641; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s34
7642; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
7643; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7644; GCN-HSA-NEXT:    s_nop 0
7645; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
7646; GCN-HSA-NEXT:    s_add_u32 s2, s16, 32
7647; GCN-HSA-NEXT:    s_addc_u32 s3, s17, 0
7648; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
7649; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s19
7650; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
7651; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7652; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s16
7653; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s0
7654; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s18
7655; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s17
7656; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7657; GCN-HSA-NEXT:    s_endpgm
7658;
7659; GCN-NOHSA-VI-LABEL: constant_zextload_v32i16_to_v32i64:
7660; GCN-NOHSA-VI:       ; %bb.0:
7661; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x24
7662; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
7663; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
7664; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
7665; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
7666; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
7667; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s18, s0, 16
7668; GCN-NOHSA-VI-NEXT:    s_and_b32 s0, s0, 0xffff
7669; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s19, s1, 16
7670; GCN-NOHSA-VI-NEXT:    s_and_b32 s1, s1, 0xffff
7671; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s20, s2, 16
7672; GCN-NOHSA-VI-NEXT:    s_and_b32 s2, s2, 0xffff
7673; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s21, s3, 16
7674; GCN-NOHSA-VI-NEXT:    s_and_b32 s3, s3, 0xffff
7675; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s22, s4, 16
7676; GCN-NOHSA-VI-NEXT:    s_and_b32 s23, s4, 0xffff
7677; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s24, s5, 16
7678; GCN-NOHSA-VI-NEXT:    s_and_b32 s25, s5, 0xffff
7679; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s26, s6, 16
7680; GCN-NOHSA-VI-NEXT:    s_and_b32 s6, s6, 0xffff
7681; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s27, s7, 16
7682; GCN-NOHSA-VI-NEXT:    s_and_b32 s7, s7, 0xffff
7683; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s28, s8, 16
7684; GCN-NOHSA-VI-NEXT:    s_and_b32 s8, s8, 0xffff
7685; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s29, s9, 16
7686; GCN-NOHSA-VI-NEXT:    s_and_b32 s9, s9, 0xffff
7687; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s30, s10, 16
7688; GCN-NOHSA-VI-NEXT:    s_and_b32 s10, s10, 0xffff
7689; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s31, s11, 16
7690; GCN-NOHSA-VI-NEXT:    s_and_b32 s11, s11, 0xffff
7691; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s33, s12, 16
7692; GCN-NOHSA-VI-NEXT:    s_and_b32 s12, s12, 0xffff
7693; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s34, s13, 16
7694; GCN-NOHSA-VI-NEXT:    s_and_b32 s13, s13, 0xffff
7695; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s35, s14, 16
7696; GCN-NOHSA-VI-NEXT:    s_and_b32 s14, s14, 0xffff
7697; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s4, s15, 16
7698; GCN-NOHSA-VI-NEXT:    s_and_b32 s5, s15, 0xffff
7699; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
7700; GCN-NOHSA-VI-NEXT:    s_add_u32 s4, s16, 0xf0
7701; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s5
7702; GCN-NOHSA-VI-NEXT:    s_addc_u32 s5, s17, 0
7703; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s4
7704; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s5
7705; GCN-NOHSA-VI-NEXT:    s_add_u32 s4, s16, 0xe0
7706; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7707; GCN-NOHSA-VI-NEXT:    s_addc_u32 s5, s17, 0
7708; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s4
7709; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
7710; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s35
7711; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s5
7712; GCN-NOHSA-VI-NEXT:    s_add_u32 s4, s16, 0xd0
7713; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7714; GCN-NOHSA-VI-NEXT:    s_addc_u32 s5, s17, 0
7715; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s4
7716; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s13
7717; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s34
7718; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s5
7719; GCN-NOHSA-VI-NEXT:    s_add_u32 s4, s16, 0xc0
7720; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7721; GCN-NOHSA-VI-NEXT:    s_addc_u32 s5, s17, 0
7722; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s4
7723; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s12
7724; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s33
7725; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s5
7726; GCN-NOHSA-VI-NEXT:    s_add_u32 s4, s16, 0xb0
7727; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7728; GCN-NOHSA-VI-NEXT:    s_addc_u32 s5, s17, 0
7729; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s4
7730; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s11
7731; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s31
7732; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s5
7733; GCN-NOHSA-VI-NEXT:    s_add_u32 s4, s16, 0xa0
7734; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7735; GCN-NOHSA-VI-NEXT:    s_addc_u32 s5, s17, 0
7736; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s4
7737; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s10
7738; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s30
7739; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s5
7740; GCN-NOHSA-VI-NEXT:    s_add_u32 s4, s16, 0x90
7741; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7742; GCN-NOHSA-VI-NEXT:    s_addc_u32 s5, s17, 0
7743; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s4
7744; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s9
7745; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s29
7746; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s5
7747; GCN-NOHSA-VI-NEXT:    s_add_u32 s4, s16, 0x80
7748; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7749; GCN-NOHSA-VI-NEXT:    s_addc_u32 s5, s17, 0
7750; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s4
7751; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
7752; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s28
7753; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s5
7754; GCN-NOHSA-VI-NEXT:    s_add_u32 s4, s16, 0x70
7755; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7756; GCN-NOHSA-VI-NEXT:    s_addc_u32 s5, s17, 0
7757; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s4
7758; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s7
7759; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s27
7760; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s5
7761; GCN-NOHSA-VI-NEXT:    s_add_u32 s4, s16, 0x60
7762; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7763; GCN-NOHSA-VI-NEXT:    s_addc_u32 s5, s17, 0
7764; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s4
7765; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s6
7766; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s26
7767; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s5
7768; GCN-NOHSA-VI-NEXT:    s_add_u32 s4, s16, 0x50
7769; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7770; GCN-NOHSA-VI-NEXT:    s_addc_u32 s5, s17, 0
7771; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s4
7772; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s25
7773; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s24
7774; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s5
7775; GCN-NOHSA-VI-NEXT:    s_add_u32 s4, s16, 64
7776; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7777; GCN-NOHSA-VI-NEXT:    s_addc_u32 s5, s17, 0
7778; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s4
7779; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s23
7780; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s22
7781; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s5
7782; GCN-NOHSA-VI-NEXT:    s_add_u32 s4, s16, 48
7783; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7784; GCN-NOHSA-VI-NEXT:    s_addc_u32 s5, s17, 0
7785; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s4
7786; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s3
7787; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s21
7788; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s5
7789; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7790; GCN-NOHSA-VI-NEXT:    s_nop 0
7791; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
7792; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s16, 32
7793; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s17, 0
7794; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
7795; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
7796; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s16, 16
7797; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s20
7798; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s17, 0
7799; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7800; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
7801; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s1
7802; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s19
7803; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
7804; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7805; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s16
7806; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
7807; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s18
7808; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s17
7809; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
7810; GCN-NOHSA-VI-NEXT:    s_endpgm
7811;
7812; EG-LABEL: constant_zextload_v32i16_to_v32i64:
7813; EG:       ; %bb.0:
7814; EG-NEXT:    ALU 0, @30, KC0[CB0:0-32], KC1[]
7815; EG-NEXT:    TEX 2 @22
7816; EG-NEXT:    ALU 33, @31, KC0[], KC1[]
7817; EG-NEXT:    TEX 0 @28
7818; EG-NEXT:    ALU 92, @65, KC0[CB0:0-32], KC1[]
7819; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T50.X, 0
7820; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T49.X, 0
7821; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T48.X, 0
7822; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T47.X, 0
7823; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T46.X, 0
7824; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T45.X, 0
7825; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T44.X, 0
7826; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T43.X, 0
7827; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T42.X, 0
7828; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T41.X, 0
7829; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T40.X, 0
7830; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T39.X, 0
7831; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T38.X, 0
7832; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T37.X, 0
7833; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T36.X, 0
7834; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T35.X, 1
7835; EG-NEXT:    CF_END
7836; EG-NEXT:    Fetch clause starting at 22:
7837; EG-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 48, #1
7838; EG-NEXT:     VTX_READ_128 T21.XYZW, T19.X, 16, #1
7839; EG-NEXT:     VTX_READ_128 T22.XYZW, T19.X, 32, #1
7840; EG-NEXT:    Fetch clause starting at 28:
7841; EG-NEXT:     VTX_READ_128 T29.XYZW, T19.X, 0, #1
7842; EG-NEXT:    ALU clause starting at 30:
7843; EG-NEXT:     MOV * T19.X, KC0[2].Z,
7844; EG-NEXT:    ALU clause starting at 31:
7845; EG-NEXT:     LSHR * T23.Z, T20.W, literal.x,
7846; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7847; EG-NEXT:     AND_INT T23.X, T20.W, literal.x,
7848; EG-NEXT:     MOV T23.Y, 0.0,
7849; EG-NEXT:     LSHR T24.Z, T20.Z, literal.y,
7850; EG-NEXT:     AND_INT * T24.X, T20.Z, literal.x,
7851; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7852; EG-NEXT:     MOV T24.Y, 0.0,
7853; EG-NEXT:     LSHR * T25.Z, T20.Y, literal.x,
7854; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7855; EG-NEXT:     AND_INT T25.X, T20.Y, literal.x,
7856; EG-NEXT:     MOV T25.Y, 0.0,
7857; EG-NEXT:     LSHR T20.Z, T20.X, literal.y,
7858; EG-NEXT:     AND_INT * T20.X, T20.X, literal.x,
7859; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7860; EG-NEXT:     MOV T20.Y, 0.0,
7861; EG-NEXT:     LSHR * T26.Z, T22.W, literal.x,
7862; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7863; EG-NEXT:     AND_INT T26.X, T22.W, literal.x,
7864; EG-NEXT:     MOV T26.Y, 0.0,
7865; EG-NEXT:     LSHR T27.Z, T22.Z, literal.y,
7866; EG-NEXT:     AND_INT * T27.X, T22.Z, literal.x,
7867; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7868; EG-NEXT:     MOV T27.Y, 0.0,
7869; EG-NEXT:     LSHR * T28.Z, T22.Y, literal.x,
7870; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7871; EG-NEXT:     AND_INT T28.X, T22.Y, literal.x,
7872; EG-NEXT:     MOV T28.Y, 0.0,
7873; EG-NEXT:     LSHR T22.Z, T22.X, literal.y,
7874; EG-NEXT:     AND_INT * T22.X, T22.X, literal.x,
7875; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7876; EG-NEXT:     MOV T22.Y, 0.0,
7877; EG-NEXT:     LSHR * T19.Z, T21.W, literal.x,
7878; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7879; EG-NEXT:    ALU clause starting at 65:
7880; EG-NEXT:     AND_INT T19.X, T21.W, literal.x,
7881; EG-NEXT:     MOV T19.Y, 0.0,
7882; EG-NEXT:     LSHR T30.Z, T21.Z, literal.y,
7883; EG-NEXT:     AND_INT * T30.X, T21.Z, literal.x,
7884; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7885; EG-NEXT:     MOV T30.Y, 0.0,
7886; EG-NEXT:     LSHR * T31.Z, T21.Y, literal.x,
7887; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7888; EG-NEXT:     AND_INT T31.X, T21.Y, literal.x,
7889; EG-NEXT:     MOV T31.Y, 0.0,
7890; EG-NEXT:     LSHR T21.Z, T21.X, literal.y,
7891; EG-NEXT:     AND_INT * T21.X, T21.X, literal.x,
7892; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7893; EG-NEXT:     MOV T21.Y, 0.0,
7894; EG-NEXT:     LSHR * T32.Z, T29.W, literal.x,
7895; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7896; EG-NEXT:     AND_INT T32.X, T29.W, literal.x,
7897; EG-NEXT:     MOV T32.Y, 0.0,
7898; EG-NEXT:     LSHR T33.Z, T29.Z, literal.y,
7899; EG-NEXT:     AND_INT * T33.X, T29.Z, literal.x,
7900; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7901; EG-NEXT:     MOV T33.Y, 0.0,
7902; EG-NEXT:     LSHR * T34.Z, T29.Y, literal.x,
7903; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
7904; EG-NEXT:     AND_INT T34.X, T29.Y, literal.x,
7905; EG-NEXT:     MOV T34.Y, 0.0,
7906; EG-NEXT:     LSHR T29.Z, T29.X, literal.y,
7907; EG-NEXT:     AND_INT * T29.X, T29.X, literal.x,
7908; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
7909; EG-NEXT:     MOV T29.Y, 0.0,
7910; EG-NEXT:     MOV T23.W, 0.0,
7911; EG-NEXT:     MOV * T24.W, 0.0,
7912; EG-NEXT:     MOV T25.W, 0.0,
7913; EG-NEXT:     MOV * T20.W, 0.0,
7914; EG-NEXT:     MOV T26.W, 0.0,
7915; EG-NEXT:     MOV * T27.W, 0.0,
7916; EG-NEXT:     MOV T28.W, 0.0,
7917; EG-NEXT:     MOV * T22.W, 0.0,
7918; EG-NEXT:     MOV T19.W, 0.0,
7919; EG-NEXT:     MOV * T30.W, 0.0,
7920; EG-NEXT:     MOV T31.W, 0.0,
7921; EG-NEXT:     MOV * T21.W, 0.0,
7922; EG-NEXT:     MOV T32.W, 0.0,
7923; EG-NEXT:     MOV * T33.W, 0.0,
7924; EG-NEXT:     MOV T34.W, 0.0,
7925; EG-NEXT:     MOV * T29.W, 0.0,
7926; EG-NEXT:     LSHR T35.X, KC0[2].Y, literal.x,
7927; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7928; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
7929; EG-NEXT:     LSHR T36.X, PV.W, literal.x,
7930; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7931; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
7932; EG-NEXT:     LSHR T37.X, PV.W, literal.x,
7933; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7934; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
7935; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
7936; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7937; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
7938; EG-NEXT:     LSHR T39.X, PV.W, literal.x,
7939; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7940; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
7941; EG-NEXT:     LSHR T40.X, PV.W, literal.x,
7942; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7943; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
7944; EG-NEXT:     LSHR T41.X, PV.W, literal.x,
7945; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7946; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
7947; EG-NEXT:     LSHR T42.X, PV.W, literal.x,
7948; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7949; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
7950; EG-NEXT:     LSHR T43.X, PV.W, literal.x,
7951; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7952; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
7953; EG-NEXT:     LSHR T44.X, PV.W, literal.x,
7954; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7955; EG-NEXT:    2(2.802597e-45), 160(2.242078e-43)
7956; EG-NEXT:     LSHR T45.X, PV.W, literal.x,
7957; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7958; EG-NEXT:    2(2.802597e-45), 176(2.466285e-43)
7959; EG-NEXT:     LSHR T46.X, PV.W, literal.x,
7960; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7961; EG-NEXT:    2(2.802597e-45), 192(2.690493e-43)
7962; EG-NEXT:     LSHR T47.X, PV.W, literal.x,
7963; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7964; EG-NEXT:    2(2.802597e-45), 208(2.914701e-43)
7965; EG-NEXT:     LSHR T48.X, PV.W, literal.x,
7966; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7967; EG-NEXT:    2(2.802597e-45), 224(3.138909e-43)
7968; EG-NEXT:     LSHR T49.X, PV.W, literal.x,
7969; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
7970; EG-NEXT:    2(2.802597e-45), 240(3.363116e-43)
7971; EG-NEXT:     LSHR * T50.X, PV.W, literal.x,
7972; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
7973;
7974; GFX12-LABEL: constant_zextload_v32i16_to_v32i64:
7975; GFX12:       ; %bb.0:
7976; GFX12-NEXT:    s_load_b128 s[16:19], s[4:5], 0x24
7977; GFX12-NEXT:    s_wait_kmcnt 0x0
7978; GFX12-NEXT:    s_load_b512 s[0:15], s[18:19], 0x0
7979; GFX12-NEXT:    s_wait_kmcnt 0x0
7980; GFX12-NEXT:    s_and_b32 s18, s15, 0xffff
7981; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
7982; GFX12-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s18
7983; GFX12-NEXT:    s_lshr_b32 s15, s15, 16
7984; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
7985; GFX12-NEXT:    v_dual_mov_b32 v2, s15 :: v_dual_mov_b32 v3, v1
7986; GFX12-NEXT:    s_lshr_b32 s15, s14, 16
7987; GFX12-NEXT:    s_and_b32 s14, s14, 0xffff
7988; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[16:17] offset:240
7989; GFX12-NEXT:    s_wait_alu 0xfffe
7990; GFX12-NEXT:    v_mov_b32_e32 v0, s14
7991; GFX12-NEXT:    v_mov_b32_e32 v2, s15
7992; GFX12-NEXT:    s_lshr_b32 s14, s13, 16
7993; GFX12-NEXT:    s_and_b32 s13, s13, 0xffff
7994; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[16:17] offset:224
7995; GFX12-NEXT:    v_mov_b32_e32 v0, s13
7996; GFX12-NEXT:    s_wait_alu 0xfffe
7997; GFX12-NEXT:    v_mov_b32_e32 v2, s14
7998; GFX12-NEXT:    s_lshr_b32 s13, s12, 16
7999; GFX12-NEXT:    s_and_b32 s12, s12, 0xffff
8000; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[16:17] offset:208
8001; GFX12-NEXT:    s_wait_alu 0xfffe
8002; GFX12-NEXT:    v_mov_b32_e32 v0, s12
8003; GFX12-NEXT:    v_mov_b32_e32 v2, s13
8004; GFX12-NEXT:    s_lshr_b32 s12, s11, 16
8005; GFX12-NEXT:    s_and_b32 s11, s11, 0xffff
8006; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[16:17] offset:192
8007; GFX12-NEXT:    v_mov_b32_e32 v0, s11
8008; GFX12-NEXT:    s_wait_alu 0xfffe
8009; GFX12-NEXT:    v_mov_b32_e32 v2, s12
8010; GFX12-NEXT:    s_lshr_b32 s11, s10, 16
8011; GFX12-NEXT:    s_and_b32 s10, s10, 0xffff
8012; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[16:17] offset:176
8013; GFX12-NEXT:    s_wait_alu 0xfffe
8014; GFX12-NEXT:    v_mov_b32_e32 v0, s10
8015; GFX12-NEXT:    v_mov_b32_e32 v2, s11
8016; GFX12-NEXT:    s_lshr_b32 s10, s9, 16
8017; GFX12-NEXT:    s_and_b32 s9, s9, 0xffff
8018; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[16:17] offset:160
8019; GFX12-NEXT:    v_mov_b32_e32 v0, s9
8020; GFX12-NEXT:    s_wait_alu 0xfffe
8021; GFX12-NEXT:    v_mov_b32_e32 v2, s10
8022; GFX12-NEXT:    s_lshr_b32 s9, s8, 16
8023; GFX12-NEXT:    s_and_b32 s8, s8, 0xffff
8024; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[16:17] offset:144
8025; GFX12-NEXT:    s_wait_alu 0xfffe
8026; GFX12-NEXT:    v_mov_b32_e32 v0, s8
8027; GFX12-NEXT:    v_mov_b32_e32 v2, s9
8028; GFX12-NEXT:    s_lshr_b32 s8, s7, 16
8029; GFX12-NEXT:    s_and_b32 s7, s7, 0xffff
8030; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[16:17] offset:128
8031; GFX12-NEXT:    v_mov_b32_e32 v0, s7
8032; GFX12-NEXT:    s_wait_alu 0xfffe
8033; GFX12-NEXT:    v_mov_b32_e32 v2, s8
8034; GFX12-NEXT:    s_lshr_b32 s7, s6, 16
8035; GFX12-NEXT:    s_and_b32 s6, s6, 0xffff
8036; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[16:17] offset:112
8037; GFX12-NEXT:    s_wait_alu 0xfffe
8038; GFX12-NEXT:    v_mov_b32_e32 v0, s6
8039; GFX12-NEXT:    v_mov_b32_e32 v2, s7
8040; GFX12-NEXT:    s_lshr_b32 s6, s5, 16
8041; GFX12-NEXT:    s_and_b32 s5, s5, 0xffff
8042; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[16:17] offset:96
8043; GFX12-NEXT:    v_mov_b32_e32 v0, s5
8044; GFX12-NEXT:    s_wait_alu 0xfffe
8045; GFX12-NEXT:    v_mov_b32_e32 v2, s6
8046; GFX12-NEXT:    s_lshr_b32 s5, s4, 16
8047; GFX12-NEXT:    s_and_b32 s4, s4, 0xffff
8048; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[16:17] offset:80
8049; GFX12-NEXT:    s_wait_alu 0xfffe
8050; GFX12-NEXT:    v_mov_b32_e32 v0, s4
8051; GFX12-NEXT:    v_mov_b32_e32 v2, s5
8052; GFX12-NEXT:    s_lshr_b32 s4, s3, 16
8053; GFX12-NEXT:    s_and_b32 s3, s3, 0xffff
8054; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[16:17] offset:64
8055; GFX12-NEXT:    v_mov_b32_e32 v0, s3
8056; GFX12-NEXT:    s_wait_alu 0xfffe
8057; GFX12-NEXT:    v_mov_b32_e32 v2, s4
8058; GFX12-NEXT:    s_lshr_b32 s3, s2, 16
8059; GFX12-NEXT:    s_and_b32 s2, s2, 0xffff
8060; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[16:17] offset:48
8061; GFX12-NEXT:    s_wait_alu 0xfffe
8062; GFX12-NEXT:    v_mov_b32_e32 v0, s2
8063; GFX12-NEXT:    v_mov_b32_e32 v2, s3
8064; GFX12-NEXT:    s_lshr_b32 s2, s1, 16
8065; GFX12-NEXT:    s_and_b32 s1, s1, 0xffff
8066; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[16:17] offset:32
8067; GFX12-NEXT:    v_mov_b32_e32 v0, s1
8068; GFX12-NEXT:    s_wait_alu 0xfffe
8069; GFX12-NEXT:    v_mov_b32_e32 v2, s2
8070; GFX12-NEXT:    s_lshr_b32 s1, s0, 16
8071; GFX12-NEXT:    s_and_b32 s0, s0, 0xffff
8072; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[16:17] offset:16
8073; GFX12-NEXT:    s_wait_alu 0xfffe
8074; GFX12-NEXT:    v_mov_b32_e32 v0, s0
8075; GFX12-NEXT:    v_mov_b32_e32 v2, s1
8076; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[16:17]
8077; GFX12-NEXT:    s_endpgm
8078  %load = load <32 x i16>, ptr addrspace(4) %in
8079  %ext = zext <32 x i16> %load to <32 x i64>
8080  store <32 x i64> %ext, ptr addrspace(1) %out
8081  ret void
8082}
8083
8084define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
8085; GCN-NOHSA-SI-LABEL: constant_sextload_v32i16_to_v32i64:
8086; GCN-NOHSA-SI:       ; %bb.0:
8087; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x9
8088; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
8089; GCN-NOHSA-SI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
8090; GCN-NOHSA-SI-NEXT:    s_waitcnt lgkmcnt(0)
8091; GCN-NOHSA-SI-NEXT:    s_mov_b32 s18, s15
8092; GCN-NOHSA-SI-NEXT:    s_mov_b32 s20, s13
8093; GCN-NOHSA-SI-NEXT:    s_mov_b32 s50, s11
8094; GCN-NOHSA-SI-NEXT:    s_mov_b32 s52, s9
8095; GCN-NOHSA-SI-NEXT:    s_mov_b32 s56, s7
8096; GCN-NOHSA-SI-NEXT:    s_mov_b32 s54, s5
8097; GCN-NOHSA-SI-NEXT:    s_mov_b32 s42, s3
8098; GCN-NOHSA-SI-NEXT:    s_mov_b32 s44, s1
8099; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s26, s14, 16
8100; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s30, s12, 16
8101; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s34, s10, 16
8102; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s36, s8, 16
8103; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[68:69], s[20:21], 0x100000
8104; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[70:71], s[18:19], 0x100000
8105; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s60, s6, 16
8106; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s62, s4, 16
8107; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s64, s2, 16
8108; GCN-NOHSA-SI-NEXT:    s_lshr_b32 s66, s0, 16
8109; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[18:19], s[0:1], 0x100000
8110; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[20:21], s[2:3], 0x100000
8111; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[22:23], s[4:5], 0x100000
8112; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[24:25], s[6:7], 0x100000
8113; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[28:29], s[8:9], 0x100000
8114; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[38:39], s[10:11], 0x100000
8115; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[40:41], s[12:13], 0x100000
8116; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[46:47], s[14:15], 0x100000
8117; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[48:49], s[0:1], 48
8118; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[58:59], s[2:3], 48
8119; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[6:7], s[6:7], 48
8120; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[8:9], s[8:9], 48
8121; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[10:11], s[10:11], 48
8122; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[2:3], s[12:13], 48
8123; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[12:13], s[14:15], 48
8124; GCN-NOHSA-SI-NEXT:    s_ashr_i64 s[4:5], s[4:5], 48
8125; GCN-NOHSA-SI-NEXT:    s_mov_b32 s0, s16
8126; GCN-NOHSA-SI-NEXT:    s_mov_b32 s1, s17
8127; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s70
8128; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s71
8129; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s12
8130; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s13
8131; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s68
8132; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s69
8133; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s2
8134; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s3
8135; GCN-NOHSA-SI-NEXT:    s_mov_b32 s3, 0xf000
8136; GCN-NOHSA-SI-NEXT:    s_mov_b32 s2, -1
8137; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[12:13], s[56:57], 0x100000
8138; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[14:15], s[52:53], 0x100000
8139; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[16:17], s[50:51], 0x100000
8140; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[50:51], s[54:55], 0x100000
8141; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x100000
8142; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[42:43], s[42:43], 0x100000
8143; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s16
8144; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s17
8145; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s10
8146; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s11
8147; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s14
8148; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s15
8149; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s8
8150; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s9
8151; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s12
8152; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s13
8153; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s6
8154; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, s7
8155; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v20, s50
8156; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
8157; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, s51
8158; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v22, s4
8159; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v23, s5
8160; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:208
8161; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[4:5], s[66:67], 0x100000
8162; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[6:7], s[64:65], 0x100000
8163; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[8:9], s[62:63], 0x100000
8164; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[10:11], s[60:61], 0x100000
8165; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[12:13], s[36:37], 0x100000
8166; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[14:15], s[34:35], 0x100000
8167; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[16:17], s[30:31], 0x100000
8168; GCN-NOHSA-SI-NEXT:    s_bfe_i64 s[26:27], s[26:27], 0x100000
8169; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:176
8170; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:144
8171; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:112
8172; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:80
8173; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(5)
8174; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s42
8175; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s43
8176; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s58
8177; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s59
8178; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
8179; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8180; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s44
8181; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s45
8182; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s48
8183; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s49
8184; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
8185; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8186; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s46
8187; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s47
8188; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, s40
8189; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, s41
8190; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v8, s38
8191; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v9, s39
8192; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v12, s28
8193; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, s29
8194; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v16, s24
8195; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v17, s25
8196; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v20, s22
8197; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v21, s23
8198; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v24, s20
8199; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v25, s21
8200; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s26
8201; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s27
8202; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
8203; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
8204; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v0, s18
8205; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v1, s19
8206; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, s16
8207; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, s17
8208; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192
8209; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v10, s14
8210; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v11, s15
8211; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:160
8212; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v14, s12
8213; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v15, s13
8214; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:128
8215; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v18, s10
8216; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v19, s11
8217; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:96
8218; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v22, s8
8219; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v23, s9
8220; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64
8221; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v26, s6
8222; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v27, s7
8223; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32
8224; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v2, s4
8225; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v3, s5
8226; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
8227; GCN-NOHSA-SI-NEXT:    s_endpgm
8228;
8229; GCN-HSA-LABEL: constant_sextload_v32i16_to_v32i64:
8230; GCN-HSA:       ; %bb.0:
8231; GCN-HSA-NEXT:    s_load_dwordx4 s[16:19], s[8:9], 0x0
8232; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
8233; GCN-HSA-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
8234; GCN-HSA-NEXT:    s_waitcnt lgkmcnt(0)
8235; GCN-HSA-NEXT:    s_mov_b32 s40, s15
8236; GCN-HSA-NEXT:    s_mov_b32 s48, s13
8237; GCN-HSA-NEXT:    s_mov_b32 s50, s11
8238; GCN-HSA-NEXT:    s_mov_b32 s52, s9
8239; GCN-HSA-NEXT:    s_mov_b32 s54, s7
8240; GCN-HSA-NEXT:    s_mov_b32 s56, s5
8241; GCN-HSA-NEXT:    s_mov_b32 s44, s3
8242; GCN-HSA-NEXT:    s_mov_b32 s58, s1
8243; GCN-HSA-NEXT:    s_lshr_b32 s60, s14, 16
8244; GCN-HSA-NEXT:    s_lshr_b32 s62, s12, 16
8245; GCN-HSA-NEXT:    s_lshr_b32 s64, s10, 16
8246; GCN-HSA-NEXT:    s_lshr_b32 s66, s8, 16
8247; GCN-HSA-NEXT:    s_lshr_b32 s68, s6, 16
8248; GCN-HSA-NEXT:    s_lshr_b32 s70, s4, 16
8249; GCN-HSA-NEXT:    s_lshr_b32 s72, s2, 16
8250; GCN-HSA-NEXT:    s_lshr_b32 s74, s0, 16
8251; GCN-HSA-NEXT:    s_bfe_i64 s[18:19], s[0:1], 0x100000
8252; GCN-HSA-NEXT:    s_bfe_i64 s[20:21], s[2:3], 0x100000
8253; GCN-HSA-NEXT:    s_ashr_i64 s[36:37], s[0:1], 48
8254; GCN-HSA-NEXT:    s_ashr_i64 s[38:39], s[2:3], 48
8255; GCN-HSA-NEXT:    s_ashr_i64 s[0:1], s[14:15], 48
8256; GCN-HSA-NEXT:    s_bfe_i64 s[2:3], s[40:41], 0x100000
8257; GCN-HSA-NEXT:    s_bfe_i64 s[22:23], s[4:5], 0x100000
8258; GCN-HSA-NEXT:    s_bfe_i64 s[24:25], s[6:7], 0x100000
8259; GCN-HSA-NEXT:    s_bfe_i64 s[26:27], s[8:9], 0x100000
8260; GCN-HSA-NEXT:    s_bfe_i64 s[28:29], s[10:11], 0x100000
8261; GCN-HSA-NEXT:    s_bfe_i64 s[30:31], s[12:13], 0x100000
8262; GCN-HSA-NEXT:    s_bfe_i64 s[34:35], s[14:15], 0x100000
8263; GCN-HSA-NEXT:    s_ashr_i64 s[42:43], s[4:5], 48
8264; GCN-HSA-NEXT:    s_ashr_i64 s[46:47], s[6:7], 48
8265; GCN-HSA-NEXT:    s_ashr_i64 s[76:77], s[8:9], 48
8266; GCN-HSA-NEXT:    s_ashr_i64 s[78:79], s[10:11], 48
8267; GCN-HSA-NEXT:    s_ashr_i64 s[80:81], s[12:13], 48
8268; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s2
8269; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s3
8270; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
8271; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
8272; GCN-HSA-NEXT:    s_bfe_i64 s[0:1], s[74:75], 0x100000
8273; GCN-HSA-NEXT:    s_bfe_i64 s[2:3], s[72:73], 0x100000
8274; GCN-HSA-NEXT:    s_bfe_i64 s[4:5], s[70:71], 0x100000
8275; GCN-HSA-NEXT:    s_bfe_i64 s[6:7], s[68:69], 0x100000
8276; GCN-HSA-NEXT:    s_bfe_i64 s[8:9], s[66:67], 0x100000
8277; GCN-HSA-NEXT:    s_bfe_i64 s[10:11], s[64:65], 0x100000
8278; GCN-HSA-NEXT:    s_bfe_i64 s[12:13], s[62:63], 0x100000
8279; GCN-HSA-NEXT:    s_bfe_i64 s[14:15], s[60:61], 0x100000
8280; GCN-HSA-NEXT:    s_bfe_i64 s[40:41], s[58:59], 0x100000
8281; GCN-HSA-NEXT:    s_bfe_i64 s[44:45], s[44:45], 0x100000
8282; GCN-HSA-NEXT:    s_bfe_i64 s[56:57], s[56:57], 0x100000
8283; GCN-HSA-NEXT:    s_bfe_i64 s[54:55], s[54:55], 0x100000
8284; GCN-HSA-NEXT:    s_bfe_i64 s[52:53], s[52:53], 0x100000
8285; GCN-HSA-NEXT:    s_bfe_i64 s[50:51], s[50:51], 0x100000
8286; GCN-HSA-NEXT:    s_bfe_i64 s[48:49], s[48:49], 0x100000
8287; GCN-HSA-NEXT:    s_add_u32 s58, s16, 0xf0
8288; GCN-HSA-NEXT:    s_addc_u32 s59, s17, 0
8289; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s48
8290; GCN-HSA-NEXT:    s_add_u32 s48, s16, 0xd0
8291; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s49
8292; GCN-HSA-NEXT:    s_addc_u32 s49, s17, 0
8293; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s48
8294; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s49
8295; GCN-HSA-NEXT:    s_add_u32 s48, s16, 0xb0
8296; GCN-HSA-NEXT:    s_addc_u32 s49, s17, 0
8297; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s58
8298; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s46
8299; GCN-HSA-NEXT:    s_add_u32 s46, s16, 0x90
8300; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s59
8301; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s47
8302; GCN-HSA-NEXT:    s_addc_u32 s47, s17, 0
8303; GCN-HSA-NEXT:    flat_store_dwordx4 v[22:23], v[0:3]
8304; GCN-HSA-NEXT:    v_mov_b32_e32 v22, s42
8305; GCN-HSA-NEXT:    s_add_u32 s42, s16, 0x70
8306; GCN-HSA-NEXT:    v_mov_b32_e32 v23, s43
8307; GCN-HSA-NEXT:    s_addc_u32 s43, s17, 0
8308; GCN-HSA-NEXT:    v_mov_b32_e32 v30, s42
8309; GCN-HSA-NEXT:    v_mov_b32_e32 v31, s43
8310; GCN-HSA-NEXT:    s_add_u32 s42, s16, 0x50
8311; GCN-HSA-NEXT:    s_addc_u32 s43, s17, 0
8312; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s80
8313; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s81
8314; GCN-HSA-NEXT:    v_mov_b32_e32 v26, s48
8315; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s38
8316; GCN-HSA-NEXT:    s_add_u32 s38, s16, 48
8317; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s50
8318; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s51
8319; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s78
8320; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s79
8321; GCN-HSA-NEXT:    v_mov_b32_e32 v27, s49
8322; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[4:7]
8323; GCN-HSA-NEXT:    flat_store_dwordx4 v[26:27], v[8:11]
8324; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s39
8325; GCN-HSA-NEXT:    s_addc_u32 s39, s17, 0
8326; GCN-HSA-NEXT:    v_mov_b32_e32 v24, s38
8327; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s42
8328; GCN-HSA-NEXT:    v_mov_b32_e32 v25, s39
8329; GCN-HSA-NEXT:    s_add_u32 s38, s16, 16
8330; GCN-HSA-NEXT:    v_mov_b32_e32 v20, s56
8331; GCN-HSA-NEXT:    v_mov_b32_e32 v21, s57
8332; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s43
8333; GCN-HSA-NEXT:    s_addc_u32 s39, s17, 0
8334; GCN-HSA-NEXT:    v_mov_b32_e32 v28, s46
8335; GCN-HSA-NEXT:    flat_store_dwordx4 v[10:11], v[20:23]
8336; GCN-HSA-NEXT:    v_mov_b32_e32 v10, s14
8337; GCN-HSA-NEXT:    s_add_u32 s14, s16, 0xe0
8338; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s52
8339; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s53
8340; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s76
8341; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s77
8342; GCN-HSA-NEXT:    v_mov_b32_e32 v29, s47
8343; GCN-HSA-NEXT:    v_mov_b32_e32 v11, s15
8344; GCN-HSA-NEXT:    s_addc_u32 s15, s17, 0
8345; GCN-HSA-NEXT:    flat_store_dwordx4 v[28:29], v[12:15]
8346; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s54
8347; GCN-HSA-NEXT:    v_mov_b32_e32 v14, s12
8348; GCN-HSA-NEXT:    s_add_u32 s12, s16, 0xc0
8349; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s55
8350; GCN-HSA-NEXT:    v_mov_b32_e32 v15, s13
8351; GCN-HSA-NEXT:    s_addc_u32 s13, s17, 0
8352; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s44
8353; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s45
8354; GCN-HSA-NEXT:    v_mov_b32_e32 v26, s38
8355; GCN-HSA-NEXT:    flat_store_dwordx4 v[30:31], v[16:19]
8356; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s40
8357; GCN-HSA-NEXT:    v_mov_b32_e32 v17, s15
8358; GCN-HSA-NEXT:    v_mov_b32_e32 v19, s13
8359; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s41
8360; GCN-HSA-NEXT:    v_mov_b32_e32 v27, s39
8361; GCN-HSA-NEXT:    v_mov_b32_e32 v6, s36
8362; GCN-HSA-NEXT:    v_mov_b32_e32 v7, s37
8363; GCN-HSA-NEXT:    v_mov_b32_e32 v8, s34
8364; GCN-HSA-NEXT:    v_mov_b32_e32 v9, s35
8365; GCN-HSA-NEXT:    v_mov_b32_e32 v12, s30
8366; GCN-HSA-NEXT:    v_mov_b32_e32 v13, s31
8367; GCN-HSA-NEXT:    v_mov_b32_e32 v16, s14
8368; GCN-HSA-NEXT:    v_mov_b32_e32 v18, s12
8369; GCN-HSA-NEXT:    flat_store_dwordx4 v[24:25], v[0:3]
8370; GCN-HSA-NEXT:    flat_store_dwordx4 v[26:27], v[4:7]
8371; GCN-HSA-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
8372; GCN-HSA-NEXT:    flat_store_dwordx4 v[18:19], v[12:15]
8373; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s10
8374; GCN-HSA-NEXT:    s_add_u32 s10, s16, 0xa0
8375; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s11
8376; GCN-HSA-NEXT:    s_addc_u32 s11, s17, 0
8377; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s10
8378; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s28
8379; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s29
8380; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s11
8381; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8382; GCN-HSA-NEXT:    s_nop 0
8383; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s8
8384; GCN-HSA-NEXT:    s_add_u32 s8, s16, 0x80
8385; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s9
8386; GCN-HSA-NEXT:    s_addc_u32 s9, s17, 0
8387; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s8
8388; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s26
8389; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s27
8390; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s9
8391; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8392; GCN-HSA-NEXT:    s_nop 0
8393; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s6
8394; GCN-HSA-NEXT:    s_add_u32 s6, s16, 0x60
8395; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s7
8396; GCN-HSA-NEXT:    s_addc_u32 s7, s17, 0
8397; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s6
8398; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s24
8399; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s25
8400; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s7
8401; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8402; GCN-HSA-NEXT:    s_nop 0
8403; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s4
8404; GCN-HSA-NEXT:    s_add_u32 s4, s16, 64
8405; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s5
8406; GCN-HSA-NEXT:    s_addc_u32 s5, s17, 0
8407; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s4
8408; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s22
8409; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s23
8410; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s5
8411; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8412; GCN-HSA-NEXT:    s_nop 0
8413; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s2
8414; GCN-HSA-NEXT:    s_add_u32 s2, s16, 32
8415; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s3
8416; GCN-HSA-NEXT:    s_addc_u32 s3, s17, 0
8417; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s3
8418; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s20
8419; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s21
8420; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s2
8421; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8422; GCN-HSA-NEXT:    v_mov_b32_e32 v4, s16
8423; GCN-HSA-NEXT:    v_mov_b32_e32 v0, s18
8424; GCN-HSA-NEXT:    v_mov_b32_e32 v1, s19
8425; GCN-HSA-NEXT:    v_mov_b32_e32 v2, s0
8426; GCN-HSA-NEXT:    v_mov_b32_e32 v3, s1
8427; GCN-HSA-NEXT:    v_mov_b32_e32 v5, s17
8428; GCN-HSA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8429; GCN-HSA-NEXT:    s_endpgm
8430;
8431; GCN-NOHSA-VI-LABEL: constant_sextload_v32i16_to_v32i64:
8432; GCN-NOHSA-VI:       ; %bb.0:
8433; GCN-NOHSA-VI-NEXT:    s_load_dwordx4 s[16:19], s[4:5], 0x24
8434; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
8435; GCN-NOHSA-VI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
8436; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
8437; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s22, s0, 16
8438; GCN-NOHSA-VI-NEXT:    s_mov_b32 s26, s1
8439; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s28, s1, 16
8440; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s30, s2, 16
8441; GCN-NOHSA-VI-NEXT:    s_mov_b32 s34, s3
8442; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s36, s3, 16
8443; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s40, s4, 16
8444; GCN-NOHSA-VI-NEXT:    s_mov_b32 s42, s5
8445; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s44, s5, 16
8446; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s46, s6, 16
8447; GCN-NOHSA-VI-NEXT:    s_mov_b32 s48, s7
8448; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s50, s7, 16
8449; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s52, s8, 16
8450; GCN-NOHSA-VI-NEXT:    s_mov_b32 s54, s9
8451; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s56, s9, 16
8452; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s58, s10, 16
8453; GCN-NOHSA-VI-NEXT:    s_mov_b32 s60, s11
8454; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s62, s11, 16
8455; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s66, s12, 16
8456; GCN-NOHSA-VI-NEXT:    s_mov_b32 s68, s13
8457; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s70, s13, 16
8458; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s74, s14, 16
8459; GCN-NOHSA-VI-NEXT:    s_mov_b32 s76, s15
8460; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s78, s15, 16
8461; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[18:19], s[0:1], 0x100000
8462; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x100000
8463; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[2:3], s[4:5], 0x100000
8464; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[20:21], s[6:7], 0x100000
8465; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[24:25], s[8:9], 0x100000
8466; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[38:39], s[10:11], 0x100000
8467; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[64:65], s[12:13], 0x100000
8468; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[72:73], s[14:15], 0x100000
8469; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[4:5], s[22:23], 0x100000
8470; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[8:9], s[26:27], 0x100000
8471; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[6:7], s[28:29], 0x100000
8472; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[10:11], s[30:31], 0x100000
8473; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[14:15], s[34:35], 0x100000
8474; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[12:13], s[36:37], 0x100000
8475; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[22:23], s[40:41], 0x100000
8476; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[28:29], s[42:43], 0x100000
8477; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[26:27], s[44:45], 0x100000
8478; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[30:31], s[46:47], 0x100000
8479; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[36:37], s[48:49], 0x100000
8480; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[34:35], s[50:51], 0x100000
8481; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[40:41], s[52:53], 0x100000
8482; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[44:45], s[54:55], 0x100000
8483; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[42:43], s[56:57], 0x100000
8484; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[46:47], s[58:59], 0x100000
8485; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[48:49], s[60:61], 0x100000
8486; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[50:51], s[62:63], 0x100000
8487; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[52:53], s[66:67], 0x100000
8488; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[54:55], s[68:69], 0x100000
8489; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[56:57], s[70:71], 0x100000
8490; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[58:59], s[74:75], 0x100000
8491; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[60:61], s[76:77], 0x100000
8492; GCN-NOHSA-VI-NEXT:    s_bfe_i64 s[62:63], s[78:79], 0x100000
8493; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s60
8494; GCN-NOHSA-VI-NEXT:    s_add_u32 s60, s16, 0xf0
8495; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s61
8496; GCN-NOHSA-VI-NEXT:    s_addc_u32 s61, s17, 0
8497; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s60
8498; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s62
8499; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s63
8500; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s61
8501; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8502; GCN-NOHSA-VI-NEXT:    s_nop 0
8503; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s58
8504; GCN-NOHSA-VI-NEXT:    s_add_u32 s58, s16, 0xe0
8505; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s59
8506; GCN-NOHSA-VI-NEXT:    s_addc_u32 s59, s17, 0
8507; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s58
8508; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s72
8509; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s73
8510; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s59
8511; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8512; GCN-NOHSA-VI-NEXT:    s_nop 0
8513; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s54
8514; GCN-NOHSA-VI-NEXT:    s_add_u32 s54, s16, 0xd0
8515; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s55
8516; GCN-NOHSA-VI-NEXT:    s_addc_u32 s55, s17, 0
8517; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s54
8518; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s56
8519; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s57
8520; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s55
8521; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8522; GCN-NOHSA-VI-NEXT:    s_nop 0
8523; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s52
8524; GCN-NOHSA-VI-NEXT:    s_add_u32 s52, s16, 0xc0
8525; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s53
8526; GCN-NOHSA-VI-NEXT:    s_addc_u32 s53, s17, 0
8527; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s52
8528; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s64
8529; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s65
8530; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s53
8531; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8532; GCN-NOHSA-VI-NEXT:    s_nop 0
8533; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s48
8534; GCN-NOHSA-VI-NEXT:    s_add_u32 s48, s16, 0xb0
8535; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s49
8536; GCN-NOHSA-VI-NEXT:    s_addc_u32 s49, s17, 0
8537; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s48
8538; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s50
8539; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s51
8540; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s49
8541; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8542; GCN-NOHSA-VI-NEXT:    s_nop 0
8543; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s38
8544; GCN-NOHSA-VI-NEXT:    s_add_u32 s38, s16, 0xa0
8545; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s39
8546; GCN-NOHSA-VI-NEXT:    s_addc_u32 s39, s17, 0
8547; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s38
8548; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s46
8549; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s47
8550; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s39
8551; GCN-NOHSA-VI-NEXT:    s_add_u32 s38, s16, 0x90
8552; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8553; GCN-NOHSA-VI-NEXT:    s_addc_u32 s39, s17, 0
8554; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s38
8555; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s44
8556; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s45
8557; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s42
8558; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s43
8559; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s39
8560; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8561; GCN-NOHSA-VI-NEXT:    s_nop 0
8562; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s24
8563; GCN-NOHSA-VI-NEXT:    s_add_u32 s24, s16, 0x80
8564; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s25
8565; GCN-NOHSA-VI-NEXT:    s_addc_u32 s25, s17, 0
8566; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s24
8567; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s40
8568; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s41
8569; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s25
8570; GCN-NOHSA-VI-NEXT:    s_add_u32 s24, s16, 0x70
8571; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8572; GCN-NOHSA-VI-NEXT:    s_addc_u32 s25, s17, 0
8573; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s24
8574; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s36
8575; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s37
8576; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s34
8577; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s35
8578; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s25
8579; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8580; GCN-NOHSA-VI-NEXT:    s_nop 0
8581; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s20
8582; GCN-NOHSA-VI-NEXT:    s_add_u32 s20, s16, 0x60
8583; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s21
8584; GCN-NOHSA-VI-NEXT:    s_addc_u32 s21, s17, 0
8585; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s20
8586; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s30
8587; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s31
8588; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s21
8589; GCN-NOHSA-VI-NEXT:    s_add_u32 s20, s16, 0x50
8590; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8591; GCN-NOHSA-VI-NEXT:    s_addc_u32 s21, s17, 0
8592; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s20
8593; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s28
8594; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s29
8595; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s26
8596; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s27
8597; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s21
8598; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8599; GCN-NOHSA-VI-NEXT:    s_nop 0
8600; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s2
8601; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s16, 64
8602; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s3
8603; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s17, 0
8604; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
8605; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
8606; GCN-NOHSA-VI-NEXT:    s_add_u32 s2, s16, 48
8607; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s22
8608; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s23
8609; GCN-NOHSA-VI-NEXT:    s_addc_u32 s3, s17, 0
8610; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8611; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s3
8612; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s14
8613; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s15
8614; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s12
8615; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s13
8616; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s2
8617; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8618; GCN-NOHSA-VI-NEXT:    s_nop 0
8619; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
8620; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s16, 32
8621; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s1
8622; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s17, 0
8623; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
8624; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
8625; GCN-NOHSA-VI-NEXT:    s_add_u32 s0, s16, 16
8626; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s10
8627; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s11
8628; GCN-NOHSA-VI-NEXT:    s_addc_u32 s1, s17, 0
8629; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8630; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s1
8631; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s8
8632; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s9
8633; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s6
8634; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s7
8635; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s0
8636; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8637; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v4, s16
8638; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s18
8639; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, s19
8640; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s4
8641; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, s5
8642; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v5, s17
8643; GCN-NOHSA-VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
8644; GCN-NOHSA-VI-NEXT:    s_endpgm
8645;
8646; EG-LABEL: constant_sextload_v32i16_to_v32i64:
8647; EG:       ; %bb.0:
8648; EG-NEXT:    ALU 0, @30, KC0[CB0:0-32], KC1[]
8649; EG-NEXT:    TEX 0 @22
8650; EG-NEXT:    ALU 55, @31, KC0[CB0:0-32], KC1[]
8651; EG-NEXT:    TEX 2 @24
8652; EG-NEXT:    ALU 74, @87, KC0[CB0:0-32], KC1[]
8653; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T38.X, 0
8654; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T36.X, 0
8655; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T34.X, 0
8656; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T33.X, 0
8657; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T32.X, 0
8658; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T31.X, 0
8659; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T30.X, 0
8660; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T29.X, 0
8661; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T28.X, 0
8662; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T27.X, 0
8663; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T26.X, 0
8664; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T25.X, 0
8665; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T24.X, 0
8666; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T23.X, 0
8667; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T22.X, 0
8668; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T21.X, 1
8669; EG-NEXT:    CF_END
8670; EG-NEXT:    Fetch clause starting at 22:
8671; EG-NEXT:     VTX_READ_128 T20.XYZW, T19.X, 0, #1
8672; EG-NEXT:    Fetch clause starting at 24:
8673; EG-NEXT:     VTX_READ_128 T38.XYZW, T19.X, 48, #1
8674; EG-NEXT:     VTX_READ_128 T39.XYZW, T19.X, 32, #1
8675; EG-NEXT:     VTX_READ_128 T40.XYZW, T19.X, 16, #1
8676; EG-NEXT:    ALU clause starting at 30:
8677; EG-NEXT:     MOV * T19.X, KC0[2].Z,
8678; EG-NEXT:    ALU clause starting at 31:
8679; EG-NEXT:     LSHR T21.X, KC0[2].Y, literal.x,
8680; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8681; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
8682; EG-NEXT:     LSHR T22.X, PV.W, literal.x,
8683; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8684; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
8685; EG-NEXT:     LSHR T23.X, PV.W, literal.x,
8686; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8687; EG-NEXT:    2(2.802597e-45), 48(6.726233e-44)
8688; EG-NEXT:     LSHR T24.X, PV.W, literal.x,
8689; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8690; EG-NEXT:    2(2.802597e-45), 64(8.968310e-44)
8691; EG-NEXT:     LSHR T25.X, PV.W, literal.x,
8692; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8693; EG-NEXT:    2(2.802597e-45), 80(1.121039e-43)
8694; EG-NEXT:     LSHR T26.X, PV.W, literal.x,
8695; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8696; EG-NEXT:    2(2.802597e-45), 96(1.345247e-43)
8697; EG-NEXT:     LSHR T27.X, PV.W, literal.x,
8698; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8699; EG-NEXT:    2(2.802597e-45), 112(1.569454e-43)
8700; EG-NEXT:     LSHR T28.X, PV.W, literal.x,
8701; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8702; EG-NEXT:    2(2.802597e-45), 128(1.793662e-43)
8703; EG-NEXT:     LSHR T29.X, PV.W, literal.x,
8704; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8705; EG-NEXT:    2(2.802597e-45), 144(2.017870e-43)
8706; EG-NEXT:     LSHR T30.X, PV.W, literal.x,
8707; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8708; EG-NEXT:    2(2.802597e-45), 160(2.242078e-43)
8709; EG-NEXT:     LSHR T31.X, PV.W, literal.x,
8710; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8711; EG-NEXT:    2(2.802597e-45), 176(2.466285e-43)
8712; EG-NEXT:     LSHR T32.X, PV.W, literal.x,
8713; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8714; EG-NEXT:    2(2.802597e-45), 192(2.690493e-43)
8715; EG-NEXT:     LSHR T33.X, PV.W, literal.x,
8716; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.y,
8717; EG-NEXT:    2(2.802597e-45), 208(2.914701e-43)
8718; EG-NEXT:     LSHR T34.X, PV.W, literal.x,
8719; EG-NEXT:     ADD_INT T0.W, KC0[2].Y, literal.y,
8720; EG-NEXT:     ASHR * T35.W, T20.X, literal.z,
8721; EG-NEXT:    2(2.802597e-45), 224(3.138909e-43)
8722; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8723; EG-NEXT:     LSHR T36.X, PV.W, literal.x,
8724; EG-NEXT:     ASHR T35.Z, T20.X, literal.y,
8725; EG-NEXT:     ASHR * T37.W, T20.Y, literal.z,
8726; EG-NEXT:    2(2.802597e-45), 16(2.242078e-44)
8727; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8728; EG-NEXT:     BFE_INT T35.X, T20.X, 0.0, literal.x,
8729; EG-NEXT:     ASHR * T37.Z, T20.Y, literal.x,
8730; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
8731; EG-NEXT:     BFE_INT T37.X, T20.Y, 0.0, literal.x,
8732; EG-NEXT:     ASHR T35.Y, PV.X, literal.y,
8733; EG-NEXT:     ASHR * T19.W, T20.Z, literal.y,
8734; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8735; EG-NEXT:    ALU clause starting at 87:
8736; EG-NEXT:     ASHR T19.Z, T20.Z, literal.x,
8737; EG-NEXT:     ASHR * T41.W, T20.W, literal.y,
8738; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8739; EG-NEXT:     BFE_INT T19.X, T20.Z, 0.0, literal.x,
8740; EG-NEXT:     ASHR T37.Y, T37.X, literal.y,
8741; EG-NEXT:     ASHR T41.Z, T20.W, literal.x,
8742; EG-NEXT:     ASHR * T42.W, T40.X, literal.y, BS:VEC_120/SCL_212
8743; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8744; EG-NEXT:     BFE_INT T41.X, T20.W, 0.0, literal.x,
8745; EG-NEXT:     ASHR T19.Y, PV.X, literal.y,
8746; EG-NEXT:     ASHR T42.Z, T40.X, literal.x,
8747; EG-NEXT:     ASHR * T20.W, T40.Y, literal.y,
8748; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8749; EG-NEXT:     BFE_INT T42.X, T40.X, 0.0, literal.x,
8750; EG-NEXT:     ASHR T41.Y, PV.X, literal.y,
8751; EG-NEXT:     ASHR T20.Z, T40.Y, literal.x,
8752; EG-NEXT:     ASHR * T43.W, T40.Z, literal.y,
8753; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8754; EG-NEXT:     BFE_INT T20.X, T40.Y, 0.0, literal.x,
8755; EG-NEXT:     ASHR T42.Y, PV.X, literal.y,
8756; EG-NEXT:     ASHR T43.Z, T40.Z, literal.x,
8757; EG-NEXT:     ASHR * T44.W, T40.W, literal.y,
8758; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8759; EG-NEXT:     BFE_INT T43.X, T40.Z, 0.0, literal.x,
8760; EG-NEXT:     ASHR T20.Y, PV.X, literal.y,
8761; EG-NEXT:     ASHR T44.Z, T40.W, literal.x,
8762; EG-NEXT:     ASHR * T45.W, T39.X, literal.y,
8763; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8764; EG-NEXT:     BFE_INT T44.X, T40.W, 0.0, literal.x,
8765; EG-NEXT:     ASHR T43.Y, PV.X, literal.y,
8766; EG-NEXT:     ASHR T45.Z, T39.X, literal.x,
8767; EG-NEXT:     ASHR * T40.W, T39.Y, literal.y,
8768; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8769; EG-NEXT:     BFE_INT T45.X, T39.X, 0.0, literal.x,
8770; EG-NEXT:     ASHR T44.Y, PV.X, literal.y,
8771; EG-NEXT:     ASHR T40.Z, T39.Y, literal.x,
8772; EG-NEXT:     ASHR * T46.W, T39.Z, literal.y,
8773; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8774; EG-NEXT:     BFE_INT T40.X, T39.Y, 0.0, literal.x,
8775; EG-NEXT:     ASHR T45.Y, PV.X, literal.y,
8776; EG-NEXT:     ASHR T46.Z, T39.Z, literal.x,
8777; EG-NEXT:     ASHR * T47.W, T39.W, literal.y,
8778; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8779; EG-NEXT:     BFE_INT T46.X, T39.Z, 0.0, literal.x,
8780; EG-NEXT:     ASHR T40.Y, PV.X, literal.y,
8781; EG-NEXT:     ASHR T47.Z, T39.W, literal.x,
8782; EG-NEXT:     ASHR * T48.W, T38.X, literal.y,
8783; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8784; EG-NEXT:     BFE_INT T47.X, T39.W, 0.0, literal.x,
8785; EG-NEXT:     ASHR T46.Y, PV.X, literal.y,
8786; EG-NEXT:     ASHR T48.Z, T38.X, literal.x,
8787; EG-NEXT:     ASHR * T39.W, T38.Y, literal.y,
8788; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8789; EG-NEXT:     BFE_INT T48.X, T38.X, 0.0, literal.x,
8790; EG-NEXT:     ASHR T47.Y, PV.X, literal.y,
8791; EG-NEXT:     ASHR T39.Z, T38.Y, literal.x,
8792; EG-NEXT:     ASHR * T49.W, T38.Z, literal.y,
8793; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8794; EG-NEXT:     BFE_INT T39.X, T38.Y, 0.0, literal.x,
8795; EG-NEXT:     ASHR T48.Y, PV.X, literal.y,
8796; EG-NEXT:     ASHR T49.Z, T38.Z, literal.x,
8797; EG-NEXT:     ASHR * T50.W, T38.W, literal.y,
8798; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8799; EG-NEXT:     BFE_INT T49.X, T38.Z, 0.0, literal.x,
8800; EG-NEXT:     ASHR T39.Y, PV.X, literal.y,
8801; EG-NEXT:     ASHR * T50.Z, T38.W, literal.x,
8802; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8803; EG-NEXT:     BFE_INT T50.X, T38.W, 0.0, literal.x,
8804; EG-NEXT:     ASHR T49.Y, PV.X, literal.y,
8805; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, literal.z,
8806; EG-NEXT:    16(2.242078e-44), 31(4.344025e-44)
8807; EG-NEXT:    240(3.363116e-43), 0(0.000000e+00)
8808; EG-NEXT:     LSHR T38.X, PV.W, literal.x,
8809; EG-NEXT:     ASHR * T50.Y, PV.X, literal.y,
8810; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
8811;
8812; GFX12-LABEL: constant_sextload_v32i16_to_v32i64:
8813; GFX12:       ; %bb.0:
8814; GFX12-NEXT:    s_load_b128 s[16:19], s[4:5], 0x24
8815; GFX12-NEXT:    s_wait_kmcnt 0x0
8816; GFX12-NEXT:    s_load_b512 s[0:15], s[18:19], 0x0
8817; GFX12-NEXT:    s_wait_kmcnt 0x0
8818; GFX12-NEXT:    s_lshr_b32 s28, s2, 16
8819; GFX12-NEXT:    s_lshr_b32 s42, s5, 16
8820; GFX12-NEXT:    s_lshr_b32 s52, s8, 16
8821; GFX12-NEXT:    s_mov_b32 s60, s11
8822; GFX12-NEXT:    s_lshr_b32 s22, s0, 16
8823; GFX12-NEXT:    s_mov_b32 s24, s1
8824; GFX12-NEXT:    s_lshr_b32 s26, s1, 16
8825; GFX12-NEXT:    s_mov_b32 s30, s3
8826; GFX12-NEXT:    s_lshr_b32 s36, s3, 16
8827; GFX12-NEXT:    s_lshr_b32 s38, s4, 16
8828; GFX12-NEXT:    s_mov_b32 s40, s5
8829; GFX12-NEXT:    s_lshr_b32 s46, s6, 16
8830; GFX12-NEXT:    s_mov_b32 s48, s7
8831; GFX12-NEXT:    s_lshr_b32 s50, s7, 16
8832; GFX12-NEXT:    s_mov_b32 s54, s9
8833; GFX12-NEXT:    s_lshr_b32 s56, s9, 16
8834; GFX12-NEXT:    s_bfe_i64 s[44:45], s[10:11], 0x100000
8835; GFX12-NEXT:    s_lshr_b32 s58, s10, 16
8836; GFX12-NEXT:    s_lshr_b32 s62, s11, 16
8837; GFX12-NEXT:    s_bfe_i64 s[10:11], s[28:29], 0x100000
8838; GFX12-NEXT:    s_bfe_i64 s[28:29], s[42:43], 0x100000
8839; GFX12-NEXT:    s_bfe_i64 s[42:43], s[52:53], 0x100000
8840; GFX12-NEXT:    s_bfe_i64 s[52:53], s[60:61], 0x100000
8841; GFX12-NEXT:    s_lshr_b32 s60, s14, 16
8842; GFX12-NEXT:    s_bfe_i64 s[64:65], s[14:15], 0x100000
8843; GFX12-NEXT:    s_mov_b32 s14, s15
8844; GFX12-NEXT:    s_lshr_b32 s66, s15, 16
8845; GFX12-NEXT:    s_bfe_i64 s[18:19], s[0:1], 0x100000
8846; GFX12-NEXT:    s_bfe_i64 s[0:1], s[2:3], 0x100000
8847; GFX12-NEXT:    s_bfe_i64 s[2:3], s[4:5], 0x100000
8848; GFX12-NEXT:    s_bfe_i64 s[20:21], s[6:7], 0x100000
8849; GFX12-NEXT:    s_bfe_i64 s[34:35], s[8:9], 0x100000
8850; GFX12-NEXT:    s_bfe_i64 s[4:5], s[22:23], 0x100000
8851; GFX12-NEXT:    s_bfe_i64 s[8:9], s[24:25], 0x100000
8852; GFX12-NEXT:    s_bfe_i64 s[6:7], s[26:27], 0x100000
8853; GFX12-NEXT:    s_bfe_i64 s[24:25], s[30:31], 0x100000
8854; GFX12-NEXT:    s_bfe_i64 s[22:23], s[36:37], 0x100000
8855; GFX12-NEXT:    s_bfe_i64 s[26:27], s[38:39], 0x100000
8856; GFX12-NEXT:    s_bfe_i64 s[30:31], s[40:41], 0x100000
8857; GFX12-NEXT:    s_bfe_i64 s[36:37], s[46:47], 0x100000
8858; GFX12-NEXT:    s_bfe_i64 s[40:41], s[48:49], 0x100000
8859; GFX12-NEXT:    s_bfe_i64 s[38:39], s[50:51], 0x100000
8860; GFX12-NEXT:    s_bfe_i64 s[46:47], s[54:55], 0x100000
8861; GFX12-NEXT:    s_bfe_i64 s[48:49], s[56:57], 0x100000
8862; GFX12-NEXT:    s_bfe_i64 s[50:51], s[58:59], 0x100000
8863; GFX12-NEXT:    s_lshr_b32 s54, s12, 16
8864; GFX12-NEXT:    s_bfe_i64 s[56:57], s[12:13], 0x100000
8865; GFX12-NEXT:    s_mov_b32 s12, s13
8866; GFX12-NEXT:    s_lshr_b32 s58, s13, 16
8867; GFX12-NEXT:    s_bfe_i64 s[14:15], s[14:15], 0x100000
8868; GFX12-NEXT:    s_bfe_i64 s[66:67], s[66:67], 0x100000
8869; GFX12-NEXT:    s_bfe_i64 s[60:61], s[60:61], 0x100000
8870; GFX12-NEXT:    v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s15
8871; GFX12-NEXT:    s_bfe_i64 s[12:13], s[12:13], 0x100000
8872; GFX12-NEXT:    s_bfe_i64 s[58:59], s[58:59], 0x100000
8873; GFX12-NEXT:    v_dual_mov_b32 v0, s14 :: v_dual_mov_b32 v3, s67
8874; GFX12-NEXT:    v_dual_mov_b32 v2, s66 :: v_dual_mov_b32 v5, s65
8875; GFX12-NEXT:    s_bfe_i64 s[54:55], s[54:55], 0x100000
8876; GFX12-NEXT:    v_dual_mov_b32 v4, s64 :: v_dual_mov_b32 v7, s61
8877; GFX12-NEXT:    v_dual_mov_b32 v6, s60 :: v_dual_mov_b32 v9, s13
8878; GFX12-NEXT:    s_wait_alu 0xfffe
8879; GFX12-NEXT:    v_dual_mov_b32 v8, s12 :: v_dual_mov_b32 v11, s59
8880; GFX12-NEXT:    v_dual_mov_b32 v10, s58 :: v_dual_mov_b32 v13, s57
8881; GFX12-NEXT:    v_dual_mov_b32 v12, s56 :: v_dual_mov_b32 v15, s55
8882; GFX12-NEXT:    v_mov_b32_e32 v14, s54
8883; GFX12-NEXT:    s_bfe_i64 s[12:13], s[62:63], 0x100000
8884; GFX12-NEXT:    s_clause 0x3
8885; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[16:17] offset:240
8886; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[16:17] offset:224
8887; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[16:17] offset:208
8888; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[16:17] offset:192
8889; GFX12-NEXT:    v_dual_mov_b32 v1, s53 :: v_dual_mov_b32 v0, s52
8890; GFX12-NEXT:    s_wait_alu 0xfffe
8891; GFX12-NEXT:    v_dual_mov_b32 v3, s13 :: v_dual_mov_b32 v2, s12
8892; GFX12-NEXT:    v_dual_mov_b32 v5, s45 :: v_dual_mov_b32 v4, s44
8893; GFX12-NEXT:    v_dual_mov_b32 v7, s51 :: v_dual_mov_b32 v6, s50
8894; GFX12-NEXT:    v_dual_mov_b32 v9, s47 :: v_dual_mov_b32 v8, s46
8895; GFX12-NEXT:    v_dual_mov_b32 v11, s49 :: v_dual_mov_b32 v10, s48
8896; GFX12-NEXT:    v_dual_mov_b32 v13, s35 :: v_dual_mov_b32 v12, s34
8897; GFX12-NEXT:    v_dual_mov_b32 v15, s43 :: v_dual_mov_b32 v14, s42
8898; GFX12-NEXT:    v_dual_mov_b32 v17, s41 :: v_dual_mov_b32 v16, s40
8899; GFX12-NEXT:    v_dual_mov_b32 v19, s39 :: v_dual_mov_b32 v18, s38
8900; GFX12-NEXT:    v_dual_mov_b32 v21, s21 :: v_dual_mov_b32 v20, s20
8901; GFX12-NEXT:    v_dual_mov_b32 v23, s37 :: v_dual_mov_b32 v22, s36
8902; GFX12-NEXT:    s_clause 0x5
8903; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[16:17] offset:176
8904; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[16:17] offset:160
8905; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[16:17] offset:144
8906; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[16:17] offset:128
8907; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[16:17] offset:112
8908; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[16:17] offset:96
8909; GFX12-NEXT:    v_dual_mov_b32 v1, s31 :: v_dual_mov_b32 v0, s30
8910; GFX12-NEXT:    v_dual_mov_b32 v3, s29 :: v_dual_mov_b32 v2, s28
8911; GFX12-NEXT:    v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v4, s2
8912; GFX12-NEXT:    v_dual_mov_b32 v7, s27 :: v_dual_mov_b32 v6, s26
8913; GFX12-NEXT:    v_dual_mov_b32 v9, s25 :: v_dual_mov_b32 v8, s24
8914; GFX12-NEXT:    v_dual_mov_b32 v11, s23 :: v_dual_mov_b32 v10, s22
8915; GFX12-NEXT:    v_dual_mov_b32 v13, s1 :: v_dual_mov_b32 v12, s0
8916; GFX12-NEXT:    v_dual_mov_b32 v15, s11 :: v_dual_mov_b32 v14, s10
8917; GFX12-NEXT:    v_dual_mov_b32 v17, s9 :: v_dual_mov_b32 v16, s8
8918; GFX12-NEXT:    v_dual_mov_b32 v19, s7 :: v_dual_mov_b32 v18, s6
8919; GFX12-NEXT:    v_dual_mov_b32 v21, s19 :: v_dual_mov_b32 v20, s18
8920; GFX12-NEXT:    v_dual_mov_b32 v23, s5 :: v_dual_mov_b32 v22, s4
8921; GFX12-NEXT:    s_clause 0x5
8922; GFX12-NEXT:    global_store_b128 v24, v[0:3], s[16:17] offset:80
8923; GFX12-NEXT:    global_store_b128 v24, v[4:7], s[16:17] offset:64
8924; GFX12-NEXT:    global_store_b128 v24, v[8:11], s[16:17] offset:48
8925; GFX12-NEXT:    global_store_b128 v24, v[12:15], s[16:17] offset:32
8926; GFX12-NEXT:    global_store_b128 v24, v[16:19], s[16:17] offset:16
8927; GFX12-NEXT:    global_store_b128 v24, v[20:23], s[16:17]
8928; GFX12-NEXT:    s_endpgm
8929  %load = load <32 x i16>, ptr addrspace(4) %in
8930  %ext = sext <32 x i16> %load to <32 x i64>
8931  store <32 x i64> %ext, ptr addrspace(1) %out
8932  ret void
8933}
8934
8935; These trigger undefined register machine verifier errors
8936
8937; define amdgpu_kernel void @constant_zextload_v64i16_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
8938;   %load = load <64 x i16>, ptr addrspace(4) %in
8939;   %ext = zext <64 x i16> %load to <64 x i64>
8940;   store <64 x i64> %ext, ptr addrspace(1) %out
8941;   ret void
8942; }
8943
8944; define amdgpu_kernel void @constant_sextload_v64i16_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
8945;   %load = load <64 x i16>, ptr addrspace(4) %in
8946;   %ext = sext <64 x i16> %load to <64 x i64>
8947;   store <64 x i64> %ext, ptr addrspace(1) %out
8948;   ret void
8949; }
8950
8951attributes #0 = { nounwind }
8952