xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll (revision 5d9c717597aef72e4ba27a2b143e9753c513e5c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s
3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
4; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
5
6define amdgpu_kernel void @constant_load_i8_align4(ptr addrspace (1) %out, ptr addrspace(4) %in) #0 {
7; GFX8-LABEL: constant_load_i8_align4:
8; GFX8:       ; %bb.0:
9; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
10; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
11; GFX8-NEXT:    s_load_dword s2, s[2:3], 0x0
12; GFX8-NEXT:    v_mov_b32_e32 v0, s0
13; GFX8-NEXT:    v_mov_b32_e32 v1, s1
14; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
15; GFX8-NEXT:    v_mov_b32_e32 v2, s2
16; GFX8-NEXT:    flat_store_byte v[0:1], v2
17; GFX8-NEXT:    s_endpgm
18;
19; GFX9-LABEL: constant_load_i8_align4:
20; GFX9:       ; %bb.0:
21; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
22; GFX9-NEXT:    v_mov_b32_e32 v1, 0
23; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
24; GFX9-NEXT:    s_load_dword s2, s[2:3], 0x0
25; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
26; GFX9-NEXT:    v_mov_b32_e32 v0, s2
27; GFX9-NEXT:    global_store_byte v1, v0, s[0:1]
28; GFX9-NEXT:    s_endpgm
29;
30; GFX10-LABEL: constant_load_i8_align4:
31; GFX10:       ; %bb.0:
32; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
33; GFX10-NEXT:    v_mov_b32_e32 v1, 0
34; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
35; GFX10-NEXT:    s_load_dword s2, s[2:3], 0x0
36; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
37; GFX10-NEXT:    v_mov_b32_e32 v0, s2
38; GFX10-NEXT:    global_store_byte v1, v0, s[0:1]
39; GFX10-NEXT:    s_endpgm
40  %ld = load i8, ptr addrspace(4) %in, align 4
41  store i8 %ld, ptr addrspace(1) %out, align 4
42  ret void
43}
44
45define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace (1) %out, ptr addrspace(4) %in) #0 {
46; GFX8-LABEL: constant_load_i16_align4:
47; GFX8:       ; %bb.0:
48; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
49; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
50; GFX8-NEXT:    s_load_dword s2, s[2:3], 0x0
51; GFX8-NEXT:    v_mov_b32_e32 v0, s0
52; GFX8-NEXT:    v_mov_b32_e32 v1, s1
53; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
54; GFX8-NEXT:    v_mov_b32_e32 v2, s2
55; GFX8-NEXT:    flat_store_short v[0:1], v2
56; GFX8-NEXT:    s_endpgm
57;
58; GFX9-LABEL: constant_load_i16_align4:
59; GFX9:       ; %bb.0:
60; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
61; GFX9-NEXT:    v_mov_b32_e32 v1, 0
62; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
63; GFX9-NEXT:    s_load_dword s2, s[2:3], 0x0
64; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
65; GFX9-NEXT:    v_mov_b32_e32 v0, s2
66; GFX9-NEXT:    global_store_short v1, v0, s[0:1]
67; GFX9-NEXT:    s_endpgm
68;
69; GFX10-LABEL: constant_load_i16_align4:
70; GFX10:       ; %bb.0:
71; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
72; GFX10-NEXT:    v_mov_b32_e32 v1, 0
73; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
74; GFX10-NEXT:    s_load_dword s2, s[2:3], 0x0
75; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
76; GFX10-NEXT:    v_mov_b32_e32 v0, s2
77; GFX10-NEXT:    global_store_short v1, v0, s[0:1]
78; GFX10-NEXT:    s_endpgm
79  %ld = load i16, ptr addrspace(4) %in, align 4
80  store i16 %ld, ptr addrspace(1) %out, align 4
81  ret void
82}
83
84define amdgpu_kernel void @sextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
85; GFX8-LABEL: sextload_i8_to_i32_align4:
86; GFX8:       ; %bb.0:
87; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
88; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
89; GFX8-NEXT:    s_load_dword s2, s[2:3], 0x0
90; GFX8-NEXT:    v_mov_b32_e32 v0, s0
91; GFX8-NEXT:    v_mov_b32_e32 v1, s1
92; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
93; GFX8-NEXT:    s_sext_i32_i8 s2, s2
94; GFX8-NEXT:    v_mov_b32_e32 v2, s2
95; GFX8-NEXT:    flat_store_dword v[0:1], v2
96; GFX8-NEXT:    s_endpgm
97;
98; GFX9-LABEL: sextload_i8_to_i32_align4:
99; GFX9:       ; %bb.0:
100; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
101; GFX9-NEXT:    v_mov_b32_e32 v1, 0
102; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
103; GFX9-NEXT:    s_load_dword s2, s[2:3], 0x0
104; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
105; GFX9-NEXT:    s_sext_i32_i8 s2, s2
106; GFX9-NEXT:    v_mov_b32_e32 v0, s2
107; GFX9-NEXT:    global_store_dword v1, v0, s[0:1]
108; GFX9-NEXT:    s_endpgm
109;
110; GFX10-LABEL: sextload_i8_to_i32_align4:
111; GFX10:       ; %bb.0:
112; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
113; GFX10-NEXT:    v_mov_b32_e32 v1, 0
114; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
115; GFX10-NEXT:    s_load_dword s2, s[2:3], 0x0
116; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
117; GFX10-NEXT:    s_sext_i32_i8 s2, s2
118; GFX10-NEXT:    v_mov_b32_e32 v0, s2
119; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
120; GFX10-NEXT:    s_endpgm
121  %load = load i8, ptr addrspace(1) %in, align 4
122  %sext = sext i8 %load to i32
123  store i32 %sext, ptr addrspace(1) %out, align 4
124  ret void
125}
126
127define amdgpu_kernel void @sextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
128; GFX8-LABEL: sextload_i16_to_i32_align4:
129; GFX8:       ; %bb.0:
130; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
131; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
132; GFX8-NEXT:    s_load_dword s2, s[2:3], 0x0
133; GFX8-NEXT:    v_mov_b32_e32 v0, s0
134; GFX8-NEXT:    v_mov_b32_e32 v1, s1
135; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
136; GFX8-NEXT:    s_sext_i32_i16 s2, s2
137; GFX8-NEXT:    v_mov_b32_e32 v2, s2
138; GFX8-NEXT:    flat_store_dword v[0:1], v2
139; GFX8-NEXT:    s_endpgm
140;
141; GFX9-LABEL: sextload_i16_to_i32_align4:
142; GFX9:       ; %bb.0:
143; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
144; GFX9-NEXT:    v_mov_b32_e32 v1, 0
145; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
146; GFX9-NEXT:    s_load_dword s2, s[2:3], 0x0
147; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
148; GFX9-NEXT:    s_sext_i32_i16 s2, s2
149; GFX9-NEXT:    v_mov_b32_e32 v0, s2
150; GFX9-NEXT:    global_store_dword v1, v0, s[0:1]
151; GFX9-NEXT:    s_endpgm
152;
153; GFX10-LABEL: sextload_i16_to_i32_align4:
154; GFX10:       ; %bb.0:
155; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
156; GFX10-NEXT:    v_mov_b32_e32 v1, 0
157; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
158; GFX10-NEXT:    s_load_dword s2, s[2:3], 0x0
159; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
160; GFX10-NEXT:    s_sext_i32_i16 s2, s2
161; GFX10-NEXT:    v_mov_b32_e32 v0, s2
162; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
163; GFX10-NEXT:    s_endpgm
164  %load = load i16, ptr addrspace(1) %in, align 4
165  %sext = sext i16 %load to i32
166  store i32 %sext, ptr addrspace(1) %out, align 4
167  ret void
168}
169
170define amdgpu_kernel void @zextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
171; GFX8-LABEL: zextload_i8_to_i32_align4:
172; GFX8:       ; %bb.0:
173; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
174; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
175; GFX8-NEXT:    s_load_dword s2, s[2:3], 0x0
176; GFX8-NEXT:    v_mov_b32_e32 v0, s0
177; GFX8-NEXT:    v_mov_b32_e32 v1, s1
178; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
179; GFX8-NEXT:    s_and_b32 s2, s2, 0xff
180; GFX8-NEXT:    v_mov_b32_e32 v2, s2
181; GFX8-NEXT:    flat_store_dword v[0:1], v2
182; GFX8-NEXT:    s_endpgm
183;
184; GFX9-LABEL: zextload_i8_to_i32_align4:
185; GFX9:       ; %bb.0:
186; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
187; GFX9-NEXT:    v_mov_b32_e32 v1, 0
188; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
189; GFX9-NEXT:    s_load_dword s2, s[2:3], 0x0
190; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
191; GFX9-NEXT:    s_and_b32 s2, s2, 0xff
192; GFX9-NEXT:    v_mov_b32_e32 v0, s2
193; GFX9-NEXT:    global_store_dword v1, v0, s[0:1]
194; GFX9-NEXT:    s_endpgm
195;
196; GFX10-LABEL: zextload_i8_to_i32_align4:
197; GFX10:       ; %bb.0:
198; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
199; GFX10-NEXT:    v_mov_b32_e32 v1, 0
200; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
201; GFX10-NEXT:    s_load_dword s2, s[2:3], 0x0
202; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
203; GFX10-NEXT:    s_and_b32 s2, s2, 0xff
204; GFX10-NEXT:    v_mov_b32_e32 v0, s2
205; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
206; GFX10-NEXT:    s_endpgm
207  %load = load i8, ptr addrspace(1) %in, align 4
208  %zext = zext i8 %load to i32
209  store i32 %zext, ptr addrspace(1) %out, align 4
210  ret void
211}
212
213define amdgpu_kernel void @zextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
214; GFX8-LABEL: zextload_i16_to_i32_align4:
215; GFX8:       ; %bb.0:
216; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
217; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
218; GFX8-NEXT:    s_load_dword s2, s[2:3], 0x0
219; GFX8-NEXT:    v_mov_b32_e32 v0, s0
220; GFX8-NEXT:    v_mov_b32_e32 v1, s1
221; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
222; GFX8-NEXT:    s_and_b32 s2, s2, 0xffff
223; GFX8-NEXT:    v_mov_b32_e32 v2, s2
224; GFX8-NEXT:    flat_store_dword v[0:1], v2
225; GFX8-NEXT:    s_endpgm
226;
227; GFX9-LABEL: zextload_i16_to_i32_align4:
228; GFX9:       ; %bb.0:
229; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
230; GFX9-NEXT:    v_mov_b32_e32 v1, 0
231; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
232; GFX9-NEXT:    s_load_dword s2, s[2:3], 0x0
233; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
234; GFX9-NEXT:    s_and_b32 s2, s2, 0xffff
235; GFX9-NEXT:    v_mov_b32_e32 v0, s2
236; GFX9-NEXT:    global_store_dword v1, v0, s[0:1]
237; GFX9-NEXT:    s_endpgm
238;
239; GFX10-LABEL: zextload_i16_to_i32_align4:
240; GFX10:       ; %bb.0:
241; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
242; GFX10-NEXT:    v_mov_b32_e32 v1, 0
243; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
244; GFX10-NEXT:    s_load_dword s2, s[2:3], 0x0
245; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
246; GFX10-NEXT:    s_and_b32 s2, s2, 0xffff
247; GFX10-NEXT:    v_mov_b32_e32 v0, s2
248; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
249; GFX10-NEXT:    s_endpgm
250  %load = load i16, ptr addrspace(1) %in, align 4
251  %zext = zext i16 %load to i32
252  store i32 %zext, ptr addrspace(1) %out, align 4
253  ret void
254}
255
256define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
257; GFX8-LABEL: constant_load_i8_align2:
258; GFX8:       ; %bb.0:
259; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
260; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
261; GFX8-NEXT:    v_mov_b32_e32 v0, s2
262; GFX8-NEXT:    v_mov_b32_e32 v1, s3
263; GFX8-NEXT:    flat_load_ubyte v2, v[0:1]
264; GFX8-NEXT:    v_mov_b32_e32 v0, s0
265; GFX8-NEXT:    v_mov_b32_e32 v1, s1
266; GFX8-NEXT:    s_waitcnt vmcnt(0)
267; GFX8-NEXT:    flat_store_byte v[0:1], v2
268; GFX8-NEXT:    s_endpgm
269;
270; GFX9-LABEL: constant_load_i8_align2:
271; GFX9:       ; %bb.0:
272; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
273; GFX9-NEXT:    v_mov_b32_e32 v0, 0
274; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
275; GFX9-NEXT:    global_load_ubyte v1, v0, s[2:3]
276; GFX9-NEXT:    s_waitcnt vmcnt(0)
277; GFX9-NEXT:    global_store_byte v0, v1, s[0:1]
278; GFX9-NEXT:    s_endpgm
279;
280; GFX10-LABEL: constant_load_i8_align2:
281; GFX10:       ; %bb.0:
282; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
283; GFX10-NEXT:    v_mov_b32_e32 v0, 0
284; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
285; GFX10-NEXT:    global_load_ubyte v1, v0, s[2:3]
286; GFX10-NEXT:    s_waitcnt vmcnt(0)
287; GFX10-NEXT:    global_store_byte v0, v1, s[0:1]
288; GFX10-NEXT:    s_endpgm
289  %load = load i8, ptr addrspace(1) %in, align 2
290  store i8 %load, ptr addrspace(1) %out, align 2
291  ret void
292}
293
294define amdgpu_kernel void @constant_load_i16_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
295; GFX8-LABEL: constant_load_i16_align2:
296; GFX8:       ; %bb.0:
297; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
298; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
299; GFX8-NEXT:    v_mov_b32_e32 v0, s2
300; GFX8-NEXT:    v_mov_b32_e32 v1, s3
301; GFX8-NEXT:    flat_load_ushort v2, v[0:1]
302; GFX8-NEXT:    v_mov_b32_e32 v0, s0
303; GFX8-NEXT:    v_mov_b32_e32 v1, s1
304; GFX8-NEXT:    s_waitcnt vmcnt(0)
305; GFX8-NEXT:    flat_store_short v[0:1], v2
306; GFX8-NEXT:    s_endpgm
307;
308; GFX9-LABEL: constant_load_i16_align2:
309; GFX9:       ; %bb.0:
310; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
311; GFX9-NEXT:    v_mov_b32_e32 v0, 0
312; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
313; GFX9-NEXT:    global_load_ushort v1, v0, s[2:3]
314; GFX9-NEXT:    s_waitcnt vmcnt(0)
315; GFX9-NEXT:    global_store_short v0, v1, s[0:1]
316; GFX9-NEXT:    s_endpgm
317;
318; GFX10-LABEL: constant_load_i16_align2:
319; GFX10:       ; %bb.0:
320; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
321; GFX10-NEXT:    v_mov_b32_e32 v0, 0
322; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
323; GFX10-NEXT:    global_load_ushort v1, v0, s[2:3]
324; GFX10-NEXT:    s_waitcnt vmcnt(0)
325; GFX10-NEXT:    global_store_short v0, v1, s[0:1]
326; GFX10-NEXT:    s_endpgm
327  %load = load i16, ptr addrspace(1) %in, align 2
328  store i16 %load, ptr addrspace(1) %out, align 2
329  ret void
330}
331
332define amdgpu_kernel void @constant_sextload_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
333; GFX8-LABEL: constant_sextload_i8_align2:
334; GFX8:       ; %bb.0:
335; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
336; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
337; GFX8-NEXT:    v_mov_b32_e32 v0, s2
338; GFX8-NEXT:    v_mov_b32_e32 v1, s3
339; GFX8-NEXT:    flat_load_sbyte v2, v[0:1]
340; GFX8-NEXT:    v_mov_b32_e32 v0, s0
341; GFX8-NEXT:    s_add_u32 s2, s0, 2
342; GFX8-NEXT:    v_mov_b32_e32 v1, s1
343; GFX8-NEXT:    s_addc_u32 s3, s1, 0
344; GFX8-NEXT:    s_waitcnt vmcnt(0)
345; GFX8-NEXT:    flat_store_short v[0:1], v2
346; GFX8-NEXT:    v_mov_b32_e32 v0, s2
347; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
348; GFX8-NEXT:    v_mov_b32_e32 v1, s3
349; GFX8-NEXT:    flat_store_short v[0:1], v3
350; GFX8-NEXT:    s_endpgm
351;
352; GFX9-LABEL: constant_sextload_i8_align2:
353; GFX9:       ; %bb.0:
354; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
355; GFX9-NEXT:    v_mov_b32_e32 v0, 0
356; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
357; GFX9-NEXT:    global_load_sbyte v1, v0, s[2:3]
358; GFX9-NEXT:    s_waitcnt vmcnt(0)
359; GFX9-NEXT:    global_store_short v0, v1, s[0:1]
360; GFX9-NEXT:    global_store_short_d16_hi v0, v1, s[0:1] offset:2
361; GFX9-NEXT:    s_endpgm
362;
363; GFX10-LABEL: constant_sextload_i8_align2:
364; GFX10:       ; %bb.0:
365; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
366; GFX10-NEXT:    v_mov_b32_e32 v0, 0
367; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
368; GFX10-NEXT:    global_load_sbyte v1, v0, s[2:3]
369; GFX10-NEXT:    s_waitcnt vmcnt(0)
370; GFX10-NEXT:    global_store_short v0, v1, s[0:1]
371; GFX10-NEXT:    global_store_short_d16_hi v0, v1, s[0:1] offset:2
372; GFX10-NEXT:    s_endpgm
373  %load = load i8, ptr addrspace(1) %in, align 2
374  %sextload = sext i8 %load to i32
375  store i32 %sextload, ptr addrspace(1) %out, align 2
376  ret void
377}
378
379define amdgpu_kernel void @constant_zextload_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
380; GFX8-LABEL: constant_zextload_i8_align2:
381; GFX8:       ; %bb.0:
382; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
383; GFX8-NEXT:    v_mov_b32_e32 v5, 0
384; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
385; GFX8-NEXT:    v_mov_b32_e32 v0, s2
386; GFX8-NEXT:    v_mov_b32_e32 v1, s3
387; GFX8-NEXT:    flat_load_ubyte v4, v[0:1]
388; GFX8-NEXT:    s_add_u32 s2, s0, 2
389; GFX8-NEXT:    v_mov_b32_e32 v0, s0
390; GFX8-NEXT:    v_mov_b32_e32 v1, s1
391; GFX8-NEXT:    s_addc_u32 s3, s1, 0
392; GFX8-NEXT:    v_mov_b32_e32 v2, s2
393; GFX8-NEXT:    v_mov_b32_e32 v3, s3
394; GFX8-NEXT:    s_waitcnt vmcnt(0)
395; GFX8-NEXT:    flat_store_short v[0:1], v4
396; GFX8-NEXT:    flat_store_short v[2:3], v5
397; GFX8-NEXT:    s_endpgm
398;
399; GFX9-LABEL: constant_zextload_i8_align2:
400; GFX9:       ; %bb.0:
401; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
402; GFX9-NEXT:    v_mov_b32_e32 v0, 0
403; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
404; GFX9-NEXT:    global_load_ubyte v1, v0, s[2:3]
405; GFX9-NEXT:    s_waitcnt vmcnt(0)
406; GFX9-NEXT:    global_store_short v0, v1, s[0:1]
407; GFX9-NEXT:    global_store_short v0, v0, s[0:1] offset:2
408; GFX9-NEXT:    s_endpgm
409;
410; GFX10-LABEL: constant_zextload_i8_align2:
411; GFX10:       ; %bb.0:
412; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
413; GFX10-NEXT:    v_mov_b32_e32 v0, 0
414; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
415; GFX10-NEXT:    global_load_ubyte v1, v0, s[2:3]
416; GFX10-NEXT:    s_waitcnt vmcnt(0)
417; GFX10-NEXT:    global_store_short v0, v1, s[0:1]
418; GFX10-NEXT:    global_store_short v0, v0, s[0:1] offset:2
419; GFX10-NEXT:    s_endpgm
420  %load = load i8, ptr addrspace(1) %in, align 2
421  %zextload = zext i8 %load to i32
422  store i32 %zextload, ptr addrspace(1) %out, align 2
423  ret void
424}
425
426attributes #0 = { nounwind }
427