xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll (revision f2c164c8150548d983565c4ddc0fde790f9e2a5b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck --check-prefix=GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck --check-prefix=GFX6 %s
5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s
6; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck --check-prefix=GFX11 %s
7
8define i32 @zextload_global_i1_to_i32(ptr addrspace(1) %ptr) {
9; GFX9-LABEL: zextload_global_i1_to_i32:
10; GFX9:       ; %bb.0:
11; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
13; GFX9-NEXT:    s_waitcnt vmcnt(0)
14; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
15; GFX9-NEXT:    s_setpc_b64 s[30:31]
16;
17; GFX8-LABEL: zextload_global_i1_to_i32:
18; GFX8:       ; %bb.0:
19; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
21; GFX8-NEXT:    s_waitcnt vmcnt(0)
22; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
23; GFX8-NEXT:    s_setpc_b64 s[30:31]
24;
25; GFX6-LABEL: zextload_global_i1_to_i32:
26; GFX6:       ; %bb.0:
27; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28; GFX6-NEXT:    s_mov_b32 s6, 0
29; GFX6-NEXT:    s_mov_b32 s7, 0xf000
30; GFX6-NEXT:    s_mov_b64 s[4:5], 0
31; GFX6-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
32; GFX6-NEXT:    s_waitcnt vmcnt(0)
33; GFX6-NEXT:    v_and_b32_e32 v0, 1, v0
34; GFX6-NEXT:    s_setpc_b64 s[30:31]
35;
36; GFX10-LABEL: zextload_global_i1_to_i32:
37; GFX10:       ; %bb.0:
38; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
40; GFX10-NEXT:    s_waitcnt vmcnt(0)
41; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
42; GFX10-NEXT:    s_setpc_b64 s[30:31]
43;
44; GFX11-LABEL: zextload_global_i1_to_i32:
45; GFX11:       ; %bb.0:
46; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
47; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
48; GFX11-NEXT:    s_waitcnt vmcnt(0)
49; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
50; GFX11-NEXT:    s_setpc_b64 s[30:31]
51  %load = load i1, ptr addrspace(1) %ptr
52  %ext = zext i1 %load to i32
53  ret i32 %ext
54}
55
56define i32 @zextload_global_i8_to_i32(ptr addrspace(1) %ptr) {
57; GFX9-LABEL: zextload_global_i8_to_i32:
58; GFX9:       ; %bb.0:
59; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
61; GFX9-NEXT:    s_waitcnt vmcnt(0)
62; GFX9-NEXT:    s_setpc_b64 s[30:31]
63;
64; GFX8-LABEL: zextload_global_i8_to_i32:
65; GFX8:       ; %bb.0:
66; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
68; GFX8-NEXT:    s_waitcnt vmcnt(0)
69; GFX8-NEXT:    s_setpc_b64 s[30:31]
70;
71; GFX6-LABEL: zextload_global_i8_to_i32:
72; GFX6:       ; %bb.0:
73; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74; GFX6-NEXT:    s_mov_b32 s6, 0
75; GFX6-NEXT:    s_mov_b32 s7, 0xf000
76; GFX6-NEXT:    s_mov_b64 s[4:5], 0
77; GFX6-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
78; GFX6-NEXT:    s_waitcnt vmcnt(0)
79; GFX6-NEXT:    s_setpc_b64 s[30:31]
80;
81; GFX10-LABEL: zextload_global_i8_to_i32:
82; GFX10:       ; %bb.0:
83; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
84; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
85; GFX10-NEXT:    s_waitcnt vmcnt(0)
86; GFX10-NEXT:    s_setpc_b64 s[30:31]
87;
88; GFX11-LABEL: zextload_global_i8_to_i32:
89; GFX11:       ; %bb.0:
90; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
91; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
92; GFX11-NEXT:    s_waitcnt vmcnt(0)
93; GFX11-NEXT:    s_setpc_b64 s[30:31]
94  %load = load i8, ptr addrspace(1) %ptr
95  %ext = zext i8 %load to i32
96  ret i32 %ext
97}
98
99define i32 @zextload_global_i16_to_i32(ptr addrspace(1) %ptr) {
100; GFX9-LABEL: zextload_global_i16_to_i32:
101; GFX9:       ; %bb.0:
102; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103; GFX9-NEXT:    global_load_ushort v0, v[0:1], off
104; GFX9-NEXT:    s_waitcnt vmcnt(0)
105; GFX9-NEXT:    s_setpc_b64 s[30:31]
106;
107; GFX8-LABEL: zextload_global_i16_to_i32:
108; GFX8:       ; %bb.0:
109; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
111; GFX8-NEXT:    s_waitcnt vmcnt(0)
112; GFX8-NEXT:    s_setpc_b64 s[30:31]
113;
114; GFX6-LABEL: zextload_global_i16_to_i32:
115; GFX6:       ; %bb.0:
116; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117; GFX6-NEXT:    s_mov_b32 s6, 0
118; GFX6-NEXT:    s_mov_b32 s7, 0xf000
119; GFX6-NEXT:    s_mov_b64 s[4:5], 0
120; GFX6-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
121; GFX6-NEXT:    s_waitcnt vmcnt(0)
122; GFX6-NEXT:    s_setpc_b64 s[30:31]
123;
124; GFX10-LABEL: zextload_global_i16_to_i32:
125; GFX10:       ; %bb.0:
126; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127; GFX10-NEXT:    global_load_ushort v0, v[0:1], off
128; GFX10-NEXT:    s_waitcnt vmcnt(0)
129; GFX10-NEXT:    s_setpc_b64 s[30:31]
130;
131; GFX11-LABEL: zextload_global_i16_to_i32:
132; GFX11:       ; %bb.0:
133; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134; GFX11-NEXT:    global_load_u16 v0, v[0:1], off
135; GFX11-NEXT:    s_waitcnt vmcnt(0)
136; GFX11-NEXT:    s_setpc_b64 s[30:31]
137  %load = load i16, ptr addrspace(1) %ptr
138  %ext = zext i16 %load to i32
139  ret i32 %ext
140}
141
142define i64 @zextload_global_i1_to_i64(ptr addrspace(1) %ptr) {
143; GFX9-LABEL: zextload_global_i1_to_i64:
144; GFX9:       ; %bb.0:
145; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
147; GFX9-NEXT:    v_mov_b32_e32 v1, 0
148; GFX9-NEXT:    s_waitcnt vmcnt(0)
149; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
150; GFX9-NEXT:    s_setpc_b64 s[30:31]
151;
152; GFX8-LABEL: zextload_global_i1_to_i64:
153; GFX8:       ; %bb.0:
154; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
156; GFX8-NEXT:    v_mov_b32_e32 v1, 0
157; GFX8-NEXT:    s_waitcnt vmcnt(0)
158; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
159; GFX8-NEXT:    s_setpc_b64 s[30:31]
160;
161; GFX6-LABEL: zextload_global_i1_to_i64:
162; GFX6:       ; %bb.0:
163; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164; GFX6-NEXT:    s_mov_b32 s6, 0
165; GFX6-NEXT:    s_mov_b32 s7, 0xf000
166; GFX6-NEXT:    s_mov_b64 s[4:5], 0
167; GFX6-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
168; GFX6-NEXT:    v_mov_b32_e32 v1, 0
169; GFX6-NEXT:    s_waitcnt vmcnt(0)
170; GFX6-NEXT:    v_and_b32_e32 v0, 1, v0
171; GFX6-NEXT:    s_setpc_b64 s[30:31]
172;
173; GFX10-LABEL: zextload_global_i1_to_i64:
174; GFX10:       ; %bb.0:
175; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
177; GFX10-NEXT:    v_mov_b32_e32 v1, 0
178; GFX10-NEXT:    s_waitcnt vmcnt(0)
179; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
180; GFX10-NEXT:    s_setpc_b64 s[30:31]
181;
182; GFX11-LABEL: zextload_global_i1_to_i64:
183; GFX11:       ; %bb.0:
184; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
186; GFX11-NEXT:    s_waitcnt vmcnt(0)
187; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 1, v0
188; GFX11-NEXT:    s_setpc_b64 s[30:31]
189  %load = load i1, ptr addrspace(1) %ptr
190  %ext = zext i1 %load to i64
191  ret i64 %ext
192}
193
194define i64 @zextload_global_i8_to_i64(ptr addrspace(1) %ptr) {
195; GFX9-LABEL: zextload_global_i8_to_i64:
196; GFX9:       ; %bb.0:
197; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
198; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
199; GFX9-NEXT:    v_mov_b32_e32 v1, 0
200; GFX9-NEXT:    s_waitcnt vmcnt(0)
201; GFX9-NEXT:    s_setpc_b64 s[30:31]
202;
203; GFX8-LABEL: zextload_global_i8_to_i64:
204; GFX8:       ; %bb.0:
205; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
207; GFX8-NEXT:    v_mov_b32_e32 v1, 0
208; GFX8-NEXT:    s_waitcnt vmcnt(0)
209; GFX8-NEXT:    s_setpc_b64 s[30:31]
210;
211; GFX6-LABEL: zextload_global_i8_to_i64:
212; GFX6:       ; %bb.0:
213; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
214; GFX6-NEXT:    s_mov_b32 s6, 0
215; GFX6-NEXT:    s_mov_b32 s7, 0xf000
216; GFX6-NEXT:    s_mov_b64 s[4:5], 0
217; GFX6-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
218; GFX6-NEXT:    v_mov_b32_e32 v1, 0
219; GFX6-NEXT:    s_waitcnt vmcnt(0)
220; GFX6-NEXT:    s_setpc_b64 s[30:31]
221;
222; GFX10-LABEL: zextload_global_i8_to_i64:
223; GFX10:       ; %bb.0:
224; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
225; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
226; GFX10-NEXT:    v_mov_b32_e32 v1, 0
227; GFX10-NEXT:    s_waitcnt vmcnt(0)
228; GFX10-NEXT:    s_setpc_b64 s[30:31]
229;
230; GFX11-LABEL: zextload_global_i8_to_i64:
231; GFX11:       ; %bb.0:
232; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
233; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
234; GFX11-NEXT:    v_mov_b32_e32 v1, 0
235; GFX11-NEXT:    s_waitcnt vmcnt(0)
236; GFX11-NEXT:    s_setpc_b64 s[30:31]
237  %load = load i8, ptr addrspace(1) %ptr
238  %ext = zext i8 %load to i64
239  ret i64 %ext
240}
241
242define i64 @zextload_global_i16_to_i64(ptr addrspace(1) %ptr) {
243; GFX9-LABEL: zextload_global_i16_to_i64:
244; GFX9:       ; %bb.0:
245; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
246; GFX9-NEXT:    global_load_ushort v0, v[0:1], off
247; GFX9-NEXT:    v_mov_b32_e32 v1, 0
248; GFX9-NEXT:    s_waitcnt vmcnt(0)
249; GFX9-NEXT:    s_setpc_b64 s[30:31]
250;
251; GFX8-LABEL: zextload_global_i16_to_i64:
252; GFX8:       ; %bb.0:
253; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
255; GFX8-NEXT:    v_mov_b32_e32 v1, 0
256; GFX8-NEXT:    s_waitcnt vmcnt(0)
257; GFX8-NEXT:    s_setpc_b64 s[30:31]
258;
259; GFX6-LABEL: zextload_global_i16_to_i64:
260; GFX6:       ; %bb.0:
261; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
262; GFX6-NEXT:    s_mov_b32 s6, 0
263; GFX6-NEXT:    s_mov_b32 s7, 0xf000
264; GFX6-NEXT:    s_mov_b64 s[4:5], 0
265; GFX6-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
266; GFX6-NEXT:    v_mov_b32_e32 v1, 0
267; GFX6-NEXT:    s_waitcnt vmcnt(0)
268; GFX6-NEXT:    s_setpc_b64 s[30:31]
269;
270; GFX10-LABEL: zextload_global_i16_to_i64:
271; GFX10:       ; %bb.0:
272; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273; GFX10-NEXT:    global_load_ushort v0, v[0:1], off
274; GFX10-NEXT:    v_mov_b32_e32 v1, 0
275; GFX10-NEXT:    s_waitcnt vmcnt(0)
276; GFX10-NEXT:    s_setpc_b64 s[30:31]
277;
278; GFX11-LABEL: zextload_global_i16_to_i64:
279; GFX11:       ; %bb.0:
280; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
281; GFX11-NEXT:    global_load_u16 v0, v[0:1], off
282; GFX11-NEXT:    v_mov_b32_e32 v1, 0
283; GFX11-NEXT:    s_waitcnt vmcnt(0)
284; GFX11-NEXT:    s_setpc_b64 s[30:31]
285  %load = load i16, ptr addrspace(1) %ptr
286  %ext = zext i16 %load to i64
287  ret i64 %ext
288}
289
290define i64 @zextload_global_i32_to_i64(ptr addrspace(1) %ptr) {
291; GFX9-LABEL: zextload_global_i32_to_i64:
292; GFX9:       ; %bb.0:
293; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
294; GFX9-NEXT:    global_load_dword v0, v[0:1], off
295; GFX9-NEXT:    v_mov_b32_e32 v1, 0
296; GFX9-NEXT:    s_waitcnt vmcnt(0)
297; GFX9-NEXT:    s_setpc_b64 s[30:31]
298;
299; GFX8-LABEL: zextload_global_i32_to_i64:
300; GFX8:       ; %bb.0:
301; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
302; GFX8-NEXT:    flat_load_dword v0, v[0:1]
303; GFX8-NEXT:    v_mov_b32_e32 v1, 0
304; GFX8-NEXT:    s_waitcnt vmcnt(0)
305; GFX8-NEXT:    s_setpc_b64 s[30:31]
306;
307; GFX6-LABEL: zextload_global_i32_to_i64:
308; GFX6:       ; %bb.0:
309; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
310; GFX6-NEXT:    s_mov_b32 s6, 0
311; GFX6-NEXT:    s_mov_b32 s7, 0xf000
312; GFX6-NEXT:    s_mov_b64 s[4:5], 0
313; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
314; GFX6-NEXT:    v_mov_b32_e32 v1, 0
315; GFX6-NEXT:    s_waitcnt vmcnt(0)
316; GFX6-NEXT:    s_setpc_b64 s[30:31]
317;
318; GFX10-LABEL: zextload_global_i32_to_i64:
319; GFX10:       ; %bb.0:
320; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
321; GFX10-NEXT:    global_load_dword v0, v[0:1], off
322; GFX10-NEXT:    v_mov_b32_e32 v1, 0
323; GFX10-NEXT:    s_waitcnt vmcnt(0)
324; GFX10-NEXT:    s_setpc_b64 s[30:31]
325;
326; GFX11-LABEL: zextload_global_i32_to_i64:
327; GFX11:       ; %bb.0:
328; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
329; GFX11-NEXT:    global_load_b32 v0, v[0:1], off
330; GFX11-NEXT:    v_mov_b32_e32 v1, 0
331; GFX11-NEXT:    s_waitcnt vmcnt(0)
332; GFX11-NEXT:    s_setpc_b64 s[30:31]
333  %load = load i32, ptr addrspace(1) %ptr
334  %ext = zext i32 %load to i64
335  ret i64 %ext
336}
337
338define i96 @zextload_global_i32_to_i96(ptr addrspace(1) %ptr) {
339; GFX9-LABEL: zextload_global_i32_to_i96:
340; GFX9:       ; %bb.0:
341; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
342; GFX9-NEXT:    global_load_dword v0, v[0:1], off
343; GFX9-NEXT:    v_mov_b32_e32 v1, 0
344; GFX9-NEXT:    v_mov_b32_e32 v2, 0
345; GFX9-NEXT:    s_waitcnt vmcnt(0)
346; GFX9-NEXT:    s_setpc_b64 s[30:31]
347;
348; GFX8-LABEL: zextload_global_i32_to_i96:
349; GFX8:       ; %bb.0:
350; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351; GFX8-NEXT:    flat_load_dword v0, v[0:1]
352; GFX8-NEXT:    v_mov_b32_e32 v1, 0
353; GFX8-NEXT:    v_mov_b32_e32 v2, 0
354; GFX8-NEXT:    s_waitcnt vmcnt(0)
355; GFX8-NEXT:    s_setpc_b64 s[30:31]
356;
357; GFX6-LABEL: zextload_global_i32_to_i96:
358; GFX6:       ; %bb.0:
359; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
360; GFX6-NEXT:    s_mov_b32 s6, 0
361; GFX6-NEXT:    s_mov_b32 s7, 0xf000
362; GFX6-NEXT:    s_mov_b64 s[4:5], 0
363; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
364; GFX6-NEXT:    v_mov_b32_e32 v1, 0
365; GFX6-NEXT:    v_mov_b32_e32 v2, 0
366; GFX6-NEXT:    s_waitcnt vmcnt(0)
367; GFX6-NEXT:    s_setpc_b64 s[30:31]
368;
369; GFX10-LABEL: zextload_global_i32_to_i96:
370; GFX10:       ; %bb.0:
371; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372; GFX10-NEXT:    global_load_dword v0, v[0:1], off
373; GFX10-NEXT:    v_mov_b32_e32 v1, 0
374; GFX10-NEXT:    v_mov_b32_e32 v2, 0
375; GFX10-NEXT:    s_waitcnt vmcnt(0)
376; GFX10-NEXT:    s_setpc_b64 s[30:31]
377;
378; GFX11-LABEL: zextload_global_i32_to_i96:
379; GFX11:       ; %bb.0:
380; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
381; GFX11-NEXT:    global_load_b32 v0, v[0:1], off
382; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
383; GFX11-NEXT:    s_waitcnt vmcnt(0)
384; GFX11-NEXT:    s_setpc_b64 s[30:31]
385  %load = load i32, ptr addrspace(1) %ptr
386  %ext = zext i32 %load to i96
387  ret i96 %ext
388}
389
390define i128 @zextload_global_i32_to_i128(ptr addrspace(1) %ptr) {
391; GFX9-LABEL: zextload_global_i32_to_i128:
392; GFX9:       ; %bb.0:
393; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
394; GFX9-NEXT:    global_load_dword v0, v[0:1], off
395; GFX9-NEXT:    v_mov_b32_e32 v1, 0
396; GFX9-NEXT:    v_mov_b32_e32 v2, 0
397; GFX9-NEXT:    v_mov_b32_e32 v3, 0
398; GFX9-NEXT:    s_waitcnt vmcnt(0)
399; GFX9-NEXT:    s_setpc_b64 s[30:31]
400;
401; GFX8-LABEL: zextload_global_i32_to_i128:
402; GFX8:       ; %bb.0:
403; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
404; GFX8-NEXT:    flat_load_dword v0, v[0:1]
405; GFX8-NEXT:    v_mov_b32_e32 v1, 0
406; GFX8-NEXT:    v_mov_b32_e32 v2, 0
407; GFX8-NEXT:    v_mov_b32_e32 v3, 0
408; GFX8-NEXT:    s_waitcnt vmcnt(0)
409; GFX8-NEXT:    s_setpc_b64 s[30:31]
410;
411; GFX6-LABEL: zextload_global_i32_to_i128:
412; GFX6:       ; %bb.0:
413; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
414; GFX6-NEXT:    s_mov_b32 s6, 0
415; GFX6-NEXT:    s_mov_b32 s7, 0xf000
416; GFX6-NEXT:    s_mov_b64 s[4:5], 0
417; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
418; GFX6-NEXT:    v_mov_b32_e32 v1, 0
419; GFX6-NEXT:    v_mov_b32_e32 v2, 0
420; GFX6-NEXT:    v_mov_b32_e32 v3, 0
421; GFX6-NEXT:    s_waitcnt vmcnt(0)
422; GFX6-NEXT:    s_setpc_b64 s[30:31]
423;
424; GFX10-LABEL: zextload_global_i32_to_i128:
425; GFX10:       ; %bb.0:
426; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
427; GFX10-NEXT:    global_load_dword v0, v[0:1], off
428; GFX10-NEXT:    v_mov_b32_e32 v1, 0
429; GFX10-NEXT:    v_mov_b32_e32 v2, 0
430; GFX10-NEXT:    v_mov_b32_e32 v3, 0
431; GFX10-NEXT:    s_waitcnt vmcnt(0)
432; GFX10-NEXT:    s_setpc_b64 s[30:31]
433;
434; GFX11-LABEL: zextload_global_i32_to_i128:
435; GFX11:       ; %bb.0:
436; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
437; GFX11-NEXT:    global_load_b32 v0, v[0:1], off
438; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
439; GFX11-NEXT:    v_mov_b32_e32 v3, 0
440; GFX11-NEXT:    s_waitcnt vmcnt(0)
441; GFX11-NEXT:    s_setpc_b64 s[30:31]
442  %load = load i32, ptr addrspace(1) %ptr
443  %ext = zext i32 %load to i128
444  ret i128 %ext
445}
446