xref: /llvm-project/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX8 %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9ALL,GFX900 %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9ALL,GFX906 %s
5; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
6; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
7
8define i16 @shl_i16(i16 %x, i16 %y) {
9; GFX8-LABEL: shl_i16:
10; GFX8:       ; %bb.0:
11; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v1, v0
13; GFX8-NEXT:    s_setpc_b64 s[30:31]
14;
15; GFX9ALL-LABEL: shl_i16:
16; GFX9ALL:       ; %bb.0:
17; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18; GFX9ALL-NEXT:    v_lshlrev_b16_e32 v0, v1, v0
19; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
20;
21; GFX10-LABEL: shl_i16:
22; GFX10:       ; %bb.0:
23; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24; GFX10-NEXT:    v_lshlrev_b16 v0, v1, v0
25; GFX10-NEXT:    s_setpc_b64 s[30:31]
26;
27; GFX11-LABEL: shl_i16:
28; GFX11:       ; %bb.0:
29; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30; GFX11-NEXT:    v_lshlrev_b16 v0, v1, v0
31; GFX11-NEXT:    s_setpc_b64 s[30:31]
32  %res = shl i16 %x, %y
33  ret i16 %res
34}
35
36define i16 @lshr_i16(i16 %x, i16 %y) {
37; GFX8-LABEL: lshr_i16:
38; GFX8:       ; %bb.0:
39; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40; GFX8-NEXT:    v_lshrrev_b16_e32 v0, v1, v0
41; GFX8-NEXT:    s_setpc_b64 s[30:31]
42;
43; GFX9ALL-LABEL: lshr_i16:
44; GFX9ALL:       ; %bb.0:
45; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46; GFX9ALL-NEXT:    v_lshrrev_b16_e32 v0, v1, v0
47; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
48;
49; GFX10-LABEL: lshr_i16:
50; GFX10:       ; %bb.0:
51; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52; GFX10-NEXT:    v_lshrrev_b16 v0, v1, v0
53; GFX10-NEXT:    s_setpc_b64 s[30:31]
54;
55; GFX11-LABEL: lshr_i16:
56; GFX11:       ; %bb.0:
57; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
58; GFX11-NEXT:    v_lshrrev_b16 v0, v1, v0
59; GFX11-NEXT:    s_setpc_b64 s[30:31]
60  %res = lshr i16 %x, %y
61  ret i16 %res
62}
63
64define i16 @ashr_i16(i16 %x, i16 %y) {
65; GFX8-LABEL: ashr_i16:
66; GFX8:       ; %bb.0:
67; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68; GFX8-NEXT:    v_ashrrev_i16_e32 v0, v1, v0
69; GFX8-NEXT:    s_setpc_b64 s[30:31]
70;
71; GFX9ALL-LABEL: ashr_i16:
72; GFX9ALL:       ; %bb.0:
73; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74; GFX9ALL-NEXT:    v_ashrrev_i16_e32 v0, v1, v0
75; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
76;
77; GFX10-LABEL: ashr_i16:
78; GFX10:       ; %bb.0:
79; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80; GFX10-NEXT:    v_ashrrev_i16 v0, v1, v0
81; GFX10-NEXT:    s_setpc_b64 s[30:31]
82;
83; GFX11-LABEL: ashr_i16:
84; GFX11:       ; %bb.0:
85; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86; GFX11-NEXT:    v_ashrrev_i16 v0, v1, v0
87; GFX11-NEXT:    s_setpc_b64 s[30:31]
88  %res = ashr i16 %x, %y
89  ret i16 %res
90}
91
92define i16 @add_u16(i16 %x, i16 %y) {
93; GFX8-LABEL: add_u16:
94; GFX8:       ; %bb.0:
95; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96; GFX8-NEXT:    v_add_u16_e32 v0, v0, v1
97; GFX8-NEXT:    s_setpc_b64 s[30:31]
98;
99; GFX9ALL-LABEL: add_u16:
100; GFX9ALL:       ; %bb.0:
101; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102; GFX9ALL-NEXT:    v_add_u16_e32 v0, v0, v1
103; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
104;
105; GFX10-LABEL: add_u16:
106; GFX10:       ; %bb.0:
107; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108; GFX10-NEXT:    v_add_nc_u16 v0, v0, v1
109; GFX10-NEXT:    s_setpc_b64 s[30:31]
110;
111; GFX11-LABEL: add_u16:
112; GFX11:       ; %bb.0:
113; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114; GFX11-NEXT:    v_add_nc_u16 v0, v0, v1
115; GFX11-NEXT:    s_setpc_b64 s[30:31]
116  %res = add i16 %x, %y
117  ret i16 %res
118}
119
120define i16 @sub_u16(i16 %x, i16 %y) {
121; GFX8-LABEL: sub_u16:
122; GFX8:       ; %bb.0:
123; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124; GFX8-NEXT:    v_sub_u16_e32 v0, v0, v1
125; GFX8-NEXT:    s_setpc_b64 s[30:31]
126;
127; GFX9ALL-LABEL: sub_u16:
128; GFX9ALL:       ; %bb.0:
129; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130; GFX9ALL-NEXT:    v_sub_u16_e32 v0, v0, v1
131; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
132;
133; GFX10-LABEL: sub_u16:
134; GFX10:       ; %bb.0:
135; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136; GFX10-NEXT:    v_sub_nc_u16 v0, v0, v1
137; GFX10-NEXT:    s_setpc_b64 s[30:31]
138;
139; GFX11-LABEL: sub_u16:
140; GFX11:       ; %bb.0:
141; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142; GFX11-NEXT:    v_sub_nc_u16 v0, v0, v1
143; GFX11-NEXT:    s_setpc_b64 s[30:31]
144  %res = sub i16 %x, %y
145  ret i16 %res
146}
147
148define i16 @mul_lo_u16(i16 %x, i16 %y) {
149; GFX8-LABEL: mul_lo_u16:
150; GFX8:       ; %bb.0:
151; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152; GFX8-NEXT:    v_mul_lo_u16_e32 v0, v0, v1
153; GFX8-NEXT:    s_setpc_b64 s[30:31]
154;
155; GFX9ALL-LABEL: mul_lo_u16:
156; GFX9ALL:       ; %bb.0:
157; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
158; GFX9ALL-NEXT:    v_mul_lo_u16_e32 v0, v0, v1
159; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
160;
161; GFX10-LABEL: mul_lo_u16:
162; GFX10:       ; %bb.0:
163; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164; GFX10-NEXT:    v_mul_lo_u16 v0, v0, v1
165; GFX10-NEXT:    s_setpc_b64 s[30:31]
166;
167; GFX11-LABEL: mul_lo_u16:
168; GFX11:       ; %bb.0:
169; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170; GFX11-NEXT:    v_mul_lo_u16 v0, v0, v1
171; GFX11-NEXT:    s_setpc_b64 s[30:31]
172  %res = mul i16 %x, %y
173  ret i16 %res
174}
175
176define i16 @min_u16(i16 %x, i16 %y) {
177; GFX8-LABEL: min_u16:
178; GFX8:       ; %bb.0:
179; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180; GFX8-NEXT:    v_min_u16_e32 v0, v0, v1
181; GFX8-NEXT:    s_setpc_b64 s[30:31]
182;
183; GFX9ALL-LABEL: min_u16:
184; GFX9ALL:       ; %bb.0:
185; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186; GFX9ALL-NEXT:    v_min_u16_e32 v0, v0, v1
187; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
188;
189; GFX10-LABEL: min_u16:
190; GFX10:       ; %bb.0:
191; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
192; GFX10-NEXT:    v_min_u16 v0, v0, v1
193; GFX10-NEXT:    s_setpc_b64 s[30:31]
194;
195; GFX11-LABEL: min_u16:
196; GFX11:       ; %bb.0:
197; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
198; GFX11-NEXT:    v_min_u16 v0, v0, v1
199; GFX11-NEXT:    s_setpc_b64 s[30:31]
200  %cmp = icmp ule i16 %x, %y
201  %res = select i1 %cmp, i16 %x, i16 %y
202  ret i16 %res
203}
204
205define i16 @min_i16(i16 %x, i16 %y) {
206; GFX8-LABEL: min_i16:
207; GFX8:       ; %bb.0:
208; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
209; GFX8-NEXT:    v_min_i16_e32 v0, v0, v1
210; GFX8-NEXT:    s_setpc_b64 s[30:31]
211;
212; GFX9ALL-LABEL: min_i16:
213; GFX9ALL:       ; %bb.0:
214; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
215; GFX9ALL-NEXT:    v_min_i16_e32 v0, v0, v1
216; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
217;
218; GFX10-LABEL: min_i16:
219; GFX10:       ; %bb.0:
220; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
221; GFX10-NEXT:    v_min_i16 v0, v0, v1
222; GFX10-NEXT:    s_setpc_b64 s[30:31]
223;
224; GFX11-LABEL: min_i16:
225; GFX11:       ; %bb.0:
226; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
227; GFX11-NEXT:    v_min_i16 v0, v0, v1
228; GFX11-NEXT:    s_setpc_b64 s[30:31]
229  %cmp = icmp sle i16 %x, %y
230  %res = select i1 %cmp, i16 %x, i16 %y
231  ret i16 %res
232}
233
234define i16 @max_u16(i16 %x, i16 %y) {
235; GFX8-LABEL: max_u16:
236; GFX8:       ; %bb.0:
237; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
238; GFX8-NEXT:    v_max_u16_e32 v0, v0, v1
239; GFX8-NEXT:    s_setpc_b64 s[30:31]
240;
241; GFX9ALL-LABEL: max_u16:
242; GFX9ALL:       ; %bb.0:
243; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
244; GFX9ALL-NEXT:    v_max_u16_e32 v0, v0, v1
245; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
246;
247; GFX10-LABEL: max_u16:
248; GFX10:       ; %bb.0:
249; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
250; GFX10-NEXT:    v_max_u16 v0, v0, v1
251; GFX10-NEXT:    s_setpc_b64 s[30:31]
252;
253; GFX11-LABEL: max_u16:
254; GFX11:       ; %bb.0:
255; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256; GFX11-NEXT:    v_max_u16 v0, v0, v1
257; GFX11-NEXT:    s_setpc_b64 s[30:31]
258  %cmp = icmp uge i16 %x, %y
259  %res = select i1 %cmp, i16 %x, i16 %y
260  ret i16 %res
261}
262
263define i16 @max_i16(i16 %x, i16 %y) {
264; GFX8-LABEL: max_i16:
265; GFX8:       ; %bb.0:
266; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
267; GFX8-NEXT:    v_max_i16_e32 v0, v0, v1
268; GFX8-NEXT:    s_setpc_b64 s[30:31]
269;
270; GFX9ALL-LABEL: max_i16:
271; GFX9ALL:       ; %bb.0:
272; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273; GFX9ALL-NEXT:    v_max_i16_e32 v0, v0, v1
274; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
275;
276; GFX10-LABEL: max_i16:
277; GFX10:       ; %bb.0:
278; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
279; GFX10-NEXT:    v_max_i16 v0, v0, v1
280; GFX10-NEXT:    s_setpc_b64 s[30:31]
281;
282; GFX11-LABEL: max_i16:
283; GFX11:       ; %bb.0:
284; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
285; GFX11-NEXT:    v_max_i16 v0, v0, v1
286; GFX11-NEXT:    s_setpc_b64 s[30:31]
287  %cmp = icmp sge i16 %x, %y
288  %res = select i1 %cmp, i16 %x, i16 %y
289  ret i16 %res
290}
291
292define i32 @shl_i16_zext_i32(i16 %x, i16 %y) {
293; GFX8-LABEL: shl_i16_zext_i32:
294; GFX8:       ; %bb.0:
295; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
296; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v1, v0
297; GFX8-NEXT:    s_setpc_b64 s[30:31]
298;
299; GFX9ALL-LABEL: shl_i16_zext_i32:
300; GFX9ALL:       ; %bb.0:
301; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
302; GFX9ALL-NEXT:    v_lshlrev_b16_e32 v0, v1, v0
303; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
304;
305; GFX10-LABEL: shl_i16_zext_i32:
306; GFX10:       ; %bb.0:
307; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308; GFX10-NEXT:    v_lshlrev_b16 v0, v1, v0
309; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
310; GFX10-NEXT:    s_setpc_b64 s[30:31]
311;
312; GFX11-LABEL: shl_i16_zext_i32:
313; GFX11:       ; %bb.0:
314; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
315; GFX11-NEXT:    v_lshlrev_b16 v0, v1, v0
316; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
317; GFX11-NEXT:    s_setpc_b64 s[30:31]
318  %res = shl i16 %x, %y
319  %zext = zext i16 %res to i32
320  ret i32 %zext
321}
322
323define i32 @lshr_i16_zext_i32(i16 %x, i16 %y) {
324; GFX8-LABEL: lshr_i16_zext_i32:
325; GFX8:       ; %bb.0:
326; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
327; GFX8-NEXT:    v_lshrrev_b16_e32 v0, v1, v0
328; GFX8-NEXT:    s_setpc_b64 s[30:31]
329;
330; GFX9ALL-LABEL: lshr_i16_zext_i32:
331; GFX9ALL:       ; %bb.0:
332; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
333; GFX9ALL-NEXT:    v_lshrrev_b16_e32 v0, v1, v0
334; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
335;
336; GFX10-LABEL: lshr_i16_zext_i32:
337; GFX10:       ; %bb.0:
338; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339; GFX10-NEXT:    v_lshrrev_b16 v0, v1, v0
340; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
341; GFX10-NEXT:    s_setpc_b64 s[30:31]
342;
343; GFX11-LABEL: lshr_i16_zext_i32:
344; GFX11:       ; %bb.0:
345; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
346; GFX11-NEXT:    v_lshrrev_b16 v0, v1, v0
347; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
348; GFX11-NEXT:    s_setpc_b64 s[30:31]
349  %res = lshr i16 %x, %y
350  %zext = zext i16 %res to i32
351  ret i32 %zext
352}
353
354define i32 @ashr_i16_zext_i32(i16 %x, i16 %y) {
355; GFX8-LABEL: ashr_i16_zext_i32:
356; GFX8:       ; %bb.0:
357; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
358; GFX8-NEXT:    v_ashrrev_i16_e32 v0, v1, v0
359; GFX8-NEXT:    s_setpc_b64 s[30:31]
360;
361; GFX9ALL-LABEL: ashr_i16_zext_i32:
362; GFX9ALL:       ; %bb.0:
363; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364; GFX9ALL-NEXT:    v_ashrrev_i16_e32 v0, v1, v0
365; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
366;
367; GFX10-LABEL: ashr_i16_zext_i32:
368; GFX10:       ; %bb.0:
369; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
370; GFX10-NEXT:    v_ashrrev_i16 v0, v1, v0
371; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
372; GFX10-NEXT:    s_setpc_b64 s[30:31]
373;
374; GFX11-LABEL: ashr_i16_zext_i32:
375; GFX11:       ; %bb.0:
376; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377; GFX11-NEXT:    v_ashrrev_i16 v0, v1, v0
378; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
379; GFX11-NEXT:    s_setpc_b64 s[30:31]
380  %res = ashr i16 %x, %y
381  %zext = zext i16 %res to i32
382  ret i32 %zext
383}
384
385define i32 @add_u16_zext_i32(i16 %x, i16 %y) {
386; GFX8-LABEL: add_u16_zext_i32:
387; GFX8:       ; %bb.0:
388; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
389; GFX8-NEXT:    v_add_u16_e32 v0, v0, v1
390; GFX8-NEXT:    s_setpc_b64 s[30:31]
391;
392; GFX9ALL-LABEL: add_u16_zext_i32:
393; GFX9ALL:       ; %bb.0:
394; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
395; GFX9ALL-NEXT:    v_add_u16_e32 v0, v0, v1
396; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
397;
398; GFX10-LABEL: add_u16_zext_i32:
399; GFX10:       ; %bb.0:
400; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
401; GFX10-NEXT:    v_add_nc_u16 v0, v0, v1
402; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
403; GFX10-NEXT:    s_setpc_b64 s[30:31]
404;
405; GFX11-LABEL: add_u16_zext_i32:
406; GFX11:       ; %bb.0:
407; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
408; GFX11-NEXT:    v_add_nc_u16 v0, v0, v1
409; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
410; GFX11-NEXT:    s_setpc_b64 s[30:31]
411  %res = add i16 %x, %y
412  %zext = zext i16 %res to i32
413  ret i32 %zext
414}
415
416define i32 @sub_u16_zext_i32(i16 %x, i16 %y) {
417; GFX8-LABEL: sub_u16_zext_i32:
418; GFX8:       ; %bb.0:
419; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
420; GFX8-NEXT:    v_sub_u16_e32 v0, v0, v1
421; GFX8-NEXT:    s_setpc_b64 s[30:31]
422;
423; GFX9ALL-LABEL: sub_u16_zext_i32:
424; GFX9ALL:       ; %bb.0:
425; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
426; GFX9ALL-NEXT:    v_sub_u16_e32 v0, v0, v1
427; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
428;
429; GFX10-LABEL: sub_u16_zext_i32:
430; GFX10:       ; %bb.0:
431; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
432; GFX10-NEXT:    v_sub_nc_u16 v0, v0, v1
433; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
434; GFX10-NEXT:    s_setpc_b64 s[30:31]
435;
436; GFX11-LABEL: sub_u16_zext_i32:
437; GFX11:       ; %bb.0:
438; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
439; GFX11-NEXT:    v_sub_nc_u16 v0, v0, v1
440; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
441; GFX11-NEXT:    s_setpc_b64 s[30:31]
442  %res = sub i16 %x, %y
443  %zext = zext i16 %res to i32
444  ret i32 %zext
445}
446
447define i32 @mul_lo_u16_zext_i32(i16 %x, i16 %y) {
448; GFX8-LABEL: mul_lo_u16_zext_i32:
449; GFX8:       ; %bb.0:
450; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
451; GFX8-NEXT:    v_mul_lo_u16_e32 v0, v0, v1
452; GFX8-NEXT:    s_setpc_b64 s[30:31]
453;
454; GFX9ALL-LABEL: mul_lo_u16_zext_i32:
455; GFX9ALL:       ; %bb.0:
456; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
457; GFX9ALL-NEXT:    v_mul_lo_u16_e32 v0, v0, v1
458; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
459;
460; GFX10-LABEL: mul_lo_u16_zext_i32:
461; GFX10:       ; %bb.0:
462; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
463; GFX10-NEXT:    v_mul_lo_u16 v0, v0, v1
464; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
465; GFX10-NEXT:    s_setpc_b64 s[30:31]
466;
467; GFX11-LABEL: mul_lo_u16_zext_i32:
468; GFX11:       ; %bb.0:
469; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
470; GFX11-NEXT:    v_mul_lo_u16 v0, v0, v1
471; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
472; GFX11-NEXT:    s_setpc_b64 s[30:31]
473  %res = mul i16 %x, %y
474  %zext = zext i16 %res to i32
475  ret i32 %zext
476}
477
478define i32 @min_u16_zext_i32(i16 %x, i16 %y) {
479; GFX8-LABEL: min_u16_zext_i32:
480; GFX8:       ; %bb.0:
481; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
482; GFX8-NEXT:    v_min_u16_e32 v0, v0, v1
483; GFX8-NEXT:    s_setpc_b64 s[30:31]
484;
485; GFX9ALL-LABEL: min_u16_zext_i32:
486; GFX9ALL:       ; %bb.0:
487; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
488; GFX9ALL-NEXT:    v_min_u16_e32 v0, v0, v1
489; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
490;
491; GFX10-LABEL: min_u16_zext_i32:
492; GFX10:       ; %bb.0:
493; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
494; GFX10-NEXT:    v_min_u16 v0, v0, v1
495; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
496; GFX10-NEXT:    s_setpc_b64 s[30:31]
497;
498; GFX11-LABEL: min_u16_zext_i32:
499; GFX11:       ; %bb.0:
500; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
501; GFX11-NEXT:    v_min_u16 v0, v0, v1
502; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
503; GFX11-NEXT:    s_setpc_b64 s[30:31]
504  %cmp = icmp ule i16 %x, %y
505  %res = select i1 %cmp, i16 %x, i16 %y
506  %zext = zext i16 %res to i32
507  ret i32 %zext
508}
509
510define i32 @min_i16_zext_i32(i16 %x, i16 %y) {
511; GFX8-LABEL: min_i16_zext_i32:
512; GFX8:       ; %bb.0:
513; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
514; GFX8-NEXT:    v_min_i16_e32 v0, v0, v1
515; GFX8-NEXT:    s_setpc_b64 s[30:31]
516;
517; GFX9ALL-LABEL: min_i16_zext_i32:
518; GFX9ALL:       ; %bb.0:
519; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
520; GFX9ALL-NEXT:    v_min_i16_e32 v0, v0, v1
521; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
522;
523; GFX10-LABEL: min_i16_zext_i32:
524; GFX10:       ; %bb.0:
525; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
526; GFX10-NEXT:    v_min_i16 v0, v0, v1
527; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
528; GFX10-NEXT:    s_setpc_b64 s[30:31]
529;
530; GFX11-LABEL: min_i16_zext_i32:
531; GFX11:       ; %bb.0:
532; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
533; GFX11-NEXT:    v_min_i16 v0, v0, v1
534; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
535; GFX11-NEXT:    s_setpc_b64 s[30:31]
536  %cmp = icmp sle i16 %x, %y
537  %res = select i1 %cmp, i16 %x, i16 %y
538  %zext = zext i16 %res to i32
539  ret i32 %zext
540}
541
542define i32 @max_u16_zext_i32(i16 %x, i16 %y) {
543; GFX8-LABEL: max_u16_zext_i32:
544; GFX8:       ; %bb.0:
545; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
546; GFX8-NEXT:    v_max_u16_e32 v0, v0, v1
547; GFX8-NEXT:    s_setpc_b64 s[30:31]
548;
549; GFX9ALL-LABEL: max_u16_zext_i32:
550; GFX9ALL:       ; %bb.0:
551; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
552; GFX9ALL-NEXT:    v_max_u16_e32 v0, v0, v1
553; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
554;
555; GFX10-LABEL: max_u16_zext_i32:
556; GFX10:       ; %bb.0:
557; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
558; GFX10-NEXT:    v_max_u16 v0, v0, v1
559; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
560; GFX10-NEXT:    s_setpc_b64 s[30:31]
561;
562; GFX11-LABEL: max_u16_zext_i32:
563; GFX11:       ; %bb.0:
564; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
565; GFX11-NEXT:    v_max_u16 v0, v0, v1
566; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
567; GFX11-NEXT:    s_setpc_b64 s[30:31]
568  %cmp = icmp uge i16 %x, %y
569  %res = select i1 %cmp, i16 %x, i16 %y
570  %zext = zext i16 %res to i32
571  ret i32 %zext
572}
573
574define i32 @max_i16_zext_i32(i16 %x, i16 %y) {
575; GFX8-LABEL: max_i16_zext_i32:
576; GFX8:       ; %bb.0:
577; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
578; GFX8-NEXT:    v_max_i16_e32 v0, v0, v1
579; GFX8-NEXT:    s_setpc_b64 s[30:31]
580;
581; GFX9ALL-LABEL: max_i16_zext_i32:
582; GFX9ALL:       ; %bb.0:
583; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
584; GFX9ALL-NEXT:    v_max_i16_e32 v0, v0, v1
585; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
586;
587; GFX10-LABEL: max_i16_zext_i32:
588; GFX10:       ; %bb.0:
589; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
590; GFX10-NEXT:    v_max_i16 v0, v0, v1
591; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
592; GFX10-NEXT:    s_setpc_b64 s[30:31]
593;
594; GFX11-LABEL: max_i16_zext_i32:
595; GFX11:       ; %bb.0:
596; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
597; GFX11-NEXT:    v_max_i16 v0, v0, v1
598; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
599; GFX11-NEXT:    s_setpc_b64 s[30:31]
600  %cmp = icmp sge i16 %x, %y
601  %res = select i1 %cmp, i16 %x, i16 %y
602  %zext = zext i16 %res to i32
603  ret i32 %zext
604}
605
606define i32 @zext_fadd_f16(half %x, half %y) {
607; GFX8-LABEL: zext_fadd_f16:
608; GFX8:       ; %bb.0:
609; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
610; GFX8-NEXT:    v_add_f16_e32 v0, v0, v1
611; GFX8-NEXT:    s_setpc_b64 s[30:31]
612;
613; GFX9ALL-LABEL: zext_fadd_f16:
614; GFX9ALL:       ; %bb.0:
615; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
616; GFX9ALL-NEXT:    v_add_f16_e32 v0, v0, v1
617; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
618;
619; GFX10-LABEL: zext_fadd_f16:
620; GFX10:       ; %bb.0:
621; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
622; GFX10-NEXT:    v_add_f16_e32 v0, v0, v1
623; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
624; GFX10-NEXT:    s_setpc_b64 s[30:31]
625;
626; GFX11-LABEL: zext_fadd_f16:
627; GFX11:       ; %bb.0:
628; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
629; GFX11-NEXT:    v_add_f16_e32 v0, v0, v1
630; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
631; GFX11-NEXT:    s_setpc_b64 s[30:31]
632  %add = fadd half %x, %y
633  %cast = bitcast half %add to i16
634  %zext = zext i16 %cast to i32
635  ret i32 %zext
636}
637
638define i32 @zext_fma_f16(half %x, half %y, half %z) {
639; GFX8-LABEL: zext_fma_f16:
640; GFX8:       ; %bb.0:
641; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
642; GFX8-NEXT:    v_fma_f16 v0, v0, v1, v2
643; GFX8-NEXT:    s_setpc_b64 s[30:31]
644;
645; GFX9ALL-LABEL: zext_fma_f16:
646; GFX9ALL:       ; %bb.0:
647; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
648; GFX9ALL-NEXT:    v_fma_f16 v0, v0, v1, v2
649; GFX9ALL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
650; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
651;
652; GFX10-LABEL: zext_fma_f16:
653; GFX10:       ; %bb.0:
654; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
655; GFX10-NEXT:    v_fmac_f16_e32 v2, v0, v1
656; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v2
657; GFX10-NEXT:    s_setpc_b64 s[30:31]
658;
659; GFX11-LABEL: zext_fma_f16:
660; GFX11:       ; %bb.0:
661; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
662; GFX11-NEXT:    v_fmac_f16_e32 v2, v0, v1
663; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v2
664; GFX11-NEXT:    s_setpc_b64 s[30:31]
665  %fma = call half @llvm.fma.f16(half %x, half %y, half %z)
666  %cast = bitcast half %fma to i16
667  %zext = zext i16 %cast to i32
668  ret i32 %zext
669}
670
671define i32 @zext_div_fixup_f16(half %x, half %y, half %z) {
672; GFX8-LABEL: zext_div_fixup_f16:
673; GFX8:       ; %bb.0:
674; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
675; GFX8-NEXT:    v_div_fixup_f16 v0, v0, v1, v2
676; GFX8-NEXT:    s_setpc_b64 s[30:31]
677;
678; GFX9ALL-LABEL: zext_div_fixup_f16:
679; GFX9ALL:       ; %bb.0:
680; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
681; GFX9ALL-NEXT:    v_div_fixup_f16 v0, v0, v1, v2
682; GFX9ALL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
683; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
684;
685; GFX10-LABEL: zext_div_fixup_f16:
686; GFX10:       ; %bb.0:
687; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
688; GFX10-NEXT:    v_div_fixup_f16 v0, v0, v1, v2
689; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
690; GFX10-NEXT:    s_setpc_b64 s[30:31]
691;
692; GFX11-LABEL: zext_div_fixup_f16:
693; GFX11:       ; %bb.0:
694; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
695; GFX11-NEXT:    v_div_fixup_f16 v0, v0, v1, v2
696; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
697; GFX11-NEXT:    s_setpc_b64 s[30:31]
698  %div.fixup = call half @llvm.amdgcn.div.fixup.f16(half %x, half %y, half %z)
699  %cast = bitcast half %div.fixup to i16
700  %zext = zext i16 %cast to i32
701  ret i32 %zext
702}
703
704; We technically could eliminate the and on gfx9 here but we don't try
705; to inspect the source of the fptrunc. We're only worried about cases
706; that lower to v_fma_mix* instructions.
707define i32 @zext_fptrunc_f16(float %x) {
708; GFX8-LABEL: zext_fptrunc_f16:
709; GFX8:       ; %bb.0:
710; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
711; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
712; GFX8-NEXT:    s_setpc_b64 s[30:31]
713;
714; GFX9ALL-LABEL: zext_fptrunc_f16:
715; GFX9ALL:       ; %bb.0:
716; GFX9ALL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
717; GFX9ALL-NEXT:    v_cvt_f16_f32_e32 v0, v0
718; GFX9ALL-NEXT:    s_setpc_b64 s[30:31]
719;
720; GFX10-LABEL: zext_fptrunc_f16:
721; GFX10:       ; %bb.0:
722; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
723; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
724; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
725; GFX10-NEXT:    s_setpc_b64 s[30:31]
726;
727; GFX11-LABEL: zext_fptrunc_f16:
728; GFX11:       ; %bb.0:
729; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
730; GFX11-NEXT:    v_cvt_f16_f32_e32 v0, v0
731; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
732; GFX11-NEXT:    s_setpc_b64 s[30:31]
733  %fptrunc = fptrunc float %x to half
734  %cast = bitcast half %fptrunc to i16
735  %zext = zext i16 %cast to i32
736  ret i32 %zext
737}
738
739define i32 @zext_fptrunc_fma_f16(float %x, float %y, float %z) {
740; GFX8-LABEL: zext_fptrunc_fma_f16:
741; GFX8:       ; %bb.0:
742; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
743; GFX8-NEXT:    v_fma_f32 v0, v0, v1, v2
744; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
745; GFX8-NEXT:    s_setpc_b64 s[30:31]
746;
747; GFX900-LABEL: zext_fptrunc_fma_f16:
748; GFX900:       ; %bb.0:
749; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
750; GFX900-NEXT:    v_fma_f32 v0, v0, v1, v2
751; GFX900-NEXT:    v_cvt_f16_f32_e32 v0, v0
752; GFX900-NEXT:    s_setpc_b64 s[30:31]
753;
754; GFX906-LABEL: zext_fptrunc_fma_f16:
755; GFX906:       ; %bb.0:
756; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
757; GFX906-NEXT:    v_fma_mixlo_f16 v0, v0, v1, v2
758; GFX906-NEXT:    v_and_b32_e32 v0, 0xffff, v0
759; GFX906-NEXT:    s_setpc_b64 s[30:31]
760;
761; GFX10-LABEL: zext_fptrunc_fma_f16:
762; GFX10:       ; %bb.0:
763; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
764; GFX10-NEXT:    v_fma_mixlo_f16 v0, v0, v1, v2
765; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
766; GFX10-NEXT:    s_setpc_b64 s[30:31]
767;
768; GFX11-LABEL: zext_fptrunc_fma_f16:
769; GFX11:       ; %bb.0:
770; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
771; GFX11-NEXT:    v_fma_mixlo_f16 v0, v0, v1, v2
772; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
773; GFX11-NEXT:    s_setpc_b64 s[30:31]
774  %fma = call float @llvm.fma.f32(float %x, float %y, float %z)
775  %fptrunc = fptrunc float %fma to half
776  %cast = bitcast half %fptrunc to i16
777  %zext = zext i16 %cast to i32
778  ret i32 %zext
779}
780
781declare half @llvm.amdgcn.div.fixup.f16(half, half, half)
782declare half @llvm.fma.f16(half, half, half)
783declare float @llvm.fma.f32(float, float, float)
784