xref: /llvm-project/llvm/test/CodeGen/AMDGPU/mad.u16.ll (revision d1139b32d251c1e258abeb6556d5fff045d7ae12)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s
5; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefixes=GFX11,GFX11-TRUE16 %s
6; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefixes=GFX11,GFX11-FAKE16 %s
7
8; FIXME: GFX9 should be producing v_mad_u16 instead of v_mad_legacy_u16.
9
10define amdgpu_kernel void @mad_u16(
11; GFX8-LABEL: mad_u16:
12; GFX8:       ; %bb.0: ; %entry
13; GFX8-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
14; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 1, v0
15; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
16; GFX8-NEXT:    v_mov_b32_e32 v1, s3
17; GFX8-NEXT:    v_add_u32_e32 v0, vcc, s2, v4
18; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
19; GFX8-NEXT:    v_mov_b32_e32 v3, s5
20; GFX8-NEXT:    v_add_u32_e32 v2, vcc, s4, v4
21; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
22; GFX8-NEXT:    v_mov_b32_e32 v5, s7
23; GFX8-NEXT:    v_add_u32_e32 v4, vcc, s6, v4
24; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
25; GFX8-NEXT:    flat_load_ushort v6, v[0:1] glc
26; GFX8-NEXT:    s_waitcnt vmcnt(0)
27; GFX8-NEXT:    flat_load_ushort v2, v[2:3] glc
28; GFX8-NEXT:    s_waitcnt vmcnt(0)
29; GFX8-NEXT:    flat_load_ushort v3, v[4:5] glc
30; GFX8-NEXT:    s_waitcnt vmcnt(0)
31; GFX8-NEXT:    v_mov_b32_e32 v0, s0
32; GFX8-NEXT:    v_mov_b32_e32 v1, s1
33; GFX8-NEXT:    v_mad_u16 v2, v6, v2, v3
34; GFX8-NEXT:    flat_store_short v[0:1], v2
35; GFX8-NEXT:    s_endpgm
36;
37; GFX9-LABEL: mad_u16:
38; GFX9:       ; %bb.0: ; %entry
39; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
40; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
41; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
42; GFX9-NEXT:    global_load_ushort v1, v0, s[10:11] glc
43; GFX9-NEXT:    s_waitcnt vmcnt(0)
44; GFX9-NEXT:    global_load_ushort v2, v0, s[12:13] glc
45; GFX9-NEXT:    s_waitcnt vmcnt(0)
46; GFX9-NEXT:    global_load_ushort v3, v0, s[14:15] glc
47; GFX9-NEXT:    s_waitcnt vmcnt(0)
48; GFX9-NEXT:    v_mov_b32_e32 v0, 0
49; GFX9-NEXT:    v_mad_legacy_u16 v1, v1, v2, v3
50; GFX9-NEXT:    global_store_short v0, v1, s[8:9]
51; GFX9-NEXT:    s_endpgm
52;
53; GFX10-LABEL: mad_u16:
54; GFX10:       ; %bb.0: ; %entry
55; GFX10-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
56; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
57; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
58; GFX10-NEXT:    global_load_ushort v1, v0, s[10:11] glc dlc
59; GFX10-NEXT:    s_waitcnt vmcnt(0)
60; GFX10-NEXT:    global_load_ushort v2, v0, s[12:13] glc dlc
61; GFX10-NEXT:    s_waitcnt vmcnt(0)
62; GFX10-NEXT:    global_load_ushort v3, v0, s[14:15] glc dlc
63; GFX10-NEXT:    s_waitcnt vmcnt(0)
64; GFX10-NEXT:    v_mov_b32_e32 v0, 0
65; GFX10-NEXT:    v_mad_u16 v1, v1, v2, v3
66; GFX10-NEXT:    global_store_short v0, v1, s[8:9]
67; GFX10-NEXT:    s_endpgm
68;
69; GFX11-TRUE16-LABEL: mad_u16:
70; GFX11-TRUE16:       ; %bb.0: ; %entry
71; GFX11-TRUE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
72; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
73; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
74; GFX11-TRUE16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
75; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
76; GFX11-TRUE16-NEXT:    global_load_u16 v1, v0, s[2:3] glc dlc
77; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
78; GFX11-TRUE16-NEXT:    global_load_u16 v2, v0, s[4:5] glc dlc
79; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
80; GFX11-TRUE16-NEXT:    global_load_u16 v3, v0, s[6:7] glc dlc
81; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
82; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v1.l
83; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v2.l
84; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v3.l
85; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
86; GFX11-TRUE16-NEXT:    v_mad_u16 v0.l, v0.l, v0.h, v1.l
87; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
88; GFX11-TRUE16-NEXT:    global_store_b16 v1, v0, s[0:1]
89; GFX11-TRUE16-NEXT:    s_endpgm
90;
91; GFX11-FAKE16-LABEL: mad_u16:
92; GFX11-FAKE16:       ; %bb.0: ; %entry
93; GFX11-FAKE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
94; GFX11-FAKE16-NEXT:    v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v0, 0x3ff, v0
95; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
96; GFX11-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
97; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
98; GFX11-FAKE16-NEXT:    global_load_u16 v1, v0, s[2:3] glc dlc
99; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
100; GFX11-FAKE16-NEXT:    global_load_u16 v2, v0, s[4:5] glc dlc
101; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
102; GFX11-FAKE16-NEXT:    global_load_u16 v0, v0, s[6:7] glc dlc
103; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
104; GFX11-FAKE16-NEXT:    v_mad_u16 v0, v1, v2, v0
105; GFX11-FAKE16-NEXT:    global_store_b16 v3, v0, s[0:1]
106; GFX11-FAKE16-NEXT:    s_endpgm
107    ptr addrspace(1) %r,
108    ptr addrspace(1) %a,
109    ptr addrspace(1) %b,
110    ptr addrspace(1) %c) {
111entry:
112  %tid = call i32 @llvm.amdgcn.workitem.id.x()
113  %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a, i32 %tid
114  %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b, i32 %tid
115  %c.gep = getelementptr inbounds i16, ptr addrspace(1) %c, i32 %tid
116
117  %a.val = load volatile i16, ptr addrspace(1) %a.gep
118  %b.val = load volatile i16, ptr addrspace(1) %b.gep
119  %c.val = load volatile i16, ptr addrspace(1) %c.gep
120
121  %m.val = mul i16 %a.val, %b.val
122  %r.val = add i16 %m.val, %c.val
123
124  store i16 %r.val, ptr addrspace(1) %r
125  ret void
126}
127
128define i16 @v_mad_u16(i16 %arg0, i16 %arg1, i16 %arg2) {
129; GFX8-LABEL: v_mad_u16:
130; GFX8:       ; %bb.0:
131; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132; GFX8-NEXT:    v_mad_u16 v0, v0, v1, v2
133; GFX8-NEXT:    s_setpc_b64 s[30:31]
134;
135; GFX9-LABEL: v_mad_u16:
136; GFX9:       ; %bb.0:
137; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138; GFX9-NEXT:    v_mad_legacy_u16 v0, v0, v1, v2
139; GFX9-NEXT:    s_setpc_b64 s[30:31]
140;
141; GFX10-LABEL: v_mad_u16:
142; GFX10:       ; %bb.0:
143; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
144; GFX10-NEXT:    v_mad_u16 v0, v0, v1, v2
145; GFX10-NEXT:    s_setpc_b64 s[30:31]
146;
147; GFX11-TRUE16-LABEL: v_mad_u16:
148; GFX11-TRUE16:       ; %bb.0:
149; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
151; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
152; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
153; GFX11-TRUE16-NEXT:    v_mad_u16 v0.l, v0.l, v0.h, v1.l
154; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
155;
156; GFX11-FAKE16-LABEL: v_mad_u16:
157; GFX11-FAKE16:       ; %bb.0:
158; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
159; GFX11-FAKE16-NEXT:    v_mad_u16 v0, v0, v1, v2
160; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
161  %mul = mul i16 %arg0, %arg1
162  %add = add i16 %mul, %arg2
163  ret i16 %add
164}
165
166define i32 @v_mad_u16_zext(i16 %arg0, i16 %arg1, i16 %arg2) {
167; GFX8-LABEL: v_mad_u16_zext:
168; GFX8:       ; %bb.0:
169; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170; GFX8-NEXT:    v_mad_u16 v0, v0, v1, v2
171; GFX8-NEXT:    s_setpc_b64 s[30:31]
172;
173; GFX9-LABEL: v_mad_u16_zext:
174; GFX9:       ; %bb.0:
175; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176; GFX9-NEXT:    v_mad_legacy_u16 v0, v0, v1, v2
177; GFX9-NEXT:    s_setpc_b64 s[30:31]
178;
179; GFX10-LABEL: v_mad_u16_zext:
180; GFX10:       ; %bb.0:
181; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182; GFX10-NEXT:    v_mad_u16 v0, v0, v1, v2
183; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
184; GFX10-NEXT:    s_setpc_b64 s[30:31]
185;
186; GFX11-TRUE16-LABEL: v_mad_u16_zext:
187; GFX11-TRUE16:       ; %bb.0:
188; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
189; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
190; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
191; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
192; GFX11-TRUE16-NEXT:    v_mad_u16 v0.l, v0.l, v0.h, v1.l
193; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
194; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
195;
196; GFX11-FAKE16-LABEL: v_mad_u16_zext:
197; GFX11-FAKE16:       ; %bb.0:
198; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
199; GFX11-FAKE16-NEXT:    v_mad_u16 v0, v0, v1, v2
200; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
201; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
202; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
203  %mul = mul i16 %arg0, %arg1
204  %add = add i16 %mul, %arg2
205  %zext = zext i16 %add to i32
206  ret i32 %zext
207}
208
209define i64 @v_mad_u16_zext64(i16 %arg0, i16 %arg1, i16 %arg2) {
210; GFX8-LABEL: v_mad_u16_zext64:
211; GFX8:       ; %bb.0:
212; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
213; GFX8-NEXT:    v_mad_u16 v0, v0, v1, v2
214; GFX8-NEXT:    v_mov_b32_e32 v1, 0
215; GFX8-NEXT:    s_setpc_b64 s[30:31]
216;
217; GFX9-LABEL: v_mad_u16_zext64:
218; GFX9:       ; %bb.0:
219; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
220; GFX9-NEXT:    v_mad_legacy_u16 v0, v0, v1, v2
221; GFX9-NEXT:    v_mov_b32_e32 v1, 0
222; GFX9-NEXT:    s_setpc_b64 s[30:31]
223;
224; GFX10-LABEL: v_mad_u16_zext64:
225; GFX10:       ; %bb.0:
226; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
227; GFX10-NEXT:    v_mad_u16 v0, v0, v1, v2
228; GFX10-NEXT:    v_mov_b32_e32 v1, 0
229; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
230; GFX10-NEXT:    s_setpc_b64 s[30:31]
231;
232; GFX11-TRUE16-LABEL: v_mad_u16_zext64:
233; GFX11-TRUE16:       ; %bb.0:
234; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
235; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
236; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
237; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
238; GFX11-TRUE16-NEXT:    v_mad_u16 v0.l, v0.l, v0.h, v1.l
239; GFX11-TRUE16-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0xffff, v0
240; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
241;
242; GFX11-FAKE16-LABEL: v_mad_u16_zext64:
243; GFX11-FAKE16:       ; %bb.0:
244; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245; GFX11-FAKE16-NEXT:    v_mad_u16 v0, v0, v1, v2
246; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
247; GFX11-FAKE16-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0xffff, v0
248; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
249  %mul = mul i16 %arg0, %arg1
250  %add = add i16 %mul, %arg2
251  %zext = zext i16 %add to i64
252  ret i64 %zext
253}
254
255define amdgpu_ps i16 @s_mad_u16(i16 inreg %arg0, i16 inreg %arg1, i16 inreg %arg2) {
256; GFX8-LABEL: s_mad_u16:
257; GFX8:       ; %bb.0:
258; GFX8-NEXT:    s_mul_i32 s0, s0, s1
259; GFX8-NEXT:    s_add_i32 s0, s0, s2
260; GFX8-NEXT:    ; return to shader part epilog
261;
262; GFX9-LABEL: s_mad_u16:
263; GFX9:       ; %bb.0:
264; GFX9-NEXT:    s_mul_i32 s0, s0, s1
265; GFX9-NEXT:    s_add_i32 s0, s0, s2
266; GFX9-NEXT:    ; return to shader part epilog
267;
268; GFX10-LABEL: s_mad_u16:
269; GFX10:       ; %bb.0:
270; GFX10-NEXT:    s_mul_i32 s0, s0, s1
271; GFX10-NEXT:    s_add_i32 s0, s0, s2
272; GFX10-NEXT:    ; return to shader part epilog
273;
274; GFX11-LABEL: s_mad_u16:
275; GFX11:       ; %bb.0:
276; GFX11-NEXT:    s_mul_i32 s0, s0, s1
277; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
278; GFX11-NEXT:    s_add_i32 s0, s0, s2
279; GFX11-NEXT:    ; return to shader part epilog
280  %mul = mul i16 %arg0, %arg1
281  %add = add i16 %mul, %arg2
282  ret i16 %add
283}
284
285define amdgpu_ps i32 @s_mad_u16_zext(i16 inreg %arg0, i16 inreg %arg1, i16 inreg %arg2) {
286; GFX8-LABEL: s_mad_u16_zext:
287; GFX8:       ; %bb.0:
288; GFX8-NEXT:    s_mul_i32 s0, s0, s1
289; GFX8-NEXT:    s_add_i32 s0, s0, s2
290; GFX8-NEXT:    s_and_b32 s0, s0, 0xffff
291; GFX8-NEXT:    ; return to shader part epilog
292;
293; GFX9-LABEL: s_mad_u16_zext:
294; GFX9:       ; %bb.0:
295; GFX9-NEXT:    s_mul_i32 s0, s0, s1
296; GFX9-NEXT:    s_add_i32 s0, s0, s2
297; GFX9-NEXT:    s_and_b32 s0, s0, 0xffff
298; GFX9-NEXT:    ; return to shader part epilog
299;
300; GFX10-LABEL: s_mad_u16_zext:
301; GFX10:       ; %bb.0:
302; GFX10-NEXT:    s_mul_i32 s0, s0, s1
303; GFX10-NEXT:    s_add_i32 s0, s0, s2
304; GFX10-NEXT:    s_and_b32 s0, s0, 0xffff
305; GFX10-NEXT:    ; return to shader part epilog
306;
307; GFX11-LABEL: s_mad_u16_zext:
308; GFX11:       ; %bb.0:
309; GFX11-NEXT:    s_mul_i32 s0, s0, s1
310; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
311; GFX11-NEXT:    s_add_i32 s0, s0, s2
312; GFX11-NEXT:    s_and_b32 s0, s0, 0xffff
313; GFX11-NEXT:    ; return to shader part epilog
314  %mul = mul i16 %arg0, %arg1
315  %add = add i16 %mul, %arg2
316  %zext = zext i16 %add to i32
317  ret i32 %zext
318}
319
320define amdgpu_ps i64 @s_mad_u16_zext64(i16 inreg %arg0, i16 inreg %arg1, i16 inreg %arg2) {
321; GFX8-LABEL: s_mad_u16_zext64:
322; GFX8:       ; %bb.0:
323; GFX8-NEXT:    s_mul_i32 s0, s0, s1
324; GFX8-NEXT:    s_add_i32 s0, s0, s2
325; GFX8-NEXT:    s_and_b32 s0, s0, 0xffff
326; GFX8-NEXT:    s_mov_b32 s1, 0
327; GFX8-NEXT:    ; return to shader part epilog
328;
329; GFX9-LABEL: s_mad_u16_zext64:
330; GFX9:       ; %bb.0:
331; GFX9-NEXT:    s_mul_i32 s0, s0, s1
332; GFX9-NEXT:    s_add_i32 s0, s0, s2
333; GFX9-NEXT:    s_and_b32 s0, s0, 0xffff
334; GFX9-NEXT:    s_mov_b32 s1, 0
335; GFX9-NEXT:    ; return to shader part epilog
336;
337; GFX10-LABEL: s_mad_u16_zext64:
338; GFX10:       ; %bb.0:
339; GFX10-NEXT:    s_mul_i32 s0, s0, s1
340; GFX10-NEXT:    s_mov_b32 s1, 0
341; GFX10-NEXT:    s_add_i32 s0, s0, s2
342; GFX10-NEXT:    s_and_b32 s0, s0, 0xffff
343; GFX10-NEXT:    ; return to shader part epilog
344;
345; GFX11-LABEL: s_mad_u16_zext64:
346; GFX11:       ; %bb.0:
347; GFX11-NEXT:    s_mul_i32 s0, s0, s1
348; GFX11-NEXT:    s_mov_b32 s1, 0
349; GFX11-NEXT:    s_add_i32 s0, s0, s2
350; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
351; GFX11-NEXT:    s_and_b32 s0, s0, 0xffff
352; GFX11-NEXT:    ; return to shader part epilog
353  %mul = mul i16 %arg0, %arg1
354  %add = add i16 %mul, %arg2
355  %zext = zext i16 %add to i64
356  ret i64 %zext
357}
358
359define amdgpu_ps i32 @s_mad_u16_sext(i16 inreg %arg0, i16 inreg %arg1, i16 inreg %arg2) {
360; GFX8-LABEL: s_mad_u16_sext:
361; GFX8:       ; %bb.0:
362; GFX8-NEXT:    s_mul_i32 s0, s0, s1
363; GFX8-NEXT:    s_add_i32 s0, s0, s2
364; GFX8-NEXT:    s_sext_i32_i16 s0, s0
365; GFX8-NEXT:    ; return to shader part epilog
366;
367; GFX9-LABEL: s_mad_u16_sext:
368; GFX9:       ; %bb.0:
369; GFX9-NEXT:    s_mul_i32 s0, s0, s1
370; GFX9-NEXT:    s_add_i32 s0, s0, s2
371; GFX9-NEXT:    s_sext_i32_i16 s0, s0
372; GFX9-NEXT:    ; return to shader part epilog
373;
374; GFX10-LABEL: s_mad_u16_sext:
375; GFX10:       ; %bb.0:
376; GFX10-NEXT:    s_mul_i32 s0, s0, s1
377; GFX10-NEXT:    s_add_i32 s0, s0, s2
378; GFX10-NEXT:    s_sext_i32_i16 s0, s0
379; GFX10-NEXT:    ; return to shader part epilog
380;
381; GFX11-LABEL: s_mad_u16_sext:
382; GFX11:       ; %bb.0:
383; GFX11-NEXT:    s_mul_i32 s0, s0, s1
384; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
385; GFX11-NEXT:    s_add_i32 s0, s0, s2
386; GFX11-NEXT:    s_sext_i32_i16 s0, s0
387; GFX11-NEXT:    ; return to shader part epilog
388  %mul = mul i16 %arg0, %arg1
389  %add = add i16 %mul, %arg2
390  %sext = sext i16 %add to i32
391  ret i32 %sext
392}
393
394declare i32 @llvm.amdgcn.workitem.id.x()
395;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
396; GCN: {{.*}}
397