xref: /llvm-project/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll (revision f2c164c8150548d983565c4ddc0fde790f9e2a5b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
3; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
4
5; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s
6; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
7
8; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10-SDAG %s
9; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10-GISEL %s
10
11; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG %s
12; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL %s
13
14; FIXME: promotion not handled without f16 insts
15
16define half @v_constained_fsub_f16_fpexcept_strict(half %x, half %y) #0 {
17; GCN-LABEL: v_constained_fsub_f16_fpexcept_strict:
18; GCN:       ; %bb.0:
19; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20; GCN-NEXT:    v_sub_f16_e32 v0, v0, v1
21; GCN-NEXT:    s_setpc_b64 s[30:31]
22;
23; GFX10PLUS-LABEL: v_constained_fsub_f16_fpexcept_strict:
24; GFX10PLUS:       ; %bb.0:
25; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26; GFX10PLUS-NEXT:    v_sub_f16_e32 v0, v0, v1
27; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
28  %val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
29  ret half %val
30}
31
32define half @v_constained_fsub_f16_fpexcept_ignore(half %x, half %y) #0 {
33; GCN-LABEL: v_constained_fsub_f16_fpexcept_ignore:
34; GCN:       ; %bb.0:
35; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36; GCN-NEXT:    v_sub_f16_e32 v0, v0, v1
37; GCN-NEXT:    s_setpc_b64 s[30:31]
38;
39; GFX10PLUS-LABEL: v_constained_fsub_f16_fpexcept_ignore:
40; GFX10PLUS:       ; %bb.0:
41; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42; GFX10PLUS-NEXT:    v_sub_f16_e32 v0, v0, v1
43; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
44  %val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
45  ret half %val
46}
47
48define half @v_constained_fsub_f16_fpexcept_maytrap(half %x, half %y) #0 {
49; GCN-LABEL: v_constained_fsub_f16_fpexcept_maytrap:
50; GCN:       ; %bb.0:
51; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52; GCN-NEXT:    v_sub_f16_e32 v0, v0, v1
53; GCN-NEXT:    s_setpc_b64 s[30:31]
54;
55; GFX10PLUS-LABEL: v_constained_fsub_f16_fpexcept_maytrap:
56; GFX10PLUS:       ; %bb.0:
57; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
58; GFX10PLUS-NEXT:    v_sub_f16_e32 v0, v0, v1
59; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
60  %val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
61  ret half %val
62}
63
64define <2 x half> @v_constained_fsub_v2f16_fpexcept_strict(<2 x half> %x, <2 x half> %y) #0 {
65; GFX9-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_strict:
66; GFX9-SDAG:       ; %bb.0:
67; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68; GFX9-SDAG-NEXT:    v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
69; GFX9-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v1
70; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0x5040100
71; GFX9-SDAG-NEXT:    v_perm_b32 v0, v2, v0, s4
72; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
73;
74; GFX9-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_strict:
75; GFX9-GISEL:       ; %bb.0:
76; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77; GFX9-GISEL-NEXT:    v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
78; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
79;
80; GFX8-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_strict:
81; GFX8-SDAG:       ; %bb.0:
82; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83; GFX8-SDAG-NEXT:    v_sub_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
84; GFX8-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v1
85; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, v0, v2
86; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
87;
88; GFX8-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_strict:
89; GFX8-GISEL:       ; %bb.0:
90; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
91; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
92; GFX8-GISEL-NEXT:    v_add_f16_e32 v2, v0, v1
93; GFX8-GISEL-NEXT:    v_add_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
94; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
95; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
96;
97; GFX10-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_strict:
98; GFX10-SDAG:       ; %bb.0:
99; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100; GFX10-SDAG-NEXT:    v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
101; GFX10-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v1
102; GFX10-SDAG-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
103; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
104;
105; GFX10-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_strict:
106; GFX10-GISEL:       ; %bb.0:
107; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108; GFX10-GISEL-NEXT:    v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
109; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
110;
111; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_strict:
112; GFX10PLUS-SDAG:       ; %bb.0:
113; GFX10PLUS-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114; GFX10PLUS-SDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
115; GFX10PLUS-SDAG-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
116; GFX10PLUS-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v1
117; GFX10PLUS-SDAG-NEXT:    v_sub_f16_e32 v2, v3, v2
118; GFX10PLUS-SDAG-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
119; GFX10PLUS-SDAG-NEXT:    s_setpc_b64 s[30:31]
120;
121; GFX10PLUS-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_strict:
122; GFX10PLUS-GISEL:       ; %bb.0:
123; GFX10PLUS-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124; GFX10PLUS-GISEL-NEXT:    v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
125; GFX10PLUS-GISEL-NEXT:    s_setpc_b64 s[30:31]
126  %val = call <2 x half> @llvm.experimental.constrained.fsub.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
127  ret <2 x half> %val
128}
129
130define <2 x half> @v_constained_fsub_v2f16_fpexcept_ignore(<2 x half> %x, <2 x half> %y) #0 {
131; GFX9-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_ignore:
132; GFX9-SDAG:       ; %bb.0:
133; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134; GFX9-SDAG-NEXT:    v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
135; GFX9-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v1
136; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0x5040100
137; GFX9-SDAG-NEXT:    v_perm_b32 v0, v2, v0, s4
138; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
139;
140; GFX9-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_ignore:
141; GFX9-GISEL:       ; %bb.0:
142; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143; GFX9-GISEL-NEXT:    v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
144; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
145;
146; GFX8-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_ignore:
147; GFX8-SDAG:       ; %bb.0:
148; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
149; GFX8-SDAG-NEXT:    v_sub_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
150; GFX8-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v1
151; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, v0, v2
152; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
153;
154; GFX8-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_ignore:
155; GFX8-GISEL:       ; %bb.0:
156; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
158; GFX8-GISEL-NEXT:    v_add_f16_e32 v2, v0, v1
159; GFX8-GISEL-NEXT:    v_add_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
160; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
161; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
162;
163; GFX10-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_ignore:
164; GFX10-SDAG:       ; %bb.0:
165; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166; GFX10-SDAG-NEXT:    v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
167; GFX10-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v1
168; GFX10-SDAG-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
169; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
170;
171; GFX10-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_ignore:
172; GFX10-GISEL:       ; %bb.0:
173; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174; GFX10-GISEL-NEXT:    v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
175; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
176;
177; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_ignore:
178; GFX10PLUS-SDAG:       ; %bb.0:
179; GFX10PLUS-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180; GFX10PLUS-SDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
181; GFX10PLUS-SDAG-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
182; GFX10PLUS-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v1
183; GFX10PLUS-SDAG-NEXT:    v_sub_f16_e32 v2, v3, v2
184; GFX10PLUS-SDAG-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
185; GFX10PLUS-SDAG-NEXT:    s_setpc_b64 s[30:31]
186;
187; GFX10PLUS-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_ignore:
188; GFX10PLUS-GISEL:       ; %bb.0:
189; GFX10PLUS-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190; GFX10PLUS-GISEL-NEXT:    v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
191; GFX10PLUS-GISEL-NEXT:    s_setpc_b64 s[30:31]
192  %val = call <2 x half> @llvm.experimental.constrained.fsub.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
193  ret <2 x half> %val
194}
195
196define <2 x half> @v_constained_fsub_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x half> %y) #0 {
197; GFX9-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap:
198; GFX9-SDAG:       ; %bb.0:
199; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
200; GFX9-SDAG-NEXT:    v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
201; GFX9-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v1
202; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0x5040100
203; GFX9-SDAG-NEXT:    v_perm_b32 v0, v2, v0, s4
204; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
205;
206; GFX9-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap:
207; GFX9-GISEL:       ; %bb.0:
208; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
209; GFX9-GISEL-NEXT:    v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
210; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
211;
212; GFX8-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap:
213; GFX8-SDAG:       ; %bb.0:
214; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
215; GFX8-SDAG-NEXT:    v_sub_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
216; GFX8-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v1
217; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, v0, v2
218; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
219;
220; GFX8-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap:
221; GFX8-GISEL:       ; %bb.0:
222; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
224; GFX8-GISEL-NEXT:    v_add_f16_e32 v2, v0, v1
225; GFX8-GISEL-NEXT:    v_add_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
226; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
227; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
228;
229; GFX10-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap:
230; GFX10-SDAG:       ; %bb.0:
231; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232; GFX10-SDAG-NEXT:    v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
233; GFX10-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v1
234; GFX10-SDAG-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
235; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
236;
237; GFX10-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap:
238; GFX10-GISEL:       ; %bb.0:
239; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
240; GFX10-GISEL-NEXT:    v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
241; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
242;
243; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap:
244; GFX10PLUS-SDAG:       ; %bb.0:
245; GFX10PLUS-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
246; GFX10PLUS-SDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
247; GFX10PLUS-SDAG-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
248; GFX10PLUS-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v1
249; GFX10PLUS-SDAG-NEXT:    v_sub_f16_e32 v2, v3, v2
250; GFX10PLUS-SDAG-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
251; GFX10PLUS-SDAG-NEXT:    s_setpc_b64 s[30:31]
252;
253; GFX10PLUS-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap:
254; GFX10PLUS-GISEL:       ; %bb.0:
255; GFX10PLUS-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256; GFX10PLUS-GISEL-NEXT:    v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
257; GFX10PLUS-GISEL-NEXT:    s_setpc_b64 s[30:31]
258  %val = call <2 x half> @llvm.experimental.constrained.fsub.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
259  ret <2 x half> %val
260}
261
262define <3 x half> @v_constained_fsub_v3f16_fpexcept_strict(<3 x half> %x, <3 x half> %y) #0 {
263; GFX9-SDAG-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
264; GFX9-SDAG:       ; %bb.0:
265; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266; GFX9-SDAG-NEXT:    v_sub_f16_sdwa v4, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
267; GFX9-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v2
268; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0x5040100
269; GFX9-SDAG-NEXT:    v_perm_b32 v0, v4, v0, s4
270; GFX9-SDAG-NEXT:    v_sub_f16_e32 v1, v1, v3
271; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
272;
273; GFX9-GISEL-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
274; GFX9-GISEL:       ; %bb.0:
275; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
276; GFX9-GISEL-NEXT:    v_sub_f16_e32 v4, v0, v2
277; GFX9-GISEL-NEXT:    v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
278; GFX9-GISEL-NEXT:    v_sub_f16_e32 v1, v1, v3
279; GFX9-GISEL-NEXT:    v_lshl_or_b32 v0, v0, 16, v4
280; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
281;
282; GFX8-SDAG-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
283; GFX8-SDAG:       ; %bb.0:
284; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
285; GFX8-SDAG-NEXT:    v_sub_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
286; GFX8-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v2
287; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, v0, v4
288; GFX8-SDAG-NEXT:    v_sub_f16_e32 v1, v1, v3
289; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
290;
291; GFX8-GISEL-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
292; GFX8-GISEL:       ; %bb.0:
293; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
294; GFX8-GISEL-NEXT:    v_sub_f16_e32 v4, v0, v2
295; GFX8-GISEL-NEXT:    v_sub_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
296; GFX8-GISEL-NEXT:    v_sub_f16_e32 v1, v1, v3
297; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v4, v0
298; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
299;
300; GFX10-SDAG-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
301; GFX10-SDAG:       ; %bb.0:
302; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
303; GFX10-SDAG-NEXT:    v_sub_f16_sdwa v4, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
304; GFX10-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v2
305; GFX10-SDAG-NEXT:    v_sub_f16_e32 v1, v1, v3
306; GFX10-SDAG-NEXT:    v_perm_b32 v0, v4, v0, 0x5040100
307; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
308;
309; GFX10-GISEL-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
310; GFX10-GISEL:       ; %bb.0:
311; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
312; GFX10-GISEL-NEXT:    v_sub_f16_e32 v4, v0, v2
313; GFX10-GISEL-NEXT:    v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
314; GFX10-GISEL-NEXT:    v_sub_f16_e32 v1, v1, v3
315; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v4
316; GFX10-GISEL-NEXT:    v_lshl_or_b32 v0, v0, 16, v2
317; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
318;
319; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
320; GFX10PLUS-SDAG:       ; %bb.0:
321; GFX10PLUS-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322; GFX10PLUS-SDAG-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
323; GFX10PLUS-SDAG-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
324; GFX10PLUS-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v2
325; GFX10PLUS-SDAG-NEXT:    v_sub_f16_e32 v1, v1, v3
326; GFX10PLUS-SDAG-NEXT:    v_sub_f16_e32 v2, v5, v4
327; GFX10PLUS-SDAG-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
328; GFX10PLUS-SDAG-NEXT:    s_setpc_b64 s[30:31]
329;
330; GFX10PLUS-GISEL-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
331; GFX10PLUS-GISEL:       ; %bb.0:
332; GFX10PLUS-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
333; GFX10PLUS-GISEL-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
334; GFX10PLUS-GISEL-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
335; GFX10PLUS-GISEL-NEXT:    v_sub_f16_e32 v0, v0, v2
336; GFX10PLUS-GISEL-NEXT:    v_sub_f16_e32 v1, v1, v3
337; GFX10PLUS-GISEL-NEXT:    v_sub_f16_e32 v2, v4, v5
338; GFX10PLUS-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
339; GFX10PLUS-GISEL-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
340; GFX10PLUS-GISEL-NEXT:    s_setpc_b64 s[30:31]
341  %val = call <3 x half> @llvm.experimental.constrained.fsub.v3f16(<3 x half> %x, <3 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
342  ret <3 x half> %val
343}
344
345; FIXME: Scalarized
346define <4 x half> @v_constained_fsub_v4f16_fpexcept_strict(<4 x half> %x, <4 x half> %y) #0 {
347; GFX9-SDAG-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
348; GFX9-SDAG:       ; %bb.0:
349; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
350; GFX9-SDAG-NEXT:    v_sub_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
351; GFX9-SDAG-NEXT:    v_sub_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
352; GFX9-SDAG-NEXT:    v_sub_f16_e32 v1, v1, v3
353; GFX9-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v2
354; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0x5040100
355; GFX9-SDAG-NEXT:    v_perm_b32 v0, v5, v0, s4
356; GFX9-SDAG-NEXT:    v_perm_b32 v1, v4, v1, s4
357; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
358;
359; GFX9-GISEL-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
360; GFX9-GISEL:       ; %bb.0:
361; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
362; GFX9-GISEL-NEXT:    v_sub_f16_e32 v4, v0, v2
363; GFX9-GISEL-NEXT:    v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
364; GFX9-GISEL-NEXT:    v_sub_f16_e32 v2, v1, v3
365; GFX9-GISEL-NEXT:    v_sub_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
366; GFX9-GISEL-NEXT:    v_lshl_or_b32 v0, v0, 16, v4
367; GFX9-GISEL-NEXT:    v_lshl_or_b32 v1, v1, 16, v2
368; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
369;
370; GFX8-SDAG-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
371; GFX8-SDAG:       ; %bb.0:
372; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
373; GFX8-SDAG-NEXT:    v_sub_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
374; GFX8-SDAG-NEXT:    v_sub_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
375; GFX8-SDAG-NEXT:    v_sub_f16_e32 v1, v1, v3
376; GFX8-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v2
377; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, v0, v5
378; GFX8-SDAG-NEXT:    v_or_b32_e32 v1, v1, v4
379; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
380;
381; GFX8-GISEL-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
382; GFX8-GISEL:       ; %bb.0:
383; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384; GFX8-GISEL-NEXT:    v_sub_f16_e32 v4, v0, v2
385; GFX8-GISEL-NEXT:    v_sub_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
386; GFX8-GISEL-NEXT:    v_sub_f16_e32 v2, v1, v3
387; GFX8-GISEL-NEXT:    v_sub_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
388; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v4, v0
389; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v2, v1
390; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
391;
392; GFX10-SDAG-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
393; GFX10-SDAG:       ; %bb.0:
394; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
395; GFX10-SDAG-NEXT:    v_sub_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
396; GFX10-SDAG-NEXT:    v_sub_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
397; GFX10-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v2
398; GFX10-SDAG-NEXT:    v_sub_f16_e32 v1, v1, v3
399; GFX10-SDAG-NEXT:    v_perm_b32 v0, v5, v0, 0x5040100
400; GFX10-SDAG-NEXT:    v_perm_b32 v1, v4, v1, 0x5040100
401; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
402;
403; GFX10-GISEL-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
404; GFX10-GISEL:       ; %bb.0:
405; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
406; GFX10-GISEL-NEXT:    v_sub_f16_e32 v4, v0, v2
407; GFX10-GISEL-NEXT:    v_sub_f16_e32 v5, v1, v3
408; GFX10-GISEL-NEXT:    v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
409; GFX10-GISEL-NEXT:    v_sub_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
410; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v4
411; GFX10-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v5
412; GFX10-GISEL-NEXT:    v_lshl_or_b32 v0, v0, 16, v2
413; GFX10-GISEL-NEXT:    v_lshl_or_b32 v1, v1, 16, v3
414; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
415;
416; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
417; GFX10PLUS-SDAG:       ; %bb.0:
418; GFX10PLUS-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
419; GFX10PLUS-SDAG-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
420; GFX10PLUS-SDAG-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
421; GFX10PLUS-SDAG-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
422; GFX10PLUS-SDAG-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
423; GFX10PLUS-SDAG-NEXT:    v_sub_f16_e32 v1, v1, v3
424; GFX10PLUS-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v2
425; GFX10PLUS-SDAG-NEXT:    v_sub_f16_e32 v2, v6, v5
426; GFX10PLUS-SDAG-NEXT:    v_sub_f16_e32 v3, v7, v4
427; GFX10PLUS-SDAG-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
428; GFX10PLUS-SDAG-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
429; GFX10PLUS-SDAG-NEXT:    s_setpc_b64 s[30:31]
430;
431; GFX10PLUS-GISEL-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
432; GFX10PLUS-GISEL:       ; %bb.0:
433; GFX10PLUS-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
434; GFX10PLUS-GISEL-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
435; GFX10PLUS-GISEL-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
436; GFX10PLUS-GISEL-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
437; GFX10PLUS-GISEL-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
438; GFX10PLUS-GISEL-NEXT:    v_sub_f16_e32 v0, v0, v2
439; GFX10PLUS-GISEL-NEXT:    v_sub_f16_e32 v1, v1, v3
440; GFX10PLUS-GISEL-NEXT:    v_sub_f16_e32 v2, v4, v6
441; GFX10PLUS-GISEL-NEXT:    v_sub_f16_e32 v3, v5, v7
442; GFX10PLUS-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
443; GFX10PLUS-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
444; GFX10PLUS-GISEL-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
445; GFX10PLUS-GISEL-NEXT:    v_lshl_or_b32 v1, v3, 16, v1
446; GFX10PLUS-GISEL-NEXT:    s_setpc_b64 s[30:31]
447  %val = call <4 x half> @llvm.experimental.constrained.fsub.v4f16(<4 x half> %x, <4 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
448  ret <4 x half> %val
449}
450
451define amdgpu_ps half @s_constained_fsub_f16_fpexcept_strict(half inreg %x, half inreg %y) #0 {
452; GCN-LABEL: s_constained_fsub_f16_fpexcept_strict:
453; GCN:       ; %bb.0:
454; GCN-NEXT:    v_mov_b32_e32 v0, s3
455; GCN-NEXT:    v_sub_f16_e32 v0, s2, v0
456; GCN-NEXT:    ; return to shader part epilog
457;
458; GFX10PLUS-LABEL: s_constained_fsub_f16_fpexcept_strict:
459; GFX10PLUS:       ; %bb.0:
460; GFX10PLUS-NEXT:    v_sub_f16_e64 v0, s2, s3
461; GFX10PLUS-NEXT:    ; return to shader part epilog
462  %val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
463  ret half %val
464}
465
466define amdgpu_ps <2 x half> @s_constained_fsub_v2f16_fpexcept_strict(<2 x half> inreg %x, <2 x half> inreg %y) #0 {
467; GFX9-SDAG-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
468; GFX9-SDAG:       ; %bb.0:
469; GFX9-SDAG-NEXT:    s_lshr_b32 s0, s3, 16
470; GFX9-SDAG-NEXT:    s_lshr_b32 s1, s2, 16
471; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s0
472; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s3
473; GFX9-SDAG-NEXT:    v_sub_f16_e32 v0, s1, v0
474; GFX9-SDAG-NEXT:    v_sub_f16_e32 v1, s2, v1
475; GFX9-SDAG-NEXT:    v_lshl_or_b32 v0, v0, 16, v1
476; GFX9-SDAG-NEXT:    ; return to shader part epilog
477;
478; GFX9-GISEL-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
479; GFX9-GISEL:       ; %bb.0:
480; GFX9-GISEL-NEXT:    s_xor_b32 s0, s3, 0x80008000
481; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s0
482; GFX9-GISEL-NEXT:    v_pk_add_f16 v0, s2, v0
483; GFX9-GISEL-NEXT:    ; return to shader part epilog
484;
485; GFX8-SDAG-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
486; GFX8-SDAG:       ; %bb.0:
487; GFX8-SDAG-NEXT:    s_lshr_b32 s0, s3, 16
488; GFX8-SDAG-NEXT:    s_lshr_b32 s1, s2, 16
489; GFX8-SDAG-NEXT:    v_mov_b32_e32 v0, s0
490; GFX8-SDAG-NEXT:    v_mov_b32_e32 v1, s1
491; GFX8-SDAG-NEXT:    v_sub_f16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
492; GFX8-SDAG-NEXT:    v_mov_b32_e32 v1, s3
493; GFX8-SDAG-NEXT:    v_sub_f16_e32 v1, s2, v1
494; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, v1, v0
495; GFX8-SDAG-NEXT:    ; return to shader part epilog
496;
497; GFX8-GISEL-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
498; GFX8-GISEL:       ; %bb.0:
499; GFX8-GISEL-NEXT:    s_xor_b32 s0, s3, 0x80008000
500; GFX8-GISEL-NEXT:    s_lshr_b32 s1, s2, 16
501; GFX8-GISEL-NEXT:    s_lshr_b32 s3, s0, 16
502; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, s0
503; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, s3
504; GFX8-GISEL-NEXT:    v_mov_b32_e32 v2, s1
505; GFX8-GISEL-NEXT:    v_add_f16_e32 v0, s2, v0
506; GFX8-GISEL-NEXT:    v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
507; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
508; GFX8-GISEL-NEXT:    ; return to shader part epilog
509;
510; GFX10-SDAG-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
511; GFX10-SDAG:       ; %bb.0:
512; GFX10-SDAG-NEXT:    v_sub_f16_e64 v0, s2, s3
513; GFX10-SDAG-NEXT:    s_lshr_b32 s0, s3, 16
514; GFX10-SDAG-NEXT:    s_lshr_b32 s1, s2, 16
515; GFX10-SDAG-NEXT:    v_sub_f16_e64 v1, s1, s0
516; GFX10-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
517; GFX10-SDAG-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
518; GFX10-SDAG-NEXT:    ; return to shader part epilog
519;
520; GFX10-GISEL-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
521; GFX10-GISEL:       ; %bb.0:
522; GFX10-GISEL-NEXT:    s_xor_b32 s0, s3, 0x80008000
523; GFX10-GISEL-NEXT:    v_pk_add_f16 v0, s2, s0
524; GFX10-GISEL-NEXT:    ; return to shader part epilog
525;
526; GFX10PLUS-SDAG-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
527; GFX10PLUS-SDAG:       ; %bb.0:
528; GFX10PLUS-SDAG-NEXT:    v_sub_f16_e64 v0, s2, s3
529; GFX10PLUS-SDAG-NEXT:    s_lshr_b32 s0, s3, 16
530; GFX10PLUS-SDAG-NEXT:    s_lshr_b32 s1, s2, 16
531; GFX10PLUS-SDAG-NEXT:    v_sub_f16_e64 v1, s1, s0
532; GFX10PLUS-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
533; GFX10PLUS-SDAG-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
534; GFX10PLUS-SDAG-NEXT:    ; return to shader part epilog
535;
536; GFX10PLUS-GISEL-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
537; GFX10PLUS-GISEL:       ; %bb.0:
538; GFX10PLUS-GISEL-NEXT:    s_xor_b32 s0, s3, 0x80008000
539; GFX10PLUS-GISEL-NEXT:    v_pk_add_f16 v0, s2, s0
540; GFX10PLUS-GISEL-NEXT:    ; return to shader part epilog
541  %val = call <2 x half> @llvm.experimental.constrained.fsub.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
542  ret <2 x half> %val
543}
544
545declare half @llvm.experimental.constrained.fsub.f16(half, half, metadata, metadata) #1
546declare <2 x half> @llvm.experimental.constrained.fsub.v2f16(<2 x half>, <2 x half>, metadata, metadata) #1
547declare <3 x half> @llvm.experimental.constrained.fsub.v3f16(<3 x half>, <3 x half>, metadata, metadata) #1
548declare <4 x half> @llvm.experimental.constrained.fsub.v4f16(<4 x half>, <4 x half>, metadata, metadata) #1
549
550attributes #0 = { strictfp }
551attributes #1 = { inaccessiblememonly nounwind willreturn }
552;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
553; GFX8: {{.*}}
554; GFX9: {{.*}}
555