xref: /llvm-project/llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll (revision 76c22b18eafd2156568d72e9df2ff7bd3457888a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
3; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
4
5; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
6; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
7
8; RUN: llc -global-isel= -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
9; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
10
11define float @v_constained_fsub_f32_fpexcept_strict(float %x, float %y) #0 {
12; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict:
13; GCN:       ; %bb.0:
14; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15; GCN-NEXT:    v_sub_f32_e32 v0, v0, v1
16; GCN-NEXT:    s_setpc_b64 s[30:31]
17;
18; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict:
19; GFX10PLUS:       ; %bb.0:
20; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21; GFX10PLUS-NEXT:    v_sub_f32_e32 v0, v0, v1
22; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
23  %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
24  ret float %val
25}
26
27define float @v_constained_fsub_f32_fpexcept_ignore(float %x, float %y) #0 {
28; GCN-LABEL: v_constained_fsub_f32_fpexcept_ignore:
29; GCN:       ; %bb.0:
30; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31; GCN-NEXT:    v_sub_f32_e32 v0, v0, v1
32; GCN-NEXT:    s_setpc_b64 s[30:31]
33;
34; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_ignore:
35; GFX10PLUS:       ; %bb.0:
36; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37; GFX10PLUS-NEXT:    v_sub_f32_e32 v0, v0, v1
38; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
39  %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
40  ret float %val
41}
42
43define float @v_constained_fsub_f32_fpexcept_maytrap(float %x, float %y) #0 {
44; GCN-LABEL: v_constained_fsub_f32_fpexcept_maytrap:
45; GCN:       ; %bb.0:
46; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
47; GCN-NEXT:    v_sub_f32_e32 v0, v0, v1
48; GCN-NEXT:    s_setpc_b64 s[30:31]
49;
50; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_maytrap:
51; GFX10PLUS:       ; %bb.0:
52; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53; GFX10PLUS-NEXT:    v_sub_f32_e32 v0, v0, v1
54; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
55  %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
56  ret float %val
57}
58
59define <2 x float> @v_constained_fsub_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 {
60; GCN-LABEL: v_constained_fsub_v2f32_fpexcept_strict:
61; GCN:       ; %bb.0:
62; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63; GCN-NEXT:    v_sub_f32_e32 v0, v0, v2
64; GCN-NEXT:    v_sub_f32_e32 v1, v1, v3
65; GCN-NEXT:    s_setpc_b64 s[30:31]
66;
67; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_strict:
68; GFX10:       ; %bb.0:
69; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
70; GFX10-NEXT:    v_sub_f32_e32 v0, v0, v2
71; GFX10-NEXT:    v_sub_f32_e32 v1, v1, v3
72; GFX10-NEXT:    s_setpc_b64 s[30:31]
73;
74; GFX11-LABEL: v_constained_fsub_v2f32_fpexcept_strict:
75; GFX11:       ; %bb.0:
76; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77; GFX11-NEXT:    v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
78; GFX11-NEXT:    s_setpc_b64 s[30:31]
79  %val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
80  ret <2 x float> %val
81}
82
83define <2 x float> @v_constained_fsub_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 {
84; GCN-LABEL: v_constained_fsub_v2f32_fpexcept_ignore:
85; GCN:       ; %bb.0:
86; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
87; GCN-NEXT:    v_sub_f32_e32 v0, v0, v2
88; GCN-NEXT:    v_sub_f32_e32 v1, v1, v3
89; GCN-NEXT:    s_setpc_b64 s[30:31]
90;
91; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_ignore:
92; GFX10:       ; %bb.0:
93; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94; GFX10-NEXT:    v_sub_f32_e32 v0, v0, v2
95; GFX10-NEXT:    v_sub_f32_e32 v1, v1, v3
96; GFX10-NEXT:    s_setpc_b64 s[30:31]
97;
98; GFX11-LABEL: v_constained_fsub_v2f32_fpexcept_ignore:
99; GFX11:       ; %bb.0:
100; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
101; GFX11-NEXT:    v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
102; GFX11-NEXT:    s_setpc_b64 s[30:31]
103  %val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
104  ret <2 x float> %val
105}
106
107define <2 x float> @v_constained_fsub_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 {
108; GCN-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap:
109; GCN:       ; %bb.0:
110; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111; GCN-NEXT:    v_sub_f32_e32 v0, v0, v2
112; GCN-NEXT:    v_sub_f32_e32 v1, v1, v3
113; GCN-NEXT:    s_setpc_b64 s[30:31]
114;
115; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap:
116; GFX10:       ; %bb.0:
117; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118; GFX10-NEXT:    v_sub_f32_e32 v0, v0, v2
119; GFX10-NEXT:    v_sub_f32_e32 v1, v1, v3
120; GFX10-NEXT:    s_setpc_b64 s[30:31]
121;
122; GFX11-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap:
123; GFX11:       ; %bb.0:
124; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125; GFX11-NEXT:    v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
126; GFX11-NEXT:    s_setpc_b64 s[30:31]
127  %val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
128  ret <2 x float> %val
129}
130
131define <3 x float> @v_constained_fsub_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y) #0 {
132; GCN-LABEL: v_constained_fsub_v3f32_fpexcept_strict:
133; GCN:       ; %bb.0:
134; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
135; GCN-NEXT:    v_sub_f32_e32 v0, v0, v3
136; GCN-NEXT:    v_sub_f32_e32 v1, v1, v4
137; GCN-NEXT:    v_sub_f32_e32 v2, v2, v5
138; GCN-NEXT:    s_setpc_b64 s[30:31]
139;
140; GFX10-LABEL: v_constained_fsub_v3f32_fpexcept_strict:
141; GFX10:       ; %bb.0:
142; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143; GFX10-NEXT:    v_sub_f32_e32 v0, v0, v3
144; GFX10-NEXT:    v_sub_f32_e32 v1, v1, v4
145; GFX10-NEXT:    v_sub_f32_e32 v2, v2, v5
146; GFX10-NEXT:    s_setpc_b64 s[30:31]
147;
148; GFX11-LABEL: v_constained_fsub_v3f32_fpexcept_strict:
149; GFX11:       ; %bb.0:
150; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151; GFX11-NEXT:    v_dual_sub_f32 v0, v0, v3 :: v_dual_sub_f32 v1, v1, v4
152; GFX11-NEXT:    v_sub_f32_e32 v2, v2, v5
153; GFX11-NEXT:    s_setpc_b64 s[30:31]
154  %val = call <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float> %x, <3 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
155  ret <3 x float> %val
156}
157
158define amdgpu_ps float @s_constained_fsub_f32_fpexcept_strict(float inreg %x, float inreg %y) #0 {
159; GCN-LABEL: s_constained_fsub_f32_fpexcept_strict:
160; GCN:       ; %bb.0:
161; GCN-NEXT:    v_mov_b32_e32 v0, s3
162; GCN-NEXT:    v_sub_f32_e32 v0, s2, v0
163; GCN-NEXT:    ; return to shader part epilog
164;
165; GFX10PLUS-LABEL: s_constained_fsub_f32_fpexcept_strict:
166; GFX10PLUS:       ; %bb.0:
167; GFX10PLUS-NEXT:    v_sub_f32_e64 v0, s2, s3
168; GFX10PLUS-NEXT:    ; return to shader part epilog
169  %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
170  ret float %val
171}
172
173define float @v_constained_fsub_f32_fpexcept_strict_fabs_lhs(float %x, float %y) #0 {
174; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_lhs:
175; GCN:       ; %bb.0:
176; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
177; GCN-NEXT:    v_sub_f32_e64 v0, |v0|, v1
178; GCN-NEXT:    s_setpc_b64 s[30:31]
179;
180; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_lhs:
181; GFX10PLUS:       ; %bb.0:
182; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183; GFX10PLUS-NEXT:    v_sub_f32_e64 v0, |v0|, v1
184; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
185  %fabs.x = call float @llvm.fabs.f32(float %x) #0
186  %val = call float @llvm.experimental.constrained.fsub.f32(float %fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
187  ret float %val
188}
189
190define float @v_constained_fsub_f32_fpexcept_strict_fabs_rhs(float %x, float %y) #0 {
191; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_rhs:
192; GCN:       ; %bb.0:
193; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194; GCN-NEXT:    v_sub_f32_e64 v0, v0, |v1|
195; GCN-NEXT:    s_setpc_b64 s[30:31]
196;
197; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_rhs:
198; GFX10PLUS:       ; %bb.0:
199; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
200; GFX10PLUS-NEXT:    v_sub_f32_e64 v0, v0, |v1|
201; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
202  %fabs.y = call float @llvm.fabs.f32(float %y) #0
203  %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %fabs.y, metadata !"round.tonearest", metadata !"fpexcept.strict")
204  ret float %val
205}
206
207define float @v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs(float %x, float %y) #0 {
208; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs:
209; GCN:       ; %bb.0:
210; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
211; GCN-NEXT:    v_sub_f32_e64 v0, -|v0|, v1
212; GCN-NEXT:    s_setpc_b64 s[30:31]
213;
214; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs:
215; GFX10PLUS:       ; %bb.0:
216; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217; GFX10PLUS-NEXT:    v_sub_f32_e64 v0, -|v0|, v1
218; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
219  %fabs.x = call float @llvm.fabs.f32(float %x) #0
220  %neg.fabs.x = fneg float %fabs.x
221  %val = call float @llvm.experimental.constrained.fsub.f32(float %neg.fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
222  ret float %val
223}
224
225declare float @llvm.fabs.f32(float)
226declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
227declare <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float>, <2 x float>, metadata, metadata)
228declare <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float>, <3 x float>, metadata, metadata)
229
230attributes #0 = { strictfp }
231