xref: /llvm-project/llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll (revision 76c22b18eafd2156568d72e9df2ff7bd3457888a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
3; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
4; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
5; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
6; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
7; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
8
9define float @v_constained_fmul_f32_fpexcept_strict(float %x, float %y) #0 {
10; GCN-LABEL: v_constained_fmul_f32_fpexcept_strict:
11; GCN:       ; %bb.0:
12; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13; GCN-NEXT:    v_mul_f32_e32 v0, v0, v1
14; GCN-NEXT:    s_setpc_b64 s[30:31]
15;
16; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_strict:
17; GFX10PLUS:       ; %bb.0:
18; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19; GFX10PLUS-NEXT:    v_mul_f32_e32 v0, v0, v1
20; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
21  %val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
22  ret float %val
23}
24
25define float @v_constained_fmul_f32_fpexcept_ignore(float %x, float %y) #0 {
26; GCN-LABEL: v_constained_fmul_f32_fpexcept_ignore:
27; GCN:       ; %bb.0:
28; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29; GCN-NEXT:    v_mul_f32_e32 v0, v0, v1
30; GCN-NEXT:    s_setpc_b64 s[30:31]
31;
32; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_ignore:
33; GFX10PLUS:       ; %bb.0:
34; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35; GFX10PLUS-NEXT:    v_mul_f32_e32 v0, v0, v1
36; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
37  %val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
38  ret float %val
39}
40
41define float @v_constained_fmul_f32_fpexcept_maytrap(float %x, float %y) #0 {
42; GCN-LABEL: v_constained_fmul_f32_fpexcept_maytrap:
43; GCN:       ; %bb.0:
44; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45; GCN-NEXT:    v_mul_f32_e32 v0, v0, v1
46; GCN-NEXT:    s_setpc_b64 s[30:31]
47;
48; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_maytrap:
49; GFX10PLUS:       ; %bb.0:
50; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51; GFX10PLUS-NEXT:    v_mul_f32_e32 v0, v0, v1
52; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
53  %val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
54  ret float %val
55}
56
57define <2 x float> @v_constained_fmul_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 {
58; GCN-LABEL: v_constained_fmul_v2f32_fpexcept_strict:
59; GCN:       ; %bb.0:
60; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61; GCN-NEXT:    v_mul_f32_e32 v0, v0, v2
62; GCN-NEXT:    v_mul_f32_e32 v1, v1, v3
63; GCN-NEXT:    s_setpc_b64 s[30:31]
64;
65; GFX10-LABEL: v_constained_fmul_v2f32_fpexcept_strict:
66; GFX10:       ; %bb.0:
67; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v2
69; GFX10-NEXT:    v_mul_f32_e32 v1, v1, v3
70; GFX10-NEXT:    s_setpc_b64 s[30:31]
71;
72; GFX11-LABEL: v_constained_fmul_v2f32_fpexcept_strict:
73; GFX11:       ; %bb.0:
74; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75; GFX11-NEXT:    v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
76; GFX11-NEXT:    s_setpc_b64 s[30:31]
77  %val = call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
78  ret <2 x float> %val
79}
80
81define <2 x float> @v_constained_fmul_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 {
82; GCN-LABEL: v_constained_fmul_v2f32_fpexcept_ignore:
83; GCN:       ; %bb.0:
84; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85; GCN-NEXT:    v_mul_f32_e32 v0, v0, v2
86; GCN-NEXT:    v_mul_f32_e32 v1, v1, v3
87; GCN-NEXT:    s_setpc_b64 s[30:31]
88;
89; GFX10-LABEL: v_constained_fmul_v2f32_fpexcept_ignore:
90; GFX10:       ; %bb.0:
91; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v2
93; GFX10-NEXT:    v_mul_f32_e32 v1, v1, v3
94; GFX10-NEXT:    s_setpc_b64 s[30:31]
95;
96; GFX11-LABEL: v_constained_fmul_v2f32_fpexcept_ignore:
97; GFX11:       ; %bb.0:
98; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99; GFX11-NEXT:    v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
100; GFX11-NEXT:    s_setpc_b64 s[30:31]
101  %val = call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
102  ret <2 x float> %val
103}
104
105define <2 x float> @v_constained_fmul_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 {
106; GCN-LABEL: v_constained_fmul_v2f32_fpexcept_maytrap:
107; GCN:       ; %bb.0:
108; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109; GCN-NEXT:    v_mul_f32_e32 v0, v0, v2
110; GCN-NEXT:    v_mul_f32_e32 v1, v1, v3
111; GCN-NEXT:    s_setpc_b64 s[30:31]
112;
113; GFX10-LABEL: v_constained_fmul_v2f32_fpexcept_maytrap:
114; GFX10:       ; %bb.0:
115; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v2
117; GFX10-NEXT:    v_mul_f32_e32 v1, v1, v3
118; GFX10-NEXT:    s_setpc_b64 s[30:31]
119;
120; GFX11-LABEL: v_constained_fmul_v2f32_fpexcept_maytrap:
121; GFX11:       ; %bb.0:
122; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123; GFX11-NEXT:    v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3
124; GFX11-NEXT:    s_setpc_b64 s[30:31]
125  %val = call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
126  ret <2 x float> %val
127}
128
129define <3 x float> @v_constained_fmul_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y) #0 {
130; GCN-LABEL: v_constained_fmul_v3f32_fpexcept_strict:
131; GCN:       ; %bb.0:
132; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133; GCN-NEXT:    v_mul_f32_e32 v0, v0, v3
134; GCN-NEXT:    v_mul_f32_e32 v1, v1, v4
135; GCN-NEXT:    v_mul_f32_e32 v2, v2, v5
136; GCN-NEXT:    s_setpc_b64 s[30:31]
137;
138; GFX10-LABEL: v_constained_fmul_v3f32_fpexcept_strict:
139; GFX10:       ; %bb.0:
140; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v3
142; GFX10-NEXT:    v_mul_f32_e32 v1, v1, v4
143; GFX10-NEXT:    v_mul_f32_e32 v2, v2, v5
144; GFX10-NEXT:    s_setpc_b64 s[30:31]
145;
146; GFX11-LABEL: v_constained_fmul_v3f32_fpexcept_strict:
147; GFX11:       ; %bb.0:
148; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
149; GFX11-NEXT:    v_dual_mul_f32 v0, v0, v3 :: v_dual_mul_f32 v1, v1, v4
150; GFX11-NEXT:    v_mul_f32_e32 v2, v2, v5
151; GFX11-NEXT:    s_setpc_b64 s[30:31]
152  %val = call <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float> %x, <3 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
153  ret <3 x float> %val
154}
155
156define amdgpu_ps float @s_constained_fmul_f32_fpexcept_strict(float inreg %x, float inreg %y) #0 {
157; GCN-LABEL: s_constained_fmul_f32_fpexcept_strict:
158; GCN:       ; %bb.0:
159; GCN-NEXT:    v_mov_b32_e32 v0, s3
160; GCN-NEXT:    v_mul_f32_e32 v0, s2, v0
161; GCN-NEXT:    ; return to shader part epilog
162;
163; GFX10PLUS-LABEL: s_constained_fmul_f32_fpexcept_strict:
164; GFX10PLUS:       ; %bb.0:
165; GFX10PLUS-NEXT:    v_mul_f32_e64 v0, s2, s3
166; GFX10PLUS-NEXT:    ; return to shader part epilog
167  %val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
168  ret float %val
169}
170
171define float @v_constained_fmul_f32_fpexcept_strict_fabs_lhs(float %x, float %y) #0 {
172; GCN-LABEL: v_constained_fmul_f32_fpexcept_strict_fabs_lhs:
173; GCN:       ; %bb.0:
174; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
175; GCN-NEXT:    v_mul_f32_e64 v0, |v0|, v1
176; GCN-NEXT:    s_setpc_b64 s[30:31]
177;
178; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_strict_fabs_lhs:
179; GFX10PLUS:       ; %bb.0:
180; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
181; GFX10PLUS-NEXT:    v_mul_f32_e64 v0, |v0|, v1
182; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
183  %fabs.x = call float @llvm.fabs.f32(float %x) #0
184  %val = call float @llvm.experimental.constrained.fmul.f32(float %fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
185  ret float %val
186}
187
188define float @v_constained_fmul_f32_fpexcept_strict_fabs_rhs(float %x, float %y) #0 {
189; GCN-LABEL: v_constained_fmul_f32_fpexcept_strict_fabs_rhs:
190; GCN:       ; %bb.0:
191; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
192; GCN-NEXT:    v_mul_f32_e64 v0, v0, |v1|
193; GCN-NEXT:    s_setpc_b64 s[30:31]
194;
195; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_strict_fabs_rhs:
196; GFX10PLUS:       ; %bb.0:
197; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
198; GFX10PLUS-NEXT:    v_mul_f32_e64 v0, v0, |v1|
199; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
200  %fabs.y = call float @llvm.fabs.f32(float %y) #0
201  %val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %fabs.y, metadata !"round.tonearest", metadata !"fpexcept.strict")
202  ret float %val
203}
204
205define float @v_constained_fmul_f32_fpexcept_strict_fneg_fabs_lhs(float %x, float %y) #0 {
206; GCN-LABEL: v_constained_fmul_f32_fpexcept_strict_fneg_fabs_lhs:
207; GCN:       ; %bb.0:
208; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
209; GCN-NEXT:    v_mul_f32_e64 v0, -|v0|, v1
210; GCN-NEXT:    s_setpc_b64 s[30:31]
211;
212; GFX10PLUS-LABEL: v_constained_fmul_f32_fpexcept_strict_fneg_fabs_lhs:
213; GFX10PLUS:       ; %bb.0:
214; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
215; GFX10PLUS-NEXT:    v_mul_f32_e64 v0, -|v0|, v1
216; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
217  %fabs.x = call float @llvm.fabs.f32(float %x) #0
218  %neg.fabs.x = fneg float %fabs.x
219  %val = call float @llvm.experimental.constrained.fmul.f32(float %neg.fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
220  ret float %val
221}
222
223declare float @llvm.fabs.f32(float)
224declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
225declare <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float>, <2 x float>, metadata, metadata)
226declare <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float>, <3 x float>, metadata, metadata)
227
228attributes #0 = { strictfp }
229