xref: /llvm-project/llvm/test/CodeGen/AMDGPU/strict_fma.f32.ll (revision 76c22b18eafd2156568d72e9df2ff7bd3457888a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
5
6define float @v_constained_fma_f32_fpexcept_strict(float %x, float %y, float %z) #0 {
7; GCN-LABEL: v_constained_fma_f32_fpexcept_strict:
8; GCN:       ; %bb.0:
9; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GCN-NEXT:    v_fma_f32 v0, v0, v1, v2
11; GCN-NEXT:    s_setpc_b64 s[30:31]
12;
13; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict:
14; GFX10:       ; %bb.0:
15; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16; GFX10-NEXT:    v_fma_f32 v0, v0, v1, v2
17; GFX10-NEXT:    s_setpc_b64 s[30:31]
18  %val = call float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
19  ret float %val
20}
21
22define <2 x float> @v_constained_fma_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y, <2 x float> %z) #0 {
23; GCN-LABEL: v_constained_fma_v2f32_fpexcept_strict:
24; GCN:       ; %bb.0:
25; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26; GCN-NEXT:    v_fma_f32 v0, v0, v2, v4
27; GCN-NEXT:    v_fma_f32 v1, v1, v3, v5
28; GCN-NEXT:    s_setpc_b64 s[30:31]
29;
30; GFX10-LABEL: v_constained_fma_v2f32_fpexcept_strict:
31; GFX10:       ; %bb.0:
32; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33; GFX10-NEXT:    v_fma_f32 v0, v0, v2, v4
34; GFX10-NEXT:    v_fma_f32 v1, v1, v3, v5
35; GFX10-NEXT:    s_setpc_b64 s[30:31]
36  %val = call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
37  ret <2 x float> %val
38}
39
40define <3 x float> @v_constained_fma_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y, <3 x float> %z) #0 {
41; GCN-LABEL: v_constained_fma_v3f32_fpexcept_strict:
42; GCN:       ; %bb.0:
43; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44; GCN-NEXT:    v_fma_f32 v0, v0, v3, v6
45; GCN-NEXT:    v_fma_f32 v1, v1, v4, v7
46; GCN-NEXT:    v_fma_f32 v2, v2, v5, v8
47; GCN-NEXT:    s_setpc_b64 s[30:31]
48;
49; GFX10-LABEL: v_constained_fma_v3f32_fpexcept_strict:
50; GFX10:       ; %bb.0:
51; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52; GFX10-NEXT:    v_fma_f32 v0, v0, v3, v6
53; GFX10-NEXT:    v_fma_f32 v1, v1, v4, v7
54; GFX10-NEXT:    v_fma_f32 v2, v2, v5, v8
55; GFX10-NEXT:    s_setpc_b64 s[30:31]
56  %val = call <3 x float> @llvm.experimental.constrained.fma.v3f32(<3 x float> %x, <3 x float> %y, <3 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
57  ret <3 x float> %val
58}
59
60define <4 x float> @v_constained_fma_v4f32_fpexcept_strict(<4 x float> %x, <4 x float> %y, <4 x float> %z) #0 {
61; GCN-LABEL: v_constained_fma_v4f32_fpexcept_strict:
62; GCN:       ; %bb.0:
63; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64; GCN-NEXT:    v_fma_f32 v0, v0, v4, v8
65; GCN-NEXT:    v_fma_f32 v1, v1, v5, v9
66; GCN-NEXT:    v_fma_f32 v2, v2, v6, v10
67; GCN-NEXT:    v_fma_f32 v3, v3, v7, v11
68; GCN-NEXT:    s_setpc_b64 s[30:31]
69;
70; GFX10-LABEL: v_constained_fma_v4f32_fpexcept_strict:
71; GFX10:       ; %bb.0:
72; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73; GFX10-NEXT:    v_fma_f32 v0, v0, v4, v8
74; GFX10-NEXT:    v_fma_f32 v1, v1, v5, v9
75; GFX10-NEXT:    v_fma_f32 v2, v2, v6, v10
76; GFX10-NEXT:    v_fma_f32 v3, v3, v7, v11
77; GFX10-NEXT:    s_setpc_b64 s[30:31]
78  %val = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
79  ret <4 x float> %val
80}
81
82define float @v_constained_fma_f32_fpexcept_strict_fneg(float %x, float %y, float %z) #0 {
83; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fneg:
84; GCN:       ; %bb.0:
85; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86; GCN-NEXT:    v_fma_f32 v0, v0, v1, -v2
87; GCN-NEXT:    s_setpc_b64 s[30:31]
88;
89; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fneg:
90; GFX10:       ; %bb.0:
91; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92; GFX10-NEXT:    v_fma_f32 v0, v0, v1, -v2
93; GFX10-NEXT:    s_setpc_b64 s[30:31]
94  %neg.z = fneg float %z
95  %val = call float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %neg.z, metadata !"round.tonearest", metadata !"fpexcept.strict")
96  ret float %val
97}
98
99define float @v_constained_fma_f32_fpexcept_strict_fneg_fneg(float %x, float %y, float %z) #0 {
100; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fneg_fneg:
101; GCN:       ; %bb.0:
102; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103; GCN-NEXT:    v_fma_f32 v0, -v0, -v1, v2
104; GCN-NEXT:    s_setpc_b64 s[30:31]
105;
106; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fneg_fneg:
107; GFX10:       ; %bb.0:
108; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109; GFX10-NEXT:    v_fma_f32 v0, -v0, -v1, v2
110; GFX10-NEXT:    s_setpc_b64 s[30:31]
111  %neg.x = fneg float %x
112  %neg.y = fneg float %y
113  %val = call float @llvm.experimental.constrained.fma.f32(float %neg.x, float %neg.y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
114  ret float %val
115}
116
117define float @v_constained_fma_f32_fpexcept_strict_fabs_fabs(float %x, float %y, float %z) #0 {
118; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fabs_fabs:
119; GCN:       ; %bb.0:
120; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121; GCN-NEXT:    v_fma_f32 v0, |v0|, |v1|, v2
122; GCN-NEXT:    s_setpc_b64 s[30:31]
123;
124; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fabs_fabs:
125; GFX10:       ; %bb.0:
126; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127; GFX10-NEXT:    v_fma_f32 v0, |v0|, |v1|, v2
128; GFX10-NEXT:    s_setpc_b64 s[30:31]
129  %neg.x = call float @llvm.fabs.f32(float %x) #0
130  %neg.y = call float @llvm.fabs.f32(float %y) #0
131  %val = call float @llvm.experimental.constrained.fma.f32(float %neg.x, float %neg.y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
132  ret float %val
133}
134
135define <2 x float> @v_constained_fma_v2f32_fpexcept_strict_fneg_fneg(<2 x float> %x, <2 x float> %y, <2 x float> %z) #0 {
136; GCN-LABEL: v_constained_fma_v2f32_fpexcept_strict_fneg_fneg:
137; GCN:       ; %bb.0:
138; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
139; GCN-NEXT:    v_fma_f32 v0, -v0, -v2, v4
140; GCN-NEXT:    v_fma_f32 v1, -v1, -v3, v5
141; GCN-NEXT:    s_setpc_b64 s[30:31]
142;
143; GFX10-LABEL: v_constained_fma_v2f32_fpexcept_strict_fneg_fneg:
144; GFX10:       ; %bb.0:
145; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146; GFX10-NEXT:    v_fma_f32 v0, -v0, -v2, v4
147; GFX10-NEXT:    v_fma_f32 v1, -v1, -v3, v5
148; GFX10-NEXT:    s_setpc_b64 s[30:31]
149  %neg.x = fneg <2 x float> %x
150  %neg.y = fneg <2 x float> %y
151  %val = call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> %neg.x, <2 x float> %neg.y, <2 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
152  ret <2 x float> %val
153}
154
155declare float @llvm.fabs.f32(float)
156declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
157declare <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, metadata, metadata)
158declare <3 x float> @llvm.experimental.constrained.fma.v3f32(<3 x float>, <3 x float>, <3 x float>, metadata, metadata)
159declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
160
161attributes #0 = { strictfp }
162