xref: /llvm-project/llvm/test/CodeGen/AMDGPU/strict_fma.f64.ll (revision 76c22b18eafd2156568d72e9df2ff7bd3457888a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
5
6define double @v_constained_fma_f64_fpexcept_strict(double %x, double %y, double %z) #0 {
7; GCN-LABEL: v_constained_fma_f64_fpexcept_strict:
8; GCN:       ; %bb.0:
9; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GCN-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
11; GCN-NEXT:    s_setpc_b64 s[30:31]
12;
13; GFX10-LABEL: v_constained_fma_f64_fpexcept_strict:
14; GFX10:       ; %bb.0:
15; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16; GFX10-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
17; GFX10-NEXT:    s_setpc_b64 s[30:31]
18  %val = call double @llvm.experimental.constrained.fma.f64(double %x, double %y, double %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
19  ret double %val
20}
21
22define <2 x double> @v_constained_fma_v2f64_fpexcept_strict(<2 x double> %x, <2 x double> %y, <2 x double> %z) #0 {
23; GCN-LABEL: v_constained_fma_v2f64_fpexcept_strict:
24; GCN:       ; %bb.0:
25; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26; GCN-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
27; GCN-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
28; GCN-NEXT:    s_setpc_b64 s[30:31]
29;
30; GFX10-LABEL: v_constained_fma_v2f64_fpexcept_strict:
31; GFX10:       ; %bb.0:
32; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33; GFX10-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
34; GFX10-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
35; GFX10-NEXT:    s_setpc_b64 s[30:31]
36  %val = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
37  ret <2 x double> %val
38}
39
40define <3 x double> @v_constained_fma_v3f64_fpexcept_strict(<3 x double> %x, <3 x double> %y, <3 x double> %z) #0 {
41; GCN-LABEL: v_constained_fma_v3f64_fpexcept_strict:
42; GCN:       ; %bb.0:
43; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44; GCN-NEXT:    v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13]
45; GCN-NEXT:    v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15]
46; GCN-NEXT:    v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17]
47; GCN-NEXT:    s_setpc_b64 s[30:31]
48;
49; GFX10-LABEL: v_constained_fma_v3f64_fpexcept_strict:
50; GFX10:       ; %bb.0:
51; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52; GFX10-NEXT:    v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13]
53; GFX10-NEXT:    v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15]
54; GFX10-NEXT:    v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17]
55; GFX10-NEXT:    s_setpc_b64 s[30:31]
56  %val = call <3 x double> @llvm.experimental.constrained.fma.v3f64(<3 x double> %x, <3 x double> %y, <3 x double> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
57  ret <3 x double> %val
58}
59
60define <4 x double> @v_constained_fma_v4f64_fpexcept_strict(<4 x double> %x, <4 x double> %y, <4 x double> %z) #0 {
61; GCN-LABEL: v_constained_fma_v4f64_fpexcept_strict:
62; GCN:       ; %bb.0:
63; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64; GCN-NEXT:    v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17]
65; GCN-NEXT:    v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19]
66; GCN-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21]
67; GCN-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23]
68; GCN-NEXT:    s_setpc_b64 s[30:31]
69;
70; GFX10-LABEL: v_constained_fma_v4f64_fpexcept_strict:
71; GFX10:       ; %bb.0:
72; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73; GFX10-NEXT:    v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17]
74; GFX10-NEXT:    v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19]
75; GFX10-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21]
76; GFX10-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23]
77; GFX10-NEXT:    s_setpc_b64 s[30:31]
78  %val = call <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> %x, <4 x double> %y, <4 x double> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
79  ret <4 x double> %val
80}
81
82define double @v_constained_fma_f64_fpexcept_strict_fneg(double %x, double %y, double %z) #0 {
83; GCN-LABEL: v_constained_fma_f64_fpexcept_strict_fneg:
84; GCN:       ; %bb.0:
85; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86; GCN-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
87; GCN-NEXT:    s_setpc_b64 s[30:31]
88;
89; GFX10-LABEL: v_constained_fma_f64_fpexcept_strict_fneg:
90; GFX10:       ; %bb.0:
91; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92; GFX10-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
93; GFX10-NEXT:    s_setpc_b64 s[30:31]
94  %neg.z = fneg double %z
95  %val = call double @llvm.experimental.constrained.fma.f64(double %x, double %y, double %neg.z, metadata !"round.tonearest", metadata !"fpexcept.strict")
96  ret double %val
97}
98
99define double @v_constained_fma_f64_fpexcept_strict_fneg_fneg(double %x, double %y, double %z) #0 {
100; GCN-LABEL: v_constained_fma_f64_fpexcept_strict_fneg_fneg:
101; GCN:       ; %bb.0:
102; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103; GCN-NEXT:    v_fma_f64 v[0:1], -v[0:1], -v[2:3], v[4:5]
104; GCN-NEXT:    s_setpc_b64 s[30:31]
105;
106; GFX10-LABEL: v_constained_fma_f64_fpexcept_strict_fneg_fneg:
107; GFX10:       ; %bb.0:
108; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109; GFX10-NEXT:    v_fma_f64 v[0:1], -v[0:1], -v[2:3], v[4:5]
110; GFX10-NEXT:    s_setpc_b64 s[30:31]
111  %neg.x = fneg double %x
112  %neg.y = fneg double %y
113  %val = call double @llvm.experimental.constrained.fma.f64(double %neg.x, double %neg.y, double %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
114  ret double %val
115}
116
117define double @v_constained_fma_f64_fpexcept_strict_fabs_fabs(double %x, double %y, double %z) #0 {
118; GCN-LABEL: v_constained_fma_f64_fpexcept_strict_fabs_fabs:
119; GCN:       ; %bb.0:
120; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121; GCN-NEXT:    v_fma_f64 v[0:1], |v[0:1]|, |v[2:3]|, v[4:5]
122; GCN-NEXT:    s_setpc_b64 s[30:31]
123;
124; GFX10-LABEL: v_constained_fma_f64_fpexcept_strict_fabs_fabs:
125; GFX10:       ; %bb.0:
126; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127; GFX10-NEXT:    v_fma_f64 v[0:1], |v[0:1]|, |v[2:3]|, v[4:5]
128; GFX10-NEXT:    s_setpc_b64 s[30:31]
129  %neg.x = call double @llvm.fabs.f64(double %x) #0
130  %neg.y = call double @llvm.fabs.f64(double %y) #0
131  %val = call double @llvm.experimental.constrained.fma.f64(double %neg.x, double %neg.y, double %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
132  ret double %val
133}
134
135define <2 x double> @v_constained_fma_v2f64_fpexcept_strict_fneg_fneg(<2 x double> %x, <2 x double> %y, <2 x double> %z) #0 {
136; GCN-LABEL: v_constained_fma_v2f64_fpexcept_strict_fneg_fneg:
137; GCN:       ; %bb.0:
138; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
139; GCN-NEXT:    v_fma_f64 v[0:1], -v[0:1], -v[4:5], v[8:9]
140; GCN-NEXT:    v_fma_f64 v[2:3], -v[2:3], -v[6:7], v[10:11]
141; GCN-NEXT:    s_setpc_b64 s[30:31]
142;
143; GFX10-LABEL: v_constained_fma_v2f64_fpexcept_strict_fneg_fneg:
144; GFX10:       ; %bb.0:
145; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146; GFX10-NEXT:    v_fma_f64 v[0:1], -v[0:1], -v[4:5], v[8:9]
147; GFX10-NEXT:    v_fma_f64 v[2:3], -v[2:3], -v[6:7], v[10:11]
148; GFX10-NEXT:    s_setpc_b64 s[30:31]
149  %neg.x = fneg <2 x double> %x
150  %neg.y = fneg <2 x double> %y
151  %val = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %neg.x, <2 x double> %neg.y, <2 x double> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
152  ret <2 x double> %val
153}
154
155declare double @llvm.fabs.f64(double)
156declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
157declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
158declare <3 x double> @llvm.experimental.constrained.fma.v3f64(<3 x double>, <3 x double>, <3 x double>, metadata, metadata)
159declare <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, metadata, metadata)
160
161attributes #0 = { strictfp }
162