xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fma.f64.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,SIGFX11 %s
2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,SIGFX11 %s
3; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx90a -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX90A %s
4; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,SIGFX11 %s
5
6declare double @llvm.fma.f64(double, double, double) nounwind readnone
7declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
8declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
9declare double @llvm.fabs.f64(double) nounwind readnone
10
11; FUNC-LABEL: {{^}}fma_f64:
12; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
13; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
14define amdgpu_kernel void @fma_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
15                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
16   %r0 = load double, ptr addrspace(1) %in1
17   %r1 = load double, ptr addrspace(1) %in2
18   %r2 = load double, ptr addrspace(1) %in3
19   %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2)
20   store double %r3, ptr addrspace(1) %out
21   ret void
22}
23
24; FUNC-LABEL: {{^}}fma_v2f64:
25; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
26; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
27; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
28; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
29define amdgpu_kernel void @fma_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
30                       ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
31   %r0 = load <2 x double>, ptr addrspace(1) %in1
32   %r1 = load <2 x double>, ptr addrspace(1) %in2
33   %r2 = load <2 x double>, ptr addrspace(1) %in3
34   %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2)
35   store <2 x double> %r3, ptr addrspace(1) %out
36   ret void
37}
38
39; FUNC-LABEL: {{^}}fma_v4f64:
40; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
41; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
42; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
43; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
44; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
45; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
46; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
47; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
48define amdgpu_kernel void @fma_v4f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
49                       ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
50   %r0 = load <4 x double>, ptr addrspace(1) %in1
51   %r1 = load <4 x double>, ptr addrspace(1) %in2
52   %r2 = load <4 x double>, ptr addrspace(1) %in3
53   %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2)
54   store <4 x double> %r3, ptr addrspace(1) %out
55   ret void
56}
57
58; FUNC-LABEL: {{^}}fma_f64_abs_src0:
59; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], |v\[[0-9]+:[0-9]+\]|, v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
60define amdgpu_kernel void @fma_f64_abs_src0(ptr addrspace(1) %out, ptr addrspace(1) %in1,
61                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
62   %r0 = load double, ptr addrspace(1) %in1
63   %r1 = load double, ptr addrspace(1) %in2
64   %r2 = load double, ptr addrspace(1) %in3
65   %fabs = call double @llvm.fabs.f64(double %r0)
66   %r3 = tail call double @llvm.fma.f64(double %fabs, double %r1, double %r2)
67   store double %r3, ptr addrspace(1) %out
68   ret void
69}
70
71; FUNC-LABEL: {{^}}fma_f64_abs_src1:
72; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], \|v\[[0-9]+:[0-9]+\]\|, v\[[0-9]+:[0-9]+\]}}
73define amdgpu_kernel void @fma_f64_abs_src1(ptr addrspace(1) %out, ptr addrspace(1) %in1,
74                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
75   %r0 = load double, ptr addrspace(1) %in1
76   %r1 = load double, ptr addrspace(1) %in2
77   %r2 = load double, ptr addrspace(1) %in3
78   %fabs = call double @llvm.fabs.f64(double %r1)
79   %r3 = tail call double @llvm.fma.f64(double %r0, double %fabs, double %r2)
80   store double %r3, ptr addrspace(1) %out
81   ret void
82}
83
84; FUNC-LABEL: {{^}}fma_f64_abs_src2:
85; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], \|v\[[0-9]+:[0-9]+\]\|}}
86define amdgpu_kernel void @fma_f64_abs_src2(ptr addrspace(1) %out, ptr addrspace(1) %in1,
87                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
88   %r0 = load double, ptr addrspace(1) %in1
89   %r1 = load double, ptr addrspace(1) %in2
90   %r2 = load double, ptr addrspace(1) %in3
91   %fabs = call double @llvm.fabs.f64(double %r2)
92   %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %fabs)
93   store double %r3, ptr addrspace(1) %out
94   ret void
95}
96
97; FUNC-LABEL: {{^}}fma_f64_neg_src0:
98; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
99define amdgpu_kernel void @fma_f64_neg_src0(ptr addrspace(1) %out, ptr addrspace(1) %in1,
100                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
101   %r0 = load double, ptr addrspace(1) %in1
102   %r1 = load double, ptr addrspace(1) %in2
103   %r2 = load double, ptr addrspace(1) %in3
104   %fsub = fsub double -0.000000e+00, %r0
105   %r3 = tail call double @llvm.fma.f64(double %fsub, double %r1, double %r2)
106   store double %r3, ptr addrspace(1) %out
107   ret void
108}
109
110; FUNC-LABEL: {{^}}fma_f64_neg_src1:
111; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
112define amdgpu_kernel void @fma_f64_neg_src1(ptr addrspace(1) %out, ptr addrspace(1) %in1,
113                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
114   %r0 = load double, ptr addrspace(1) %in1
115   %r1 = load double, ptr addrspace(1) %in2
116   %r2 = load double, ptr addrspace(1) %in3
117   %fsub = fsub double -0.000000e+00, %r1
118   %r3 = tail call double @llvm.fma.f64(double %r0, double %fsub, double %r2)
119   store double %r3, ptr addrspace(1) %out
120   ret void
121}
122
123; FUNC-LABEL: {{^}}fma_f64_neg_src2:
124; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
125define amdgpu_kernel void @fma_f64_neg_src2(ptr addrspace(1) %out, ptr addrspace(1) %in1,
126                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
127   %r0 = load double, ptr addrspace(1) %in1
128   %r1 = load double, ptr addrspace(1) %in2
129   %r2 = load double, ptr addrspace(1) %in3
130   %fsub = fsub double -0.000000e+00, %r2
131   %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %fsub)
132   store double %r3, ptr addrspace(1) %out
133   ret void
134}
135
136; FUNC-LABEL: {{^}}fma_f64_abs_neg_src0:
137; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|, v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
138define amdgpu_kernel void @fma_f64_abs_neg_src0(ptr addrspace(1) %out, ptr addrspace(1) %in1,
139                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
140   %r0 = load double, ptr addrspace(1) %in1
141   %r1 = load double, ptr addrspace(1) %in2
142   %r2 = load double, ptr addrspace(1) %in3
143   %fabs = call double @llvm.fabs.f64(double %r0)
144   %fsub = fsub double -0.000000e+00, %fabs
145   %r3 = tail call double @llvm.fma.f64(double %fsub, double %r1, double %r2)
146   store double %r3, ptr addrspace(1) %out
147   ret void
148}
149
150; FUNC-LABEL: {{^}}fma_f64_abs_neg_src1:
151; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|, v\[[0-9]+:[0-9]+\]}}
152define amdgpu_kernel void @fma_f64_abs_neg_src1(ptr addrspace(1) %out, ptr addrspace(1) %in1,
153                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
154   %r0 = load double, ptr addrspace(1) %in1
155   %r1 = load double, ptr addrspace(1) %in2
156   %r2 = load double, ptr addrspace(1) %in3
157   %fabs = call double @llvm.fabs.f64(double %r1)
158   %fsub = fsub double -0.000000e+00, %fabs
159   %r3 = tail call double @llvm.fma.f64(double %r0, double %fsub, double %r2)
160   store double %r3, ptr addrspace(1) %out
161   ret void
162}
163
164; FUNC-LABEL: {{^}}fma_f64_abs_neg_src2:
165; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|}}
166define amdgpu_kernel void @fma_f64_abs_neg_src2(ptr addrspace(1) %out, ptr addrspace(1) %in1,
167                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
168   %r0 = load double, ptr addrspace(1) %in1
169   %r1 = load double, ptr addrspace(1) %in2
170   %r2 = load double, ptr addrspace(1) %in3
171   %fabs = call double @llvm.fabs.f64(double %r2)
172   %fsub = fsub double -0.000000e+00, %fabs
173   %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %fsub)
174   store double %r3, ptr addrspace(1) %out
175   ret void
176}
177
178; FUNC-LABEL: {{^}}fma_f64_lit_src0:
179; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
180; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
181define amdgpu_kernel void @fma_f64_lit_src0(ptr addrspace(1) %out,
182                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
183   %r1 = load double, ptr addrspace(1) %in2
184   %r2 = load double, ptr addrspace(1) %in3
185   %r3 = tail call double @llvm.fma.f64(double +2.0, double %r1, double %r2)
186   store double %r3, ptr addrspace(1) %out
187   ret void
188}
189
190; FUNC-LABEL: {{^}}fma_f64_lit_src1:
191; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
192; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
193define amdgpu_kernel void @fma_f64_lit_src1(ptr addrspace(1) %out, ptr addrspace(1) %in1,
194                     ptr addrspace(1) %in3) {
195   %r0 = load double, ptr addrspace(1) %in1
196   %r2 = load double, ptr addrspace(1) %in3
197   %r3 = tail call double @llvm.fma.f64(double %r0, double +2.0, double %r2)
198   store double %r3, ptr addrspace(1) %out
199   ret void
200}
201
202; FUNC-LABEL: {{^}}fma_f64_lit_src2:
203; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0}}
204define amdgpu_kernel void @fma_f64_lit_src2(ptr addrspace(1) %out, ptr addrspace(1) %in1,
205                     ptr addrspace(1) %in2) {
206   %r0 = load double, ptr addrspace(1) %in1
207   %r1 = load double, ptr addrspace(1) %in2
208   %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double +2.0)
209   store double %r3, ptr addrspace(1) %out
210   ret void
211}
212