xref: /minix3/external/bsd/llvm/dist/clang/test/CodeGen/arm64_vfma.c (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1*0a6a1f1dSLionel Sambuc // RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -S -o - -emit-llvm %s | FileCheck %s
2*0a6a1f1dSLionel Sambuc // Test ARM64 SIMD fused multiply add intrinsics
3*0a6a1f1dSLionel Sambuc 
4*0a6a1f1dSLionel Sambuc #include <arm_neon.h>
5*0a6a1f1dSLionel Sambuc 
test_vfma_f32(float32x2_t a1,float32x2_t a2,float32x2_t a3)6*0a6a1f1dSLionel Sambuc float32x2_t test_vfma_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
7*0a6a1f1dSLionel Sambuc   // CHECK: test_vfma_f32
8*0a6a1f1dSLionel Sambuc   return vfma_f32(a1, a2, a3);
9*0a6a1f1dSLionel Sambuc   // CHECK: llvm.fma.v2f32({{.*a2, .*a3, .*a1}})
10*0a6a1f1dSLionel Sambuc   // CHECK-NEXT: ret
11*0a6a1f1dSLionel Sambuc }
12*0a6a1f1dSLionel Sambuc 
test_vfmaq_f32(float32x4_t a1,float32x4_t a2,float32x4_t a3)13*0a6a1f1dSLionel Sambuc float32x4_t test_vfmaq_f32(float32x4_t a1, float32x4_t a2, float32x4_t a3) {
14*0a6a1f1dSLionel Sambuc   // CHECK: test_vfmaq_f32
15*0a6a1f1dSLionel Sambuc   return vfmaq_f32(a1, a2, a3);
16*0a6a1f1dSLionel Sambuc   // CHECK: llvm.fma.v4f32({{.*a2, .*a3, .*a1}})
17*0a6a1f1dSLionel Sambuc   // CHECK-NEXT: ret
18*0a6a1f1dSLionel Sambuc }
19*0a6a1f1dSLionel Sambuc 
test_vfmaq_f64(float64x2_t a1,float64x2_t a2,float64x2_t a3)20*0a6a1f1dSLionel Sambuc float64x2_t test_vfmaq_f64(float64x2_t a1, float64x2_t a2, float64x2_t a3) {
21*0a6a1f1dSLionel Sambuc   // CHECK: test_vfmaq_f64
22*0a6a1f1dSLionel Sambuc   return vfmaq_f64(a1, a2, a3);
23*0a6a1f1dSLionel Sambuc   // CHECK: llvm.fma.v2f64({{.*a2, .*a3, .*a1}})
24*0a6a1f1dSLionel Sambuc   // CHECK-NEXT: ret
25*0a6a1f1dSLionel Sambuc }
26*0a6a1f1dSLionel Sambuc 
test_vfma_lane_f32(float32x2_t a1,float32x2_t a2,float32x2_t a3)27*0a6a1f1dSLionel Sambuc float32x2_t test_vfma_lane_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
28*0a6a1f1dSLionel Sambuc   // CHECK: test_vfma_lane_f32
29*0a6a1f1dSLionel Sambuc   return vfma_lane_f32(a1, a2, a3, 1);
30*0a6a1f1dSLionel Sambuc   // NB: the test below is deliberately lose, so that we don't depend too much
31*0a6a1f1dSLionel Sambuc   // upon the exact IR used to select lane 1 (usually a shufflevector)
32*0a6a1f1dSLionel Sambuc   // CHECK: llvm.fma.v2f32(<2 x float> %a2, <2 x float> {{.*}}, <2 x float> %a1)
33*0a6a1f1dSLionel Sambuc   // CHECK-NEXT: ret
34*0a6a1f1dSLionel Sambuc }
35*0a6a1f1dSLionel Sambuc 
test_vfmaq_lane_f32(float32x4_t a1,float32x4_t a2,float32x2_t a3)36*0a6a1f1dSLionel Sambuc float32x4_t test_vfmaq_lane_f32(float32x4_t a1, float32x4_t a2, float32x2_t a3) {
37*0a6a1f1dSLionel Sambuc   // CHECK: test_vfmaq_lane_f32
38*0a6a1f1dSLionel Sambuc   return vfmaq_lane_f32(a1, a2, a3, 1);
39*0a6a1f1dSLionel Sambuc   // NB: the test below is deliberately lose, so that we don't depend too much
40*0a6a1f1dSLionel Sambuc   // upon the exact IR used to select lane 1 (usually a shufflevector)
41*0a6a1f1dSLionel Sambuc   // CHECK: llvm.fma.v4f32(<4 x float> %a2, <4 x float> {{.*}}, <4 x float> %a1)
42*0a6a1f1dSLionel Sambuc   // CHECK-NEXT: ret
43*0a6a1f1dSLionel Sambuc }
44*0a6a1f1dSLionel Sambuc 
test_vfmaq_lane_f64(float64x2_t a1,float64x2_t a2,float64x1_t a3)45*0a6a1f1dSLionel Sambuc float64x2_t test_vfmaq_lane_f64(float64x2_t a1, float64x2_t a2, float64x1_t a3) {
46*0a6a1f1dSLionel Sambuc   // CHECK: test_vfmaq_lane_f64
47*0a6a1f1dSLionel Sambuc   return vfmaq_lane_f64(a1, a2, a3, 0);
48*0a6a1f1dSLionel Sambuc   // NB: the test below is deliberately lose, so that we don't depend too much
49*0a6a1f1dSLionel Sambuc   // upon the exact IR used to select lane 1 (usually a shufflevector)
50*0a6a1f1dSLionel Sambuc   // CHECK: llvm.fma.v2f64(<2 x double> %a2, <2 x double> {{.*}}, <2 x double> %a1)
51*0a6a1f1dSLionel Sambuc   // CHECK-NEXT: ret
52*0a6a1f1dSLionel Sambuc }
53*0a6a1f1dSLionel Sambuc 
test_vfma_n_f32(float32x2_t a1,float32x2_t a2,float32_t a3)54*0a6a1f1dSLionel Sambuc float32x2_t test_vfma_n_f32(float32x2_t a1, float32x2_t a2, float32_t a3) {
55*0a6a1f1dSLionel Sambuc   // CHECK: test_vfma_n_f32
56*0a6a1f1dSLionel Sambuc   return vfma_n_f32(a1, a2, a3);
57*0a6a1f1dSLionel Sambuc   // NB: the test below is deliberately lose, so that we don't depend too much
58*0a6a1f1dSLionel Sambuc   // upon the exact IR used to select lane 0 (usually two insertelements)
59*0a6a1f1dSLionel Sambuc   // CHECK: llvm.fma.v2f32
60*0a6a1f1dSLionel Sambuc   // CHECK-NEXT: ret
61*0a6a1f1dSLionel Sambuc }
62*0a6a1f1dSLionel Sambuc 
test_vfmaq_n_f32(float32x4_t a1,float32x4_t a2,float32_t a3)63*0a6a1f1dSLionel Sambuc float32x4_t test_vfmaq_n_f32(float32x4_t a1, float32x4_t a2, float32_t a3) {
64*0a6a1f1dSLionel Sambuc   // CHECK: test_vfmaq_n_f32
65*0a6a1f1dSLionel Sambuc   return vfmaq_n_f32(a1, a2, a3);
66*0a6a1f1dSLionel Sambuc   // NB: the test below is deliberately lose, so that we don't depend too much
67*0a6a1f1dSLionel Sambuc   // upon the exact IR used to select lane 0 (usually four insertelements)
68*0a6a1f1dSLionel Sambuc   // CHECK: llvm.fma.v4f32
69*0a6a1f1dSLionel Sambuc   // CHECK-NEXT: ret
70*0a6a1f1dSLionel Sambuc }
71*0a6a1f1dSLionel Sambuc 
test_vfmaq_n_f64(float64x2_t a1,float64x2_t a2,float64_t a3)72*0a6a1f1dSLionel Sambuc float64x2_t test_vfmaq_n_f64(float64x2_t a1, float64x2_t a2, float64_t a3) {
73*0a6a1f1dSLionel Sambuc   // CHECK: test_vfmaq_n_f64
74*0a6a1f1dSLionel Sambuc   return vfmaq_n_f64(a1, a2, a3);
75*0a6a1f1dSLionel Sambuc   // NB: the test below is deliberately lose, so that we don't depend too much
76*0a6a1f1dSLionel Sambuc   // upon the exact IR used to select lane 0 (usually two insertelements)
77*0a6a1f1dSLionel Sambuc   // CHECK: llvm.fma.v2f64
78*0a6a1f1dSLionel Sambuc   // CHECK-NEXT: ret
79*0a6a1f1dSLionel Sambuc }
80*0a6a1f1dSLionel Sambuc 
test_vfms_f32(float32x2_t a1,float32x2_t a2,float32x2_t a3)81*0a6a1f1dSLionel Sambuc float32x2_t test_vfms_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
82*0a6a1f1dSLionel Sambuc   // CHECK: test_vfms_f32
83*0a6a1f1dSLionel Sambuc   return vfms_f32(a1, a2, a3);
84*0a6a1f1dSLionel Sambuc   // CHECK: [[NEG:%.*]] = fsub <2 x float> {{.*}}, %a2
85*0a6a1f1dSLionel Sambuc   // CHECK: llvm.fma.v2f32(<2 x float> %a3, <2 x float> [[NEG]], <2 x float> %a1)
86*0a6a1f1dSLionel Sambuc   // CHECK-NEXT: ret
87*0a6a1f1dSLionel Sambuc }
88*0a6a1f1dSLionel Sambuc 
test_vfmsq_f32(float32x4_t a1,float32x4_t a2,float32x4_t a3)89*0a6a1f1dSLionel Sambuc float32x4_t test_vfmsq_f32(float32x4_t a1, float32x4_t a2, float32x4_t a3) {
90*0a6a1f1dSLionel Sambuc   // CHECK: test_vfmsq_f32
91*0a6a1f1dSLionel Sambuc   return vfmsq_f32(a1, a2, a3);
92*0a6a1f1dSLionel Sambuc   // CHECK: [[NEG:%.*]] = fsub <4 x float> {{.*}}, %a2
93*0a6a1f1dSLionel Sambuc   // CHECK: llvm.fma.v4f32(<4 x float> %a3, <4 x float> [[NEG]], <4 x float> %a1)
94*0a6a1f1dSLionel Sambuc   // CHECK-NEXT: ret
95*0a6a1f1dSLionel Sambuc }
96*0a6a1f1dSLionel Sambuc 
test_vfmsq_f64(float64x2_t a1,float64x2_t a2,float64x2_t a3)97*0a6a1f1dSLionel Sambuc float64x2_t test_vfmsq_f64(float64x2_t a1, float64x2_t a2, float64x2_t a3) {
98*0a6a1f1dSLionel Sambuc   // CHECK: test_vfmsq_f64
99*0a6a1f1dSLionel Sambuc   return vfmsq_f64(a1, a2, a3);
100*0a6a1f1dSLionel Sambuc   // CHECK: [[NEG:%.*]] = fsub <2 x double> {{.*}}, %a2
101*0a6a1f1dSLionel Sambuc   // CHECK: llvm.fma.v2f64(<2 x double> %a3, <2 x double> [[NEG]], <2 x double> %a1)
102*0a6a1f1dSLionel Sambuc   // CHECK-NEXT: ret
103*0a6a1f1dSLionel Sambuc }
104*0a6a1f1dSLionel Sambuc 
test_vfms_lane_f32(float32x2_t a1,float32x2_t a2,float32x2_t a3)105*0a6a1f1dSLionel Sambuc float32x2_t test_vfms_lane_f32(float32x2_t a1, float32x2_t a2, float32x2_t a3) {
106*0a6a1f1dSLionel Sambuc   // CHECK: test_vfms_lane_f32
107*0a6a1f1dSLionel Sambuc   return vfms_lane_f32(a1, a2, a3, 1);
108*0a6a1f1dSLionel Sambuc   // NB: the test below is deliberately lose, so that we don't depend too much
109*0a6a1f1dSLionel Sambuc   // upon the exact IR used to select lane 1 (usually a shufflevector)
110*0a6a1f1dSLionel Sambuc   // CHECK: [[NEG:%.*]] = fsub <2 x float> {{.*}}, %a3
111*0a6a1f1dSLionel Sambuc   // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[NEG]]
112*0a6a1f1dSLionel Sambuc   // CHECK: llvm.fma.v2f32(<2 x float> {{.*}}, <2 x float> [[LANE]], <2 x float> %a1)
113*0a6a1f1dSLionel Sambuc   // CHECK-NEXT: ret
114*0a6a1f1dSLionel Sambuc }
115*0a6a1f1dSLionel Sambuc 
test_vfmsq_lane_f32(float32x4_t a1,float32x4_t a2,float32x2_t a3)116*0a6a1f1dSLionel Sambuc float32x4_t test_vfmsq_lane_f32(float32x4_t a1, float32x4_t a2, float32x2_t a3) {
117*0a6a1f1dSLionel Sambuc   // CHECK: test_vfmsq_lane_f32
118*0a6a1f1dSLionel Sambuc   return vfmsq_lane_f32(a1, a2, a3, 1);
119*0a6a1f1dSLionel Sambuc   // NB: the test below is deliberately lose, so that we don't depend too much
120*0a6a1f1dSLionel Sambuc   // upon the exact IR used to select lane 1 (usually a shufflevector)
121*0a6a1f1dSLionel Sambuc   // CHECK: [[NEG:%.*]] = fsub <2 x float> {{.*}}, %a3
122*0a6a1f1dSLionel Sambuc   // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[NEG]]
123*0a6a1f1dSLionel Sambuc   // CHECK: llvm.fma.v4f32(<4 x float> {{.*}}, <4 x float> [[LANE]], <4 x float> %a1)
124*0a6a1f1dSLionel Sambuc   // CHECK-NEXT: ret
125*0a6a1f1dSLionel Sambuc }
126*0a6a1f1dSLionel Sambuc 
test_vfmsq_lane_f64(float64x2_t a1,float64x2_t a2,float64x1_t a3)127*0a6a1f1dSLionel Sambuc float64x2_t test_vfmsq_lane_f64(float64x2_t a1, float64x2_t a2, float64x1_t a3) {
128*0a6a1f1dSLionel Sambuc   // CHECK: test_vfmsq_lane_f64
129*0a6a1f1dSLionel Sambuc   return vfmsq_lane_f64(a1, a2, a3, 0);
130*0a6a1f1dSLionel Sambuc   // NB: the test below is deliberately lose, so that we don't depend too much
131*0a6a1f1dSLionel Sambuc   // upon the exact IR used to select lane 1 (usually a shufflevector)
132*0a6a1f1dSLionel Sambuc   // CHECK: [[NEG:%.*]] = fsub <1 x double> {{.*}}, %a3
133*0a6a1f1dSLionel Sambuc   // CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[NEG]]
134*0a6a1f1dSLionel Sambuc   // CHECK: llvm.fma.v2f64(<2 x double> {{.*}}, <2 x double> [[LANE]], <2 x double> %a1)
135*0a6a1f1dSLionel Sambuc   // CHECK-NEXT: ret
136*0a6a1f1dSLionel Sambuc }
137