xref: /freebsd-src/contrib/llvm-project/clang/lib/Headers/velintrin_approx.h (revision 81ad626541db97eb356e2c1d4a20eb2a26a766ab)
1*81ad6265SDimitry Andric /*===---- velintrin_approx.h - VEL intrinsics helper for VE ----------------===
2*81ad6265SDimitry Andric  *
3*81ad6265SDimitry Andric  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*81ad6265SDimitry Andric  * See https://llvm.org/LICENSE.txt for license information.
5*81ad6265SDimitry Andric  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*81ad6265SDimitry Andric  *
7*81ad6265SDimitry Andric  *===-----------------------------------------------------------------------===
8*81ad6265SDimitry Andric  */
9*81ad6265SDimitry Andric #ifndef __VEL_INTRIN_APPROX_H__
10*81ad6265SDimitry Andric #define __VEL_INTRIN_APPROX_H__
11*81ad6265SDimitry Andric 
_vel_approx_vfdivs_vvvl(__vr v0,__vr v1,int l)12*81ad6265SDimitry Andric static inline __vr _vel_approx_vfdivs_vvvl(__vr v0, __vr v1, int l) {
13*81ad6265SDimitry Andric   float s0;
14*81ad6265SDimitry Andric   __vr v2, v3, v4, v5;
15*81ad6265SDimitry Andric   v5 = _vel_vrcps_vvl(v1, l);
16*81ad6265SDimitry Andric   s0 = 1.0;
17*81ad6265SDimitry Andric   v4 = _vel_vfnmsbs_vsvvl(s0, v1, v5, l);
18*81ad6265SDimitry Andric   v3 = _vel_vfmads_vvvvl(v5, v5, v4, l);
19*81ad6265SDimitry Andric   v2 = _vel_vfmuls_vvvl(v0, v3, l);
20*81ad6265SDimitry Andric   v4 = _vel_vfnmsbs_vvvvl(v0, v2, v1, l);
21*81ad6265SDimitry Andric   v2 = _vel_vfmads_vvvvl(v2, v5, v4, l);
22*81ad6265SDimitry Andric   v0 = _vel_vfnmsbs_vvvvl(v0, v2, v1, l);
23*81ad6265SDimitry Andric   v0 = _vel_vfmads_vvvvl(v2, v3, v0, l);
24*81ad6265SDimitry Andric   return v0;
25*81ad6265SDimitry Andric }
26*81ad6265SDimitry Andric 
_vel_approx_pvfdiv_vvvl(__vr v0,__vr v1,int l)27*81ad6265SDimitry Andric static inline __vr _vel_approx_pvfdiv_vvvl(__vr v0, __vr v1, int l) {
28*81ad6265SDimitry Andric   float s0;
29*81ad6265SDimitry Andric   __vr v2, v3, v4, v5;
30*81ad6265SDimitry Andric   v5 = _vel_pvrcp_vvl(v1, l);
31*81ad6265SDimitry Andric   s0 = 1.0;
32*81ad6265SDimitry Andric   v4 = _vel_pvfnmsb_vsvvl(s0, v1, v5, l);
33*81ad6265SDimitry Andric   v3 = _vel_pvfmad_vvvvl(v5, v5, v4, l);
34*81ad6265SDimitry Andric   v2 = _vel_pvfmul_vvvl(v0, v3, l);
35*81ad6265SDimitry Andric   v4 = _vel_pvfnmsb_vvvvl(v0, v2, v1, l);
36*81ad6265SDimitry Andric   v2 = _vel_pvfmad_vvvvl(v2, v5, v4, l);
37*81ad6265SDimitry Andric   v0 = _vel_pvfnmsb_vvvvl(v0, v2, v1, l);
38*81ad6265SDimitry Andric   v0 = _vel_pvfmad_vvvvl(v2, v3, v0, l);
39*81ad6265SDimitry Andric   return v0;
40*81ad6265SDimitry Andric }
41*81ad6265SDimitry Andric 
_vel_approx_vfdivs_vsvl(float s0,__vr v0,int l)42*81ad6265SDimitry Andric static inline __vr _vel_approx_vfdivs_vsvl(float s0, __vr v0, int l) {
43*81ad6265SDimitry Andric   float s1;
44*81ad6265SDimitry Andric   __vr v1, v2, v3, v4;
45*81ad6265SDimitry Andric   v4 = _vel_vrcps_vvl(v0, l);
46*81ad6265SDimitry Andric   s1 = 1.0;
47*81ad6265SDimitry Andric   v2 = _vel_vfnmsbs_vsvvl(s1, v0, v4, l);
48*81ad6265SDimitry Andric   v2 = _vel_vfmads_vvvvl(v4, v4, v2, l);
49*81ad6265SDimitry Andric   v1 = _vel_vfmuls_vsvl(s0, v2, l);
50*81ad6265SDimitry Andric   v3 = _vel_vfnmsbs_vsvvl(s0, v1, v0, l);
51*81ad6265SDimitry Andric   v1 = _vel_vfmads_vvvvl(v1, v4, v3, l);
52*81ad6265SDimitry Andric   v3 = _vel_vfnmsbs_vsvvl(s0, v1, v0, l);
53*81ad6265SDimitry Andric   v0 = _vel_vfmads_vvvvl(v1, v2, v3, l);
54*81ad6265SDimitry Andric   return v0;
55*81ad6265SDimitry Andric }
56*81ad6265SDimitry Andric 
_vel_approx_vfdivs_vvsl(__vr v0,float s0,int l)57*81ad6265SDimitry Andric static inline __vr _vel_approx_vfdivs_vvsl(__vr v0, float s0, int l) {
58*81ad6265SDimitry Andric   float s1;
59*81ad6265SDimitry Andric   __vr v1, v2;
60*81ad6265SDimitry Andric   s1 = 1.0f / s0;
61*81ad6265SDimitry Andric   v1 = _vel_vfmuls_vsvl(s1, v0, l);
62*81ad6265SDimitry Andric   v2 = _vel_vfnmsbs_vvsvl(v0, s0, v1, l);
63*81ad6265SDimitry Andric   v0 = _vel_vfmads_vvsvl(v1, s1, v2, l);
64*81ad6265SDimitry Andric   return v0;
65*81ad6265SDimitry Andric }
66*81ad6265SDimitry Andric 
_vel_approx_vfdivd_vsvl(double s0,__vr v0,int l)67*81ad6265SDimitry Andric static inline __vr _vel_approx_vfdivd_vsvl(double s0, __vr v0, int l) {
68*81ad6265SDimitry Andric   __vr v1, v2, v3;
69*81ad6265SDimitry Andric   v2 = _vel_vrcpd_vvl(v0, l);
70*81ad6265SDimitry Andric   double s1 = 1.0;
71*81ad6265SDimitry Andric   v3 = _vel_vfnmsbd_vsvvl(s1, v0, v2, l);
72*81ad6265SDimitry Andric   v2 = _vel_vfmadd_vvvvl(v2, v2, v3, l);
73*81ad6265SDimitry Andric   v1 = _vel_vfnmsbd_vsvvl(s1, v0, v2, l);
74*81ad6265SDimitry Andric   v1 = _vel_vfmadd_vvvvl(v2, v2, v1, l);
75*81ad6265SDimitry Andric   v1 = _vel_vaddul_vsvl(1, v1, l);
76*81ad6265SDimitry Andric   v3 = _vel_vfnmsbd_vsvvl(s1, v0, v1, l);
77*81ad6265SDimitry Andric   v3 = _vel_vfmadd_vvvvl(v1, v1, v3, l);
78*81ad6265SDimitry Andric   v1 = _vel_vfmuld_vsvl(s0, v3, l);
79*81ad6265SDimitry Andric   v0 = _vel_vfnmsbd_vsvvl(s0, v1, v0, l);
80*81ad6265SDimitry Andric   v0 = _vel_vfmadd_vvvvl(v1, v3, v0, l);
81*81ad6265SDimitry Andric   return v0;
82*81ad6265SDimitry Andric }
83*81ad6265SDimitry Andric 
_vel_approx_vfsqrtd_vvl(__vr v0,int l)84*81ad6265SDimitry Andric static inline __vr _vel_approx_vfsqrtd_vvl(__vr v0, int l) {
85*81ad6265SDimitry Andric   double s0, s1;
86*81ad6265SDimitry Andric   __vr v1, v2, v3;
87*81ad6265SDimitry Andric   v2 = _vel_vrsqrtdnex_vvl(v0, l);
88*81ad6265SDimitry Andric   v1 = _vel_vfmuld_vvvl(v0, v2, l);
89*81ad6265SDimitry Andric   s0 = 1.0;
90*81ad6265SDimitry Andric   s1 = 0.5;
91*81ad6265SDimitry Andric   v3 = _vel_vfnmsbd_vsvvl(s0, v1, v2, l);
92*81ad6265SDimitry Andric   v3 = _vel_vfmuld_vsvl(s1, v3, l);
93*81ad6265SDimitry Andric   v2 = _vel_vfmadd_vvvvl(v2, v2, v3, l);
94*81ad6265SDimitry Andric   v1 = _vel_vfmuld_vvvl(v0, v2, l);
95*81ad6265SDimitry Andric   v3 = _vel_vfnmsbd_vsvvl(s0, v1, v2, l);
96*81ad6265SDimitry Andric   v3 = _vel_vfmuld_vsvl(s1, v3, l);
97*81ad6265SDimitry Andric   v0 = _vel_vfmadd_vvvvl(v1, v1, v3, l);
98*81ad6265SDimitry Andric   return v0;
99*81ad6265SDimitry Andric }
100*81ad6265SDimitry Andric 
_vel_approx_vfsqrts_vvl(__vr v0,int l)101*81ad6265SDimitry Andric static inline __vr _vel_approx_vfsqrts_vvl(__vr v0, int l) {
102*81ad6265SDimitry Andric   float s0, s1;
103*81ad6265SDimitry Andric   __vr v1, v2, v3;
104*81ad6265SDimitry Andric   v0 = _vel_vcvtds_vvl(v0, l);
105*81ad6265SDimitry Andric   v2 = _vel_vrsqrtdnex_vvl(v0, l);
106*81ad6265SDimitry Andric   v1 = _vel_vfmuld_vvvl(v0, v2, l);
107*81ad6265SDimitry Andric   s0 = 1.0;
108*81ad6265SDimitry Andric   s1 = 0.5;
109*81ad6265SDimitry Andric   v3 = _vel_vfnmsbd_vsvvl(s0, v1, v2, l);
110*81ad6265SDimitry Andric   v3 = _vel_vfmuld_vsvl(s1, v3, l);
111*81ad6265SDimitry Andric   v2 = _vel_vfmadd_vvvvl(v2, v2, v3, l);
112*81ad6265SDimitry Andric   v1 = _vel_vfmuld_vvvl(v0, v2, l);
113*81ad6265SDimitry Andric   v3 = _vel_vfnmsbd_vsvvl(s0, v1, v2, l);
114*81ad6265SDimitry Andric   v3 = _vel_vfmuld_vsvl(s1, v3, l);
115*81ad6265SDimitry Andric   v0 = _vel_vfmadd_vvvvl(v1, v1, v3, l);
116*81ad6265SDimitry Andric   v0 = _vel_vcvtsd_vvl(v0, l);
117*81ad6265SDimitry Andric   return v0;
118*81ad6265SDimitry Andric }
119*81ad6265SDimitry Andric 
120*81ad6265SDimitry Andric #endif
121