1*d30dc8cbSJohn Marino /* mpc_fma -- Fused multiply-add of three complex numbers
2*d30dc8cbSJohn Marino
3*d30dc8cbSJohn Marino Copyright (C) 2011, 2012 INRIA
4*d30dc8cbSJohn Marino
5*d30dc8cbSJohn Marino This file is part of GNU MPC.
6*d30dc8cbSJohn Marino
7*d30dc8cbSJohn Marino GNU MPC is free software; you can redistribute it and/or modify it under
8*d30dc8cbSJohn Marino the terms of the GNU Lesser General Public License as published by the
9*d30dc8cbSJohn Marino Free Software Foundation; either version 3 of the License, or (at your
10*d30dc8cbSJohn Marino option) any later version.
11*d30dc8cbSJohn Marino
12*d30dc8cbSJohn Marino GNU MPC is distributed in the hope that it will be useful, but WITHOUT ANY
13*d30dc8cbSJohn Marino WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14*d30dc8cbSJohn Marino FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
15*d30dc8cbSJohn Marino more details.
16*d30dc8cbSJohn Marino
17*d30dc8cbSJohn Marino You should have received a copy of the GNU Lesser General Public License
18*d30dc8cbSJohn Marino along with this program. If not, see http://www.gnu.org/licenses/ .
19*d30dc8cbSJohn Marino */
20*d30dc8cbSJohn Marino
21*d30dc8cbSJohn Marino #include "mpc-impl.h"
22*d30dc8cbSJohn Marino
23*d30dc8cbSJohn Marino /* return a bound on the precision needed to add or subtract x and y exactly */
24*d30dc8cbSJohn Marino static mpfr_prec_t
bound_prec_addsub(mpfr_srcptr x,mpfr_srcptr y)25*d30dc8cbSJohn Marino bound_prec_addsub (mpfr_srcptr x, mpfr_srcptr y)
26*d30dc8cbSJohn Marino {
27*d30dc8cbSJohn Marino if (!mpfr_regular_p (x))
28*d30dc8cbSJohn Marino return mpfr_get_prec (y);
29*d30dc8cbSJohn Marino else if (!mpfr_regular_p (y))
30*d30dc8cbSJohn Marino return mpfr_get_prec (x);
31*d30dc8cbSJohn Marino else /* neither x nor y are NaN, Inf or zero */
32*d30dc8cbSJohn Marino {
33*d30dc8cbSJohn Marino mpfr_exp_t ex = mpfr_get_exp (x);
34*d30dc8cbSJohn Marino mpfr_exp_t ey = mpfr_get_exp (y);
35*d30dc8cbSJohn Marino mpfr_exp_t ulpx = ex - mpfr_get_prec (x);
36*d30dc8cbSJohn Marino mpfr_exp_t ulpy = ey - mpfr_get_prec (y);
37*d30dc8cbSJohn Marino return ((ex >= ey) ? ex : ey) + 1 - ((ulpx <= ulpy) ? ulpx : ulpy);
38*d30dc8cbSJohn Marino }
39*d30dc8cbSJohn Marino }
40*d30dc8cbSJohn Marino
41*d30dc8cbSJohn Marino /* r <- a*b+c */
42*d30dc8cbSJohn Marino int
mpc_fma_naive(mpc_ptr r,mpc_srcptr a,mpc_srcptr b,mpc_srcptr c,mpc_rnd_t rnd)43*d30dc8cbSJohn Marino mpc_fma_naive (mpc_ptr r, mpc_srcptr a, mpc_srcptr b, mpc_srcptr c, mpc_rnd_t rnd)
44*d30dc8cbSJohn Marino {
45*d30dc8cbSJohn Marino mpfr_t rea_reb, rea_imb, ima_reb, ima_imb, tmp;
46*d30dc8cbSJohn Marino mpfr_prec_t pre12, pre13, pre23, pim12, pim13, pim23;
47*d30dc8cbSJohn Marino int inex_re, inex_im;
48*d30dc8cbSJohn Marino
49*d30dc8cbSJohn Marino mpfr_init2 (rea_reb, mpfr_get_prec (mpc_realref(a)) + mpfr_get_prec (mpc_realref(b)));
50*d30dc8cbSJohn Marino mpfr_init2 (rea_imb, mpfr_get_prec (mpc_realref(a)) + mpfr_get_prec (mpc_imagref(b)));
51*d30dc8cbSJohn Marino mpfr_init2 (ima_reb, mpfr_get_prec (mpc_imagref(a)) + mpfr_get_prec (mpc_realref(b)));
52*d30dc8cbSJohn Marino mpfr_init2 (ima_imb, mpfr_get_prec (mpc_imagref(a)) + mpfr_get_prec (mpc_imagref(b)));
53*d30dc8cbSJohn Marino
54*d30dc8cbSJohn Marino mpfr_mul (rea_reb, mpc_realref(a), mpc_realref(b), GMP_RNDZ); /* exact */
55*d30dc8cbSJohn Marino mpfr_mul (rea_imb, mpc_realref(a), mpc_imagref(b), GMP_RNDZ); /* exact */
56*d30dc8cbSJohn Marino mpfr_mul (ima_reb, mpc_imagref(a), mpc_realref(b), GMP_RNDZ); /* exact */
57*d30dc8cbSJohn Marino mpfr_mul (ima_imb, mpc_imagref(a), mpc_imagref(b), GMP_RNDZ); /* exact */
58*d30dc8cbSJohn Marino
59*d30dc8cbSJohn Marino /* Re(r) <- rea_reb - ima_imb + Re(c) */
60*d30dc8cbSJohn Marino
61*d30dc8cbSJohn Marino pre12 = bound_prec_addsub (rea_reb, ima_imb); /* bound on exact precision for
62*d30dc8cbSJohn Marino rea_reb - ima_imb */
63*d30dc8cbSJohn Marino pre13 = bound_prec_addsub (rea_reb, mpc_realref(c));
64*d30dc8cbSJohn Marino /* bound for rea_reb + Re(c) */
65*d30dc8cbSJohn Marino pre23 = bound_prec_addsub (ima_imb, mpc_realref(c));
66*d30dc8cbSJohn Marino /* bound for ima_imb - Re(c) */
67*d30dc8cbSJohn Marino if (pre12 <= pre13 && pre12 <= pre23) /* (rea_reb - ima_imb) + Re(c) */
68*d30dc8cbSJohn Marino {
69*d30dc8cbSJohn Marino mpfr_init2 (tmp, pre12);
70*d30dc8cbSJohn Marino mpfr_sub (tmp, rea_reb, ima_imb, GMP_RNDZ); /* exact */
71*d30dc8cbSJohn Marino inex_re = mpfr_add (mpc_realref(r), tmp, mpc_realref(c), MPC_RND_RE(rnd));
72*d30dc8cbSJohn Marino /* the only possible bad overlap is between r and c, but since we are
73*d30dc8cbSJohn Marino only touching the real part of both, it is ok */
74*d30dc8cbSJohn Marino }
75*d30dc8cbSJohn Marino else if (pre13 <= pre23) /* (rea_reb + Re(c)) - ima_imb */
76*d30dc8cbSJohn Marino {
77*d30dc8cbSJohn Marino mpfr_init2 (tmp, pre13);
78*d30dc8cbSJohn Marino mpfr_add (tmp, rea_reb, mpc_realref(c), GMP_RNDZ); /* exact */
79*d30dc8cbSJohn Marino inex_re = mpfr_sub (mpc_realref(r), tmp, ima_imb, MPC_RND_RE(rnd));
80*d30dc8cbSJohn Marino /* the only possible bad overlap is between r and c, but since we are
81*d30dc8cbSJohn Marino only touching the real part of both, it is ok */
82*d30dc8cbSJohn Marino }
83*d30dc8cbSJohn Marino else /* rea_reb + (Re(c) - ima_imb) */
84*d30dc8cbSJohn Marino {
85*d30dc8cbSJohn Marino mpfr_init2 (tmp, pre23);
86*d30dc8cbSJohn Marino mpfr_sub (tmp, mpc_realref(c), ima_imb, GMP_RNDZ); /* exact */
87*d30dc8cbSJohn Marino inex_re = mpfr_add (mpc_realref(r), tmp, rea_reb, MPC_RND_RE(rnd));
88*d30dc8cbSJohn Marino /* the only possible bad overlap is between r and c, but since we are
89*d30dc8cbSJohn Marino only touching the real part of both, it is ok */
90*d30dc8cbSJohn Marino }
91*d30dc8cbSJohn Marino
92*d30dc8cbSJohn Marino /* Im(r) <- rea_imb + ima_reb + Im(c) */
93*d30dc8cbSJohn Marino pim12 = bound_prec_addsub (rea_imb, ima_reb); /* bound on exact precision for
94*d30dc8cbSJohn Marino rea_imb + ima_reb */
95*d30dc8cbSJohn Marino pim13 = bound_prec_addsub (rea_imb, mpc_imagref(c));
96*d30dc8cbSJohn Marino /* bound for rea_imb + Im(c) */
97*d30dc8cbSJohn Marino pim23 = bound_prec_addsub (ima_reb, mpc_imagref(c));
98*d30dc8cbSJohn Marino /* bound for ima_reb + Im(c) */
99*d30dc8cbSJohn Marino if (pim12 <= pim13 && pim12 <= pim23) /* (rea_imb + ima_reb) + Im(c) */
100*d30dc8cbSJohn Marino {
101*d30dc8cbSJohn Marino mpfr_set_prec (tmp, pim12);
102*d30dc8cbSJohn Marino mpfr_add (tmp, rea_imb, ima_reb, GMP_RNDZ); /* exact */
103*d30dc8cbSJohn Marino inex_im = mpfr_add (mpc_imagref(r), tmp, mpc_imagref(c), MPC_RND_IM(rnd));
104*d30dc8cbSJohn Marino /* the only possible bad overlap is between r and c, but since we are
105*d30dc8cbSJohn Marino only touching the imaginary part of both, it is ok */
106*d30dc8cbSJohn Marino }
107*d30dc8cbSJohn Marino else if (pim13 <= pim23) /* (rea_imb + Im(c)) + ima_reb */
108*d30dc8cbSJohn Marino {
109*d30dc8cbSJohn Marino mpfr_set_prec (tmp, pim13);
110*d30dc8cbSJohn Marino mpfr_add (tmp, rea_imb, mpc_imagref(c), GMP_RNDZ); /* exact */
111*d30dc8cbSJohn Marino inex_im = mpfr_add (mpc_imagref(r), tmp, ima_reb, MPC_RND_IM(rnd));
112*d30dc8cbSJohn Marino /* the only possible bad overlap is between r and c, but since we are
113*d30dc8cbSJohn Marino only touching the imaginary part of both, it is ok */
114*d30dc8cbSJohn Marino }
115*d30dc8cbSJohn Marino else /* rea_imb + (Im(c) + ima_reb) */
116*d30dc8cbSJohn Marino {
117*d30dc8cbSJohn Marino mpfr_set_prec (tmp, pre23);
118*d30dc8cbSJohn Marino mpfr_add (tmp, mpc_imagref(c), ima_reb, GMP_RNDZ); /* exact */
119*d30dc8cbSJohn Marino inex_im = mpfr_add (mpc_imagref(r), tmp, rea_imb, MPC_RND_IM(rnd));
120*d30dc8cbSJohn Marino /* the only possible bad overlap is between r and c, but since we are
121*d30dc8cbSJohn Marino only touching the imaginary part of both, it is ok */
122*d30dc8cbSJohn Marino }
123*d30dc8cbSJohn Marino
124*d30dc8cbSJohn Marino mpfr_clear (rea_reb);
125*d30dc8cbSJohn Marino mpfr_clear (rea_imb);
126*d30dc8cbSJohn Marino mpfr_clear (ima_reb);
127*d30dc8cbSJohn Marino mpfr_clear (ima_imb);
128*d30dc8cbSJohn Marino mpfr_clear (tmp);
129*d30dc8cbSJohn Marino
130*d30dc8cbSJohn Marino return MPC_INEX(inex_re, inex_im);
131*d30dc8cbSJohn Marino }
132*d30dc8cbSJohn Marino
133*d30dc8cbSJohn Marino /* The algorithm is as follows:
134*d30dc8cbSJohn Marino - in a first pass, we use the target precision + some extra bits
135*d30dc8cbSJohn Marino - if it fails, we add the number of cancelled bits when adding
136*d30dc8cbSJohn Marino Re(a*b) and Re(c) [similarly for the imaginary part]
137*d30dc8cbSJohn Marino - it is fails again, we call the mpc_fma_naive function, which also
138*d30dc8cbSJohn Marino deals with the special cases */
139*d30dc8cbSJohn Marino int
mpc_fma(mpc_ptr r,mpc_srcptr a,mpc_srcptr b,mpc_srcptr c,mpc_rnd_t rnd)140*d30dc8cbSJohn Marino mpc_fma (mpc_ptr r, mpc_srcptr a, mpc_srcptr b, mpc_srcptr c, mpc_rnd_t rnd)
141*d30dc8cbSJohn Marino {
142*d30dc8cbSJohn Marino mpc_t ab;
143*d30dc8cbSJohn Marino mpfr_prec_t pre, pim, wpre, wpim;
144*d30dc8cbSJohn Marino mpfr_exp_t diffre, diffim;
145*d30dc8cbSJohn Marino int i, inex = 0, okre = 0, okim = 0;
146*d30dc8cbSJohn Marino
147*d30dc8cbSJohn Marino if (mpc_fin_p (a) == 0 || mpc_fin_p (b) == 0 || mpc_fin_p (c) == 0)
148*d30dc8cbSJohn Marino return mpc_fma_naive (r, a, b, c, rnd);
149*d30dc8cbSJohn Marino
150*d30dc8cbSJohn Marino pre = mpfr_get_prec (mpc_realref(r));
151*d30dc8cbSJohn Marino pim = mpfr_get_prec (mpc_imagref(r));
152*d30dc8cbSJohn Marino wpre = pre + mpc_ceil_log2 (pre) + 10;
153*d30dc8cbSJohn Marino wpim = pim + mpc_ceil_log2 (pim) + 10;
154*d30dc8cbSJohn Marino mpc_init3 (ab, wpre, wpim);
155*d30dc8cbSJohn Marino for (i = 0; i < 2; ++i)
156*d30dc8cbSJohn Marino {
157*d30dc8cbSJohn Marino mpc_mul (ab, a, b, MPC_RNDZZ);
158*d30dc8cbSJohn Marino if (mpfr_zero_p (mpc_realref(ab)) || mpfr_zero_p (mpc_imagref(ab)))
159*d30dc8cbSJohn Marino break;
160*d30dc8cbSJohn Marino diffre = mpfr_get_exp (mpc_realref(ab));
161*d30dc8cbSJohn Marino diffim = mpfr_get_exp (mpc_imagref(ab));
162*d30dc8cbSJohn Marino mpc_add (ab, ab, c, MPC_RNDZZ);
163*d30dc8cbSJohn Marino if (mpfr_zero_p (mpc_realref(ab)) || mpfr_zero_p (mpc_imagref(ab)))
164*d30dc8cbSJohn Marino break;
165*d30dc8cbSJohn Marino diffre -= mpfr_get_exp (mpc_realref(ab));
166*d30dc8cbSJohn Marino diffim -= mpfr_get_exp (mpc_imagref(ab));
167*d30dc8cbSJohn Marino diffre = (diffre > 0 ? diffre + 1 : 1);
168*d30dc8cbSJohn Marino diffim = (diffim > 0 ? diffim + 1 : 1);
169*d30dc8cbSJohn Marino okre = diffre > (mpfr_exp_t) wpre ? 0 : mpfr_can_round (mpc_realref(ab),
170*d30dc8cbSJohn Marino wpre - diffre, GMP_RNDN, GMP_RNDZ,
171*d30dc8cbSJohn Marino pre + (MPC_RND_RE (rnd) == GMP_RNDN));
172*d30dc8cbSJohn Marino okim = diffim > (mpfr_exp_t) wpim ? 0 : mpfr_can_round (mpc_imagref(ab),
173*d30dc8cbSJohn Marino wpim - diffim, GMP_RNDN, GMP_RNDZ,
174*d30dc8cbSJohn Marino pim + (MPC_RND_IM (rnd) == GMP_RNDN));
175*d30dc8cbSJohn Marino if (okre && okim)
176*d30dc8cbSJohn Marino {
177*d30dc8cbSJohn Marino inex = mpc_set (r, ab, rnd);
178*d30dc8cbSJohn Marino break;
179*d30dc8cbSJohn Marino }
180*d30dc8cbSJohn Marino if (i == 1)
181*d30dc8cbSJohn Marino break;
182*d30dc8cbSJohn Marino if (okre == 0 && diffre > 1)
183*d30dc8cbSJohn Marino wpre += diffre;
184*d30dc8cbSJohn Marino if (okim == 0 && diffim > 1)
185*d30dc8cbSJohn Marino wpim += diffim;
186*d30dc8cbSJohn Marino mpfr_set_prec (mpc_realref(ab), wpre);
187*d30dc8cbSJohn Marino mpfr_set_prec (mpc_imagref(ab), wpim);
188*d30dc8cbSJohn Marino }
189*d30dc8cbSJohn Marino mpc_clear (ab);
190*d30dc8cbSJohn Marino return okre && okim ? inex : mpc_fma_naive (r, a, b, c, rnd);
191*d30dc8cbSJohn Marino }
192