xref: /netbsd-src/external/lgpl3/mpfr/dist/src/sin_cos.c (revision ec6772edaf0cdcb5f52a48f4aca5e33a8fb8ecfd)
1 /* mpfr_sin_cos -- sine and cosine of a floating-point number
2 
3 Copyright 2002-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25 
26 /* (y, z) <- (sin(x), cos(x)), return value is 0 iff both results are exact
27    ie, iff x = 0 */
28 int
mpfr_sin_cos(mpfr_ptr y,mpfr_ptr z,mpfr_srcptr x,mpfr_rnd_t rnd_mode)29 mpfr_sin_cos (mpfr_ptr y, mpfr_ptr z, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
30 {
31   mpfr_prec_t prec, m;
32   int neg, reduce;
33   mpfr_t c, xr;
34   mpfr_srcptr xx;
35   mpfr_exp_t err, expx;
36   int inexy, inexz;
37   MPFR_ZIV_DECL (loop);
38   MPFR_SAVE_EXPO_DECL (expo);
39 
40   MPFR_ASSERTN (y != z);
41 
42   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
43     {
44       if (MPFR_IS_NAN(x) || MPFR_IS_INF(x))
45         {
46           MPFR_SET_NAN (y);
47           MPFR_SET_NAN (z);
48           MPFR_RET_NAN;
49         }
50       else /* x is zero */
51         {
52           MPFR_ASSERTD (MPFR_IS_ZERO (x));
53           MPFR_SET_ZERO (y);
54           MPFR_SET_SAME_SIGN (y, x);
55           /* y = 0, thus exact, but z is inexact in case of underflow
56              or overflow */
57           inexy = 0; /* y is exact */
58           inexz = mpfr_set_ui (z, 1, rnd_mode);
59           return INEX(inexy,inexz);
60         }
61     }
62 
63   MPFR_LOG_FUNC
64     (("x[%Pd]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd_mode),
65      ("sin[%Pd]=%.*Rg cos[%Pd]=%.*Rg", mpfr_get_prec(y), mpfr_log_prec, y,
66       mpfr_get_prec (z), mpfr_log_prec, z));
67 
68   MPFR_SAVE_EXPO_MARK (expo);
69 
70   prec = MAX (MPFR_PREC (y), MPFR_PREC (z));
71   m = prec + MPFR_INT_CEIL_LOG2 (prec) + 13;
72   expx = MPFR_GET_EXP (x);
73 
74   /* When x is close to 0, say 2^(-k), then there is a cancellation of about
75      2k bits in 1-cos(x)^2. FIXME: in that case, it would be more efficient
76      to compute sin(x) directly. VL: This is partly done by using
77      MPFR_FAST_COMPUTE_IF_SMALL_INPUT from the mpfr_sin and mpfr_cos
78      functions. Moreover, any overflow on m is avoided. */
79   if (expx < 0)
80     {
81       /* Warning: in case y = x, and the first call to
82          MPFR_FAST_COMPUTE_IF_SMALL_INPUT succeeds but the second fails,
83          we will have clobbered the original value of x.
84          The workaround is to first compute z = cos(x) in that case, since
85          y and z are different. */
86       if (y != x)
87         /* y and x differ, thus we can safely try to compute y first */
88         {
89           MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
90             y, x, -2 * expx, 2, 0, rnd_mode,
91             { inexy = _inexact;
92               goto small_input; });
93           if (0)
94             {
95             small_input:
96               /* we can go here only if we can round sin(x) */
97               MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
98                 z, __gmpfr_one, -2 * expx, 1, 0, rnd_mode,
99                 { inexz = _inexact;
100                   MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
101                   goto end; });
102             }
103 
104           /* if we go here, one of the two MPFR_FAST_COMPUTE_IF_SMALL_INPUT
105              calls failed */
106         }
107       else /* y and x are the same variable: try to compute z first, which
108               necessarily differs */
109         {
110           MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
111             z, __gmpfr_one, -2 * expx, 1, 0, rnd_mode,
112             { inexz = _inexact;
113               goto small_input2; });
114           if (0)
115             {
116             small_input2:
117               /* we can go here only if we can round cos(x) */
118               MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
119                 y, x, -2 * expx, 2, 0, rnd_mode,
120                 { inexy = _inexact;
121                   MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
122                   goto end; });
123             }
124         }
125       m += 2 * (-expx);
126     }
127 
128   if (prec >= MPFR_SINCOS_THRESHOLD)
129     {
130       MPFR_SAVE_EXPO_FREE (expo);
131       return mpfr_sincos_fast (y, z, x, rnd_mode);
132     }
133 
134   mpfr_init2 (c, m);
135   mpfr_init2 (xr, m);
136 
137   MPFR_ZIV_INIT (loop, m);
138   for (;;)
139     {
140       /* the following is copied from sin.c */
141       if (expx >= 2) /* reduce the argument */
142         {
143           reduce = 1;
144           MPFR_ASSERTN (expx + m - 1 <= MPFR_PREC_MAX);
145           mpfr_set_prec (c, expx + m - 1);
146           mpfr_set_prec (xr, m);
147           mpfr_const_pi (c, MPFR_RNDN);
148           mpfr_mul_2ui (c, c, 1, MPFR_RNDN);
149           mpfr_remainder (xr, x, c, MPFR_RNDN);
150           mpfr_div_2ui (c, c, 1, MPFR_RNDN);
151           if (MPFR_IS_POS (xr))
152             mpfr_sub (c, c, xr, MPFR_RNDZ);
153           else
154             mpfr_add (c, c, xr, MPFR_RNDZ);
155           if (MPFR_IS_ZERO(xr)
156               || MPFR_EXP(xr) < (mpfr_exp_t) 3 - (mpfr_exp_t) m
157               || MPFR_EXP(c) < (mpfr_exp_t) 3 - (mpfr_exp_t) m)
158             goto next_step;
159           xx = xr;
160         }
161       else /* the input argument is already reduced */
162         {
163           reduce = 0;
164           xx = x;
165         }
166 
167       neg = MPFR_IS_NEG (xx); /* gives sign of sin(x) */
168       mpfr_set_prec (c, m);
169       mpfr_cos (c, xx, MPFR_RNDZ);
170       /* If no argument reduction was performed, the error is at most ulp(c),
171          otherwise it is at most ulp(c) + 2^(2-m). Since |c| < 1, we have
172          ulp(c) <= 2^(-m), thus the error is bounded by 2^(3-m) in that later
173          case. */
174       if (reduce == 0)
175         err = m;
176       else
177         err = MPFR_GET_EXP (c) + (mpfr_exp_t) (m - 3);
178       if (!MPFR_CAN_ROUND (c, err, MPFR_PREC (z), rnd_mode))
179         goto next_step;
180 
181       /* We can't set z now, because in case z = x, and the MPFR_CAN_ROUND()
182          call below fails, we will have clobbered the input.
183          Note: m below is the precision of c; see above. */
184       mpfr_set_prec (xr, m);
185       mpfr_swap (xr, c); /* save the approximation of the cosine in xr */
186       mpfr_sqr (c, xr, MPFR_RNDU); /* the absolute error is bounded by
187                                       2^(5-m) if reduce=1, and by 2^(2-m)
188                                       otherwise */
189       mpfr_ui_sub (c, 1, c, MPFR_RNDN); /* error bounded by 2^(6-m) if reduce
190                                            is 1, and 2^(3-m) otherwise */
191       mpfr_sqrt (c, c, MPFR_RNDN); /* the absolute error is bounded by
192                                       2^(6-m-Exp(c)) if reduce=1, and
193                                       2^(3-m-Exp(c)) otherwise */
194       err = 3 + 3 * reduce - MPFR_GET_EXP (c);
195       if (neg)
196         MPFR_CHANGE_SIGN (c);
197 
198       /* the absolute error on c is at most 2^(err-m), which we must put
199          in the form 2^(EXP(c)-err). */
200       err = MPFR_GET_EXP (c) + (mpfr_exp_t) m - err;
201       if (MPFR_CAN_ROUND (c, err, MPFR_PREC (y), rnd_mode))
202         break;
203       /* check for huge cancellation */
204       if (err < (mpfr_exp_t) MPFR_PREC (y))
205         m += MPFR_PREC (y) - err;
206       /* Check if near 1 */
207       if (MPFR_GET_EXP (c) == 1
208           && MPFR_MANT (c)[MPFR_LIMB_SIZE (c)-1] == MPFR_LIMB_HIGHBIT)
209         m += m;
210 
211     next_step:
212       MPFR_ZIV_NEXT (loop, m);
213       mpfr_set_prec (c, m);
214     }
215   MPFR_ZIV_FREE (loop);
216 
217   inexy = mpfr_set (y, c, rnd_mode);
218   inexz = mpfr_set (z, xr, rnd_mode);
219 
220   mpfr_clear (c);
221   mpfr_clear (xr);
222 
223  end:
224   MPFR_SAVE_EXPO_FREE (expo);
225   /* FIXME: add a test for bug before revision 7355 */
226   inexy = mpfr_check_range (y, inexy, rnd_mode);
227   inexz = mpfr_check_range (z, inexz, rnd_mode);
228   MPFR_RET (INEX(inexy,inexz));
229 }
230 
231 /*************** asymptotically fast implementation below ********************/
232 
233 /* truncate Q from R to at most prec bits.
234    Return the number of truncated bits.
235  */
236 static mpfr_prec_t
reduce(mpz_t Q,mpz_srcptr R,mpfr_prec_t prec)237 reduce (mpz_t Q, mpz_srcptr R, mpfr_prec_t prec)
238 {
239   mpfr_prec_t l;
240 
241   MPFR_MPZ_SIZEINBASE2(l, R);
242   l = (l > prec) ? l - prec : 0;
243   mpz_fdiv_q_2exp (Q, R, l);
244   return l;
245 }
246 
247 /* truncate S and C so that the smaller has prec bits.
248    Return the number of truncated bits.
249  */
250 static unsigned long
reduce2(mpz_t S,mpz_t C,mpfr_prec_t prec)251 reduce2 (mpz_t S, mpz_t C, mpfr_prec_t prec)
252 {
253   unsigned long ls;
254   unsigned long lc;
255   unsigned long l;
256 
257   MPFR_MPZ_SIZEINBASE2(ls, S);
258   MPFR_MPZ_SIZEINBASE2(lc, C);
259 
260   l = (ls < lc) ? ls : lc; /* smaller length */
261   l = (l > prec) ? l - prec : 0;
262   mpz_fdiv_q_2exp (S, S, l);
263   mpz_fdiv_q_2exp (C, C, l);
264   return l;
265 }
266 
267 /* return in S0/Q0 a rational approximation of sin(X) with absolute error
268                      bounded by 9*2^(-prec), where 0 <= X=p/2^r <= 1/2,
269    and in    C0/Q0 a rational approximation of cos(X), with relative error
270                      bounded by 9*2^(-prec) (and also absolute error, since
271                      |cos(X)| <= 1).
272    We have sin(X)/X = sum((-1)^i*(p/2^r)^i/(2i+1)!, i=0..infinity).
273    We use the following binary splitting formula:
274    P(a,b) = (-p)^(b-a)
275    Q(a,b) = (2a)*(2a+1)*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise
276    T(a,b) = 1 if a+1=b, Q(c,b)*T(a,c)+P(a,c)*T(c,b) otherwise.
277 
278    Since we use P(a,b) for b-a=2^k only, we compute only p^(2^k).
279    We do not store the factor 2^r in Q().
280 
281    Then sin(X)/X ~ T(0,i)/Q(0,i) for i so that (p/2^r)^i/i! is small enough.
282 
283    Return l such that Q0 has to be multiplied by 2^l.
284 
285    Assumes prec >= 10.
286 */
287 
288 #define KMAX 64
289 static unsigned long
sin_bs_aux(mpz_t Q0,mpz_t S0,mpz_t C0,mpz_srcptr p,mpfr_prec_t r,mpfr_prec_t prec)290 sin_bs_aux (mpz_t Q0, mpz_t S0, mpz_t C0, mpz_srcptr p, mpfr_prec_t r,
291             mpfr_prec_t prec)
292 {
293   mpz_t T[KMAX], Q[KMAX], ptoj[KMAX], pp;
294   mpfr_prec_t log2_nb_terms[KMAX], mult[KMAX];
295   mpfr_prec_t accu[KMAX], size_ptoj[KMAX];
296   mpfr_prec_t prec_i_have, h, r0 = r, pp_s, p_s;
297   unsigned long i, j, m;
298   int alloc, k, l;
299 
300   if (MPFR_UNLIKELY(mpz_cmp_ui (p, 0) == 0)) /* sin(x)/x -> 1 */
301     {
302       mpz_set_ui (Q0, 1);
303       mpz_set_ui (S0, 1);
304       mpz_set_ui (C0, 1);
305       return 0;
306     }
307 
308   /* check that X=p/2^r <= 1/2 */
309   MPFR_ASSERTN(mpz_sizeinbase (p, 2) - (mpfr_exp_t) r <= -1);
310 
311   mpz_init (pp);
312 
313   /* normalize p (non-zero here) */
314   h = mpz_scan1 (p, 0);
315   mpz_fdiv_q_2exp (pp, p, h); /* p = pp * 2^h */
316   mpz_mul (pp, pp, pp);
317   r = 2 * (r - h);            /* x^2 = (p/2^r0)^2 = pp / 2^r */
318 
319   /* now p is odd */
320   alloc = 2;
321   mpz_init_set_ui (T[0], 6);
322   mpz_init_set_ui (Q[0], 6);
323   mpz_init_set (ptoj[0], pp); /* ptoj[i] = pp^(2^i) */
324   mpz_init (T[1]);
325   mpz_init (Q[1]);
326   mpz_init (ptoj[1]);
327   mpz_mul (ptoj[1], pp, pp);  /* ptoj[1] = pp^2 */
328   MPFR_MPZ_SIZEINBASE2(size_ptoj[1], ptoj[1]);
329 
330   mpz_mul_2exp (T[0], T[0], r);
331   mpz_sub (T[0], T[0], pp);      /* 6*2^r - pp = 6*2^r*(1 - x^2/6) */
332   log2_nb_terms[0] = 1;
333 
334   /* already take into account the factor x=p/2^r in sin(x) = x * (...) */
335   MPFR_MPZ_SIZEINBASE2(pp_s, pp);
336   MPFR_MPZ_SIZEINBASE2(p_s, p);
337   mult[0] = r - pp_s + r0 - p_s;
338   /* we have x^3 < 1/2^mult[0] */
339 
340   for (i = 2, k = 0, prec_i_have = mult[0]; prec_i_have < prec; i += 2)
341     {
342       /* i is even here */
343       /* invariant: Q[0]*Q[1]*...*Q[k] equals (2i-1)!,
344          we have already summed terms of index < i
345          in S[0]/Q[0], ..., S[k]/Q[k] */
346       k ++;
347       if (k + 1 >= alloc) /* necessarily k + 1 = alloc */
348         {
349           MPFR_ASSERTD (k + 1 == alloc);
350           alloc ++;
351           MPFR_ASSERTN (k + 1 < KMAX);
352           mpz_init (T[k+1]);
353           mpz_init (Q[k+1]);
354           mpz_init (ptoj[k+1]);
355           mpz_mul (ptoj[k+1], ptoj[k], ptoj[k]); /* pp^(2^(k+1)) */
356           MPFR_MPZ_SIZEINBASE2(size_ptoj[k+1], ptoj[k+1]);
357         }
358       /* for i even, we have Q[k] = (2*i)*(2*i+1), T[k] = 1,
359          then                Q[k+1] = (2*i+2)*(2*i+3), T[k+1] = 1,
360          which reduces to T[k] = (2*i+2)*(2*i+3)*2^r-pp,
361          Q[k] = (2*i)*(2*i+1)*(2*i+2)*(2*i+3). */
362       MPFR_ASSERTN (k < KMAX);
363       log2_nb_terms[k] = 1;
364       mpz_set_ui (Q[k], 2 * i + 2);
365       mpz_mul_ui (Q[k], Q[k], 2 * i + 3);
366       mpz_mul_2exp (T[k], Q[k], r);
367       mpz_sub (T[k], T[k], pp);
368       mpz_mul_ui (Q[k], Q[k], 2 * i);
369       mpz_mul_ui (Q[k], Q[k], 2 * i + 1);
370       /* the next term of the series is divided by Q[k] and multiplied
371          by pp^2/2^(2r), thus the mult. factor < 1/2^mult[k] */
372       MPFR_MPZ_SIZEINBASE2(mult[k], Q[k]);
373       mult[k] += 2 * r - size_ptoj[1] - 1;
374       /* the absolute contribution of the next term is 1/2^accu[k] */
375       accu[k] = (k == 0) ? mult[k] : mult[k] + accu[k-1];
376       prec_i_have = accu[k]; /* the current term is < 1/2^accu[k] */
377       j = (i + 2) / 2;
378       l = 1;
379       while ((j & 1) == 0) /* combine and reduce */
380         {
381           MPFR_ASSERTN (k >= 1);
382           mpz_mul (T[k], T[k], ptoj[l]);
383           mpz_mul (T[k-1], T[k-1], Q[k]);
384           mpz_mul_2exp (T[k-1], T[k-1], r << l);
385           mpz_add (T[k-1], T[k-1], T[k]);
386           mpz_mul (Q[k-1], Q[k-1], Q[k]);
387           log2_nb_terms[k-1] ++; /* number of terms in S[k-1]
388                                     is a power of 2 by construction */
389           MPFR_MPZ_SIZEINBASE2(prec_i_have, Q[k]);
390           mult[k-1] += prec_i_have + (r << l) - size_ptoj[l] - 1;
391           accu[k-1] = (k == 1) ? mult[k-1] : mult[k-1] + accu[k-2];
392           prec_i_have = accu[k-1];
393           l ++;
394           j >>= 1;
395           k --;
396         }
397     }
398 
399   /* accumulate all products in T[0] and Q[0]. Warning: contrary to above,
400      here we do not have log2_nb_terms[k-1] = log2_nb_terms[k]+1. */
401   h = 0; /* number of accumulated terms in the right part T[k]/Q[k] */
402   while (k > 0)
403     {
404       mpz_mul (T[k], T[k], ptoj[log2_nb_terms[k-1]]);
405       mpz_mul (T[k-1], T[k-1], Q[k]);
406       h += (mpfr_prec_t) 1 << log2_nb_terms[k];
407       mpz_mul_2exp (T[k-1], T[k-1], r * h);
408       mpz_add (T[k-1], T[k-1], T[k]);
409       mpz_mul (Q[k-1], Q[k-1], Q[k]);
410       k--;
411     }
412 
413   m = r0 + r * (i - 1); /* implicit multiplier 2^r for Q0 */
414   /* at this point T[0]/(2^m*Q[0]) is an approximation of sin(x) where the 1st
415      neglected term has contribution < 1/2^prec, thus since the series has
416      alternate signs, the error is < 1/2^prec */
417 
418   /* we truncate Q0 to prec bits: the relative error is at most 2^(1-prec),
419      which means that Q0 = Q[0] * (1+theta) with |theta| <= 2^(1-prec)
420      [up to a power of two] */
421   m += reduce (Q0, Q[0], prec);
422   m -= reduce (T[0], T[0], prec);
423   /* multiply by x = p/2^m */
424   mpz_mul (S0, T[0], p);
425   m -= reduce (S0, S0, prec); /* S0 = T[0] * (1 + theta)^2 up to power of 2 */
426   /* sin(X) ~ S0/Q0*(1 + theta)^3 + err with |theta| <= 2^(1-prec) and
427               |err| <= 2^(-prec), thus since |S0/Q0| <= 1:
428      |sin(X) - S0/Q0| <= 4*|theta*S0/Q0| + |err| <= 9*2^(-prec) */
429 
430   mpz_clear (pp);
431   for (k = 0; k < alloc; k ++)
432     {
433       mpz_clear (T[k]);
434       mpz_clear (Q[k]);
435       mpz_clear (ptoj[k]);
436     }
437 
438   /* compute cos(X) from sin(X): sqrt(1-(S/Q)^2) = sqrt(Q^2-S^2)/Q
439      = sqrt(Q0^2*2^(2m)-S0^2)/Q0.
440      Write S/Q = sin(X) + eps with |eps| <= 9*2^(-prec),
441      then sqrt(Q^2-S^2) = sqrt(Q^2-Q^2*(sin(X)+eps)^2)
442                         = sqrt(Q^2*cos(X)^2-Q^2*(2*sin(X)*eps+eps^2))
443                         = sqrt(Q^2*cos(X)^2-Q^2*eps1) with |eps1|<=9*2^(-prec)
444                           [using X<=1/2 and eps<=9*2^(-prec) and prec>=10]
445 
446                         Since we truncate the square root, we get:
447                           sqrt(Q^2*cos(X)^2-Q^2*eps1)+eps2 with |eps2|<1
448                         = Q*sqrt(cos(X)^2-eps1)+eps2
449                         = Q*cos(X)*(1+eps3)+eps2 with |eps3| <= 6*2^(-prec)
450                         = Q*cos(X)*(1+eps3+eps2/(Q*cos(X)))
451                         = Q*cos(X)*(1+eps4) with |eps4| <= 9*2^(-prec)
452                           since |Q| >= 2^(prec-1) */
453   /* we assume that Q0*2^m >= 2^(prec-1) */
454   MPFR_ASSERTN(m + mpz_sizeinbase (Q0, 2) >= prec);
455   mpz_mul (C0, Q0, Q0);
456   mpz_mul_2exp (C0, C0, 2 * m);
457   mpz_submul (C0, S0, S0);
458   mpz_sqrt (C0, C0);
459 
460   return m;
461 }
462 
463 /* Put in s and c approximations of sin(x) and cos(x) respectively.
464    Assumes 0 < x < Pi/4 and PREC(s) = PREC(c) >= 10.
465    Return err such that the relative error is bounded by 2^err ulps.
466 */
467 static int
sincos_aux(mpfr_ptr s,mpfr_ptr c,mpfr_srcptr x,mpfr_rnd_t rnd_mode)468 sincos_aux (mpfr_ptr s, mpfr_ptr c, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
469 {
470   mpfr_prec_t prec_s, sh;
471   mpz_t Q, S, C, Q2, S2, C2, y;
472   mpfr_t x2;
473   unsigned long l, l2, j, err;
474 
475   MPFR_ASSERTD(MPFR_PREC(s) == MPFR_PREC(c));
476 
477   prec_s = MPFR_PREC(s);
478 
479   mpfr_init2 (x2, MPFR_PREC(x));
480   mpz_init (Q);
481   mpz_init (S);
482   mpz_init (C);
483   mpz_init (Q2);
484   mpz_init (S2);
485   mpz_init (C2);
486   mpz_init (y);
487 
488   mpfr_set (x2, x, MPFR_RNDN); /* exact */
489   mpz_set_ui (Q, 1);
490   l = 0;
491   mpz_set_ui (S, 0); /* sin(0) = S/(2^l*Q), exact */
492   mpz_set_ui (C, 1); /* cos(0) = C/(2^l*Q), exact */
493 
494   /* Invariant: x = X + x2/2^(sh-1), where the part X was already treated,
495      S/(2^l*Q) ~ sin(X), C/(2^l*Q) ~ cos(X), and x2/2^(sh-1) < Pi/4.
496      'sh-1' is the number of already shifted bits in x2.
497   */
498 
499   for (sh = 1, j = 0; mpfr_cmp_ui (x2, 0) != 0 && sh <= prec_s; sh <<= 1, j++)
500     {
501       if (sh > prec_s / 2) /* sin(x) = x + O(x^3), cos(x) = 1 + O(x^2) */
502         {
503           l2 = -mpfr_get_z_2exp (S2, x2); /* S2/2^l2 = x2 */
504           l2 += sh - 1;
505           mpz_set_ui (Q2, 1);
506           mpz_set_ui (C2, 1);
507           mpz_mul_2exp (C2, C2, l2);
508           mpfr_set_ui (x2, 0, MPFR_RNDN);
509         }
510       else
511         {
512           /* y <- trunc(x2 * 2^sh) = trunc(x * 2^(2*sh-1)) */
513           mpfr_mul_2ui (x2, x2, sh, MPFR_RNDN); /* exact */
514           mpfr_get_z (y, x2, MPFR_RNDZ); /* round toward zero: now
515                                            0 <= x2 < 2^sh, thus
516                                            0 <= x2/2^(sh-1) < 2^(1-sh) */
517           if (mpz_cmp_ui (y, 0) == 0)
518             continue;
519           mpfr_sub_z (x2, x2, y, MPFR_RNDN); /* should be exact */
520           l2 = sin_bs_aux (Q2, S2, C2, y, 2 * sh - 1, prec_s);
521           /* we now have |S2/Q2/2^l2 - sin(X)| <= 9*2^(prec_s)
522              and |C2/Q2/2^l2 - cos(X)| <= 6*2^(prec_s), with X=y/2^(2sh-1) */
523         }
524       if (sh == 1) /* S=0, C=1 */
525         {
526           l = l2;
527           mpz_swap (Q, Q2);
528           mpz_swap (S, S2);
529           mpz_swap (C, C2);
530         }
531       else
532         {
533           /* s <- s*c2+c*s2, c <- c*c2-s*s2, using Karatsuba:
534              a = s+c, b = s2+c2, t = a*b, d = s*s2, e = c*c2,
535              s <- t - d - e, c <- e - d */
536           mpz_add (y, S, C); /* a */
537           mpz_mul (C, C, C2); /* e */
538           mpz_add (C2, C2, S2); /* b */
539           mpz_mul (S2, S, S2); /* d */
540           mpz_mul (y, y, C2); /* a*b */
541           mpz_sub (S, y, S2); /* t - d */
542           mpz_sub (S, S, C); /* t - d - e */
543           mpz_sub (C, C, S2); /* e - d */
544           mpz_mul (Q, Q, Q2);
545           /* after j loops, the error is <= (11j-2)*2^(prec_s) */
546           l += l2;
547           /* reduce Q to prec_s bits */
548           l += reduce (Q, Q, prec_s);
549           /* reduce S,C to prec_s bits, error <= 11*j*2^(prec_s) */
550           l -= reduce2 (S, C, prec_s);
551         }
552     }
553 
554   j = 11 * j;
555   for (err = 0; j > 1; j = (j + 1) / 2, err ++);
556 
557   mpfr_set_z (s, S, MPFR_RNDN);
558   mpfr_div_z (s, s, Q, MPFR_RNDN);
559   mpfr_div_2ui (s, s, l, MPFR_RNDN);
560 
561   mpfr_set_z (c, C, MPFR_RNDN);
562   mpfr_div_z (c, c, Q, MPFR_RNDN);
563   mpfr_div_2ui (c, c, l, MPFR_RNDN);
564 
565   mpz_clear (Q);
566   mpz_clear (S);
567   mpz_clear (C);
568   mpz_clear (Q2);
569   mpz_clear (S2);
570   mpz_clear (C2);
571   mpz_clear (y);
572   mpfr_clear (x2);
573   return err;
574 }
575 
576 /* Assumes x is neither NaN, +/-Inf, nor +/- 0.
577    One of s and c might be NULL, in which case the corresponding value is
578    not computed.
579    Assumes s differs from c.
580  */
581 int
mpfr_sincos_fast(mpfr_ptr s,mpfr_ptr c,mpfr_srcptr x,mpfr_rnd_t rnd)582 mpfr_sincos_fast (mpfr_ptr s, mpfr_ptr c, mpfr_srcptr x, mpfr_rnd_t rnd)
583 {
584   int inexs, inexc;
585   mpfr_t x_red, ts, tc;
586   mpfr_prec_t w;
587   mpfr_exp_t err, errs, errc;
588   MPFR_GROUP_DECL (group);
589   MPFR_ZIV_DECL (loop);
590 
591   MPFR_ASSERTN(s != c);
592   if (s == NULL)
593     w = MPFR_PREC(c);
594   else if (c == NULL)
595     w = MPFR_PREC(s);
596   else
597     w = MPFR_PREC(s) >= MPFR_PREC(c) ? MPFR_PREC(s) : MPFR_PREC(c);
598   w += MPFR_INT_CEIL_LOG2(w) + 9; /* ensures w >= 10 (needed by sincos_aux) */
599 
600   MPFR_GROUP_INIT_2(group, w, ts, tc);
601 
602   MPFR_ZIV_INIT (loop, w);
603   for (;;)
604     {
605       /* if 0 < x <= Pi/4, we can call sincos_aux directly */
606       if (MPFR_IS_POS(x) && mpfr_cmp_ui_2exp (x, 1686629713, -31) <= 0)
607         {
608           err = sincos_aux (ts, tc, x, MPFR_RNDN);
609         }
610       /* if -Pi/4 <= x < 0, use sin(-x)=-sin(x) */
611       else if (MPFR_IS_NEG(x) && mpfr_cmp_si_2exp (x, -1686629713, -31) >= 0)
612         {
613           MPFR_ALIAS(x_red, x, MPFR_SIGN_POS, MPFR_GET_EXP(x));
614           err = sincos_aux (ts, tc, x_red, MPFR_RNDN);
615           MPFR_CHANGE_SIGN(ts);
616         }
617       else /* argument reduction is needed */
618         {
619           long q;
620           mpfr_t pi;
621           int neg = 0;
622 
623           mpfr_init2 (x_red, w);
624           mpfr_init2 (pi, (MPFR_EXP(x) > 0) ? w + MPFR_EXP(x) : w);
625           mpfr_const_pi (pi, MPFR_RNDN);
626           mpfr_div_2ui (pi, pi, 1, MPFR_RNDN); /* Pi/2 */
627           mpfr_remquo (x_red, &q, x, pi, MPFR_RNDN);
628           /* x = q * (Pi/2 + eps1) + x_red + eps2,
629              where |eps1| <= 1/2*ulp(Pi/2) = 2^(-w-MAX(0,EXP(x))),
630              and eps2 <= 1/2*ulp(x_red) <= 1/2*ulp(Pi/2) = 2^(-w)
631              Since |q| <= x/(Pi/2) <= |x|, we have
632              q*|eps1| <= 2^(-w), thus
633              |x - q * Pi/2 - x_red| <= 2^(1-w) */
634           /* now -Pi/4 <= x_red <= Pi/4: if x_red < 0, consider -x_red */
635           if (MPFR_IS_NEG(x_red))
636             {
637               mpfr_neg (x_red, x_red, MPFR_RNDN);
638               neg = 1;
639             }
640           err = sincos_aux (ts, tc, x_red, MPFR_RNDN);
641           err ++; /* to take into account the argument reduction */
642           if (neg) /* sin(-x) = -sin(x), cos(-x) = cos(x) */
643             mpfr_neg (ts, ts, MPFR_RNDN);
644           if (q & 2) /* sin(x+Pi) = -sin(x), cos(x+Pi) = -cos(x) */
645             {
646               mpfr_neg (ts, ts, MPFR_RNDN);
647               mpfr_neg (tc, tc, MPFR_RNDN);
648             }
649           if (q & 1) /* sin(x+Pi/2) = cos(x), cos(x+Pi/2) = -sin(x) */
650             {
651               mpfr_neg (ts, ts, MPFR_RNDN);
652               mpfr_swap (ts, tc);
653             }
654           mpfr_clear (x_red);
655           mpfr_clear (pi);
656         }
657       /* adjust errors with respect to absolute values */
658       errs = err - MPFR_EXP(ts);
659       errc = err - MPFR_EXP(tc);
660       if ((s == NULL || MPFR_CAN_ROUND (ts, w - errs, MPFR_PREC(s), rnd)) &&
661           (c == NULL || MPFR_CAN_ROUND (tc, w - errc, MPFR_PREC(c), rnd)))
662         break;
663       MPFR_ZIV_NEXT (loop, w);
664       MPFR_GROUP_REPREC_2(group, w, ts, tc);
665     }
666   MPFR_ZIV_FREE (loop);
667 
668   inexs = (s == NULL) ? 0 : mpfr_set (s, ts, rnd);
669   inexc = (c == NULL) ? 0 : mpfr_set (c, tc, rnd);
670 
671   MPFR_GROUP_CLEAR (group);
672   return INEX(inexs,inexc);
673 }
674