1 /* mpfr_sin_cos -- sine and cosine of a floating-point number
2
3 Copyright 2002-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5
6 This file is part of the GNU MPFR Library.
7
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25
26 /* (y, z) <- (sin(x), cos(x)), return value is 0 iff both results are exact
27 ie, iff x = 0 */
28 int
mpfr_sin_cos(mpfr_ptr y,mpfr_ptr z,mpfr_srcptr x,mpfr_rnd_t rnd_mode)29 mpfr_sin_cos (mpfr_ptr y, mpfr_ptr z, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
30 {
31 mpfr_prec_t prec, m;
32 int neg, reduce;
33 mpfr_t c, xr;
34 mpfr_srcptr xx;
35 mpfr_exp_t err, expx;
36 int inexy, inexz;
37 MPFR_ZIV_DECL (loop);
38 MPFR_SAVE_EXPO_DECL (expo);
39
40 MPFR_ASSERTN (y != z);
41
42 if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
43 {
44 if (MPFR_IS_NAN(x) || MPFR_IS_INF(x))
45 {
46 MPFR_SET_NAN (y);
47 MPFR_SET_NAN (z);
48 MPFR_RET_NAN;
49 }
50 else /* x is zero */
51 {
52 MPFR_ASSERTD (MPFR_IS_ZERO (x));
53 MPFR_SET_ZERO (y);
54 MPFR_SET_SAME_SIGN (y, x);
55 /* y = 0, thus exact, but z is inexact in case of underflow
56 or overflow */
57 inexy = 0; /* y is exact */
58 inexz = mpfr_set_ui (z, 1, rnd_mode);
59 return INEX(inexy,inexz);
60 }
61 }
62
63 MPFR_LOG_FUNC
64 (("x[%Pd]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd_mode),
65 ("sin[%Pd]=%.*Rg cos[%Pd]=%.*Rg", mpfr_get_prec(y), mpfr_log_prec, y,
66 mpfr_get_prec (z), mpfr_log_prec, z));
67
68 MPFR_SAVE_EXPO_MARK (expo);
69
70 prec = MAX (MPFR_PREC (y), MPFR_PREC (z));
71 m = prec + MPFR_INT_CEIL_LOG2 (prec) + 13;
72 expx = MPFR_GET_EXP (x);
73
74 /* When x is close to 0, say 2^(-k), then there is a cancellation of about
75 2k bits in 1-cos(x)^2. FIXME: in that case, it would be more efficient
76 to compute sin(x) directly. VL: This is partly done by using
77 MPFR_FAST_COMPUTE_IF_SMALL_INPUT from the mpfr_sin and mpfr_cos
78 functions. Moreover, any overflow on m is avoided. */
79 if (expx < 0)
80 {
81 /* Warning: in case y = x, and the first call to
82 MPFR_FAST_COMPUTE_IF_SMALL_INPUT succeeds but the second fails,
83 we will have clobbered the original value of x.
84 The workaround is to first compute z = cos(x) in that case, since
85 y and z are different. */
86 if (y != x)
87 /* y and x differ, thus we can safely try to compute y first */
88 {
89 MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
90 y, x, -2 * expx, 2, 0, rnd_mode,
91 { inexy = _inexact;
92 goto small_input; });
93 if (0)
94 {
95 small_input:
96 /* we can go here only if we can round sin(x) */
97 MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
98 z, __gmpfr_one, -2 * expx, 1, 0, rnd_mode,
99 { inexz = _inexact;
100 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
101 goto end; });
102 }
103
104 /* if we go here, one of the two MPFR_FAST_COMPUTE_IF_SMALL_INPUT
105 calls failed */
106 }
107 else /* y and x are the same variable: try to compute z first, which
108 necessarily differs */
109 {
110 MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
111 z, __gmpfr_one, -2 * expx, 1, 0, rnd_mode,
112 { inexz = _inexact;
113 goto small_input2; });
114 if (0)
115 {
116 small_input2:
117 /* we can go here only if we can round cos(x) */
118 MPFR_FAST_COMPUTE_IF_SMALL_INPUT (
119 y, x, -2 * expx, 2, 0, rnd_mode,
120 { inexy = _inexact;
121 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
122 goto end; });
123 }
124 }
125 m += 2 * (-expx);
126 }
127
128 if (prec >= MPFR_SINCOS_THRESHOLD)
129 {
130 MPFR_SAVE_EXPO_FREE (expo);
131 return mpfr_sincos_fast (y, z, x, rnd_mode);
132 }
133
134 mpfr_init2 (c, m);
135 mpfr_init2 (xr, m);
136
137 MPFR_ZIV_INIT (loop, m);
138 for (;;)
139 {
140 /* the following is copied from sin.c */
141 if (expx >= 2) /* reduce the argument */
142 {
143 reduce = 1;
144 MPFR_ASSERTN (expx + m - 1 <= MPFR_PREC_MAX);
145 mpfr_set_prec (c, expx + m - 1);
146 mpfr_set_prec (xr, m);
147 mpfr_const_pi (c, MPFR_RNDN);
148 mpfr_mul_2ui (c, c, 1, MPFR_RNDN);
149 mpfr_remainder (xr, x, c, MPFR_RNDN);
150 mpfr_div_2ui (c, c, 1, MPFR_RNDN);
151 if (MPFR_IS_POS (xr))
152 mpfr_sub (c, c, xr, MPFR_RNDZ);
153 else
154 mpfr_add (c, c, xr, MPFR_RNDZ);
155 if (MPFR_IS_ZERO(xr)
156 || MPFR_EXP(xr) < (mpfr_exp_t) 3 - (mpfr_exp_t) m
157 || MPFR_EXP(c) < (mpfr_exp_t) 3 - (mpfr_exp_t) m)
158 goto next_step;
159 xx = xr;
160 }
161 else /* the input argument is already reduced */
162 {
163 reduce = 0;
164 xx = x;
165 }
166
167 neg = MPFR_IS_NEG (xx); /* gives sign of sin(x) */
168 mpfr_set_prec (c, m);
169 mpfr_cos (c, xx, MPFR_RNDZ);
170 /* If no argument reduction was performed, the error is at most ulp(c),
171 otherwise it is at most ulp(c) + 2^(2-m). Since |c| < 1, we have
172 ulp(c) <= 2^(-m), thus the error is bounded by 2^(3-m) in that later
173 case. */
174 if (reduce == 0)
175 err = m;
176 else
177 err = MPFR_GET_EXP (c) + (mpfr_exp_t) (m - 3);
178 if (!MPFR_CAN_ROUND (c, err, MPFR_PREC (z), rnd_mode))
179 goto next_step;
180
181 /* We can't set z now, because in case z = x, and the MPFR_CAN_ROUND()
182 call below fails, we will have clobbered the input.
183 Note: m below is the precision of c; see above. */
184 mpfr_set_prec (xr, m);
185 mpfr_swap (xr, c); /* save the approximation of the cosine in xr */
186 mpfr_sqr (c, xr, MPFR_RNDU); /* the absolute error is bounded by
187 2^(5-m) if reduce=1, and by 2^(2-m)
188 otherwise */
189 mpfr_ui_sub (c, 1, c, MPFR_RNDN); /* error bounded by 2^(6-m) if reduce
190 is 1, and 2^(3-m) otherwise */
191 mpfr_sqrt (c, c, MPFR_RNDN); /* the absolute error is bounded by
192 2^(6-m-Exp(c)) if reduce=1, and
193 2^(3-m-Exp(c)) otherwise */
194 err = 3 + 3 * reduce - MPFR_GET_EXP (c);
195 if (neg)
196 MPFR_CHANGE_SIGN (c);
197
198 /* the absolute error on c is at most 2^(err-m), which we must put
199 in the form 2^(EXP(c)-err). */
200 err = MPFR_GET_EXP (c) + (mpfr_exp_t) m - err;
201 if (MPFR_CAN_ROUND (c, err, MPFR_PREC (y), rnd_mode))
202 break;
203 /* check for huge cancellation */
204 if (err < (mpfr_exp_t) MPFR_PREC (y))
205 m += MPFR_PREC (y) - err;
206 /* Check if near 1 */
207 if (MPFR_GET_EXP (c) == 1
208 && MPFR_MANT (c)[MPFR_LIMB_SIZE (c)-1] == MPFR_LIMB_HIGHBIT)
209 m += m;
210
211 next_step:
212 MPFR_ZIV_NEXT (loop, m);
213 mpfr_set_prec (c, m);
214 }
215 MPFR_ZIV_FREE (loop);
216
217 inexy = mpfr_set (y, c, rnd_mode);
218 inexz = mpfr_set (z, xr, rnd_mode);
219
220 mpfr_clear (c);
221 mpfr_clear (xr);
222
223 end:
224 MPFR_SAVE_EXPO_FREE (expo);
225 /* FIXME: add a test for bug before revision 7355 */
226 inexy = mpfr_check_range (y, inexy, rnd_mode);
227 inexz = mpfr_check_range (z, inexz, rnd_mode);
228 MPFR_RET (INEX(inexy,inexz));
229 }
230
231 /*************** asymptotically fast implementation below ********************/
232
233 /* truncate Q from R to at most prec bits.
234 Return the number of truncated bits.
235 */
236 static mpfr_prec_t
reduce(mpz_t Q,mpz_srcptr R,mpfr_prec_t prec)237 reduce (mpz_t Q, mpz_srcptr R, mpfr_prec_t prec)
238 {
239 mpfr_prec_t l;
240
241 MPFR_MPZ_SIZEINBASE2(l, R);
242 l = (l > prec) ? l - prec : 0;
243 mpz_fdiv_q_2exp (Q, R, l);
244 return l;
245 }
246
247 /* truncate S and C so that the smaller has prec bits.
248 Return the number of truncated bits.
249 */
250 static unsigned long
reduce2(mpz_t S,mpz_t C,mpfr_prec_t prec)251 reduce2 (mpz_t S, mpz_t C, mpfr_prec_t prec)
252 {
253 unsigned long ls;
254 unsigned long lc;
255 unsigned long l;
256
257 MPFR_MPZ_SIZEINBASE2(ls, S);
258 MPFR_MPZ_SIZEINBASE2(lc, C);
259
260 l = (ls < lc) ? ls : lc; /* smaller length */
261 l = (l > prec) ? l - prec : 0;
262 mpz_fdiv_q_2exp (S, S, l);
263 mpz_fdiv_q_2exp (C, C, l);
264 return l;
265 }
266
267 /* return in S0/Q0 a rational approximation of sin(X) with absolute error
268 bounded by 9*2^(-prec), where 0 <= X=p/2^r <= 1/2,
269 and in C0/Q0 a rational approximation of cos(X), with relative error
270 bounded by 9*2^(-prec) (and also absolute error, since
271 |cos(X)| <= 1).
272 We have sin(X)/X = sum((-1)^i*(p/2^r)^i/(2i+1)!, i=0..infinity).
273 We use the following binary splitting formula:
274 P(a,b) = (-p)^(b-a)
275 Q(a,b) = (2a)*(2a+1)*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise
276 T(a,b) = 1 if a+1=b, Q(c,b)*T(a,c)+P(a,c)*T(c,b) otherwise.
277
278 Since we use P(a,b) for b-a=2^k only, we compute only p^(2^k).
279 We do not store the factor 2^r in Q().
280
281 Then sin(X)/X ~ T(0,i)/Q(0,i) for i so that (p/2^r)^i/i! is small enough.
282
283 Return l such that Q0 has to be multiplied by 2^l.
284
285 Assumes prec >= 10.
286 */
287
288 #define KMAX 64
289 static unsigned long
sin_bs_aux(mpz_t Q0,mpz_t S0,mpz_t C0,mpz_srcptr p,mpfr_prec_t r,mpfr_prec_t prec)290 sin_bs_aux (mpz_t Q0, mpz_t S0, mpz_t C0, mpz_srcptr p, mpfr_prec_t r,
291 mpfr_prec_t prec)
292 {
293 mpz_t T[KMAX], Q[KMAX], ptoj[KMAX], pp;
294 mpfr_prec_t log2_nb_terms[KMAX], mult[KMAX];
295 mpfr_prec_t accu[KMAX], size_ptoj[KMAX];
296 mpfr_prec_t prec_i_have, h, r0 = r, pp_s, p_s;
297 unsigned long i, j, m;
298 int alloc, k, l;
299
300 if (MPFR_UNLIKELY(mpz_cmp_ui (p, 0) == 0)) /* sin(x)/x -> 1 */
301 {
302 mpz_set_ui (Q0, 1);
303 mpz_set_ui (S0, 1);
304 mpz_set_ui (C0, 1);
305 return 0;
306 }
307
308 /* check that X=p/2^r <= 1/2 */
309 MPFR_ASSERTN(mpz_sizeinbase (p, 2) - (mpfr_exp_t) r <= -1);
310
311 mpz_init (pp);
312
313 /* normalize p (non-zero here) */
314 h = mpz_scan1 (p, 0);
315 mpz_fdiv_q_2exp (pp, p, h); /* p = pp * 2^h */
316 mpz_mul (pp, pp, pp);
317 r = 2 * (r - h); /* x^2 = (p/2^r0)^2 = pp / 2^r */
318
319 /* now p is odd */
320 alloc = 2;
321 mpz_init_set_ui (T[0], 6);
322 mpz_init_set_ui (Q[0], 6);
323 mpz_init_set (ptoj[0], pp); /* ptoj[i] = pp^(2^i) */
324 mpz_init (T[1]);
325 mpz_init (Q[1]);
326 mpz_init (ptoj[1]);
327 mpz_mul (ptoj[1], pp, pp); /* ptoj[1] = pp^2 */
328 MPFR_MPZ_SIZEINBASE2(size_ptoj[1], ptoj[1]);
329
330 mpz_mul_2exp (T[0], T[0], r);
331 mpz_sub (T[0], T[0], pp); /* 6*2^r - pp = 6*2^r*(1 - x^2/6) */
332 log2_nb_terms[0] = 1;
333
334 /* already take into account the factor x=p/2^r in sin(x) = x * (...) */
335 MPFR_MPZ_SIZEINBASE2(pp_s, pp);
336 MPFR_MPZ_SIZEINBASE2(p_s, p);
337 mult[0] = r - pp_s + r0 - p_s;
338 /* we have x^3 < 1/2^mult[0] */
339
340 for (i = 2, k = 0, prec_i_have = mult[0]; prec_i_have < prec; i += 2)
341 {
342 /* i is even here */
343 /* invariant: Q[0]*Q[1]*...*Q[k] equals (2i-1)!,
344 we have already summed terms of index < i
345 in S[0]/Q[0], ..., S[k]/Q[k] */
346 k ++;
347 if (k + 1 >= alloc) /* necessarily k + 1 = alloc */
348 {
349 MPFR_ASSERTD (k + 1 == alloc);
350 alloc ++;
351 MPFR_ASSERTN (k + 1 < KMAX);
352 mpz_init (T[k+1]);
353 mpz_init (Q[k+1]);
354 mpz_init (ptoj[k+1]);
355 mpz_mul (ptoj[k+1], ptoj[k], ptoj[k]); /* pp^(2^(k+1)) */
356 MPFR_MPZ_SIZEINBASE2(size_ptoj[k+1], ptoj[k+1]);
357 }
358 /* for i even, we have Q[k] = (2*i)*(2*i+1), T[k] = 1,
359 then Q[k+1] = (2*i+2)*(2*i+3), T[k+1] = 1,
360 which reduces to T[k] = (2*i+2)*(2*i+3)*2^r-pp,
361 Q[k] = (2*i)*(2*i+1)*(2*i+2)*(2*i+3). */
362 MPFR_ASSERTN (k < KMAX);
363 log2_nb_terms[k] = 1;
364 mpz_set_ui (Q[k], 2 * i + 2);
365 mpz_mul_ui (Q[k], Q[k], 2 * i + 3);
366 mpz_mul_2exp (T[k], Q[k], r);
367 mpz_sub (T[k], T[k], pp);
368 mpz_mul_ui (Q[k], Q[k], 2 * i);
369 mpz_mul_ui (Q[k], Q[k], 2 * i + 1);
370 /* the next term of the series is divided by Q[k] and multiplied
371 by pp^2/2^(2r), thus the mult. factor < 1/2^mult[k] */
372 MPFR_MPZ_SIZEINBASE2(mult[k], Q[k]);
373 mult[k] += 2 * r - size_ptoj[1] - 1;
374 /* the absolute contribution of the next term is 1/2^accu[k] */
375 accu[k] = (k == 0) ? mult[k] : mult[k] + accu[k-1];
376 prec_i_have = accu[k]; /* the current term is < 1/2^accu[k] */
377 j = (i + 2) / 2;
378 l = 1;
379 while ((j & 1) == 0) /* combine and reduce */
380 {
381 MPFR_ASSERTN (k >= 1);
382 mpz_mul (T[k], T[k], ptoj[l]);
383 mpz_mul (T[k-1], T[k-1], Q[k]);
384 mpz_mul_2exp (T[k-1], T[k-1], r << l);
385 mpz_add (T[k-1], T[k-1], T[k]);
386 mpz_mul (Q[k-1], Q[k-1], Q[k]);
387 log2_nb_terms[k-1] ++; /* number of terms in S[k-1]
388 is a power of 2 by construction */
389 MPFR_MPZ_SIZEINBASE2(prec_i_have, Q[k]);
390 mult[k-1] += prec_i_have + (r << l) - size_ptoj[l] - 1;
391 accu[k-1] = (k == 1) ? mult[k-1] : mult[k-1] + accu[k-2];
392 prec_i_have = accu[k-1];
393 l ++;
394 j >>= 1;
395 k --;
396 }
397 }
398
399 /* accumulate all products in T[0] and Q[0]. Warning: contrary to above,
400 here we do not have log2_nb_terms[k-1] = log2_nb_terms[k]+1. */
401 h = 0; /* number of accumulated terms in the right part T[k]/Q[k] */
402 while (k > 0)
403 {
404 mpz_mul (T[k], T[k], ptoj[log2_nb_terms[k-1]]);
405 mpz_mul (T[k-1], T[k-1], Q[k]);
406 h += (mpfr_prec_t) 1 << log2_nb_terms[k];
407 mpz_mul_2exp (T[k-1], T[k-1], r * h);
408 mpz_add (T[k-1], T[k-1], T[k]);
409 mpz_mul (Q[k-1], Q[k-1], Q[k]);
410 k--;
411 }
412
413 m = r0 + r * (i - 1); /* implicit multiplier 2^r for Q0 */
414 /* at this point T[0]/(2^m*Q[0]) is an approximation of sin(x) where the 1st
415 neglected term has contribution < 1/2^prec, thus since the series has
416 alternate signs, the error is < 1/2^prec */
417
418 /* we truncate Q0 to prec bits: the relative error is at most 2^(1-prec),
419 which means that Q0 = Q[0] * (1+theta) with |theta| <= 2^(1-prec)
420 [up to a power of two] */
421 m += reduce (Q0, Q[0], prec);
422 m -= reduce (T[0], T[0], prec);
423 /* multiply by x = p/2^m */
424 mpz_mul (S0, T[0], p);
425 m -= reduce (S0, S0, prec); /* S0 = T[0] * (1 + theta)^2 up to power of 2 */
426 /* sin(X) ~ S0/Q0*(1 + theta)^3 + err with |theta| <= 2^(1-prec) and
427 |err| <= 2^(-prec), thus since |S0/Q0| <= 1:
428 |sin(X) - S0/Q0| <= 4*|theta*S0/Q0| + |err| <= 9*2^(-prec) */
429
430 mpz_clear (pp);
431 for (k = 0; k < alloc; k ++)
432 {
433 mpz_clear (T[k]);
434 mpz_clear (Q[k]);
435 mpz_clear (ptoj[k]);
436 }
437
438 /* compute cos(X) from sin(X): sqrt(1-(S/Q)^2) = sqrt(Q^2-S^2)/Q
439 = sqrt(Q0^2*2^(2m)-S0^2)/Q0.
440 Write S/Q = sin(X) + eps with |eps| <= 9*2^(-prec),
441 then sqrt(Q^2-S^2) = sqrt(Q^2-Q^2*(sin(X)+eps)^2)
442 = sqrt(Q^2*cos(X)^2-Q^2*(2*sin(X)*eps+eps^2))
443 = sqrt(Q^2*cos(X)^2-Q^2*eps1) with |eps1|<=9*2^(-prec)
444 [using X<=1/2 and eps<=9*2^(-prec) and prec>=10]
445
446 Since we truncate the square root, we get:
447 sqrt(Q^2*cos(X)^2-Q^2*eps1)+eps2 with |eps2|<1
448 = Q*sqrt(cos(X)^2-eps1)+eps2
449 = Q*cos(X)*(1+eps3)+eps2 with |eps3| <= 6*2^(-prec)
450 = Q*cos(X)*(1+eps3+eps2/(Q*cos(X)))
451 = Q*cos(X)*(1+eps4) with |eps4| <= 9*2^(-prec)
452 since |Q| >= 2^(prec-1) */
453 /* we assume that Q0*2^m >= 2^(prec-1) */
454 MPFR_ASSERTN(m + mpz_sizeinbase (Q0, 2) >= prec);
455 mpz_mul (C0, Q0, Q0);
456 mpz_mul_2exp (C0, C0, 2 * m);
457 mpz_submul (C0, S0, S0);
458 mpz_sqrt (C0, C0);
459
460 return m;
461 }
462
463 /* Put in s and c approximations of sin(x) and cos(x) respectively.
464 Assumes 0 < x < Pi/4 and PREC(s) = PREC(c) >= 10.
465 Return err such that the relative error is bounded by 2^err ulps.
466 */
467 static int
sincos_aux(mpfr_ptr s,mpfr_ptr c,mpfr_srcptr x,mpfr_rnd_t rnd_mode)468 sincos_aux (mpfr_ptr s, mpfr_ptr c, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
469 {
470 mpfr_prec_t prec_s, sh;
471 mpz_t Q, S, C, Q2, S2, C2, y;
472 mpfr_t x2;
473 unsigned long l, l2, j, err;
474
475 MPFR_ASSERTD(MPFR_PREC(s) == MPFR_PREC(c));
476
477 prec_s = MPFR_PREC(s);
478
479 mpfr_init2 (x2, MPFR_PREC(x));
480 mpz_init (Q);
481 mpz_init (S);
482 mpz_init (C);
483 mpz_init (Q2);
484 mpz_init (S2);
485 mpz_init (C2);
486 mpz_init (y);
487
488 mpfr_set (x2, x, MPFR_RNDN); /* exact */
489 mpz_set_ui (Q, 1);
490 l = 0;
491 mpz_set_ui (S, 0); /* sin(0) = S/(2^l*Q), exact */
492 mpz_set_ui (C, 1); /* cos(0) = C/(2^l*Q), exact */
493
494 /* Invariant: x = X + x2/2^(sh-1), where the part X was already treated,
495 S/(2^l*Q) ~ sin(X), C/(2^l*Q) ~ cos(X), and x2/2^(sh-1) < Pi/4.
496 'sh-1' is the number of already shifted bits in x2.
497 */
498
499 for (sh = 1, j = 0; mpfr_cmp_ui (x2, 0) != 0 && sh <= prec_s; sh <<= 1, j++)
500 {
501 if (sh > prec_s / 2) /* sin(x) = x + O(x^3), cos(x) = 1 + O(x^2) */
502 {
503 l2 = -mpfr_get_z_2exp (S2, x2); /* S2/2^l2 = x2 */
504 l2 += sh - 1;
505 mpz_set_ui (Q2, 1);
506 mpz_set_ui (C2, 1);
507 mpz_mul_2exp (C2, C2, l2);
508 mpfr_set_ui (x2, 0, MPFR_RNDN);
509 }
510 else
511 {
512 /* y <- trunc(x2 * 2^sh) = trunc(x * 2^(2*sh-1)) */
513 mpfr_mul_2ui (x2, x2, sh, MPFR_RNDN); /* exact */
514 mpfr_get_z (y, x2, MPFR_RNDZ); /* round toward zero: now
515 0 <= x2 < 2^sh, thus
516 0 <= x2/2^(sh-1) < 2^(1-sh) */
517 if (mpz_cmp_ui (y, 0) == 0)
518 continue;
519 mpfr_sub_z (x2, x2, y, MPFR_RNDN); /* should be exact */
520 l2 = sin_bs_aux (Q2, S2, C2, y, 2 * sh - 1, prec_s);
521 /* we now have |S2/Q2/2^l2 - sin(X)| <= 9*2^(prec_s)
522 and |C2/Q2/2^l2 - cos(X)| <= 6*2^(prec_s), with X=y/2^(2sh-1) */
523 }
524 if (sh == 1) /* S=0, C=1 */
525 {
526 l = l2;
527 mpz_swap (Q, Q2);
528 mpz_swap (S, S2);
529 mpz_swap (C, C2);
530 }
531 else
532 {
533 /* s <- s*c2+c*s2, c <- c*c2-s*s2, using Karatsuba:
534 a = s+c, b = s2+c2, t = a*b, d = s*s2, e = c*c2,
535 s <- t - d - e, c <- e - d */
536 mpz_add (y, S, C); /* a */
537 mpz_mul (C, C, C2); /* e */
538 mpz_add (C2, C2, S2); /* b */
539 mpz_mul (S2, S, S2); /* d */
540 mpz_mul (y, y, C2); /* a*b */
541 mpz_sub (S, y, S2); /* t - d */
542 mpz_sub (S, S, C); /* t - d - e */
543 mpz_sub (C, C, S2); /* e - d */
544 mpz_mul (Q, Q, Q2);
545 /* after j loops, the error is <= (11j-2)*2^(prec_s) */
546 l += l2;
547 /* reduce Q to prec_s bits */
548 l += reduce (Q, Q, prec_s);
549 /* reduce S,C to prec_s bits, error <= 11*j*2^(prec_s) */
550 l -= reduce2 (S, C, prec_s);
551 }
552 }
553
554 j = 11 * j;
555 for (err = 0; j > 1; j = (j + 1) / 2, err ++);
556
557 mpfr_set_z (s, S, MPFR_RNDN);
558 mpfr_div_z (s, s, Q, MPFR_RNDN);
559 mpfr_div_2ui (s, s, l, MPFR_RNDN);
560
561 mpfr_set_z (c, C, MPFR_RNDN);
562 mpfr_div_z (c, c, Q, MPFR_RNDN);
563 mpfr_div_2ui (c, c, l, MPFR_RNDN);
564
565 mpz_clear (Q);
566 mpz_clear (S);
567 mpz_clear (C);
568 mpz_clear (Q2);
569 mpz_clear (S2);
570 mpz_clear (C2);
571 mpz_clear (y);
572 mpfr_clear (x2);
573 return err;
574 }
575
576 /* Assumes x is neither NaN, +/-Inf, nor +/- 0.
577 One of s and c might be NULL, in which case the corresponding value is
578 not computed.
579 Assumes s differs from c.
580 */
581 int
mpfr_sincos_fast(mpfr_ptr s,mpfr_ptr c,mpfr_srcptr x,mpfr_rnd_t rnd)582 mpfr_sincos_fast (mpfr_ptr s, mpfr_ptr c, mpfr_srcptr x, mpfr_rnd_t rnd)
583 {
584 int inexs, inexc;
585 mpfr_t x_red, ts, tc;
586 mpfr_prec_t w;
587 mpfr_exp_t err, errs, errc;
588 MPFR_GROUP_DECL (group);
589 MPFR_ZIV_DECL (loop);
590
591 MPFR_ASSERTN(s != c);
592 if (s == NULL)
593 w = MPFR_PREC(c);
594 else if (c == NULL)
595 w = MPFR_PREC(s);
596 else
597 w = MPFR_PREC(s) >= MPFR_PREC(c) ? MPFR_PREC(s) : MPFR_PREC(c);
598 w += MPFR_INT_CEIL_LOG2(w) + 9; /* ensures w >= 10 (needed by sincos_aux) */
599
600 MPFR_GROUP_INIT_2(group, w, ts, tc);
601
602 MPFR_ZIV_INIT (loop, w);
603 for (;;)
604 {
605 /* if 0 < x <= Pi/4, we can call sincos_aux directly */
606 if (MPFR_IS_POS(x) && mpfr_cmp_ui_2exp (x, 1686629713, -31) <= 0)
607 {
608 err = sincos_aux (ts, tc, x, MPFR_RNDN);
609 }
610 /* if -Pi/4 <= x < 0, use sin(-x)=-sin(x) */
611 else if (MPFR_IS_NEG(x) && mpfr_cmp_si_2exp (x, -1686629713, -31) >= 0)
612 {
613 MPFR_ALIAS(x_red, x, MPFR_SIGN_POS, MPFR_GET_EXP(x));
614 err = sincos_aux (ts, tc, x_red, MPFR_RNDN);
615 MPFR_CHANGE_SIGN(ts);
616 }
617 else /* argument reduction is needed */
618 {
619 long q;
620 mpfr_t pi;
621 int neg = 0;
622
623 mpfr_init2 (x_red, w);
624 mpfr_init2 (pi, (MPFR_EXP(x) > 0) ? w + MPFR_EXP(x) : w);
625 mpfr_const_pi (pi, MPFR_RNDN);
626 mpfr_div_2ui (pi, pi, 1, MPFR_RNDN); /* Pi/2 */
627 mpfr_remquo (x_red, &q, x, pi, MPFR_RNDN);
628 /* x = q * (Pi/2 + eps1) + x_red + eps2,
629 where |eps1| <= 1/2*ulp(Pi/2) = 2^(-w-MAX(0,EXP(x))),
630 and eps2 <= 1/2*ulp(x_red) <= 1/2*ulp(Pi/2) = 2^(-w)
631 Since |q| <= x/(Pi/2) <= |x|, we have
632 q*|eps1| <= 2^(-w), thus
633 |x - q * Pi/2 - x_red| <= 2^(1-w) */
634 /* now -Pi/4 <= x_red <= Pi/4: if x_red < 0, consider -x_red */
635 if (MPFR_IS_NEG(x_red))
636 {
637 mpfr_neg (x_red, x_red, MPFR_RNDN);
638 neg = 1;
639 }
640 err = sincos_aux (ts, tc, x_red, MPFR_RNDN);
641 err ++; /* to take into account the argument reduction */
642 if (neg) /* sin(-x) = -sin(x), cos(-x) = cos(x) */
643 mpfr_neg (ts, ts, MPFR_RNDN);
644 if (q & 2) /* sin(x+Pi) = -sin(x), cos(x+Pi) = -cos(x) */
645 {
646 mpfr_neg (ts, ts, MPFR_RNDN);
647 mpfr_neg (tc, tc, MPFR_RNDN);
648 }
649 if (q & 1) /* sin(x+Pi/2) = cos(x), cos(x+Pi/2) = -sin(x) */
650 {
651 mpfr_neg (ts, ts, MPFR_RNDN);
652 mpfr_swap (ts, tc);
653 }
654 mpfr_clear (x_red);
655 mpfr_clear (pi);
656 }
657 /* adjust errors with respect to absolute values */
658 errs = err - MPFR_EXP(ts);
659 errc = err - MPFR_EXP(tc);
660 if ((s == NULL || MPFR_CAN_ROUND (ts, w - errs, MPFR_PREC(s), rnd)) &&
661 (c == NULL || MPFR_CAN_ROUND (tc, w - errc, MPFR_PREC(c), rnd)))
662 break;
663 MPFR_ZIV_NEXT (loop, w);
664 MPFR_GROUP_REPREC_2(group, w, ts, tc);
665 }
666 MPFR_ZIV_FREE (loop);
667
668 inexs = (s == NULL) ? 0 : mpfr_set (s, ts, rnd);
669 inexc = (c == NULL) ? 0 : mpfr_set (c, tc, rnd);
670
671 MPFR_GROUP_CLEAR (group);
672 return INEX(inexs,inexc);
673 }
674