xref: /netbsd-src/external/lgpl3/mpfr/dist/src/cosu.c (revision ec6772edaf0cdcb5f52a48f4aca5e33a8fb8ecfd)
1 /* mpfr_cosu  -- cosu(x) = cos(2*pi*x/u)
2    mpfr_cospi -- cospi(x) = cos(pi*x)
3 
4 Copyright 2020-2023 Free Software Foundation, Inc.
5 Contributed by the AriC and Caramba projects, INRIA.
6 
7 This file is part of the GNU MPFR Library.
8 
9 The GNU MPFR Library is free software; you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
13 
14 The GNU MPFR Library is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
17 License for more details.
18 
19 You should have received a copy of the GNU Lesser General Public License
20 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
21 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
22 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
23 
24 #define MPFR_NEED_LONGLONG_H
25 #include "mpfr-impl.h"
26 
27 /* put in y the correctly rounded value of cos(2*pi*x/u) */
28 int
mpfr_cosu(mpfr_ptr y,mpfr_srcptr x,unsigned long u,mpfr_rnd_t rnd_mode)29 mpfr_cosu (mpfr_ptr y, mpfr_srcptr x, unsigned long u, mpfr_rnd_t rnd_mode)
30 {
31   mpfr_srcptr xp;
32   mpfr_prec_t precy, prec;
33   mpfr_exp_t expx, expt, err, log2u, erra, errb;
34   mpfr_t t, xr;
35   int inexact = 0, nloops = 0, underflow = 0;
36   MPFR_ZIV_DECL (loop);
37   MPFR_SAVE_EXPO_DECL (expo);
38 
39   MPFR_LOG_FUNC (
40     ("x[%Pd]=%.*Rg u=%lu rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, u,
41      rnd_mode),
42     ("y[%Pd]=%.*Rg inexact=%d", mpfr_get_prec (y), mpfr_log_prec, y,
43      inexact));
44 
45   if (u == 0 || MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
46     {
47       /* for u=0, return NaN */
48       if (u == 0 || MPFR_IS_NAN (x) || MPFR_IS_INF (x))
49         {
50           MPFR_SET_NAN (y);
51           MPFR_RET_NAN;
52         }
53       else /* x is zero: cos(0) = 1 */
54         {
55           MPFR_ASSERTD (MPFR_IS_ZERO (x));
56           return mpfr_set_ui (y, 1, rnd_mode);
57         }
58     }
59 
60   MPFR_SAVE_EXPO_MARK (expo);
61 
62   /* Range reduction. We do not need to reduce the argument if it is
63      already reduced (|x| < u).
64      Note that the case |x| = u is better in the "else" branch as it
65      will give xr = 0. */
66   if (mpfr_cmpabs_ui (x, u) < 0)
67     {
68       xp = x;
69     }
70   else
71     {
72       mpfr_exp_t p = MPFR_GET_PREC (x) - MPFR_GET_EXP (x);
73       int inex;
74 
75       /* Let's compute xr = x mod u, with signbit(xr) = signbit(x), though
76          this doesn't matter.
77          The precision of xr is chosen to ensure that x mod u is exactly
78          representable in xr, e.g., the maximum size of u + the length of
79          the fractional part of x. Note that since |x| >= u in this branch,
80          the additional memory amount will not be more than the one of x.
81       */
82       mpfr_init2 (xr, sizeof (unsigned long) * CHAR_BIT + (p < 0 ? 0 : p));
83       MPFR_DBGRES (inex = mpfr_fmod_ui (xr, x, u, MPFR_RNDN));  /* exact */
84       MPFR_ASSERTD (inex == 0);
85       if (MPFR_IS_ZERO (xr))
86         {
87           mpfr_clear (xr);
88           MPFR_SAVE_EXPO_FREE (expo);
89           return mpfr_set_ui (y, 1, rnd_mode);
90         }
91       xp = xr;
92     }
93 
94 #define CLEAR_XR                       \
95   do                                   \
96     if (xp != x)                       \
97       {                                \
98         MPFR_ASSERTD (xp == xr);       \
99         mpfr_clear (xr);               \
100       }                                \
101   while (0)
102 
103   /* now |xp/u| < 1 */
104 
105   /* for x small, we have |cos(2*pi*x/u)-1| < 1/2*(2*pi*x/u)^2 < 2^5*(x/u)^2 */
106   expx = MPFR_GET_EXP (xp);
107   log2u = u == 1 ? 0 : MPFR_INT_CEIL_LOG2 (u) - 1;
108   /* u >= 2^log2u thus 1/u <= 2^(-log2u) */
109   erra = -2 * expx;
110   errb = 5 - 2 * log2u;
111   /* The 3rd argument (err1) of MPFR_SMALL_INPUT_AFTER_SAVE_EXPO should be
112      erra - errb, but it may overflow. The negative overflow is avoided by
113      the test erra > errb: if erra - errb <= 0, the macro is no-op.
114      Saturate to MPFR_EXP_MAX in case of positive overflow, as the error
115      test in MPFR_SMALL_INPUT_AFTER_SAVE_EXPO will always be true for
116      any value >= MPFR_PREC_MAX + 1, and this includes MPFR_EXP_MAX (from
117      the definition of MPFR_PREC_MAX and mpfr_exp_t >= mpfr_prec_t). */
118   if (erra > errb)
119     {
120       mpfr_exp_t err1 = errb >= 0 || erra < MPFR_EXP_MAX + errb ?
121         erra - errb : MPFR_EXP_MAX;
122       MPFR_SMALL_INPUT_AFTER_SAVE_EXPO (y, __gmpfr_one, err1, 0, 0,
123                                         rnd_mode, expo, CLEAR_XR);
124     }
125 
126   precy = MPFR_GET_PREC (y);
127   /* For x large, since argument reduction is expensive, we want to avoid
128      any failure in Ziv's strategy, thus we take into account expx too. */
129   prec = precy + MAX(expx,MPFR_INT_CEIL_LOG2 (precy)) + 8;
130   MPFR_ASSERTD(prec >= 2);
131   mpfr_init2 (t, prec);
132   MPFR_ZIV_INIT (loop, prec);
133   for (;;)
134     {
135       nloops ++;
136       /* In the error analysis below, xp stands for x.
137          We first compute an approximation t of 2*pi*x/u, then call cos(t).
138          If t = 2*pi*x/u + s, then |cos(t) - cos(2*pi*x/u)| <= |s|. */
139       mpfr_set_prec (t, prec);
140       mpfr_const_pi (t, MPFR_RNDN); /* t = pi * (1 + theta1) where
141                                        |theta1| <= 2^-prec */
142       mpfr_mul_2ui (t, t, 1, MPFR_RNDN); /* t = 2*pi * (1 + theta1) */
143       mpfr_mul (t, t, xp, MPFR_RNDN);    /* t = 2*pi*x * (1 + theta2)^2 where
144                                             |theta2| <= 2^-prec */
145       mpfr_div_ui (t, t, u, MPFR_RNDN);  /* t = 2*pi*x/u * (1 + theta3)^3 where
146                                             |theta3| <= 2^-prec */
147       /* if t is zero here, it means the division by u underflowd */
148       if (MPFR_UNLIKELY (MPFR_IS_ZERO (t)))
149         {
150           mpfr_set_ui (y, 1, MPFR_RNDZ);
151           if (MPFR_IS_LIKE_RNDZ(rnd_mode,0))
152             {
153               inexact = -1;
154               mpfr_nextbelow (y);
155             }
156           else
157             inexact = 1;
158           goto end;
159         }
160       /* since prec >= 2, |(1 + theta3)^3 - 1| <= 4*theta3 <= 2^(2-prec) */
161       expt = MPFR_GET_EXP (t);
162       /* we have |s| <= 2^(expt + 2 - prec) */
163       mpfr_cos (t, t, MPFR_RNDN);
164       err = expt + 2 - prec;
165       expt = MPFR_GET_EXP (t); /* new exponent of t */
166       /* the total error is at most 2^err + ulp(t)/2 = 2^err + 2^(expt-prec-1)
167          thus if err <= expt-prec-1, it is bounded by 2^(expt-prec),
168          otherwise it is bounded by 2^(err+1). */
169       err = (err <= expt - prec - 1) ? expt - prec : err + 1;
170       /* normalize err for mpfr_can_round */
171       err = expt - err;
172       if (MPFR_CAN_ROUND (t, err, precy, rnd_mode))
173         break;
174       /* Check exact cases only after the first level of Ziv' strategy, to
175          avoid slowing down the average case. Exact cases are:
176          (a) 2*pi*x/u is a multiple of pi/2, i.e., x/u is a multiple of 1/4
177          (b) 2*pi*x/u is {pi/3,2pi/3,4pi/3,5pi/3} mod 2pi */
178       if (nloops == 1)
179         {
180           /* detect case (a) */
181           inexact = mpfr_div_ui (t, xp, u, MPFR_RNDZ);
182           mpfr_mul_2ui (t, t, 2, MPFR_RNDZ);
183           if (inexact == 0 && mpfr_integer_p (t))
184             {
185               if (mpfr_odd_p (t))
186                 /* t is odd: we have kpi+pi/2, thus cosu = 0,
187                    for the sign, we always return +0, following IEEE 754-2019:
188                    cosPi(n + 1/2) is +0 for any integer n when n + 1/2 is
189                    representable. */
190                 mpfr_set_zero (y, +1);
191               else /* t is even: case kpi */
192                 {
193                   mpfr_div_2ui (t, t, 1, MPFR_RNDZ);
194                   if (!mpfr_odd_p (t))
195                     /* case 2kpi: cosu = 1 */
196                     mpfr_set_ui (y, 1, MPFR_RNDZ);
197                   else
198                     mpfr_set_si (y, -1, MPFR_RNDZ);
199                 }
200               goto end;
201             }
202           /* detect case (b): this can only occur if u is divisible by 3 */
203           if ((u % 3) == 0)
204             {
205               inexact = mpfr_div_ui (t, xp, u / 3, MPFR_RNDZ);
206               /* t should be in {1/2,2/2,4/2,5/2} */
207               mpfr_mul_2ui (t, t, 1, MPFR_RNDZ);
208               /* t should be {1,2,4,5} mod 6:
209                  t = 1 mod 6: case pi/3: return 1/2
210                  t = 2 mod 6: case 2pi/3: return -1/2
211                  t = 4 mod 6: case 4pi/3: return -1/2
212                  t = 5 mod 6: case 5pi/3: return 1/2 */
213               if (inexact == 0 && mpfr_integer_p (t))
214                 {
215                   mpz_t z;
216                   unsigned long mod6;
217                   mpz_init (z);
218                   inexact = mpfr_get_z (z, t, MPFR_RNDZ);
219                   MPFR_ASSERTN(inexact == 0);
220                   mod6 = mpz_fdiv_ui (z, 6);
221                   mpz_clear (z);
222                   if (mod6 == 1 || mod6 == 5)
223                     {
224                       mpfr_set_ui_2exp (y, 1, -1, MPFR_RNDZ);
225                       goto end;
226                     }
227                   else /* we cannot have mod6 = 0 or 3 since those
228                           case belong to (a) */
229                     {
230                       MPFR_ASSERTD(mod6 == 2 || mod6 == 4);
231                       mpfr_set_si_2exp (y, -1, -1, MPFR_RNDZ);
232                       goto end;
233                     }
234                 }
235             }
236         }
237       MPFR_ZIV_NEXT (loop, prec);
238     }
239   MPFR_ZIV_FREE (loop);
240 
241   inexact = mpfr_set (y, t, rnd_mode);
242 
243  end:
244   mpfr_clear (t);
245   CLEAR_XR;
246   MPFR_SAVE_EXPO_FREE (expo);
247   return underflow ? inexact : mpfr_check_range (y, inexact, rnd_mode);
248 }
249 
250 int
mpfr_cospi(mpfr_ptr y,mpfr_srcptr x,mpfr_rnd_t rnd_mode)251 mpfr_cospi (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
252 {
253   return mpfr_cosu (y, x, 2, rnd_mode);
254 }
255