14a238c70SJohn Marino /* mpfr_ai -- Airy function Ai
24a238c70SJohn Marino
3*ab6d115fSJohn Marino Copyright 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4*ab6d115fSJohn Marino Contributed by the AriC and Caramel projects, INRIA.
54a238c70SJohn Marino
64a238c70SJohn Marino This file is part of the GNU MPFR Library.
74a238c70SJohn Marino
84a238c70SJohn Marino The GNU MPFR Library is free software; you can redistribute it and/or modify
94a238c70SJohn Marino it under the terms of the GNU Lesser General Public License as published by
104a238c70SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
114a238c70SJohn Marino option) any later version.
124a238c70SJohn Marino
134a238c70SJohn Marino The GNU MPFR Library is distributed in the hope that it will be useful, but
144a238c70SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
154a238c70SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
164a238c70SJohn Marino License for more details.
174a238c70SJohn Marino
184a238c70SJohn Marino You should have received a copy of the GNU Lesser General Public License
194a238c70SJohn Marino along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
204a238c70SJohn Marino http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
214a238c70SJohn Marino 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
224a238c70SJohn Marino
234a238c70SJohn Marino #define MPFR_NEED_LONGLONG_H
244a238c70SJohn Marino #include "mpfr-impl.h"
254a238c70SJohn Marino
264a238c70SJohn Marino /* Reminder and notations:
274a238c70SJohn Marino -----------------------
284a238c70SJohn Marino
294a238c70SJohn Marino Ai is the solution of:
304a238c70SJohn Marino / y'' - x*y = 0
314a238c70SJohn Marino { Ai(0) = 1/ ( 9^(1/3)*Gamma(2/3) )
324a238c70SJohn Marino \ Ai'(0) = -1/ ( 3^(1/3)*Gamma(1/3) )
334a238c70SJohn Marino
344a238c70SJohn Marino Series development:
354a238c70SJohn Marino Ai(x) = sum (a_i*x^i)
364a238c70SJohn Marino = sum (t_i)
374a238c70SJohn Marino
384a238c70SJohn Marino Recurrences:
394a238c70SJohn Marino a_(i+3) = a_i / ((i+2)*(i+3))
404a238c70SJohn Marino t_(i+3) = t_i * x^3 / ((i+2)*(i+3))
414a238c70SJohn Marino
424a238c70SJohn Marino Values:
434a238c70SJohn Marino a_0 = Ai(0) ~ 0.355
444a238c70SJohn Marino a_1 = Ai'(0) ~ -0.259
454a238c70SJohn Marino */
464a238c70SJohn Marino
474a238c70SJohn Marino
484a238c70SJohn Marino /* Airy function Ai evaluated by the most naive algorithm */
494a238c70SJohn Marino static int
mpfr_ai1(mpfr_ptr y,mpfr_srcptr x,mpfr_rnd_t rnd)504a238c70SJohn Marino mpfr_ai1 (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd)
514a238c70SJohn Marino {
524a238c70SJohn Marino MPFR_ZIV_DECL (loop);
534a238c70SJohn Marino MPFR_SAVE_EXPO_DECL (expo);
544a238c70SJohn Marino mpfr_prec_t wprec; /* working precision */
554a238c70SJohn Marino mpfr_prec_t prec; /* target precision */
564a238c70SJohn Marino mpfr_prec_t err; /* used to estimate the evaluation error */
574a238c70SJohn Marino mpfr_prec_t correct_bits; /* estimates the number of correct bits*/
584a238c70SJohn Marino unsigned long int k;
594a238c70SJohn Marino unsigned long int cond; /* condition number of the series */
604a238c70SJohn Marino unsigned long int assumed_exponent; /* used as a lowerbound of |EXP(Ai(x))| */
614a238c70SJohn Marino int r;
624a238c70SJohn Marino mpfr_t s; /* used to store the partial sum */
634a238c70SJohn Marino mpfr_t ti, tip1; /* used to store successive values of t_i */
644a238c70SJohn Marino mpfr_t x3; /* used to store x^3 */
654a238c70SJohn Marino mpfr_t tmp_sp, tmp2_sp; /* small precision variables */
664a238c70SJohn Marino unsigned long int x3u; /* used to store ceil(x^3) */
674a238c70SJohn Marino mpfr_t temp1, temp2;
684a238c70SJohn Marino int test1, test2;
694a238c70SJohn Marino
704a238c70SJohn Marino /* Logging */
714a238c70SJohn Marino MPFR_LOG_FUNC (
724a238c70SJohn Marino ("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd),
734a238c70SJohn Marino ("y[%Pu]=%.*Rg", mpfr_get_prec (y), mpfr_log_prec, y) );
744a238c70SJohn Marino
754a238c70SJohn Marino /* Special cases */
764a238c70SJohn Marino if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
774a238c70SJohn Marino {
784a238c70SJohn Marino if (MPFR_IS_NAN (x))
794a238c70SJohn Marino {
804a238c70SJohn Marino MPFR_SET_NAN (y);
814a238c70SJohn Marino MPFR_RET_NAN;
824a238c70SJohn Marino }
834a238c70SJohn Marino else if (MPFR_IS_INF (x))
844a238c70SJohn Marino return mpfr_set_ui (y, 0, rnd);
854a238c70SJohn Marino }
864a238c70SJohn Marino
874a238c70SJohn Marino
884a238c70SJohn Marino /* Save current exponents range */
894a238c70SJohn Marino MPFR_SAVE_EXPO_MARK (expo);
904a238c70SJohn Marino
914a238c70SJohn Marino if (MPFR_UNLIKELY (MPFR_IS_ZERO (x)))
924a238c70SJohn Marino {
934a238c70SJohn Marino mpfr_t y1, y2;
944a238c70SJohn Marino prec = MPFR_PREC (y) + 3;
954a238c70SJohn Marino mpfr_init2 (y1, prec);
964a238c70SJohn Marino mpfr_init2 (y2, prec);
974a238c70SJohn Marino MPFR_ZIV_INIT (loop, prec);
984a238c70SJohn Marino
994a238c70SJohn Marino /* ZIV loop */
1004a238c70SJohn Marino for (;;)
1014a238c70SJohn Marino {
1024a238c70SJohn Marino mpfr_gamma_one_and_two_third (y1, y2, prec); /* y2 = Gamma(2/3)(1 + delta1), |delta1| <= 2^{1-prec}. */
1034a238c70SJohn Marino
1044a238c70SJohn Marino r = mpfr_set_ui (y1, 9, MPFR_RNDN);
1054a238c70SJohn Marino MPFR_ASSERTD (r == 0);
1064a238c70SJohn Marino mpfr_cbrt (y1, y1, MPFR_RNDN); /* y1 = cbrt(9)(1 + delta2), |delta2| <= 2^{-prec}. */
1074a238c70SJohn Marino mpfr_mul (y1, y1, y2, MPFR_RNDN);
1084a238c70SJohn Marino mpfr_ui_div (y1, 1, y1, MPFR_RNDN);
1094a238c70SJohn Marino if (MPFR_LIKELY (MPFR_CAN_ROUND (y1, prec - 3, MPFR_PREC (y), rnd)))
1104a238c70SJohn Marino break;
1114a238c70SJohn Marino MPFR_ZIV_NEXT (loop, prec);
1124a238c70SJohn Marino }
1134a238c70SJohn Marino r = mpfr_set (y, y1, rnd);
1144a238c70SJohn Marino MPFR_ZIV_FREE (loop);
1154a238c70SJohn Marino MPFR_SAVE_EXPO_FREE (expo);
1164a238c70SJohn Marino mpfr_clear (y1);
1174a238c70SJohn Marino mpfr_clear (y2);
1184a238c70SJohn Marino return mpfr_check_range (y, r, rnd);
1194a238c70SJohn Marino }
1204a238c70SJohn Marino
1214a238c70SJohn Marino /* FIXME: underflow for large values of |x| ? */
1224a238c70SJohn Marino
1234a238c70SJohn Marino
1244a238c70SJohn Marino /* Set initial precision */
1254a238c70SJohn Marino /* If we compute sum(i=0, N-1, t_i), the relative error is bounded by */
1264a238c70SJohn Marino /* 2*(4N)*2^(1-wprec)*C(|x|)/Ai(x) */
1274a238c70SJohn Marino /* where C(|x|) = 1 if 0<=x<=1 */
1284a238c70SJohn Marino /* and C(|x|) = (1/2)*x^(-1/4)*exp(2/3 x^(3/2)) if x >= 1 */
1294a238c70SJohn Marino
1304a238c70SJohn Marino /* A priori, we do not know N, so we estimate it to ~ prec */
1314a238c70SJohn Marino /* If 0<=x<=1, we estimate Ai(x) ~ 1/8 */
1324a238c70SJohn Marino /* if 1<=x, we estimate Ai(x) ~ (1/4)*x^(-1/4)*exp(-2/3 * x^(3/2)) */
1334a238c70SJohn Marino /* if x<=0, ????? */
1344a238c70SJohn Marino
1354a238c70SJohn Marino /* We begin with 11 guard bits */
1364a238c70SJohn Marino prec = MPFR_PREC (y)+11;
1374a238c70SJohn Marino MPFR_ZIV_INIT (loop, prec);
1384a238c70SJohn Marino
1394a238c70SJohn Marino /* The working precision is heuristically chosen in order to obtain */
1404a238c70SJohn Marino /* approximately prec correct bits in the sum. To sum up: the sum */
1414a238c70SJohn Marino /* is stopped when the *exact* sum gives ~ prec correct bit. And */
1424a238c70SJohn Marino /* when it is stopped, the accuracy of the computed sum, with respect*/
1434a238c70SJohn Marino /* to the exact one should be ~prec bits. */
1444a238c70SJohn Marino mpfr_init2 (tmp_sp, MPFR_SMALL_PRECISION);
1454a238c70SJohn Marino mpfr_init2 (tmp2_sp, MPFR_SMALL_PRECISION);
1464a238c70SJohn Marino mpfr_abs (tmp_sp, x, MPFR_RNDU);
1474a238c70SJohn Marino mpfr_pow_ui (tmp_sp, tmp_sp, 3, MPFR_RNDU);
1484a238c70SJohn Marino mpfr_sqrt (tmp_sp, tmp_sp, MPFR_RNDU); /* tmp_sp ~ x^3/2 */
1494a238c70SJohn Marino
1504a238c70SJohn Marino /* 0.96179669392597567 >~ 2/3 * log2(e). See algorithms.tex */
1514a238c70SJohn Marino mpfr_set_str (tmp2_sp, "0.96179669392597567", 10, MPFR_RNDU);
1524a238c70SJohn Marino mpfr_mul (tmp2_sp, tmp_sp, tmp2_sp, MPFR_RNDU);
1534a238c70SJohn Marino
1544a238c70SJohn Marino /* cond represents the number of lost bits in the evaluation of the sum */
1554a238c70SJohn Marino if ( (MPFR_IS_ZERO (x)) || (MPFR_GET_EXP (x) <= 0) )
1564a238c70SJohn Marino cond = 0;
1574a238c70SJohn Marino else
1584a238c70SJohn Marino cond = mpfr_get_ui (tmp2_sp, MPFR_RNDU) - (MPFR_GET_EXP (x)-1)/4 - 1;
1594a238c70SJohn Marino
1604a238c70SJohn Marino /* The variable assumed_exponent is used to store the maximal assumed */
1614a238c70SJohn Marino /* exponent of Ai(x). More precisely, we assume that |Ai(x)| will be */
1624a238c70SJohn Marino /* greater than 2^{-assumed_exponent}. */
1634a238c70SJohn Marino if (MPFR_IS_ZERO (x))
1644a238c70SJohn Marino assumed_exponent = 2;
1654a238c70SJohn Marino else
1664a238c70SJohn Marino {
1674a238c70SJohn Marino if (MPFR_IS_POS (x))
1684a238c70SJohn Marino {
1694a238c70SJohn Marino if (MPFR_GET_EXP (x) <= 0)
1704a238c70SJohn Marino assumed_exponent = 3;
1714a238c70SJohn Marino else
1724a238c70SJohn Marino assumed_exponent = (2 + (MPFR_GET_EXP (x)/4 + 1)
1734a238c70SJohn Marino + mpfr_get_ui (tmp2_sp, MPFR_RNDU));
1744a238c70SJohn Marino }
1754a238c70SJohn Marino /* We do not know Ai (x) yet */
1764a238c70SJohn Marino /* We cover the case when EXP (Ai (x))>=-10 */
1774a238c70SJohn Marino else
1784a238c70SJohn Marino assumed_exponent = 10;
1794a238c70SJohn Marino }
1804a238c70SJohn Marino
1814a238c70SJohn Marino wprec = prec + MPFR_INT_CEIL_LOG2 (prec) + 5 + cond + assumed_exponent;
1824a238c70SJohn Marino
1834a238c70SJohn Marino mpfr_init (ti);
1844a238c70SJohn Marino mpfr_init (tip1);
1854a238c70SJohn Marino mpfr_init (temp1);
1864a238c70SJohn Marino mpfr_init (temp2);
1874a238c70SJohn Marino mpfr_init (x3);
1884a238c70SJohn Marino mpfr_init (s);
1894a238c70SJohn Marino
1904a238c70SJohn Marino /* ZIV loop */
1914a238c70SJohn Marino for (;;)
1924a238c70SJohn Marino {
1934a238c70SJohn Marino MPFR_LOG_MSG (("Working precision: %Pu\n", wprec));
1944a238c70SJohn Marino mpfr_set_prec (ti, wprec);
1954a238c70SJohn Marino mpfr_set_prec (tip1, wprec);
1964a238c70SJohn Marino mpfr_set_prec (x3, wprec);
1974a238c70SJohn Marino mpfr_set_prec (s, wprec);
1984a238c70SJohn Marino
1994a238c70SJohn Marino mpfr_sqr (x3, x, MPFR_RNDU);
2004a238c70SJohn Marino mpfr_mul (x3, x3, x, (MPFR_IS_POS (x)?MPFR_RNDU:MPFR_RNDD)); /* x3=x^3 */
2014a238c70SJohn Marino if (MPFR_IS_NEG (x))
2024a238c70SJohn Marino MPFR_CHANGE_SIGN (x3);
2034a238c70SJohn Marino x3u = mpfr_get_ui (x3, MPFR_RNDU); /* x3u >= ceil(x^3) */
2044a238c70SJohn Marino if (MPFR_IS_NEG (x))
2054a238c70SJohn Marino MPFR_CHANGE_SIGN (x3);
2064a238c70SJohn Marino
2074a238c70SJohn Marino mpfr_gamma_one_and_two_third (temp1, temp2, wprec);
2084a238c70SJohn Marino mpfr_set_ui (ti, 9, MPFR_RNDN);
2094a238c70SJohn Marino mpfr_cbrt (ti, ti, MPFR_RNDN);
2104a238c70SJohn Marino mpfr_mul (ti, ti, temp2, MPFR_RNDN);
2114a238c70SJohn Marino mpfr_ui_div (ti, 1, ti , MPFR_RNDN); /* ti = 1/( Gamma (2/3)*9^(1/3) ) */
2124a238c70SJohn Marino
2134a238c70SJohn Marino mpfr_set_ui (tip1, 3, MPFR_RNDN);
2144a238c70SJohn Marino mpfr_cbrt (tip1, tip1, MPFR_RNDN);
2154a238c70SJohn Marino mpfr_mul (tip1, tip1, temp1, MPFR_RNDN);
2164a238c70SJohn Marino mpfr_neg (tip1, tip1, MPFR_RNDN);
2174a238c70SJohn Marino mpfr_div (tip1, x, tip1, MPFR_RNDN); /* tip1 = -x/(Gamma (1/3)*3^(1/3)) */
2184a238c70SJohn Marino
2194a238c70SJohn Marino mpfr_add (s, ti, tip1, MPFR_RNDN);
2204a238c70SJohn Marino
2214a238c70SJohn Marino
2224a238c70SJohn Marino /* Evaluation of the series */
2234a238c70SJohn Marino k = 2;
2244a238c70SJohn Marino for (;;)
2254a238c70SJohn Marino {
2264a238c70SJohn Marino mpfr_mul (ti, ti, x3, MPFR_RNDN);
2274a238c70SJohn Marino mpfr_mul (tip1, tip1, x3, MPFR_RNDN);
2284a238c70SJohn Marino
2294a238c70SJohn Marino mpfr_div_ui2 (ti, ti, k, (k+1), MPFR_RNDN);
2304a238c70SJohn Marino mpfr_div_ui2 (tip1, tip1, (k+1), (k+2), MPFR_RNDN);
2314a238c70SJohn Marino
2324a238c70SJohn Marino k += 3;
2334a238c70SJohn Marino mpfr_add (s, s, ti, MPFR_RNDN);
2344a238c70SJohn Marino mpfr_add (s, s, tip1, MPFR_RNDN);
2354a238c70SJohn Marino
2364a238c70SJohn Marino /* FIXME: if s==0 */
2374a238c70SJohn Marino test1 = MPFR_IS_ZERO (ti)
2384a238c70SJohn Marino || (MPFR_GET_EXP (ti) + (mpfr_exp_t)prec + 3 <= MPFR_GET_EXP (s));
2394a238c70SJohn Marino test2 = MPFR_IS_ZERO (tip1)
2404a238c70SJohn Marino || (MPFR_GET_EXP (tip1) + (mpfr_exp_t)prec + 3 <= MPFR_GET_EXP (s));
2414a238c70SJohn Marino
2424a238c70SJohn Marino if ( test1 && test2 && (x3u <= k*(k+1)/2) )
2434a238c70SJohn Marino break; /* FIXME: if k*(k+1) overflows */
2444a238c70SJohn Marino }
2454a238c70SJohn Marino
2464a238c70SJohn Marino MPFR_LOG_MSG (("Truncation rank: %lu\n", k));
2474a238c70SJohn Marino
2484a238c70SJohn Marino err = 4 + MPFR_INT_CEIL_LOG2 (k) + cond - MPFR_GET_EXP (s);
2494a238c70SJohn Marino
2504a238c70SJohn Marino /* err is the number of bits lost due to the evaluation error */
2514a238c70SJohn Marino /* wprec-(prec+1): number of bits lost due to the approximation error */
2524a238c70SJohn Marino MPFR_LOG_MSG (("Roundoff error: %Pu\n", err));
2534a238c70SJohn Marino MPFR_LOG_MSG (("Approxim error: %Pu\n", wprec-prec-1));
2544a238c70SJohn Marino
2554a238c70SJohn Marino if (wprec < err+1)
2564a238c70SJohn Marino correct_bits=0;
2574a238c70SJohn Marino else
2584a238c70SJohn Marino {
2594a238c70SJohn Marino if (wprec < err+prec+1)
2604a238c70SJohn Marino correct_bits = wprec - err - 1;
2614a238c70SJohn Marino else
2624a238c70SJohn Marino correct_bits = prec;
2634a238c70SJohn Marino }
2644a238c70SJohn Marino
2654a238c70SJohn Marino if (MPFR_LIKELY (MPFR_CAN_ROUND (s, correct_bits, MPFR_PREC (y), rnd)))
2664a238c70SJohn Marino break;
2674a238c70SJohn Marino
2684a238c70SJohn Marino if (correct_bits == 0)
2694a238c70SJohn Marino {
2704a238c70SJohn Marino assumed_exponent *= 2;
2714a238c70SJohn Marino MPFR_LOG_MSG (("Not a single bit correct (assumed_exponent=%lu)\n",
2724a238c70SJohn Marino assumed_exponent));
2734a238c70SJohn Marino wprec = prec + 5 + MPFR_INT_CEIL_LOG2 (k) + cond + assumed_exponent;
2744a238c70SJohn Marino }
2754a238c70SJohn Marino else
2764a238c70SJohn Marino {
2774a238c70SJohn Marino if (correct_bits < prec)
2784a238c70SJohn Marino { /* The precision was badly chosen */
2794a238c70SJohn Marino MPFR_LOG_MSG (("Bad assumption on the exponent of Ai(x)", 0));
2804a238c70SJohn Marino MPFR_LOG_MSG ((" (E=%ld)\n", (long) MPFR_GET_EXP (s)));
2814a238c70SJohn Marino wprec = prec + err + 1;
2824a238c70SJohn Marino }
2834a238c70SJohn Marino else
2844a238c70SJohn Marino { /* We are really in a bad case of the TMD */
2854a238c70SJohn Marino MPFR_ZIV_NEXT (loop, prec);
2864a238c70SJohn Marino
2874a238c70SJohn Marino /* We update wprec */
2884a238c70SJohn Marino /* We assume that K will not be multiplied by more than 4 */
2894a238c70SJohn Marino wprec = prec + (MPFR_INT_CEIL_LOG2 (k)+2) + 5 + cond
2904a238c70SJohn Marino - MPFR_GET_EXP (s);
2914a238c70SJohn Marino }
2924a238c70SJohn Marino }
2934a238c70SJohn Marino
2944a238c70SJohn Marino } /* End of ZIV loop */
2954a238c70SJohn Marino
2964a238c70SJohn Marino MPFR_ZIV_FREE (loop);
2974a238c70SJohn Marino
2984a238c70SJohn Marino r = mpfr_set (y, s, rnd);
2994a238c70SJohn Marino
3004a238c70SJohn Marino mpfr_clear (ti);
3014a238c70SJohn Marino mpfr_clear (tip1);
3024a238c70SJohn Marino mpfr_clear (temp1);
3034a238c70SJohn Marino mpfr_clear (temp2);
3044a238c70SJohn Marino mpfr_clear (x3);
3054a238c70SJohn Marino mpfr_clear (s);
3064a238c70SJohn Marino mpfr_clear (tmp_sp);
3074a238c70SJohn Marino mpfr_clear (tmp2_sp);
3084a238c70SJohn Marino
3094a238c70SJohn Marino MPFR_SAVE_EXPO_FREE (expo);
3104a238c70SJohn Marino return mpfr_check_range (y, r, rnd);
3114a238c70SJohn Marino }
3124a238c70SJohn Marino
3134a238c70SJohn Marino
3144a238c70SJohn Marino /* Airy function Ai evaluated by Smith algorithm */
3154a238c70SJohn Marino static int
mpfr_ai2(mpfr_ptr y,mpfr_srcptr x,mpfr_rnd_t rnd)3164a238c70SJohn Marino mpfr_ai2 (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd)
3174a238c70SJohn Marino {
3184a238c70SJohn Marino MPFR_ZIV_DECL (loop);
3194a238c70SJohn Marino MPFR_SAVE_EXPO_DECL (expo);
3204a238c70SJohn Marino mpfr_prec_t wprec; /* working precision */
3214a238c70SJohn Marino mpfr_prec_t prec; /* target precision */
3224a238c70SJohn Marino mpfr_prec_t err; /* used to estimate the evaluation error */
3234a238c70SJohn Marino mpfr_prec_t correctBits; /* estimates the number of correct bits*/
3244a238c70SJohn Marino unsigned long int i, j, L, t;
3254a238c70SJohn Marino unsigned long int cond; /* condition number of the series */
3264a238c70SJohn Marino unsigned long int assumed_exponent; /* used as a lowerbound of |EXP(Ai(x))| */
3274a238c70SJohn Marino int r; /* returned ternary value */
3284a238c70SJohn Marino mpfr_t s; /* used to store the partial sum */
3294a238c70SJohn Marino mpfr_t u0, u1;
3304a238c70SJohn Marino mpfr_t *z; /* used to store the (x^3j) */
3314a238c70SJohn Marino mpfr_t result;
3324a238c70SJohn Marino mpfr_t tmp_sp, tmp2_sp; /* small precision variables */
3334a238c70SJohn Marino unsigned long int x3u; /* used to store ceil (x^3) */
3344a238c70SJohn Marino mpfr_t temp1, temp2;
3354a238c70SJohn Marino int test0, test1;
3364a238c70SJohn Marino
3374a238c70SJohn Marino /* Logging */
3384a238c70SJohn Marino MPFR_LOG_FUNC (
3394a238c70SJohn Marino ("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd),
3404a238c70SJohn Marino ("y[%Pu]=%.*Rg", mpfr_get_prec (y), mpfr_log_prec, y));
3414a238c70SJohn Marino
3424a238c70SJohn Marino /* Special cases */
3434a238c70SJohn Marino if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
3444a238c70SJohn Marino {
3454a238c70SJohn Marino if (MPFR_IS_NAN (x))
3464a238c70SJohn Marino {
3474a238c70SJohn Marino MPFR_SET_NAN (y);
3484a238c70SJohn Marino MPFR_RET_NAN;
3494a238c70SJohn Marino }
3504a238c70SJohn Marino else if (MPFR_IS_INF (x))
3514a238c70SJohn Marino return mpfr_set_ui (y, 0, rnd);
3524a238c70SJohn Marino }
3534a238c70SJohn Marino
3544a238c70SJohn Marino /* Save current exponents range */
3554a238c70SJohn Marino MPFR_SAVE_EXPO_MARK (expo);
3564a238c70SJohn Marino
3574a238c70SJohn Marino /* FIXME: underflow for large values of |x| */
3584a238c70SJohn Marino
3594a238c70SJohn Marino
3604a238c70SJohn Marino /* Set initial precision */
3614a238c70SJohn Marino /* See the analysis for the naive evaluation */
3624a238c70SJohn Marino
3634a238c70SJohn Marino /* We begin with 11 guard bits */
3644a238c70SJohn Marino prec = MPFR_PREC (y) + 11;
3654a238c70SJohn Marino MPFR_ZIV_INIT (loop, prec);
3664a238c70SJohn Marino
3674a238c70SJohn Marino mpfr_init2 (tmp_sp, MPFR_SMALL_PRECISION);
3684a238c70SJohn Marino mpfr_init2 (tmp2_sp, MPFR_SMALL_PRECISION);
3694a238c70SJohn Marino mpfr_abs (tmp_sp, x, MPFR_RNDU);
3704a238c70SJohn Marino mpfr_pow_ui (tmp_sp, tmp_sp, 3, MPFR_RNDU);
3714a238c70SJohn Marino mpfr_sqrt (tmp_sp, tmp_sp, MPFR_RNDU); /* tmp_sp ~ x^3/2 */
3724a238c70SJohn Marino
3734a238c70SJohn Marino /* 0.96179669392597567 >~ 2/3 * log2(e). See algorithms.tex */
3744a238c70SJohn Marino mpfr_set_str (tmp2_sp, "0.96179669392597567", 10, MPFR_RNDU);
3754a238c70SJohn Marino mpfr_mul (tmp2_sp, tmp_sp, tmp2_sp, MPFR_RNDU);
3764a238c70SJohn Marino
3774a238c70SJohn Marino /* cond represents the number of lost bits in the evaluation of the sum */
3784a238c70SJohn Marino if ( (MPFR_IS_ZERO (x)) || (MPFR_GET_EXP (x) <= 0) )
3794a238c70SJohn Marino cond = 0;
3804a238c70SJohn Marino else
3814a238c70SJohn Marino cond = mpfr_get_ui (tmp2_sp, MPFR_RNDU) - (MPFR_GET_EXP (x) - 1)/4 - 1;
3824a238c70SJohn Marino
3834a238c70SJohn Marino /* This variable is used to store the maximal assumed exponent of */
3844a238c70SJohn Marino /* Ai (x). More precisely, we assume that |Ai (x)| will be greater than */
3854a238c70SJohn Marino /* 2^{-assumedExp}. */
3864a238c70SJohn Marino if (MPFR_IS_ZERO (x))
3874a238c70SJohn Marino assumed_exponent = 2;
3884a238c70SJohn Marino else
3894a238c70SJohn Marino {
3904a238c70SJohn Marino if (MPFR_IS_POS (x))
3914a238c70SJohn Marino {
3924a238c70SJohn Marino if (MPFR_GET_EXP (x) <= 0)
3934a238c70SJohn Marino assumed_exponent = 3;
3944a238c70SJohn Marino else
3954a238c70SJohn Marino assumed_exponent = (2 + (MPFR_GET_EXP (x)/4 + 1)
3964a238c70SJohn Marino + mpfr_get_ui (tmp2_sp, MPFR_RNDU));
3974a238c70SJohn Marino }
3984a238c70SJohn Marino /* We do not know Ai (x) yet */
3994a238c70SJohn Marino /* We cover the case when EXP (Ai (x))>=-10 */
4004a238c70SJohn Marino else
4014a238c70SJohn Marino assumed_exponent = 10;
4024a238c70SJohn Marino }
4034a238c70SJohn Marino
4044a238c70SJohn Marino wprec = prec + MPFR_INT_CEIL_LOG2 (prec) + 6 + cond + assumed_exponent;
4054a238c70SJohn Marino
4064a238c70SJohn Marino /* We assume that the truncation rank will be ~ prec */
4074a238c70SJohn Marino L = __gmpfr_isqrt (prec);
4084a238c70SJohn Marino MPFR_LOG_MSG (("size of blocks L = %lu\n", L));
4094a238c70SJohn Marino
4104a238c70SJohn Marino z = (mpfr_t *) (*__gmp_allocate_func) ( (L + 1) * sizeof (mpfr_t) );
4114a238c70SJohn Marino MPFR_ASSERTN (z != NULL);
4124a238c70SJohn Marino for (j=0; j<=L; j++)
4134a238c70SJohn Marino mpfr_init (z[j]);
4144a238c70SJohn Marino
4154a238c70SJohn Marino mpfr_init (s);
4164a238c70SJohn Marino mpfr_init (u0); mpfr_init (u1);
4174a238c70SJohn Marino mpfr_init (result);
4184a238c70SJohn Marino mpfr_init (temp1);
4194a238c70SJohn Marino mpfr_init (temp2);
4204a238c70SJohn Marino
4214a238c70SJohn Marino /* ZIV loop */
4224a238c70SJohn Marino for (;;)
4234a238c70SJohn Marino {
4244a238c70SJohn Marino MPFR_LOG_MSG (("working precision: %Pu\n", wprec));
4254a238c70SJohn Marino
4264a238c70SJohn Marino for (j=0; j<=L; j++)
4274a238c70SJohn Marino mpfr_set_prec (z[j], wprec);
4284a238c70SJohn Marino mpfr_set_prec (s, wprec);
4294a238c70SJohn Marino mpfr_set_prec (u0, wprec); mpfr_set_prec (u1, wprec);
4304a238c70SJohn Marino mpfr_set_prec (result, wprec);
4314a238c70SJohn Marino
4324a238c70SJohn Marino mpfr_set_ui (u0, 1, MPFR_RNDN);
4334a238c70SJohn Marino mpfr_set (u1, x, MPFR_RNDN);
4344a238c70SJohn Marino
4354a238c70SJohn Marino mpfr_set_ui (z[0], 1, MPFR_RNDU);
4364a238c70SJohn Marino mpfr_sqr (z[1], u1, MPFR_RNDU);
4374a238c70SJohn Marino mpfr_mul (z[1], z[1], x, (MPFR_IS_POS (x) ? MPFR_RNDU : MPFR_RNDD) );
4384a238c70SJohn Marino
4394a238c70SJohn Marino if (MPFR_IS_NEG (x))
4404a238c70SJohn Marino MPFR_CHANGE_SIGN (z[1]);
4414a238c70SJohn Marino x3u = mpfr_get_ui (z[1], MPFR_RNDU); /* x3u >= ceil (x^3) */
4424a238c70SJohn Marino if (MPFR_IS_NEG (x))
4434a238c70SJohn Marino MPFR_CHANGE_SIGN (z[1]);
4444a238c70SJohn Marino
4454a238c70SJohn Marino for (j=2; j<=L ;j++)
4464a238c70SJohn Marino {
4474a238c70SJohn Marino if (j%2 == 0)
4484a238c70SJohn Marino mpfr_sqr (z[j], z[j/2], MPFR_RNDN);
4494a238c70SJohn Marino else
4504a238c70SJohn Marino mpfr_mul (z[j], z[j-1], z[1], MPFR_RNDN);
4514a238c70SJohn Marino }
4524a238c70SJohn Marino
4534a238c70SJohn Marino mpfr_gamma_one_and_two_third (temp1, temp2, wprec);
4544a238c70SJohn Marino mpfr_set_ui (u0, 9, MPFR_RNDN);
4554a238c70SJohn Marino mpfr_cbrt (u0, u0, MPFR_RNDN);
4564a238c70SJohn Marino mpfr_mul (u0, u0, temp2, MPFR_RNDN);
4574a238c70SJohn Marino mpfr_ui_div (u0, 1, u0 , MPFR_RNDN); /* u0 = 1/( Gamma (2/3)*9^(1/3) ) */
4584a238c70SJohn Marino
4594a238c70SJohn Marino mpfr_set_ui (u1, 3, MPFR_RNDN);
4604a238c70SJohn Marino mpfr_cbrt (u1, u1, MPFR_RNDN);
4614a238c70SJohn Marino mpfr_mul (u1, u1, temp1, MPFR_RNDN);
4624a238c70SJohn Marino mpfr_neg (u1, u1, MPFR_RNDN);
4634a238c70SJohn Marino mpfr_div (u1, x, u1, MPFR_RNDN); /* u1 = -x/(Gamma (1/3)*3^(1/3)) */
4644a238c70SJohn Marino
4654a238c70SJohn Marino mpfr_set_ui (result, 0, MPFR_RNDN);
4664a238c70SJohn Marino t = 0;
4674a238c70SJohn Marino
4684a238c70SJohn Marino /* Evaluation of the series by Smith' method */
4694a238c70SJohn Marino for (i=0; ; i++)
4704a238c70SJohn Marino {
4714a238c70SJohn Marino t += 3 * L;
4724a238c70SJohn Marino
4734a238c70SJohn Marino /* k = 0 */
4744a238c70SJohn Marino t -= 3;
4754a238c70SJohn Marino mpfr_set (s, z[L-1], MPFR_RNDN);
4764a238c70SJohn Marino for (j=L-2; ; j--)
4774a238c70SJohn Marino {
4784a238c70SJohn Marino t -= 3;
4794a238c70SJohn Marino mpfr_div_ui2 (s, s, (t+2), (t+3), MPFR_RNDN);
4804a238c70SJohn Marino mpfr_add (s, s, z[j], MPFR_RNDN);
4814a238c70SJohn Marino if (j==0)
4824a238c70SJohn Marino break;
4834a238c70SJohn Marino }
4844a238c70SJohn Marino mpfr_mul (s, s, u0, MPFR_RNDN);
4854a238c70SJohn Marino mpfr_add (result, result, s, MPFR_RNDN);
4864a238c70SJohn Marino
4874a238c70SJohn Marino mpfr_mul (u0, u0, z[L], MPFR_RNDN);
4884a238c70SJohn Marino for (j=0; j<=L-1; j++)
4894a238c70SJohn Marino {
4904a238c70SJohn Marino mpfr_div_ui2 (u0, u0, (t + 2), (t + 3), MPFR_RNDN);
4914a238c70SJohn Marino t += 3;
4924a238c70SJohn Marino }
4934a238c70SJohn Marino
4944a238c70SJohn Marino t++;
4954a238c70SJohn Marino
4964a238c70SJohn Marino /* k = 1 */
4974a238c70SJohn Marino t -= 3;
4984a238c70SJohn Marino mpfr_set (s, z[L-1], MPFR_RNDN);
4994a238c70SJohn Marino for (j=L-2; ; j--)
5004a238c70SJohn Marino {
5014a238c70SJohn Marino t -= 3;
5024a238c70SJohn Marino mpfr_div_ui2 (s, s, (t + 2), (t + 3), MPFR_RNDN);
5034a238c70SJohn Marino mpfr_add (s, s, z[j], MPFR_RNDN);
5044a238c70SJohn Marino if (j==0)
5054a238c70SJohn Marino break;
5064a238c70SJohn Marino }
5074a238c70SJohn Marino mpfr_mul (s, s, u1, MPFR_RNDN);
5084a238c70SJohn Marino mpfr_add (result, result, s, MPFR_RNDN);
5094a238c70SJohn Marino
5104a238c70SJohn Marino mpfr_mul (u1, u1, z[L], MPFR_RNDN);
5114a238c70SJohn Marino for (j=0; j<=L-1; j++)
5124a238c70SJohn Marino {
5134a238c70SJohn Marino mpfr_div_ui2 (u1, u1, (t + 2), (t + 3), MPFR_RNDN);
5144a238c70SJohn Marino t += 3;
5154a238c70SJohn Marino }
5164a238c70SJohn Marino
5174a238c70SJohn Marino t++;
5184a238c70SJohn Marino
5194a238c70SJohn Marino /* k = 2 */
5204a238c70SJohn Marino t++;
5214a238c70SJohn Marino
5224a238c70SJohn Marino /* End of the loop over k */
5234a238c70SJohn Marino t -= 3;
5244a238c70SJohn Marino
5254a238c70SJohn Marino test0 = MPFR_IS_ZERO (u0) ||
5264a238c70SJohn Marino MPFR_GET_EXP (u0) + (mpfr_exp_t)prec + 4 <= MPFR_GET_EXP (result);
5274a238c70SJohn Marino test1 = MPFR_IS_ZERO (u1) ||
5284a238c70SJohn Marino MPFR_GET_EXP (u1) + (mpfr_exp_t)prec + 4 <= MPFR_GET_EXP (result);
5294a238c70SJohn Marino
5304a238c70SJohn Marino if ( test0 && test1 && (x3u <= (t + 2) * (t + 3) / 2) )
5314a238c70SJohn Marino break;
5324a238c70SJohn Marino }
5334a238c70SJohn Marino
5344a238c70SJohn Marino MPFR_LOG_MSG (("Truncation rank: %lu\n", t));
5354a238c70SJohn Marino
5364a238c70SJohn Marino err = (5 + MPFR_INT_CEIL_LOG2 (L+1) + MPFR_INT_CEIL_LOG2 (i+1)
5374a238c70SJohn Marino + cond - MPFR_GET_EXP (result));
5384a238c70SJohn Marino
5394a238c70SJohn Marino /* err is the number of bits lost due to the evaluation error */
5404a238c70SJohn Marino /* wprec-(prec+1): number of bits lost due to the approximation error */
5414a238c70SJohn Marino MPFR_LOG_MSG (("Roundoff error: %Pu\n", err));
5424a238c70SJohn Marino MPFR_LOG_MSG (("Approxim error: %Pu\n", wprec - prec - 1));
5434a238c70SJohn Marino
5444a238c70SJohn Marino if (wprec < err+1)
5454a238c70SJohn Marino correctBits = 0;
5464a238c70SJohn Marino else
5474a238c70SJohn Marino {
5484a238c70SJohn Marino if (wprec < err+prec+1)
5494a238c70SJohn Marino correctBits = wprec - err - 1;
5504a238c70SJohn Marino else
5514a238c70SJohn Marino correctBits = prec;
5524a238c70SJohn Marino }
5534a238c70SJohn Marino
5544a238c70SJohn Marino if (MPFR_LIKELY (MPFR_CAN_ROUND (result, correctBits,
5554a238c70SJohn Marino MPFR_PREC (y), rnd)))
5564a238c70SJohn Marino break;
5574a238c70SJohn Marino
5584a238c70SJohn Marino for (j=0; j<=L; j++)
5594a238c70SJohn Marino mpfr_clear (z[j]);
5604a238c70SJohn Marino (*__gmp_free_func) (z, (L + 1) * sizeof (mpfr_t));
5614a238c70SJohn Marino L = __gmpfr_isqrt (t);
5624a238c70SJohn Marino MPFR_LOG_MSG (("size of blocks L = %lu\n", L));
5634a238c70SJohn Marino z = (mpfr_t *) (*__gmp_allocate_func) ( (L + 1) * sizeof (mpfr_t));
5644a238c70SJohn Marino MPFR_ASSERTN (z != NULL);
5654a238c70SJohn Marino for (j=0; j<=L; j++)
5664a238c70SJohn Marino mpfr_init (z[j]);
5674a238c70SJohn Marino
5684a238c70SJohn Marino if (correctBits == 0)
5694a238c70SJohn Marino {
5704a238c70SJohn Marino assumed_exponent *= 2;
5714a238c70SJohn Marino MPFR_LOG_MSG (("Not a single bit correct (assumed_exponent=%lu)\n",
5724a238c70SJohn Marino assumed_exponent));
5734a238c70SJohn Marino wprec = prec + 6 + MPFR_INT_CEIL_LOG2 (t) + cond + assumed_exponent;
5744a238c70SJohn Marino }
5754a238c70SJohn Marino else
5764a238c70SJohn Marino {
5774a238c70SJohn Marino if (correctBits < prec)
5784a238c70SJohn Marino { /* The precision was badly chosen */
5794a238c70SJohn Marino MPFR_LOG_MSG (("Bad assumption on the exponent of Ai (x)", 0));
5804a238c70SJohn Marino MPFR_LOG_MSG ((" (E=%ld)\n", (long) (MPFR_GET_EXP (result))));
5814a238c70SJohn Marino wprec = prec + err + 1;
5824a238c70SJohn Marino }
5834a238c70SJohn Marino else
5844a238c70SJohn Marino { /* We are really in a bad case of the TMD */
5854a238c70SJohn Marino MPFR_ZIV_NEXT (loop, prec);
5864a238c70SJohn Marino
5874a238c70SJohn Marino /* We update wprec */
5884a238c70SJohn Marino /* We assume that t will not be multiplied by more than 4 */
5894a238c70SJohn Marino wprec = (prec + (MPFR_INT_CEIL_LOG2 (t) + 2) + 6 + cond
5904a238c70SJohn Marino - MPFR_GET_EXP (result));
5914a238c70SJohn Marino }
5924a238c70SJohn Marino }
5934a238c70SJohn Marino } /* End of ZIV loop */
5944a238c70SJohn Marino
5954a238c70SJohn Marino MPFR_ZIV_FREE (loop);
5964a238c70SJohn Marino MPFR_SAVE_EXPO_FREE (expo);
5974a238c70SJohn Marino
5984a238c70SJohn Marino r = mpfr_set (y, result, rnd);
5994a238c70SJohn Marino
6004a238c70SJohn Marino mpfr_clear (tmp_sp);
6014a238c70SJohn Marino mpfr_clear (tmp2_sp);
6024a238c70SJohn Marino for (j=0; j<=L; j++)
6034a238c70SJohn Marino mpfr_clear (z[j]);
6044a238c70SJohn Marino (*__gmp_free_func) (z, (L + 1) * sizeof (mpfr_t));
6054a238c70SJohn Marino
6064a238c70SJohn Marino mpfr_clear (s);
6074a238c70SJohn Marino mpfr_clear (u0); mpfr_clear (u1);
6084a238c70SJohn Marino mpfr_clear (result);
6094a238c70SJohn Marino mpfr_clear (temp1);
6104a238c70SJohn Marino mpfr_clear (temp2);
6114a238c70SJohn Marino
6124a238c70SJohn Marino return r;
6134a238c70SJohn Marino }
6144a238c70SJohn Marino
6154a238c70SJohn Marino /* We consider that the boundary between the area where the naive method
6164a238c70SJohn Marino should preferably be used and the area where Smith' method should preferably
6174a238c70SJohn Marino be used has the following form:
6184a238c70SJohn Marino it is a triangle defined by two lines (one for the negative values of x, and
6194a238c70SJohn Marino one for the positive values of x) crossing at x=0.
6204a238c70SJohn Marino
6214a238c70SJohn Marino More precisely,
6224a238c70SJohn Marino
6234a238c70SJohn Marino * If x<0 and MPFR_AI_THRESHOLD1*x + MPFR_AI_THRESHOLD2*prec > MPFR_AI_SCALE,
6244a238c70SJohn Marino use Smith' algorithm;
6254a238c70SJohn Marino * If x>0 and MPFR_AI_THRESHOLD3*x + MPFR_AI_THRESHOLD2*prec > MPFR_AI_SCALE,
6264a238c70SJohn Marino use Smith' algorithm;
6274a238c70SJohn Marino * otherwise, use the naive method.
6284a238c70SJohn Marino */
6294a238c70SJohn Marino
6304a238c70SJohn Marino #define MPFR_AI_SCALE 1048576
6314a238c70SJohn Marino
6324a238c70SJohn Marino int
mpfr_ai(mpfr_ptr y,mpfr_srcptr x,mpfr_rnd_t rnd)6334a238c70SJohn Marino mpfr_ai (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd)
6344a238c70SJohn Marino {
6354a238c70SJohn Marino mpfr_t temp1, temp2;
6364a238c70SJohn Marino int use_ai2;
6374a238c70SJohn Marino MPFR_SAVE_EXPO_DECL (expo);
6384a238c70SJohn Marino
6394a238c70SJohn Marino /* The exponent range must be large enough for the computation of temp1. */
6404a238c70SJohn Marino MPFR_SAVE_EXPO_MARK (expo);
6414a238c70SJohn Marino
6424a238c70SJohn Marino mpfr_init2 (temp1, MPFR_SMALL_PRECISION);
6434a238c70SJohn Marino mpfr_init2 (temp2, MPFR_SMALL_PRECISION);
6444a238c70SJohn Marino
6454a238c70SJohn Marino mpfr_set (temp1, x, MPFR_RNDN);
6464a238c70SJohn Marino mpfr_set_si (temp2, MPFR_AI_THRESHOLD2, MPFR_RNDN);
6474a238c70SJohn Marino mpfr_mul_ui (temp2, temp2, MPFR_PREC (y) > ULONG_MAX ?
6484a238c70SJohn Marino ULONG_MAX : (unsigned long) MPFR_PREC (y), MPFR_RNDN);
6494a238c70SJohn Marino
6504a238c70SJohn Marino if (MPFR_IS_NEG (x))
6514a238c70SJohn Marino mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD1, MPFR_RNDN);
6524a238c70SJohn Marino else
6534a238c70SJohn Marino mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD3, MPFR_RNDN);
6544a238c70SJohn Marino
6554a238c70SJohn Marino mpfr_add (temp1, temp1, temp2, MPFR_RNDN);
6564a238c70SJohn Marino mpfr_clear (temp2);
6574a238c70SJohn Marino
6584a238c70SJohn Marino use_ai2 = mpfr_cmp_si (temp1, MPFR_AI_SCALE) > 0;
6594a238c70SJohn Marino mpfr_clear (temp1);
6604a238c70SJohn Marino
6614a238c70SJohn Marino MPFR_SAVE_EXPO_FREE (expo); /* Ignore all previous exceptions. */
6624a238c70SJohn Marino
6634a238c70SJohn Marino return use_ai2 ? mpfr_ai2 (y, x, rnd) : mpfr_ai1 (y, x, rnd);
6644a238c70SJohn Marino }
665