xref: /dflybsd-src/contrib/mpfr/src/sum.c (revision 2786097444a0124b5d33763854de247e230c6629)
14a238c70SJohn Marino /* Sum -- efficiently sum a list of floating-point numbers
24a238c70SJohn Marino 
3*ab6d115fSJohn Marino Copyright 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4*ab6d115fSJohn Marino Contributed by the AriC and Caramel projects, INRIA.
54a238c70SJohn Marino 
64a238c70SJohn Marino This file is part of the GNU MPFR Library.
74a238c70SJohn Marino 
84a238c70SJohn Marino The GNU MPFR Library is free software; you can redistribute it and/or modify
94a238c70SJohn Marino it under the terms of the GNU Lesser General Public License as published by
104a238c70SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
114a238c70SJohn Marino option) any later version.
124a238c70SJohn Marino 
134a238c70SJohn Marino The GNU MPFR Library is distributed in the hope that it will be useful, but
144a238c70SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
154a238c70SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
164a238c70SJohn Marino License for more details.
174a238c70SJohn Marino 
184a238c70SJohn Marino You should have received a copy of the GNU Lesser General Public License
194a238c70SJohn Marino along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
204a238c70SJohn Marino http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
214a238c70SJohn Marino 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
224a238c70SJohn Marino 
234a238c70SJohn Marino /* Reference: James Demmel and Yozo Hida, Fast and accurate floating-point
244a238c70SJohn Marino    summation with application to computational geometry, Numerical Algorithms,
254a238c70SJohn Marino    volume 37, number 1-4, pages 101--112, 2004. */
264a238c70SJohn Marino 
274a238c70SJohn Marino #define MPFR_NEED_LONGLONG_H
284a238c70SJohn Marino #include "mpfr-impl.h"
294a238c70SJohn Marino 
304a238c70SJohn Marino /* I would really like to use "mpfr_srcptr const []" but the norm is buggy:
314a238c70SJohn Marino    it doesn't automaticaly cast a "mpfr_ptr []" to "mpfr_srcptr const []"
324a238c70SJohn Marino    if necessary. So the choice are:
334a238c70SJohn Marino      mpfr_s **                : ok
344a238c70SJohn Marino      mpfr_s *const*           : ok
354a238c70SJohn Marino      mpfr_s **const           : ok
364a238c70SJohn Marino      mpfr_s *const*const      : ok
374a238c70SJohn Marino      const mpfr_s *const*     : no
384a238c70SJohn Marino      const mpfr_s **const     : no
394a238c70SJohn Marino      const mpfr_s *const*const: no
404a238c70SJohn Marino    VL: this is not a bug, but a feature. See the reason here:
414a238c70SJohn Marino      http://c-faq.com/ansi/constmismatch.html
424a238c70SJohn Marino */
434a238c70SJohn Marino static void heap_sort (mpfr_srcptr *const, unsigned long, mpfr_srcptr *);
444a238c70SJohn Marino static void count_sort (mpfr_srcptr *const, unsigned long, mpfr_srcptr *,
454a238c70SJohn Marino                         mpfr_exp_t, mpfr_uexp_t);
464a238c70SJohn Marino 
474a238c70SJohn Marino /* Either sort the tab in perm and returns 0
484a238c70SJohn Marino    Or returns 1 for +INF, -1 for -INF and 2 for NAN */
494a238c70SJohn Marino int
mpfr_sum_sort(mpfr_srcptr * const tab,unsigned long n,mpfr_srcptr * perm)504a238c70SJohn Marino mpfr_sum_sort (mpfr_srcptr *const tab, unsigned long n, mpfr_srcptr *perm)
514a238c70SJohn Marino {
524a238c70SJohn Marino   mpfr_exp_t min, max;
534a238c70SJohn Marino   mpfr_uexp_t exp_num;
544a238c70SJohn Marino   unsigned long i;
554a238c70SJohn Marino   int sign_inf;
564a238c70SJohn Marino 
574a238c70SJohn Marino   sign_inf = 0;
584a238c70SJohn Marino   min = MPFR_EMIN_MAX;
594a238c70SJohn Marino   max = MPFR_EMAX_MIN;
604a238c70SJohn Marino   for (i = 0; i < n; i++)
614a238c70SJohn Marino     {
624a238c70SJohn Marino       if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (tab[i])))
634a238c70SJohn Marino         {
644a238c70SJohn Marino           if (MPFR_IS_NAN (tab[i]))
654a238c70SJohn Marino             return 2; /* Return NAN code */
664a238c70SJohn Marino           else if (MPFR_IS_INF (tab[i]))
674a238c70SJohn Marino             {
684a238c70SJohn Marino               if (sign_inf == 0) /* No previous INF */
694a238c70SJohn Marino                 sign_inf = MPFR_SIGN (tab[i]);
704a238c70SJohn Marino               else if (sign_inf != MPFR_SIGN (tab[i]))
714a238c70SJohn Marino                 return 2; /* Return NAN */
724a238c70SJohn Marino             }
734a238c70SJohn Marino         }
744a238c70SJohn Marino       else
754a238c70SJohn Marino         {
764a238c70SJohn Marino           MPFR_ASSERTD (MPFR_IS_PURE_FP (tab[i]));
774a238c70SJohn Marino           if (MPFR_GET_EXP (tab[i]) < min)
784a238c70SJohn Marino             min = MPFR_GET_EXP(tab[i]);
794a238c70SJohn Marino           if (MPFR_GET_EXP (tab[i]) > max)
804a238c70SJohn Marino             max = MPFR_GET_EXP(tab[i]);
814a238c70SJohn Marino         }
824a238c70SJohn Marino     }
834a238c70SJohn Marino   if (MPFR_UNLIKELY (sign_inf != 0))
844a238c70SJohn Marino     return sign_inf;
854a238c70SJohn Marino 
864a238c70SJohn Marino   exp_num = max - min + 1;
874a238c70SJohn Marino   /* FIXME : better test */
884a238c70SJohn Marino   if (exp_num > n * MPFR_INT_CEIL_LOG2 (n))
894a238c70SJohn Marino     heap_sort (tab, n, perm);
904a238c70SJohn Marino   else
914a238c70SJohn Marino     count_sort (tab, n, perm, min, exp_num);
924a238c70SJohn Marino   return 0;
934a238c70SJohn Marino }
944a238c70SJohn Marino 
954a238c70SJohn Marino #define GET_EXP1(x) (MPFR_IS_ZERO (x) ? min : MPFR_GET_EXP (x))
964a238c70SJohn Marino /* Performs a count sort of the entries */
974a238c70SJohn Marino static void
count_sort(mpfr_srcptr * const tab,unsigned long n,mpfr_srcptr * perm,mpfr_exp_t min,mpfr_uexp_t exp_num)984a238c70SJohn Marino count_sort (mpfr_srcptr *const tab, unsigned long n,
994a238c70SJohn Marino             mpfr_srcptr *perm, mpfr_exp_t min, mpfr_uexp_t exp_num)
1004a238c70SJohn Marino {
1014a238c70SJohn Marino   unsigned long *account;
1024a238c70SJohn Marino   unsigned long target_rank, i;
1034a238c70SJohn Marino   MPFR_TMP_DECL(marker);
1044a238c70SJohn Marino 
1054a238c70SJohn Marino   /* Reserve a place for potential 0 (with EXP min-1)
1064a238c70SJohn Marino      If there is no zero, we only lose one unused entry */
1074a238c70SJohn Marino   min--;
1084a238c70SJohn Marino   exp_num++;
1094a238c70SJohn Marino 
1104a238c70SJohn Marino   /* Performs a counting sort of the entries */
1114a238c70SJohn Marino   MPFR_TMP_MARK (marker);
1124a238c70SJohn Marino   account = (unsigned long *) MPFR_TMP_ALLOC (exp_num * sizeof *account);
1134a238c70SJohn Marino   for (i = 0; i < exp_num; i++)
1144a238c70SJohn Marino     account[i] = 0;
1154a238c70SJohn Marino   for (i = 0; i < n; i++)
1164a238c70SJohn Marino     account[GET_EXP1 (tab[i]) - min]++;
1174a238c70SJohn Marino   for (i = exp_num - 1; i >= 1; i--)
1184a238c70SJohn Marino     account[i - 1] += account[i];
1194a238c70SJohn Marino   for (i = 0; i < n; i++)
1204a238c70SJohn Marino     {
1214a238c70SJohn Marino       target_rank = --account[GET_EXP1 (tab[i]) - min];
1224a238c70SJohn Marino       perm[target_rank] = tab[i];
1234a238c70SJohn Marino     }
1244a238c70SJohn Marino   MPFR_TMP_FREE (marker);
1254a238c70SJohn Marino }
1264a238c70SJohn Marino 
1274a238c70SJohn Marino 
1284a238c70SJohn Marino #define GET_EXP2(x) (MPFR_IS_ZERO (x) ? MPFR_EMIN_MIN : MPFR_GET_EXP (x))
1294a238c70SJohn Marino 
1304a238c70SJohn Marino /* Performs a heap sort of the entries */
1314a238c70SJohn Marino static void
heap_sort(mpfr_srcptr * const tab,unsigned long n,mpfr_srcptr * perm)1324a238c70SJohn Marino heap_sort (mpfr_srcptr *const tab, unsigned long n, mpfr_srcptr *perm)
1334a238c70SJohn Marino {
1344a238c70SJohn Marino   unsigned long dernier_traite;
1354a238c70SJohn Marino   unsigned long i, pere;
1364a238c70SJohn Marino   mpfr_srcptr tmp;
1374a238c70SJohn Marino   unsigned long fils_gauche, fils_droit, fils_indigne;
1384a238c70SJohn Marino   /* Reminder of a heap structure :
1394a238c70SJohn Marino      node(i) has for left son node(2i +1) and right son node(2i)
1404a238c70SJohn Marino      and father(node(i)) = node((i - 1) / 2)
1414a238c70SJohn Marino   */
1424a238c70SJohn Marino 
1434a238c70SJohn Marino   /* initialize the permutation to identity */
1444a238c70SJohn Marino   for (i = 0; i < n; i++)
1454a238c70SJohn Marino     perm[i] = tab[i];
1464a238c70SJohn Marino 
1474a238c70SJohn Marino   /* insertion phase */
1484a238c70SJohn Marino   for (dernier_traite = 1; dernier_traite < n; dernier_traite++)
1494a238c70SJohn Marino     {
1504a238c70SJohn Marino       i = dernier_traite;
1514a238c70SJohn Marino       while (i > 0)
1524a238c70SJohn Marino         {
1534a238c70SJohn Marino           pere = (i - 1) / 2;
1544a238c70SJohn Marino           if (GET_EXP2 (perm[pere]) > GET_EXP2 (perm[i]))
1554a238c70SJohn Marino             {
1564a238c70SJohn Marino               tmp = perm[pere];
1574a238c70SJohn Marino               perm[pere] = perm[i];
1584a238c70SJohn Marino               perm[i] = tmp;
1594a238c70SJohn Marino               i = pere;
1604a238c70SJohn Marino             }
1614a238c70SJohn Marino           else
1624a238c70SJohn Marino             break;
1634a238c70SJohn Marino         }
1644a238c70SJohn Marino     }
1654a238c70SJohn Marino 
1664a238c70SJohn Marino   /* extraction phase */
1674a238c70SJohn Marino   for (dernier_traite = n - 1; dernier_traite > 0; dernier_traite--)
1684a238c70SJohn Marino     {
1694a238c70SJohn Marino       tmp = perm[0];
1704a238c70SJohn Marino       perm[0] = perm[dernier_traite];
1714a238c70SJohn Marino       perm[dernier_traite] = tmp;
1724a238c70SJohn Marino 
1734a238c70SJohn Marino       i = 0;
1744a238c70SJohn Marino       while (1)
1754a238c70SJohn Marino         {
1764a238c70SJohn Marino           fils_gauche = 2 * i + 1;
1774a238c70SJohn Marino           fils_droit = fils_gauche + 1;
1784a238c70SJohn Marino           if (fils_gauche < dernier_traite)
1794a238c70SJohn Marino             {
1804a238c70SJohn Marino               if (fils_droit < dernier_traite)
1814a238c70SJohn Marino                 {
1824a238c70SJohn Marino                   if (GET_EXP2(perm[fils_droit]) < GET_EXP2(perm[fils_gauche]))
1834a238c70SJohn Marino                     fils_indigne = fils_droit;
1844a238c70SJohn Marino                   else
1854a238c70SJohn Marino                     fils_indigne = fils_gauche;
1864a238c70SJohn Marino 
1874a238c70SJohn Marino                   if (GET_EXP2 (perm[i]) > GET_EXP2 (perm[fils_indigne]))
1884a238c70SJohn Marino                     {
1894a238c70SJohn Marino                       tmp = perm[i];
1904a238c70SJohn Marino                       perm[i] = perm[fils_indigne];
1914a238c70SJohn Marino                       perm[fils_indigne] = tmp;
1924a238c70SJohn Marino                       i = fils_indigne;
1934a238c70SJohn Marino                     }
1944a238c70SJohn Marino                   else
1954a238c70SJohn Marino                     break;
1964a238c70SJohn Marino                 }
1974a238c70SJohn Marino               else /* on a un fils gauche, pas de fils droit */
1984a238c70SJohn Marino                 {
1994a238c70SJohn Marino                   if (GET_EXP2 (perm[i]) > GET_EXP2 (perm[fils_gauche]))
2004a238c70SJohn Marino                     {
2014a238c70SJohn Marino                       tmp = perm[i];
2024a238c70SJohn Marino                       perm[i] = perm[fils_gauche];
2034a238c70SJohn Marino                       perm[fils_gauche] = tmp;
2044a238c70SJohn Marino                     }
2054a238c70SJohn Marino                   break;
2064a238c70SJohn Marino                 }
2074a238c70SJohn Marino             }
2084a238c70SJohn Marino           else /* on n'a pas de fils */
2094a238c70SJohn Marino             break;
2104a238c70SJohn Marino         }
2114a238c70SJohn Marino     }
2124a238c70SJohn Marino }
2134a238c70SJohn Marino 
2144a238c70SJohn Marino 
2154a238c70SJohn Marino /* Sum a list of float with order given by permutation perm,
2164a238c70SJohn Marino  * intermediate size set to F.
2174a238c70SJohn Marino  * Internal use function.
2184a238c70SJohn Marino  */
2194a238c70SJohn Marino static int
sum_once(mpfr_ptr ret,mpfr_srcptr * const tab,unsigned long n,mpfr_prec_t F)2204a238c70SJohn Marino sum_once (mpfr_ptr ret, mpfr_srcptr *const tab, unsigned long n, mpfr_prec_t F)
2214a238c70SJohn Marino {
2224a238c70SJohn Marino   mpfr_t sum;
2234a238c70SJohn Marino   unsigned long i;
2244a238c70SJohn Marino   int error_trap;
2254a238c70SJohn Marino 
2264a238c70SJohn Marino   MPFR_ASSERTD (n >= 2);
2274a238c70SJohn Marino 
2284a238c70SJohn Marino   mpfr_init2 (sum, F);
2294a238c70SJohn Marino   error_trap = mpfr_set (sum, tab[0], MPFR_RNDN);
2304a238c70SJohn Marino   for (i = 1; i < n - 1; i++)
2314a238c70SJohn Marino     {
2324a238c70SJohn Marino       MPFR_ASSERTD (!MPFR_IS_NAN (sum) && !MPFR_IS_INF (sum));
2334a238c70SJohn Marino       error_trap |= mpfr_add (sum, sum, tab[i], MPFR_RNDN);
2344a238c70SJohn Marino     }
2354a238c70SJohn Marino   error_trap |= mpfr_add (ret, sum, tab[n - 1], MPFR_RNDN);
2364a238c70SJohn Marino   mpfr_clear (sum);
2374a238c70SJohn Marino   return error_trap;
2384a238c70SJohn Marino }
2394a238c70SJohn Marino 
2404a238c70SJohn Marino /* Sum a list of floating-point numbers.
2414a238c70SJohn Marino  */
2424a238c70SJohn Marino 
2434a238c70SJohn Marino int
mpfr_sum(mpfr_ptr ret,mpfr_ptr * const tab_p,unsigned long n,mpfr_rnd_t rnd)2444a238c70SJohn Marino mpfr_sum (mpfr_ptr ret, mpfr_ptr *const tab_p, unsigned long n, mpfr_rnd_t rnd)
2454a238c70SJohn Marino {
2464a238c70SJohn Marino   mpfr_t cur_sum;
2474a238c70SJohn Marino   mpfr_prec_t prec;
2484a238c70SJohn Marino   mpfr_srcptr *perm, *const tab = (mpfr_srcptr *) tab_p;
2494a238c70SJohn Marino   int k, error_trap;
2504a238c70SJohn Marino   MPFR_ZIV_DECL (loop);
2514a238c70SJohn Marino   MPFR_SAVE_EXPO_DECL (expo);
2524a238c70SJohn Marino   MPFR_TMP_DECL (marker);
2534a238c70SJohn Marino 
2544a238c70SJohn Marino   if (MPFR_UNLIKELY (n <= 1))
2554a238c70SJohn Marino     {
2564a238c70SJohn Marino       if (n < 1)
2574a238c70SJohn Marino         {
2584a238c70SJohn Marino           MPFR_SET_ZERO (ret);
2594a238c70SJohn Marino           MPFR_SET_POS (ret);
2604a238c70SJohn Marino           return 0;
2614a238c70SJohn Marino         }
2624a238c70SJohn Marino       else
2634a238c70SJohn Marino         return mpfr_set (ret, tab[0], rnd);
2644a238c70SJohn Marino     }
2654a238c70SJohn Marino 
2664a238c70SJohn Marino   /* Sort and treat special cases */
2674a238c70SJohn Marino   MPFR_TMP_MARK (marker);
2684a238c70SJohn Marino   perm = (mpfr_srcptr *) MPFR_TMP_ALLOC (n * sizeof *perm);
2694a238c70SJohn Marino   error_trap = mpfr_sum_sort (tab, n, perm);
2704a238c70SJohn Marino   /* Check if there was a NAN or a INF */
2714a238c70SJohn Marino   if (MPFR_UNLIKELY (error_trap != 0))
2724a238c70SJohn Marino     {
2734a238c70SJohn Marino       MPFR_TMP_FREE (marker);
2744a238c70SJohn Marino       if (error_trap == 2)
2754a238c70SJohn Marino         {
2764a238c70SJohn Marino           MPFR_SET_NAN (ret);
2774a238c70SJohn Marino           MPFR_RET_NAN;
2784a238c70SJohn Marino         }
2794a238c70SJohn Marino       MPFR_SET_INF (ret);
2804a238c70SJohn Marino       MPFR_SET_SIGN (ret, error_trap);
2814a238c70SJohn Marino       MPFR_RET (0);
2824a238c70SJohn Marino     }
2834a238c70SJohn Marino 
2844a238c70SJohn Marino   /* Initial precision */
2854a238c70SJohn Marino   prec = MAX (MPFR_PREC (tab[0]), MPFR_PREC (ret));
2864a238c70SJohn Marino   k = MPFR_INT_CEIL_LOG2 (n) + 1;
2874a238c70SJohn Marino   prec += k + 2;
2884a238c70SJohn Marino   mpfr_init2 (cur_sum, prec);
2894a238c70SJohn Marino 
2904a238c70SJohn Marino   /* Ziv Loop */
2914a238c70SJohn Marino   MPFR_SAVE_EXPO_MARK (expo);
2924a238c70SJohn Marino   MPFR_ZIV_INIT (loop, prec);
2934a238c70SJohn Marino   for (;;)
2944a238c70SJohn Marino     {
2954a238c70SJohn Marino       error_trap = sum_once (cur_sum, perm, n, prec + k);
2964a238c70SJohn Marino       if (MPFR_LIKELY (error_trap == 0 ||
2974a238c70SJohn Marino                        (!MPFR_IS_ZERO (cur_sum) &&
2984a238c70SJohn Marino                         mpfr_can_round (cur_sum,
2994a238c70SJohn Marino                                         MPFR_GET_EXP (cur_sum) - prec + 2,
3004a238c70SJohn Marino                                         MPFR_RNDN, rnd, MPFR_PREC (ret)))))
3014a238c70SJohn Marino         break;
3024a238c70SJohn Marino       MPFR_ZIV_NEXT (loop, prec);
3034a238c70SJohn Marino       mpfr_set_prec (cur_sum, prec);
3044a238c70SJohn Marino     }
3054a238c70SJohn Marino   MPFR_ZIV_FREE (loop);
3064a238c70SJohn Marino   MPFR_TMP_FREE (marker);
3074a238c70SJohn Marino 
3084a238c70SJohn Marino   error_trap |= mpfr_set (ret, cur_sum, rnd);
3094a238c70SJohn Marino   mpfr_clear (cur_sum);
3104a238c70SJohn Marino 
3114a238c70SJohn Marino   MPFR_SAVE_EXPO_FREE (expo);
3124a238c70SJohn Marino   error_trap |= mpfr_check_range (ret, 0, rnd);
3134a238c70SJohn Marino   return error_trap; /* It doesn't return the ternary value */
3144a238c70SJohn Marino }
3154a238c70SJohn Marino 
3164a238c70SJohn Marino /* __END__ */
317