xref: /netbsd-src/external/lgpl3/mpfr/dist/src/sum.c (revision 16dce51364ebe8aeafbae46bc5aa167b8115bc45)
1 /* Sum -- efficiently sum a list of floating-point numbers
2 
3 Copyright 2004-2016 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 /* Reference: James Demmel and Yozo Hida, Fast and accurate floating-point
24    summation with application to computational geometry, Numerical Algorithms,
25    volume 37, number 1-4, pages 101--112, 2004. */
26 
27 #define MPFR_NEED_LONGLONG_H
28 #include "mpfr-impl.h"
29 
30 /* I would really like to use "mpfr_srcptr const []" but the norm is buggy:
31    it doesn't automaticaly cast a "mpfr_ptr []" to "mpfr_srcptr const []"
32    if necessary. So the choice are:
33      mpfr_s **                : ok
34      mpfr_s *const*           : ok
35      mpfr_s **const           : ok
36      mpfr_s *const*const      : ok
37      const mpfr_s *const*     : no
38      const mpfr_s **const     : no
39      const mpfr_s *const*const: no
40    VL: this is not a bug, but a feature. See the reason here:
41      http://c-faq.com/ansi/constmismatch.html
42 */
43 static void heap_sort (mpfr_srcptr *const, unsigned long, mpfr_srcptr *);
44 static void count_sort (mpfr_srcptr *const, unsigned long, mpfr_srcptr *,
45                         mpfr_exp_t, mpfr_uexp_t);
46 
47 /* Either sort the tab in perm and returns 0
48    Or returns 1 for +INF, -1 for -INF and 2 for NAN.
49    Also set *maxprec to the maximal precision of tab[0..n-1] and of the
50    initial value of *maxprec.
51 */
52 int
53 mpfr_sum_sort (mpfr_srcptr *const tab, unsigned long n, mpfr_srcptr *perm,
54                mpfr_prec_t *maxprec)
55 {
56   mpfr_exp_t min, max;
57   mpfr_uexp_t exp_num;
58   unsigned long i;
59   int sign_inf;
60 
61   sign_inf = 0;
62   min = MPFR_EMIN_MAX;
63   max = MPFR_EMAX_MIN;
64   for (i = 0; i < n; i++)
65     {
66       if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (tab[i])))
67         {
68           if (MPFR_IS_NAN (tab[i]))
69             return 2; /* Return NAN code */
70           else if (MPFR_IS_INF (tab[i]))
71             {
72               if (sign_inf == 0) /* No previous INF */
73                 sign_inf = MPFR_SIGN (tab[i]);
74               else if (sign_inf != MPFR_SIGN (tab[i]))
75                 return 2; /* Return NAN */
76             }
77         }
78       else
79         {
80           MPFR_ASSERTD (MPFR_IS_PURE_FP (tab[i]));
81           if (MPFR_GET_EXP (tab[i]) < min)
82             min = MPFR_GET_EXP(tab[i]);
83           if (MPFR_GET_EXP (tab[i]) > max)
84             max = MPFR_GET_EXP(tab[i]);
85         }
86       if (MPFR_PREC (tab[i]) > *maxprec)
87         *maxprec = MPFR_PREC (tab[i]);
88     }
89   if (MPFR_UNLIKELY (sign_inf != 0))
90     return sign_inf;
91 
92   exp_num = max - min + 1;
93   /* FIXME : better test */
94   if (exp_num > n * MPFR_INT_CEIL_LOG2 (n))
95     heap_sort (tab, n, perm);
96   else
97     count_sort (tab, n, perm, min, exp_num);
98   return 0;
99 }
100 
101 #define GET_EXP1(x) (MPFR_IS_ZERO (x) ? min : MPFR_GET_EXP (x))
102 /* Performs a count sort of the entries */
103 static void
104 count_sort (mpfr_srcptr *const tab, unsigned long n,
105             mpfr_srcptr *perm, mpfr_exp_t min, mpfr_uexp_t exp_num)
106 {
107   unsigned long *account;
108   unsigned long target_rank, i;
109   MPFR_TMP_DECL(marker);
110 
111   /* Reserve a place for potential 0 (with EXP min-1)
112      If there is no zero, we only lose one unused entry */
113   min--;
114   exp_num++;
115 
116   /* Performs a counting sort of the entries */
117   MPFR_TMP_MARK (marker);
118   account = (unsigned long *) MPFR_TMP_ALLOC (exp_num * sizeof *account);
119   for (i = 0; i < exp_num; i++)
120     account[i] = 0;
121   for (i = 0; i < n; i++)
122     account[GET_EXP1 (tab[i]) - min]++;
123   for (i = exp_num - 1; i >= 1; i--)
124     account[i - 1] += account[i];
125   for (i = 0; i < n; i++)
126     {
127       target_rank = --account[GET_EXP1 (tab[i]) - min];
128       perm[target_rank] = tab[i];
129     }
130   MPFR_TMP_FREE (marker);
131 }
132 
133 
134 #define GET_EXP2(x) (MPFR_IS_ZERO (x) ? MPFR_EMIN_MIN : MPFR_GET_EXP (x))
135 
136 /* Performs a heap sort of the entries */
137 static void
138 heap_sort (mpfr_srcptr *const tab, unsigned long n, mpfr_srcptr *perm)
139 {
140   unsigned long dernier_traite;
141   unsigned long i, pere;
142   mpfr_srcptr tmp;
143   unsigned long fils_gauche, fils_droit, fils_indigne;
144   /* Reminder of a heap structure :
145      node(i) has for left son node(2i +1) and right son node(2i)
146      and father(node(i)) = node((i - 1) / 2)
147   */
148 
149   /* initialize the permutation to identity */
150   for (i = 0; i < n; i++)
151     perm[i] = tab[i];
152 
153   /* insertion phase */
154   for (dernier_traite = 1; dernier_traite < n; dernier_traite++)
155     {
156       i = dernier_traite;
157       while (i > 0)
158         {
159           pere = (i - 1) / 2;
160           if (GET_EXP2 (perm[pere]) > GET_EXP2 (perm[i]))
161             {
162               tmp = perm[pere];
163               perm[pere] = perm[i];
164               perm[i] = tmp;
165               i = pere;
166             }
167           else
168             break;
169         }
170     }
171 
172   /* extraction phase */
173   for (dernier_traite = n - 1; dernier_traite > 0; dernier_traite--)
174     {
175       tmp = perm[0];
176       perm[0] = perm[dernier_traite];
177       perm[dernier_traite] = tmp;
178 
179       i = 0;
180       while (1)
181         {
182           fils_gauche = 2 * i + 1;
183           fils_droit = fils_gauche + 1;
184           if (fils_gauche < dernier_traite)
185             {
186               if (fils_droit < dernier_traite)
187                 {
188                   if (GET_EXP2(perm[fils_droit]) < GET_EXP2(perm[fils_gauche]))
189                     fils_indigne = fils_droit;
190                   else
191                     fils_indigne = fils_gauche;
192 
193                   if (GET_EXP2 (perm[i]) > GET_EXP2 (perm[fils_indigne]))
194                     {
195                       tmp = perm[i];
196                       perm[i] = perm[fils_indigne];
197                       perm[fils_indigne] = tmp;
198                       i = fils_indigne;
199                     }
200                   else
201                     break;
202                 }
203               else /* on a un fils gauche, pas de fils droit */
204                 {
205                   if (GET_EXP2 (perm[i]) > GET_EXP2 (perm[fils_gauche]))
206                     {
207                       tmp = perm[i];
208                       perm[i] = perm[fils_gauche];
209                       perm[fils_gauche] = tmp;
210                     }
211                   break;
212                 }
213             }
214           else /* on n'a pas de fils */
215             break;
216         }
217     }
218 }
219 
220 
221 /* Sum a list of float with order given by permutation perm,
222  * intermediate size set to F. Return non-zero if at least one of
223  * the operations is inexact (thus 0 implies that the sum is exact).
224  * Internal use function.
225  */
226 static int
227 sum_once (mpfr_ptr ret, mpfr_srcptr *const tab, unsigned long n, mpfr_prec_t F)
228 {
229   mpfr_t sum;
230   unsigned long i;
231   int error_trap;
232 
233   MPFR_ASSERTD (n >= 2);
234 
235   mpfr_init2 (sum, F);
236   error_trap = mpfr_set (sum, tab[0], MPFR_RNDN);
237   for (i = 1; i < n - 1; i++)
238     {
239       MPFR_ASSERTD (!MPFR_IS_NAN (sum) && !MPFR_IS_INF (sum));
240       if (mpfr_add (sum, sum, tab[i], MPFR_RNDN))
241         error_trap = 1;
242     }
243   if (mpfr_add (ret, sum, tab[n - 1], MPFR_RNDN))
244     error_trap = 1;
245   mpfr_clear (sum);
246   return error_trap;
247 }
248 
249 /* Sum a list of floating-point numbers.
250  * If the return value is 0, then the sum is exact.
251  * Otherwise the return value gives no information.
252  */
253 int
254 mpfr_sum (mpfr_ptr ret, mpfr_ptr *const tab_p, unsigned long n, mpfr_rnd_t rnd)
255 {
256   mpfr_t cur_sum;
257   mpfr_prec_t prec;
258   mpfr_srcptr *perm, *const tab = (mpfr_srcptr *) tab_p;
259   int k, error_trap;
260   MPFR_ZIV_DECL (loop);
261   MPFR_SAVE_EXPO_DECL (expo);
262   MPFR_TMP_DECL (marker);
263 
264   if (MPFR_UNLIKELY (n <= 1))
265     {
266       if (n < 1)
267         {
268           MPFR_SET_ZERO (ret);
269           MPFR_SET_POS (ret);
270           return 0;
271         }
272       else
273         return mpfr_set (ret, tab[0], rnd);
274     }
275 
276   /* Sort and treat special cases */
277   MPFR_TMP_MARK (marker);
278   perm = (mpfr_srcptr *) MPFR_TMP_ALLOC (n * sizeof *perm);
279   prec = MPFR_PREC (ret);
280   error_trap = mpfr_sum_sort (tab, n, perm, &prec);
281   /* Check if there was a NAN or a INF */
282   if (MPFR_UNLIKELY (error_trap != 0))
283     {
284       MPFR_TMP_FREE (marker);
285       if (error_trap == 2)
286         {
287           MPFR_SET_NAN (ret);
288           MPFR_RET_NAN;
289         }
290       MPFR_SET_INF (ret);
291       MPFR_SET_SIGN (ret, error_trap);
292       MPFR_RET (0);
293     }
294 
295   /* Initial precision is max(prec(ret),prec(tab[0]),...,prec(tab[n-1])) */
296   k = MPFR_INT_CEIL_LOG2 (n) + 1;
297   prec += k + 2;
298   mpfr_init2 (cur_sum, prec);
299 
300   /* Ziv Loop */
301   MPFR_SAVE_EXPO_MARK (expo);
302   MPFR_ZIV_INIT (loop, prec);
303   for (;;)
304     {
305       error_trap = sum_once (cur_sum, perm, n, prec + k);
306       if (MPFR_LIKELY (error_trap == 0 ||
307                        (!MPFR_IS_ZERO (cur_sum) &&
308                         mpfr_can_round (cur_sum, prec - 2,
309                                         MPFR_RNDN, rnd, MPFR_PREC (ret)))))
310         break;
311       MPFR_ZIV_NEXT (loop, prec);
312       mpfr_set_prec (cur_sum, prec);
313     }
314   MPFR_ZIV_FREE (loop);
315   MPFR_TMP_FREE (marker);
316 
317   if (mpfr_set (ret, cur_sum, rnd))
318     error_trap = 1;
319   mpfr_clear (cur_sum);
320 
321   MPFR_SAVE_EXPO_FREE (expo);
322   if (mpfr_check_range (ret, 0, rnd))
323     error_trap = 1;
324   return error_trap; /* It doesn't return the ternary value */
325 }
326 
327 /* __END__ */
328