xref: /netbsd-src/external/lgpl3/mpfr/dist/src/sum.c (revision ec6772edaf0cdcb5f52a48f4aca5e33a8fb8ecfd)
1 /* Sum -- efficiently sum a list of floating-point numbers
2 
3 Copyright 2014-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25 
26 /* Note: In the prototypes, one uses
27  *
28  *   const mpfr_ptr *x      i.e.:  __mpfr_struct *const *x
29  *
30  * instead of
31  *
32  *   const mpfr_srcptr *x   i.e.:  const __mpfr_struct *const *x
33  *
34  * because here one has a double indirection and the type matching rules
35  * from the C standard in such a case are stricter and they would yield
36  * annoying errors for the user in practice. See:
37  *
38  *   Why can't I pass a char ** to a function which expects a const char **?
39  *
40  * in the comp.lang.c FAQ:
41  *
42  *   https://c-faq.com/ansi/constmismatch.html
43  */
44 
45 /* See the doc/sum.txt file for the algorithm and a part of its proof
46 (this will later go into algorithms.tex).
47 
48 TODO [VL, after a discussion with James Demmel]: Compared to
49   James Demmel and Yozo Hida, Fast and accurate floating-point summation
50   with application to computational geometry, Numerical Algorithms,
51   volume 37, number 1-4, pages 101--112, 2004.
52 sorting is not necessary here. It is not done because in the most common
53 cases (where big cancellations are rare), it would take time and be
54 useless. However, the lack of sorting increases the worst case complexity.
55 For instance, consider many inputs that cancel one another (two by two).
56 One would need n/2 iterations, where each iteration reads the exponent
57 of each input, therefore n*n/2 read operations. Using a worst-case sort
58 in O(n log n) could give a O(n log n) worst-case complexity. As we don't
59 want to slow down the most common cases, this could be done at the 3rd
60 iteration. But are there practical applications which would be used as
61 tests?
62 
63 Note: see the following paper and its references:
64   http://www.acsel-lab.com/arithmetic/arith21/papers/p54.pdf
65   (J. Demmel and H. D. Nguyen, Fast Reproducible Floating-Point Summation)
66 VL: This is very different:
67           In MPFR             In the paper & references
68     arbitrary precision            fixed precision
69      correct rounding        just reproducible rounding
70     integer operations        floating-point operations
71         sequential             parallel (& sequential)
72 */
73 
74 #ifdef MPFR_COV_CHECK
75 int __gmpfr_cov_sum_tmd[MPFR_RND_MAX][2][2][3][2][2] = { 0 };
76 #endif
77 
78 /* Update minexp (V) after detecting a potential integer overflow in
79    extreme cases (only a 32-bit ABI may be concerned in practice).
80    Instead of an assertion failure below, we could
81    1. check that the ulp of each regular input has an exponent >= MPFR_EXP_MIN
82       (with an assertion failure if this is not the case);
83    2. set minexp to MPFR_EXP_MIN and shift the accumulator accordingly
84       (the sum will then be exact).
85    However, such cases, which involve huge precisions, will probably
86    never occur in practice (at least with a 64-bit ABI) and are not
87    easily testable due to these huge precisions. Moreover, switching
88    to a 64-bit ABI would be a better solution for such computations.
89    So, let's leave this unimplemented. */
90 #define SAFE_SUB(V,E,SH)                        \
91   do                                            \
92     {                                           \
93       mpfr_prec_t sh = (SH);                    \
94       MPFR_ASSERTN ((E) >= MPFR_EXP_MIN + sh);  \
95       V = (E) - sh;                             \
96     }                                           \
97   while (0)
98 
99 /* Function sum_raw
100  * ================
101  *
102  * Accumulate a new [minexp,maxexp[ block into (wp,ws). If e and err denote
103  * the exponents of the computed result and of the error bound respectively,
104  * while e - err is less than some given bound (due to cancellation), shift
105  * the accumulator and reiterate.
106  *
107  * Inputs:
108  *   wp: pointer to the accumulator (least significant limb first).
109  *   ws: size of the accumulator (in limbs).
110  *   wq: precision of the accumulator (ws * GMP_NUMB_BITS).
111  *   x: array of the input numbers.
112  *   n: size of this array (number of inputs, regular or not).
113  *   minexp: exponent of the least significant bit of the first block.
114  *   maxexp: exponent of the first block (exponent of its MSB + 1).
115  *   tp: pointer to a temporary area (pre-allocated).
116  *   ts: size of this temporary area.
117  *   logn: ceil(log2(rn)), where rn is the number of regular inputs.
118  *   prec: lower bound for e - err (as described above).
119  *   ep: pointer to mpfr_exp_t (see below), or a null pointer.
120  *   minexpp: pointer to mpfr_exp_t (see below), or a null pointer.
121  *   maxexpp: pointer to mpfr_exp_t (see below), or a null pointer.
122  *
123  * Preconditions:
124  *   prec >= 1
125  *   wq >= logn + prec + 2
126  *
127  * This function returns 0 if the accumulator is 0 (which implies that
128  * the exact sum for this sum_raw invocation is 0), otherwise the number
129  * of cancelled bits (>= 1), defined as the number of identical bits on
130  * the most significant part of the accumulator. In the latter case, it
131  * also returns the following data in variables passed by reference, if
132  * the pointers are not NULL:
133  * - in ep: the exponent e of the computed result;
134  * - in minexpp: the last value of minexp;
135  * - in maxexpp: the new value of maxexp (for the next iteration after
136  *   the first invocation of sum_raw in the main code).
137  *
138  * Notes:
139  * - minexp is also the exponent of the least significant bit of the
140  *   accumulator;
141  * - the temporary area must be large enough to hold a shifted input
142  *   block, and the value of ts is used only when the full assertions
143  *   are checked (i.e. with the --enable-assert configure option), to
144  *   check that a buffer overflow doesn't occur;
145  * - contrary to the returned value of minexp (the value in the last
146  *   iteration), the returned value of maxexp is the one for the next
147  *   iteration (= maxexp2 of the last iteration).
148  */
149 static mpfr_prec_t
sum_raw(mp_limb_t * wp,mp_size_t ws,mpfr_prec_t wq,const mpfr_ptr * x,unsigned long n,mpfr_exp_t minexp,mpfr_exp_t maxexp,mp_limb_t * tp,mp_size_t ts,int logn,mpfr_prec_t prec,mpfr_exp_t * ep,mpfr_exp_t * minexpp,mpfr_exp_t * maxexpp)150 sum_raw (mp_limb_t *wp, mp_size_t ws, mpfr_prec_t wq, const mpfr_ptr *x,
151          unsigned long n, mpfr_exp_t minexp, mpfr_exp_t maxexp,
152          mp_limb_t *tp, mp_size_t ts, int logn, mpfr_prec_t prec,
153          mpfr_exp_t *ep, mpfr_exp_t *minexpp, mpfr_exp_t *maxexpp)
154 {
155   MPFR_LOG_FUNC
156     (("ws=%Pd ts=%Pd prec=%Pd", (mpfr_prec_t) ws, (mpfr_prec_t) ts, prec),
157      ("", 0));
158 
159   /* The C code below requires prec >= 0 due to the use of unsigned
160      integer arithmetic on it. Actually the computation makes sense
161      only with prec >= 1 (otherwise one can't even know the sign of
162      the result), hence the following assertion. */
163   MPFR_ASSERTD (prec >= 1);
164 
165   /* Consistency check. */
166   MPFR_ASSERTD (wq == (mpfr_prec_t) ws * GMP_NUMB_BITS);
167 
168   /* The following precondition together with prec >= 1 will imply:
169      minexp - shiftq < maxexp2, as required by the algorithm. */
170   MPFR_ASSERTD (wq >= logn + prec + 2);
171 
172   while (1)
173     {
174       mpfr_exp_t maxexp2 = MPFR_EXP_MIN;
175       unsigned long i;
176 
177       MPFR_LOG_MSG (("sum_raw loop: "
178                      "maxexp=%" MPFR_EXP_FSPEC "d "
179                      "minexp=%" MPFR_EXP_FSPEC "d\n",
180                      (mpfr_eexp_t) maxexp, (mpfr_eexp_t) minexp));
181 
182       MPFR_ASSERTD (maxexp > minexp);
183 
184       for (i = 0; i < n; i++)
185         if (! MPFR_IS_SINGULAR (x[i]))  /* Step 1 (see sum_raw in sum.txt) */
186           {
187             mp_limb_t *dp, *vp;
188             mp_size_t ds, vs, vds;
189             mpfr_exp_t xe, vd;
190             mpfr_prec_t xq;
191             int tr;
192 
193             xe = MPFR_GET_EXP (x[i]);
194             xq = MPFR_GET_PREC (x[i]);
195 
196             vp = MPFR_MANT (x[i]);
197             vs = MPFR_PREC2LIMBS (xq);
198             vd = xe - vs * GMP_NUMB_BITS - minexp;
199             /* vd is the exponent of the least significant represented bit of
200                x[i] (including the trailing bits, whose value is 0) minus the
201                exponent of the least significant bit of the accumulator. To
202                make the code simpler, we won't try to filter out the trailing
203                bits of x[i]. */
204 
205             /* Steps 2, 3, 4 (see sum_raw in sum.txt) */
206 
207             if (vd < 0)
208               {
209                 /* This covers the following cases:
210                  *     [-+- accumulator ---]
211                  *   [---|----- x[i] ------|--]
212                  *       |   [----- x[i] --|--]
213                  *       |                 |[----- x[i] -----]
214                  *       |                 |    [----- x[i] -----]
215                  *     maxexp           minexp
216                  */
217 
218                 /* Step 2 for subcase vd < 0 */
219 
220                 if (xe <= minexp)
221                   {
222                     /* x[i] is entirely after the LSB of the accumulator,
223                        so that it will be ignored at this iteration. */
224                     if (xe > maxexp2)
225                       {
226                         maxexp2 = xe;
227                         /* And since the exponent of x[i] is valid... */
228                         MPFR_ASSERTD (maxexp2 >= MPFR_EMIN_MIN);
229                       }
230                     continue;
231                   }
232 
233                 /* Step 3 for subcase vd < 0 */
234 
235                 /* If some significant bits of x[i] are after the LSB of the
236                    accumulator, then maxexp2 will necessarily be minexp. */
237                 if (MPFR_LIKELY (xe - xq < minexp))
238                   maxexp2 = minexp;
239 
240                 /* Step 4 for subcase vd < 0 */
241 
242                 /* We need to ignore the least |vd| significant bits of x[i].
243                    First, let's ignore the least vds = |vd| / GMP_NUMB_BITS
244                    limbs. */
245                 vd = - vd;
246                 vds = vd / GMP_NUMB_BITS;
247                 vs -= vds;
248                 MPFR_ASSERTD (vs > 0);  /* see xe <= minexp test above */
249                 vp += vds;
250                 vd -= vds * GMP_NUMB_BITS;
251                 MPFR_ASSERTD (vd >= 0 && vd < GMP_NUMB_BITS);
252 
253                 if (xe > maxexp)
254                   {
255                     vs -= (xe - maxexp) / GMP_NUMB_BITS;
256                     MPFR_ASSERTD (vs > 0);
257                     tr = (xe - maxexp) % GMP_NUMB_BITS;
258                   }
259                 else
260                   tr = 0;
261 
262                 if (vd != 0)
263                   {
264                     MPFR_ASSERTD (vs <= ts);
265                     mpn_rshift (tp, vp, vs, vd);
266                     vp = tp;
267                     tr += vd;
268                     if (tr >= GMP_NUMB_BITS)
269                       {
270                         vs--;
271                         tr -= GMP_NUMB_BITS;
272                       }
273                     MPFR_ASSERTD (vs >= 1);
274                     MPFR_ASSERTD (tr >= 0 && tr < GMP_NUMB_BITS);
275                     if (tr != 0)
276                       {
277                         tp[vs-1] &= MPFR_LIMB_MASK (GMP_NUMB_BITS - tr);
278                         tr = 0;
279                       }
280                     /* Truncation has now been taken into account. */
281                     MPFR_ASSERTD (tr == 0);
282                   }
283 
284                 dp = wp;
285                 ds = ws;
286               }
287             else  /* vd >= 0 */
288               {
289                 /* This covers the following cases:
290                  *               [-+- accumulator ---]
291                  *   [- x[i] -]    |                 |
292                  *             [---|-- x[i] ------]  |
293                  *          [------|-- x[i] ---------]
294                  *                 |   [- x[i] -]    |
295                  *               maxexp           minexp
296                  */
297 
298                 /* Steps 2 and 3 for subcase vd >= 0 */
299 
300                 MPFR_ASSERTD (xe - xq >= minexp);  /* see definition of vd */
301 
302                 /* Step 4 for subcase vd >= 0 */
303 
304                 /* We need to ignore the least vd significant bits
305                    of the accumulator. First, let's ignore the least
306                    vds = vd / GMP_NUMB_BITS limbs. -> (dp,ds) */
307                 vds = vd / GMP_NUMB_BITS;
308                 ds = ws - vds;
309                 if (ds <= 0)
310                   continue;
311                 dp = wp + vds;
312                 vd -= vds * GMP_NUMB_BITS;
313                 MPFR_ASSERTD (vd >= 0 && vd < GMP_NUMB_BITS);
314 
315                 /* The low part of x[i] (to be determined) will have to be
316                    shifted vd bits to the left if vd != 0. */
317 
318                 if (xe > maxexp)
319                   {
320                     vs -= (xe - maxexp) / GMP_NUMB_BITS;
321                     if (vs <= 0)
322                       continue;
323                     tr = (xe - maxexp) % GMP_NUMB_BITS;
324                   }
325                 else
326                   tr = 0;
327 
328                 MPFR_ASSERTD (tr >= 0 && tr < GMP_NUMB_BITS && vs > 0);
329 
330                 /* We need to consider the least significant vs limbs of x[i]
331                    except the most significant tr bits. */
332 
333                 if (vd != 0)
334                   {
335                     mp_limb_t carry;
336 
337                     MPFR_ASSERTD (vs <= ts);
338                     carry = mpn_lshift (tp, vp, vs, vd);
339                     tr -= vd;
340                     if (tr < 0)
341                       {
342                         tr += GMP_NUMB_BITS;
343                         MPFR_ASSERTD (vs + 1 <= ts);
344                         tp[vs++] = carry;
345                       }
346                     MPFR_ASSERTD (tr >= 0 && tr < GMP_NUMB_BITS);
347                     vp = tp;
348                   }
349               }  /* vd >= 0 */
350 
351             MPFR_ASSERTD (vs > 0 && vs <= ds);
352 
353             /* We can't truncate the most significant limb of the input
354                (in case it hasn't been shifted to the temporary area).
355                So, let's ignore it now. It will be taken into account
356                via carry propagation after the addition. */
357             if (tr != 0)
358               vs--;
359 
360             /* Step 5 (see sum_raw in sum.txt) */
361 
362             if (MPFR_IS_POS (x[i]))
363               {
364                 mp_limb_t carry;
365 
366                 carry = vs > 0 ? mpn_add_n (dp, dp, vp, vs) : 0;
367                 MPFR_ASSERTD (carry <= 1);
368                 if (tr != 0)
369                   carry += vp[vs] & MPFR_LIMB_MASK (GMP_NUMB_BITS - tr);
370                 if (ds > vs)
371                   mpn_add_1 (dp + vs, dp + vs, ds - vs, carry);
372               }
373             else
374               {
375                 mp_limb_t borrow;
376 
377                 borrow = vs > 0 ? mpn_sub_n (dp, dp, vp, vs) : 0;
378                 MPFR_ASSERTD (borrow <= 1);
379                 if (tr != 0)
380                   borrow += vp[vs] & MPFR_LIMB_MASK (GMP_NUMB_BITS - tr);
381                 if (ds > vs)
382                   mpn_sub_1 (dp + vs, dp + vs, ds - vs, borrow);
383               }
384           }
385 
386       {
387         mpfr_prec_t cancel;  /* number of cancelled bits */
388         mp_size_t wi;        /* index in the accumulator */
389         mp_limb_t a, b;
390         int cnt;
391 
392         cancel = 0;
393         wi = ws - 1;
394         MPFR_ASSERTD (wi >= 0);
395         a = wp[wi] >> (GMP_NUMB_BITS - 1) ? MPFR_LIMB_MAX : MPFR_LIMB_ZERO;
396 
397         while (wi >= 0)
398           if ((b = wp[wi]) == a)
399             {
400               cancel += GMP_NUMB_BITS;
401               wi--;
402             }
403           else
404             {
405               b ^= a;
406               MPFR_ASSERTD (b != 0);
407               count_leading_zeros (cnt, b);
408               cancel += cnt;
409               break;
410             }
411 
412         if (wi >= 0 || a != MPFR_LIMB_ZERO)  /* accumulator != 0 */
413           {
414             mpfr_exp_t e;        /* exponent of the computed result */
415             mpfr_exp_t err;      /* exponent of the error bound */
416 
417             MPFR_LOG_MSG (("accumulator %s 0, cancel=%Pd\n",
418                            a != MPFR_LIMB_ZERO ? "<" : ">", cancel));
419 
420             MPFR_ASSERTD (cancel > 0);
421             e = minexp + wq - cancel;
422             MPFR_ASSERTD (e >= minexp);
423             err = maxexp2 + logn;  /* OK even if maxexp2 == MPFR_EXP_MIN */
424 
425             /* The absolute value of the truncated sum is in the binade
426                [2^(e-1),2^e] (closed on both ends due to two's complement).
427                The error is strictly less than 2^err (and is 0 if
428                maxexp2 == MPFR_EXP_MIN). */
429 
430             /* This basically tests whether err <= e - prec without
431                potential integer overflow (since prec >= 0)...
432                Note that the maxexp2 == MPFR_EXP_MIN test is there just for
433                the potential corner case e - prec < MPFR_EXP_MIN + logn.
434                Such corner cases, involving specific huge-precision numbers,
435                are probably not supported in many places in MPFR, but this
436                test doesn't hurt... */
437             if (maxexp2 == MPFR_EXP_MIN ||
438                 (err <= e && SAFE_DIFF (mpfr_uexp_t, e, err) >= prec))
439               {
440                 MPFR_LOG_MSG (("(err=%" MPFR_EXP_FSPEC "d) <= (e=%"
441                                MPFR_EXP_FSPEC "d) - (prec=%Pd)\n",
442                                (mpfr_eexp_t) err, (mpfr_eexp_t) e, prec));
443                 /* To avoid tests or copies, we consider the only two cases
444                    that will occur in sum_aux. */
445                 MPFR_ASSERTD ((ep != NULL &&
446                                minexpp != NULL &&
447                                maxexpp != NULL) ||
448                               (ep == NULL &&
449                                minexpp == NULL &&
450                                maxexpp == NULL));
451                 if (ep != NULL)
452                   {
453                     *ep = e;
454                     *minexpp = minexp;
455                     *maxexpp = maxexp2;
456                   }
457                 MPFR_LOG_MSG (("return with minexp=%" MPFR_EXP_FSPEC
458                                "d maxexp2=%" MPFR_EXP_FSPEC "d%s\n",
459                                (mpfr_eexp_t) minexp, (mpfr_eexp_t) maxexp2,
460                                maxexp2 == MPFR_EXP_MIN ?
461                                " (MPFR_EXP_MIN)" : ""));
462                 return cancel;
463               }
464             else
465               {
466                 mpfr_exp_t diffexp;
467                 mpfr_prec_t shiftq;
468                 mpfr_size_t shifts;
469                 int shiftc;
470 
471                 MPFR_LOG_MSG (("e=%" MPFR_EXP_FSPEC "d err=%" MPFR_EXP_FSPEC
472                                "d maxexp2=%" MPFR_EXP_FSPEC "d%s\n",
473                                (mpfr_eexp_t) e, (mpfr_eexp_t) err,
474                                (mpfr_eexp_t) maxexp2,
475                                maxexp2 == MPFR_EXP_MIN ?
476                                " (MPFR_EXP_MIN)" : ""));
477 
478                 diffexp = err - e;
479                 if (diffexp < 0)
480                   diffexp = 0;
481                 /* diffexp = max(0, err - e) */
482 
483                 MPFR_LOG_MSG (("diffexp=%" MPFR_EXP_FSPEC "d\n",
484                                 (mpfr_eexp_t) diffexp));
485 
486                 MPFR_ASSERTD (diffexp < cancel - 2);
487                 shiftq = cancel - 2 - (mpfr_prec_t) diffexp;
488                 /* equivalent to: minexp + wq - 2 - max(e,err) */
489                 MPFR_ASSERTD (shiftq > 0);
490                 shifts = shiftq / GMP_NUMB_BITS;
491                 shiftc = shiftq % GMP_NUMB_BITS;
492                 MPFR_LOG_MSG (("shiftq = %Pd = %Pd * GMP_NUMB_BITS + %d\n",
493                                shiftq, (mpfr_prec_t) shifts, shiftc));
494                 if (MPFR_LIKELY (shiftc != 0))
495                   mpn_lshift (wp + shifts, wp, ws - shifts, shiftc);
496                 else
497                   mpn_copyd (wp + shifts, wp, ws - shifts);
498                 MPN_ZERO (wp, shifts);
499                 /* Compute minexp = minexp - shiftq safely. */
500                 SAFE_SUB (minexp, minexp, shiftq);
501                 MPFR_ASSERTD (minexp < maxexp2);
502               }
503           }
504         else if (maxexp2 == MPFR_EXP_MIN)
505           {
506             MPFR_LOG_MSG (("accumulator = 0, maxexp2 = MPFR_EXP_MIN\n", 0));
507             return 0;
508           }
509         else
510           {
511             MPFR_LOG_MSG (("accumulator = 0, reiterate\n", 0));
512             /* Compute minexp = maxexp2 - (wq - (logn + 1)) safely. */
513             SAFE_SUB (minexp, maxexp2, wq - (logn + 1));
514             /* Note: the logn + 1 corresponds to cq in the main code. */
515           }
516       }
517 
518       maxexp = maxexp2;
519     }
520 }
521 
522 /**********************************************************************/
523 
524 /* Generic case: all the inputs are finite numbers,
525    with at least 3 regular numbers. */
526 static int
sum_aux(mpfr_ptr sum,const mpfr_ptr * x,unsigned long n,mpfr_rnd_t rnd,mpfr_exp_t maxexp,unsigned long rn)527 sum_aux (mpfr_ptr sum, const mpfr_ptr *x, unsigned long n, mpfr_rnd_t rnd,
528          mpfr_exp_t maxexp, unsigned long rn)
529 {
530   mp_limb_t *sump;
531   mp_limb_t *tp;  /* pointer to a temporary area */
532   mp_limb_t *wp;  /* pointer to the accumulator */
533   mp_size_t ts;   /* size of the temporary area, in limbs */
534   mp_size_t ws;   /* size of the accumulator, in limbs */
535   mp_size_t zs;   /* size of the TMD accumulator, in limbs */
536   mpfr_prec_t wq; /* size of the accumulator, in bits */
537   int logn;       /* ceil(log2(rn)) */
538   int cq;
539   mpfr_prec_t sq;
540   int inex;
541   MPFR_TMP_DECL (marker);
542 
543   MPFR_LOG_FUNC
544     (("n=%lu rnd=%d maxexp=%" MPFR_EXP_FSPEC "d rn=%lu",
545       n, rnd, (mpfr_eexp_t) maxexp, rn),
546      ("sum[%Pd]=%.*Rg", mpfr_get_prec (sum), mpfr_log_prec, sum));
547 
548   MPFR_ASSERTD (rn >= 3 && rn <= n);
549 
550   /* In practice, no integer overflow on the exponent. */
551   MPFR_STAT_STATIC_ASSERT (MPFR_EXP_MAX - MPFR_EMAX_MAX >=
552                            sizeof (unsigned long) * CHAR_BIT);
553 
554   /* Set up some variables and the accumulator. */
555 
556   sump = MPFR_MANT (sum);
557 
558   /* rn is the number of regular inputs (the singular ones will be
559      ignored). Compute logn = ceil(log2(rn)). */
560   logn = MPFR_INT_CEIL_LOG2 (rn);
561   MPFR_ASSERTD (logn >= 2);
562 
563   MPFR_LOG_MSG (("logn=%d maxexp=%" MPFR_EXP_FSPEC "d\n",
564                  logn, (mpfr_eexp_t) maxexp));
565 
566   sq = MPFR_GET_PREC (sum);
567   cq = logn + 1;
568 
569   /* First determine the size of the accumulator.
570    * cq + sq + logn + 2 >= logn + sq + 5, which will be used later.
571    * The assertion wq - cq - sq >= 4 is another way to check that.
572    */
573   ws = MPFR_PREC2LIMBS (cq + sq + logn + 2);
574   wq = (mpfr_prec_t) ws * GMP_NUMB_BITS;
575   MPFR_ASSERTD (wq - cq - sq >= 4);
576 
577   /* TODO: timings, comparing with a larger zs. */
578   zs = MPFR_PREC2LIMBS (wq - sq);
579 
580   MPFR_LOG_MSG (("cq=%d sq=%Pd logn=%d wq=%Pd\n", cq, sq, logn, wq));
581 
582   /* An input block will have up to wq - cq bits, and its shifted value
583      (to be correctly aligned) may take GMP_NUMB_BITS - 1 additional bits. */
584   ts = MPFR_PREC2LIMBS (wq - cq + GMP_NUMB_BITS - 1);
585 
586   MPFR_TMP_MARK (marker);
587 
588   /* Note: If the TMD does not occur, which should be the case for most
589      sums, allocating zs limbs is not necessary. However, we choose to
590      do this now (thus in all cases) because zs is very small, so that
591      the difference on the memory footprint will not be noticeable.
592      More precisely, zs is at most 2 in practice with the current code;
593      we may want to increase it in order to avoid performance issues in
594      some unlikely corner cases, but even in this case, it will remain
595      small.
596      One will have:
597        [------ ts ------][------ ws ------][- zs -]
598      The following would probably be better:
599        [------ ts ------]  [------ ws ------]
600                    [- zs -]
601      i.e. where the TMD accumulator (partially or completely) takes
602      some unneeded part of the temporary area in order to improve
603      data locality. But
604        * in low precision, data locality is regarded as ensured even
605          with the actual choice;
606        * in high precision, data locality for TMD resolution may not
607          be that important.
608   */
609   tp = MPFR_TMP_LIMBS_ALLOC (ts + ws + zs);
610   wp = tp + ts;
611 
612   MPN_ZERO (wp, ws);  /* zero the accumulator */
613 
614   {
615     mpfr_exp_t minexp;   /* exponent of the LSB of the block for sum_raw */
616     mpfr_prec_t cancel;  /* number of cancelled bits */
617     mpfr_exp_t e;        /* temporary exponent of the result */
618     mpfr_exp_t u;        /* temporary exponent of the ulp (quantum) */
619     mp_limb_t lbit;      /* last bit (useful if even rounding) */
620     mp_limb_t rbit;      /* rounding bit (corrected in halfway case) */
621     int corr;            /* correction term (from -1 to 2) */
622     int sd, sh;          /* shift counts */
623     mp_size_t sn;        /* size of the output number */
624     int tmd;             /* 0: the TMD does not occur
625                             1: the TMD occurs on a machine number
626                             2: the TMD occurs on a midpoint */
627     int neg;             /* 0 if positive sum, 1 if negative */
628     int sgn;             /* +1 if positive sum, -1 if negative */
629 
630     MPFR_LOG_MSG (("Compute an approximation with sum_raw...\n", 0));
631 
632     /* Compute minexp = maxexp - (wq - cq) safely. */
633     SAFE_SUB (minexp, maxexp, wq - cq);
634     MPFR_ASSERTD (wq >= logn + sq + 5);
635     cancel = sum_raw (wp, ws, wq, x, n, minexp, maxexp, tp, ts,
636                       logn, sq + 3, &e, &minexp, &maxexp);
637 
638     if (MPFR_UNLIKELY (cancel == 0))
639       {
640         /* The exact sum is zero. Since not all inputs are 0, the sum
641          * is +0 except in MPFR_RNDD, as specified according to the
642          * IEEE 754 rules for the addition of two numbers.
643          */
644         MPFR_SET_SIGN (sum, (rnd != MPFR_RNDD ?
645                              MPFR_SIGN_POS : MPFR_SIGN_NEG));
646         MPFR_SET_ZERO (sum);
647         MPFR_TMP_FREE (marker);
648         MPFR_RET (0);
649       }
650 
651     /* The absolute value of the truncated sum is in the binade
652        [2^(e-1),2^e] (closed on both ends due to two's complement).
653        The error is strictly less than 2^(maxexp + logn) (and is 0
654        if maxexp == MPFR_EXP_MIN). */
655 
656     u = e - sq;  /* e being the exponent, u is the ulp of the target */
657 
658     /* neg = 1 if negative, 0 if positive. */
659     neg = wp[ws-1] >> (GMP_NUMB_BITS - 1);
660     MPFR_ASSERTD (neg == 0 || neg == 1);
661 
662     sgn = neg ? -1 : 1;
663     MPFR_ASSERTN (sgn == (neg ? MPFR_SIGN_NEG : MPFR_SIGN_POS));
664 
665     MPFR_LOG_MSG (("neg=%d sgn=%d cancel=%Pd"
666                    " e=%" MPFR_EXP_FSPEC "d"
667                    " u=%" MPFR_EXP_FSPEC "d"
668                    " maxexp=%" MPFR_EXP_FSPEC "d%s\n",
669                    neg, sgn, cancel, (mpfr_eexp_t) e, (mpfr_eexp_t) u,
670                    (mpfr_eexp_t) maxexp,
671                    maxexp == MPFR_EXP_MIN ? " (MPFR_EXP_MIN)" : ""));
672 
673     if (rnd == MPFR_RNDF)
674       {
675         /* Rounding the approximate value to nearest (ties don't matter) is
676            sufficient. We need to get the rounding bit; the code is similar
677            to a part from the generic code (here, corr = rbit). */
678         if (MPFR_LIKELY (u > minexp))
679           {
680             mpfr_prec_t tq;
681             mp_size_t wi;
682             int td;
683 
684             tq = u - minexp;
685             MPFR_ASSERTD (tq > 0); /* number of trailing bits */
686             MPFR_LOG_MSG (("tq=%Pd\n", tq));
687 
688             wi = tq / GMP_NUMB_BITS;
689             td = tq % GMP_NUMB_BITS;
690             corr = td >= 1 ? ((wp[wi] >> (td - 1)) & MPFR_LIMB_ONE) :
691               (MPFR_ASSERTD (wi >= 1), wp[wi-1] >> (GMP_NUMB_BITS - 1));
692           }
693         else
694           corr = 0;
695         inex = 0;  /* not meaningful, but needs to have a value */
696       }
697     else  /* rnd != MPFR_RNDF */
698       {
699         if (MPFR_LIKELY (u > minexp))
700           {
701             mpfr_prec_t tq;
702             mp_size_t wi;
703             int td;
704 
705             tq = u - minexp;
706             MPFR_ASSERTD (tq > 0); /* number of trailing bits */
707             MPFR_LOG_MSG (("tq=%Pd\n", tq));
708 
709             wi = tq / GMP_NUMB_BITS;
710 
711             /* Determine the rounding bit, which is represented. */
712             td = tq % GMP_NUMB_BITS;
713             lbit = (wp[wi] >> td) & MPFR_LIMB_ONE;
714             rbit = td >= 1 ? ((wp[wi] >> (td - 1)) & MPFR_LIMB_ONE) :
715               (MPFR_ASSERTD (wi >= 1), wp[wi-1] >> (GMP_NUMB_BITS - 1));
716             MPFR_ASSERTD (rbit == 0 || rbit == 1);
717             MPFR_LOG_MSG (("rbit=%d\n", (int) rbit));
718 
719             if (maxexp == MPFR_EXP_MIN)
720               {
721                 /* The sum in the accumulator is exact. Determine inex:
722                    inex = 0 if the final sum is exact, else 1, i.e.
723                    inex = rounding bit || sticky bit. In round to nearest,
724                    also determine the rounding direction: obtained from
725                    the rounding bit possibly except in halfway cases.
726                    Halfway cases are rounded toward -inf iff the last bit
727                    of the truncated significand in two's complement is 0
728                    (in precision > 1, because the parity after rounding is
729                    the same in two's complement and sign + magnitude; in
730                    precision 1, one checks that the rule works for both
731                    positive (lbit == 1) and negative (lbit == 0) numbers,
732                    rounding halfway cases away from zero). */
733                 if (MPFR_LIKELY (rbit == 0 || (rnd == MPFR_RNDN && lbit == 0)))
734                   {
735                     /* We need to determine the sticky bit, either to set inex
736                        (if the rounding bit is 0) or to possibly "correct" rbit
737                        (round to nearest, halfway case rounded downward) from
738                        which the rounding direction will be determined. */
739                     MPFR_LOG_MSG (("Determine the sticky bit...\n", 0));
740 
741                     inex = td >= 2 ? (wp[wi] & MPFR_LIMB_MASK (td - 1)) != 0
742                       : td == 0 ?
743                       (MPFR_ASSERTD (wi >= 1),
744                        (wp[--wi] & MPFR_LIMB_MASK (GMP_NUMB_BITS - 1)) != 0)
745                       : 0;
746 
747                     if (!inex)
748                       {
749                         while (!inex && wi > 0)
750                           inex = wp[--wi] != 0;
751                         if (!inex && rbit != 0)
752                           {
753                             /* sticky bit = 0, rounding bit = 1,
754                                i.e. halfway case, which will be
755                                rounded downward (see earlier if). */
756                             MPFR_ASSERTD (rnd == MPFR_RNDN);
757                             inex = 1;
758                             rbit = 0;  /* even rounding downward */
759                             MPFR_LOG_MSG (("Halfway case rounded downward;"
760                                            " set inex=1 rbit=0\n", 0));
761                           }
762                       }
763                   }
764                 else
765                   inex = 1;
766                 tmd = 0;  /* We can round correctly -> no TMD. */
767               }
768             else  /* maxexp > MPFR_EXP_MIN */
769               {
770                 mpfr_exp_t d;
771                 mp_limb_t limb, mask;
772                 int nbits;
773 
774                 /* Since maxexp was set to either the exponent of a x[i] or
775                    to minexp... */
776                 MPFR_ASSERTD (maxexp >= MPFR_EMIN_MIN || maxexp == minexp);
777 
778                 inex = 1;  /* We do not know whether the sum is exact. */
779 
780                 MPFR_ASSERTD (u <= MPFR_EMAX_MAX && u <= minexp + wq);
781                 d = u - (maxexp + logn);  /* representable */
782                 MPFR_ASSERTD (d >= 3);  /* due to prec = sq + 3 in sum_raw */
783 
784                 /* Let's see whether the TMD occurs by looking at the d bits
785                    following the ulp bit, or the d-1 bits after the rounding
786                    bit. */
787 
788                 /* First chunk after the rounding bit... It starts at:
789                    (wi,td-2) if td >= 2,
790                    (wi-1,td-2+GMP_NUMB_BITS) if td < 2. */
791                 if (td == 0)
792                   {
793                     MPFR_ASSERTD (wi >= 1);
794                     limb = wp[--wi];
795                     mask = MPFR_LIMB_MASK (GMP_NUMB_BITS - 1);
796                     nbits = GMP_NUMB_BITS;
797                   }
798                 else if (td == 1)
799                   {
800                     limb = wi >= 1 ? wp[--wi] : MPFR_LIMB_ZERO;
801                     mask = MPFR_LIMB_MAX;
802                     nbits = GMP_NUMB_BITS + 1;
803                   }
804                 else  /* td >= 2 */
805                   {
806                     MPFR_ASSERTD (td >= 2);
807                     limb = wp[wi];
808                     mask = MPFR_LIMB_MASK (td - 1);
809                     nbits = td;
810                   }
811 
812                 /* nbits: number of bits of the first chunk + 1
813                    (the +1 is for the rounding bit). */
814 
815                 if (nbits > d)
816                   {
817                     /* Some low significant bits must be ignored. */
818                     limb >>= nbits - d;
819                     mask >>= nbits - d;
820                     d = 0;
821                   }
822                 else
823                   {
824                     d -= nbits;
825                     MPFR_ASSERTD (d >= 0);
826                   }
827 
828                 limb &= mask;
829                 tmd =
830                   limb == MPFR_LIMB_ZERO ?
831                     (rbit == 0 ? 1 : rnd == MPFR_RNDN ? 2 : 0) :
832                   limb == mask ?
833                     (limb = MPFR_LIMB_MAX,
834                      rbit != 0 ? 1 : rnd == MPFR_RNDN ? 2 : 0) : 0;
835 
836                 while (tmd != 0 && d != 0)
837                   {
838                     mp_limb_t limb2;
839 
840                     MPFR_ASSERTD (d > 0);
841                     if (wi == 0)
842                       {
843                         /* The non-represented bits are 0's. */
844                         if (limb != MPFR_LIMB_ZERO)
845                           tmd = 0;
846                         break;
847                       }
848                     MPFR_ASSERTD (wi > 0);
849                     limb2 = wp[--wi];
850                     if (d < GMP_NUMB_BITS)
851                       {
852                         int c = GMP_NUMB_BITS - d;
853                         MPFR_ASSERTD (c > 0 && c < GMP_NUMB_BITS);
854                         if ((limb2 >> c) != (limb >> c))
855                           tmd = 0;
856                         break;
857                       }
858                     if (limb2 != limb)
859                       tmd = 0;
860                     d -= GMP_NUMB_BITS;
861                   }
862               }
863           }
864         else  /* u <= minexp */
865           {
866             /* The exact value of the accumulator will be copied.
867              * The TMD occurs if and only if there are bits still
868              * not taken into account, and if it occurs, this is
869              * necessarily on a machine number (-> tmd = 1).
870              */
871             lbit = u == minexp ? wp[0] & MPFR_LIMB_ONE : 0;
872             rbit = 0;
873             inex = tmd = maxexp != MPFR_EXP_MIN;
874           }
875 
876         MPFR_ASSERTD (rbit == 0 || rbit == 1);
877 
878         MPFR_LOG_MSG (("tmd=%d lbit=%d rbit=%d inex=%d neg=%d\n",
879                        tmd, (int) lbit, (int) rbit, inex, neg));
880 
881         /* Here, if the final sum is known to be exact, inex = 0, otherwise
882          * inex = 1. We have a truncated significand, a trailing term t such
883          * that 0 <= t < 1 ulp, and an error on the trailing term bounded by
884          * t' in absolute value. Thus the error e on the truncated significand
885          * satisfies -t' <= e < 1 ulp + t'. Thus one has 4 correction cases
886          * denoted by a corr value between -1 and 2 depending on e, neg, rbit,
887          * and the rounding mode:
888          *   -1: equivalent to nextbelow;
889          *    0: the truncated significand is not corrected;
890          *    1: add 1 ulp;
891          *    2: add 1 ulp, then nextabove.
892          * The nextbelow and nextabove are used here since there may be a
893          * change of the binade.
894          */
895 
896         if (tmd == 0)  /* no TMD */
897           {
898             switch (rnd)
899               {
900               case MPFR_RNDD:
901                 corr = 0;
902                 break;
903               case MPFR_RNDU:
904                 corr = inex;
905                 break;
906               case MPFR_RNDZ:
907                 corr = inex && neg;
908                 break;
909               case MPFR_RNDA:
910                 corr = inex && !neg;
911                 break;
912               default:
913                 MPFR_ASSERTN (rnd == MPFR_RNDN);
914                 /* Note: for halfway cases (maxexp == MPFR_EXP_MIN) that are
915                    rounded downward, rbit has been changed to 0 so that corr
916                    is set correctly. */
917                 corr = rbit;
918               }
919             MPFR_ASSERTD (corr == 0 || corr == 1);
920             if (inex &&
921                 corr == 0)  /* two's complement significand decreased */
922               inex = -1;
923           }
924         else  /* tmd */
925           {
926             mpfr_exp_t minexp2;
927             mpfr_prec_t cancel2;
928             mpfr_exp_t err;  /* exponent of the error bound */
929             mp_size_t zz;    /* nb of limbs to zero in the TMD accumulator */
930             mp_limb_t *zp;   /* pointer to the TMD accumulator */
931             mpfr_prec_t zq;  /* size of the TMD accumulator, in bits */
932             int sst;         /* sign of the secondary term */
933 
934             /* TMD case. Here we use a new variable minexp2, with the same
935                meaning as minexp, as we want to keep the minexp value for
936                the copy to the destination. */
937 
938             MPFR_ASSERTD (maxexp > MPFR_EXP_MIN);
939             MPFR_ASSERTD (tmd == 1 || tmd == 2);
940 
941             /* TMD accumulator */
942             zp = wp + ws;
943             zq = (mpfr_prec_t) zs * GMP_NUMB_BITS;
944 
945             err = maxexp + logn;
946 
947             MPFR_LOG_MSG (("TMD with"
948                            " maxexp=%" MPFR_EXP_FSPEC "d"
949                            " err=%" MPFR_EXP_FSPEC "d"
950                            " zs=%Pd"
951                            " zq=%Pd\n",
952                            (mpfr_eexp_t) maxexp, (mpfr_eexp_t) err,
953                            (mpfr_prec_t) zs, zq));
954 
955             /* The d-1 bits from u-2 to u-d (= err) are identical. */
956 
957             if (err >= minexp)
958               {
959                 mpfr_prec_t tq;
960                 mp_size_t wi;
961                 int td;
962 
963                 /* Let's keep the last 2 over the d-1 identical bits and the
964                    following bits, i.e. the bits from err+1 to minexp. */
965                 tq = err - minexp + 2;  /* tq = number of such bits */
966                 MPFR_LOG_MSG (("[TMD] tq=%Pd\n", tq));
967                 MPFR_ASSERTD (tq >= 2);
968 
969                 wi = tq / GMP_NUMB_BITS;
970                 td = tq % GMP_NUMB_BITS;
971 
972                 /* Note: The "else" (td == 0) branch below can be executed
973                    only if tq >= GMP_NUMB_BITS, which is possible only when
974                    logn is large enough. Indeed, if tq > logn + some constant,
975                    this means that the TMD did not occur.
976                    TODO: Find an upper bound on tq, and add a corresponding
977                    MPFR_ASSERTD assertion / hint. On some platforms, this
978                    branch might be dead code, and such information would
979                    allow the compiler to remove it.
980                    It seems that this branch is never tested (r12754). */
981 
982                 if (td != 0)
983                   {
984                     wi++;  /* number of words with represented bits */
985                     td = GMP_NUMB_BITS - td;
986                     zz = zs - wi;
987                     MPFR_ASSERTD (zz >= 0 && zz < zs);
988                     mpn_lshift (zp + zz, wp, wi, td);
989                   }
990                 else
991                   {
992                     MPFR_ASSERTD (wi > 0);
993                     zz = zs - wi;
994                     MPFR_ASSERTD (zz >= 0 && zz < zs);
995                     if (zz > 0)
996                       MPN_COPY (zp + zz, wp, wi);
997                   }
998 
999                 /* Compute minexp2 = minexp - (zs * GMP_NUMB_BITS + td)
1000                    safely. */
1001                 SAFE_SUB (minexp2, minexp, zz * GMP_NUMB_BITS + td);
1002                 MPFR_ASSERTD (minexp2 == err + 2 - zq);
1003               }
1004             else  /* err < minexp */
1005               {
1006                 /* At least one of the identical bits is not represented,
1007                    meaning that it is 0 and all these bits are 0's. Thus
1008                    the accumulator will be 0. The new minexp is determined
1009                    from maxexp, with cq bits reserved to avoid an overflow
1010                    (as in the early steps). */
1011                 MPFR_LOG_MSG (("[TMD] err < minexp\n", 0));
1012                 zz = zs;
1013 
1014                 /* Compute minexp2 = maxexp - (zq - cq) safely. */
1015                 SAFE_SUB (minexp2, maxexp, zq - cq);
1016                 MPFR_ASSERTD (minexp2 == err + 1 - zq);
1017               }
1018 
1019             MPN_ZERO (zp, zz);
1020 
1021             /* We need to determine the sign sst of the secondary term.
1022                In sum_raw, since the truncated sum corresponding to this
1023                secondary term will be in [2^(e-1),2^e] and the error
1024                strictly less than 2^err, we can stop the iterations when
1025                e - err >= 1 (this bound is the 11th argument of sum_raw). */
1026             cancel2 = sum_raw (zp, zs, zq, x, n, minexp2, maxexp, tp, ts,
1027                                logn, 1, NULL, NULL, NULL);
1028 
1029             if (cancel2 != 0)
1030               sst = MPFR_LIMB_MSB (zp[zs-1]) == 0 ? 1 : -1;
1031             else if (tmd == 1)
1032               sst = 0;
1033             else
1034               {
1035                 /* For halfway cases, let's virtually eliminate them
1036                    by setting a sst equivalent to a non-halfway case,
1037                    which depends on the last bit of the pre-rounded
1038                    result. */
1039                 MPFR_ASSERTD (rnd == MPFR_RNDN && tmd == 2);
1040                 sst = lbit != 0 ? 1 : -1;
1041               }
1042 
1043             MPFR_LOG_MSG (("[TMD] tmd=%d rbit=%d sst=%d\n",
1044                            tmd, (int) rbit, sst));
1045 
1046             /* Do not consider the corrected sst for MPFR_COV_SET */
1047             MPFR_COV_SET (sum_tmd[(int) rnd][tmd-1][rbit]
1048                           [cancel2 == 0 ? 1 : sst+1][neg][sq > MPFR_PREC_MIN]);
1049 
1050             inex =
1051               MPFR_IS_LIKE_RNDD (rnd, sgn) ? (sst ? -1 : 0) :
1052               MPFR_IS_LIKE_RNDU (rnd, sgn) ? (sst ?  1 : 0) :
1053               (MPFR_ASSERTD (rnd == MPFR_RNDN),
1054                tmd == 1 ? - sst : sst);
1055 
1056             if (tmd == 2 && sst == (rbit != 0 ? -1 : 1))
1057               corr = 1 - (int) rbit;
1058             else if (MPFR_IS_LIKE_RNDD (rnd, sgn) && sst == -1)
1059               corr = (int) rbit - 1;
1060             else if (MPFR_IS_LIKE_RNDU (rnd, sgn) && sst == +1)
1061               corr = (int) rbit + 1;
1062             else
1063               corr = (int) rbit;
1064           }  /* tmd */
1065       }  /* rnd != MPFR_RNDF */
1066 
1067     MPFR_LOG_MSG (("neg=%d corr=%d inex=%d\n", neg, corr, inex));
1068 
1069     /* Sign handling (-> absolute value and sign), together with
1070        rounding. The most common cases are corr = 0 and corr = 1
1071        as this is necessarily the case when the TMD did not occur. */
1072 
1073     MPFR_ASSERTD (corr >= -1 && corr <= 2);
1074 
1075     MPFR_SIGN (sum) = sgn;
1076 
1077     /* Let's copy/shift the bits [max(u,minexp),e) to the
1078        most significant part of the destination, and zero
1079        the least significant part (there can be one only if
1080        u < minexp). The trailing bits of the destination may
1081        contain garbage at this point. */
1082 
1083     sn = MPFR_PREC2LIMBS (sq);
1084     sd = (mpfr_prec_t) sn * GMP_NUMB_BITS - sq;
1085     sh = cancel % GMP_NUMB_BITS;
1086 
1087     MPFR_ASSERTD (sd >= 0 && sd < GMP_NUMB_BITS);
1088 
1089     if (MPFR_LIKELY (u > minexp))
1090       {
1091         mp_size_t wi;
1092 
1093         /* Recompute the initial value of wi. */
1094         wi = (u - minexp) / GMP_NUMB_BITS;
1095         if (MPFR_LIKELY (sh != 0))
1096           {
1097             mp_size_t fi;
1098 
1099             fi = (e - minexp) / GMP_NUMB_BITS - (sn - 1);
1100             MPFR_ASSERTD (fi == wi || fi == wi + 1);
1101             mpn_lshift (sump, wp + fi, sn, sh);
1102             if (fi != wi)
1103               sump[0] |= wp[wi] >> (GMP_NUMB_BITS - sh);
1104           }
1105         else
1106           {
1107             MPFR_ASSERTD ((mpfr_prec_t) (ws - (wi + sn)) * GMP_NUMB_BITS
1108                           == cancel);
1109             MPN_COPY (sump, wp + wi, sn);
1110           }
1111       }
1112     else  /* u <= minexp */
1113       {
1114         mp_size_t en;
1115 
1116         en = (e - minexp + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS;
1117         if (MPFR_LIKELY (sh != 0))
1118           mpn_lshift (sump + sn - en, wp, en, sh);
1119         else if (MPFR_UNLIKELY (en > 0))
1120           MPN_COPY (sump + sn - en, wp, en);
1121         if (sn > en)
1122           MPN_ZERO (sump, sn - en);
1123       }
1124 
1125     /* Let's take the complement if the result is negative, and at
1126        the same time, do the rounding and zero the trailing bits.
1127        As this is valid only for precisions >= 2, there is special
1128        code for precision 1 first. */
1129 
1130     if (MPFR_UNLIKELY (sq == 1))  /* precision 1 */
1131       {
1132         sump[0] = MPFR_LIMB_HIGHBIT;
1133         e += neg ? 1 - corr : corr;
1134       }
1135     else if (neg)  /* negative result with sq > 1 */
1136       {
1137         MPFR_ASSERTD (MPFR_LIMB_MSB (sump[sn-1]) == 0);
1138 
1139         /* abs(x + corr) = - (x + corr) = com(x) + (1 - corr) */
1140 
1141         /* We want to avoid separate mpn_com (or mpn_neg) and mpn_add_1
1142            (or mpn_sub_1) operations, as they could yield two loops in
1143            some particular cases involving a long sequence of 0's in
1144            the low significant bits (except the least significant bit,
1145            which doesn't matter). */
1146 
1147         if (corr <= 1)
1148           {
1149             mp_limb_t corr2;
1150 
1151             /* Here we can just do the correction operation on the
1152                least significant limb, then do either a mpn_com or
1153                a mpn_neg on the remaining limbs, depending on the
1154                carry (BTW, mpn_neg is just a mpn_com with an initial
1155                carry propagation: after some point, mpn_neg does a
1156                complement). */
1157 
1158             corr2 = (mp_limb_t) (1 - corr) << sd;
1159             /* Note: If corr = -1, this can overflow to corr2 = 0.
1160                This case is taken into account below. */
1161 
1162             sump[0] = (~ (sump[0] | MPFR_LIMB_MASK (sd))) + corr2;
1163 
1164             if (sump[0] < corr2 || (corr2 == 0 && corr < 0))
1165               {
1166                 if (sn == 1 || ! mpn_neg (sump + 1, sump + 1, sn - 1))
1167                   {
1168                     /* Note: The | is important when sump[sn-1] is not 0
1169                        (this can occur with sn = 1 and corr = -1). TODO:
1170                        Add something to make sure that this is tested. */
1171                     sump[sn-1] |= MPFR_LIMB_HIGHBIT;
1172                     e++;
1173                   }
1174               }
1175             else if (sn > 1)
1176               mpn_com (sump + 1, sump + 1, sn - 1);
1177           }
1178         else  /* corr == 2 */
1179           {
1180             mp_limb_t corr2, c;
1181             mp_size_t i = 1;
1182 
1183             /* We want to compute com(x) - 1, but GMP doesn't have an
1184                operation for that. The fact is that a sequence of low
1185                significant bits 1 is invariant. Starting at the first
1186                low significant bit 0, we can do the complement with
1187                mpn_com. */
1188 
1189             corr2 = MPFR_LIMB_ONE << sd;
1190             c = ~ (sump[0] | MPFR_LIMB_MASK (sd));
1191             sump[0] = c - corr2;
1192 
1193             if (c == 0)
1194               {
1195                 while (MPFR_ASSERTD (i < sn), sump[i] == MPFR_LIMB_MAX)
1196                   i++;
1197                 sump[i] = (~ sump[i]) - 1;
1198                 i++;
1199               }
1200 
1201             if (i < sn)
1202               mpn_com (sump + i, sump + i, sn - i);
1203             else if (MPFR_UNLIKELY (MPFR_LIMB_MSB (sump[sn-1]) == 0))
1204               {
1205                 /* Happens on 01111...111, whose complement is
1206                    10000...000, and com(x) - 1 is 01111...111. */
1207                 sump[sn-1] |= MPFR_LIMB_HIGHBIT;
1208                 e--;
1209               }
1210           }
1211       }
1212     else  /* positive result with sq > 1 */
1213       {
1214         MPFR_ASSERTD (MPFR_LIMB_MSB (sump[sn-1]) != 0);
1215         sump[0] &= ~ MPFR_LIMB_MASK (sd);
1216 
1217         if (corr > 0)
1218           {
1219             mp_limb_t corr2, carry_out;
1220 
1221             corr2 = (mp_limb_t) corr << sd;
1222             /* If corr == 2 && sd == GMP_NUMB_BITS - 1, this overflows
1223                to corr2 = 0. This case is taken into account below. */
1224 
1225             carry_out = corr2 != 0 ? mpn_add_1 (sump, sump, sn, corr2) :
1226               (MPFR_ASSERTD (sn > 1),
1227                mpn_add_1 (sump + 1, sump + 1, sn - 1, MPFR_LIMB_ONE));
1228 
1229             MPFR_ASSERTD (sump[sn-1] >> (GMP_NUMB_BITS - 1) == !carry_out);
1230 
1231             if (MPFR_UNLIKELY (carry_out))
1232               {
1233                 /* Note: The | is important when sump[sn-1] is not 0
1234                    (this can occur with sn = 1 and corr = 2). TODO:
1235                    Add something to make sure that this is tested. */
1236                 sump[sn-1] |= MPFR_LIMB_HIGHBIT;
1237                 e++;
1238               }
1239           }
1240 
1241         if (corr < 0)
1242           {
1243             mpn_sub_1 (sump, sump, sn, MPFR_LIMB_ONE << sd);
1244 
1245             if (MPFR_UNLIKELY (MPFR_LIMB_MSB (sump[sn-1]) == 0))
1246               {
1247                 sump[sn-1] |= MPFR_LIMB_HIGHBIT;
1248                 e--;
1249               }
1250           }
1251       }
1252 
1253     MPFR_ASSERTD (MPFR_LIMB_MSB (sump[sn-1]) != 0);
1254     MPFR_LOG_MSG (("Set exponent e=%" MPFR_EXP_FSPEC "d\n", (mpfr_eexp_t) e));
1255     /* e may be outside the current exponent range, but this will be checked
1256        with mpfr_check_range below. */
1257     MPFR_EXP (sum) = e;
1258   }  /* main block */
1259 
1260   MPFR_TMP_FREE (marker);
1261   return mpfr_check_range (sum, inex, rnd);
1262 }
1263 
1264 /**********************************************************************/
1265 
1266 int
mpfr_sum(mpfr_ptr sum,const mpfr_ptr * x,unsigned long n,mpfr_rnd_t rnd)1267 mpfr_sum (mpfr_ptr sum, const mpfr_ptr *x, unsigned long n, mpfr_rnd_t rnd)
1268 {
1269   MPFR_LOG_FUNC
1270     (("n=%lu rnd=%d", n, rnd),
1271      ("sum[%Pd]=%.*Rg", mpfr_get_prec (sum), mpfr_log_prec, sum));
1272 
1273   if (MPFR_UNLIKELY (n <= 2))
1274     {
1275       if (n == 0)
1276         {
1277           MPFR_SET_ZERO (sum);
1278           MPFR_SET_POS (sum);
1279           MPFR_RET (0);
1280         }
1281       else if (n == 1)
1282         return mpfr_set (sum, x[0], rnd);
1283       else
1284         return mpfr_add (sum, x[0], x[1], rnd);
1285     }
1286   else
1287     {
1288       mpfr_exp_t maxexp = MPFR_EXP_MIN;  /* max(Empty) */
1289       unsigned long i;
1290       unsigned long rn = 0;  /* will be the number of regular inputs */
1291       /* sign of infinities and zeros (0: currently unknown) */
1292       int sign_inf = 0, sign_zero = 0;
1293 
1294       MPFR_LOG_MSG (("Check for special inputs (n = %lu >= 3)\n", n));
1295 
1296       for (i = 0; i < n; i++)
1297         {
1298           if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x[i])))
1299             {
1300               if (MPFR_IS_NAN (x[i]))
1301                 {
1302                   /* The current value x[i] is NaN. Then the sum is NaN. */
1303                 nan:
1304                   MPFR_SET_NAN (sum);
1305                   MPFR_RET_NAN;
1306                 }
1307               else if (MPFR_IS_INF (x[i]))
1308                 {
1309                   /* The current value x[i] is an infinity.
1310                      There are two cases:
1311                      1. This is the first infinity value (sign_inf == 0).
1312                         Then set sign_inf to its sign, and go on.
1313                      2. All the infinities found until now have the same
1314                         sign sign_inf. If this new infinity has a different
1315                         sign, then return NaN immediately, else go on. */
1316                   if (sign_inf == 0)
1317                     sign_inf = MPFR_SIGN (x[i]);
1318                   else if (MPFR_SIGN (x[i]) != sign_inf)
1319                     goto nan;
1320                 }
1321               else if (MPFR_UNLIKELY (rn == 0))
1322                 {
1323                   /* The current value x[i] is a zero. The code below matters
1324                      only when all values found until now are zeros, otherwise
1325                      it is harmless (the test rn == 0 above is just a minor
1326                      optimization).
1327                      Here we track the sign of the zero result when all inputs
1328                      are zeros: if all zeros have the same sign, the result
1329                      will have this sign, otherwise (i.e. if there is at least
1330                      a zero of each sign), the sign of the zero result depends
1331                      only on the rounding mode (note that this choice is
1332                      sticky when new zeros are considered). */
1333                   MPFR_ASSERTD (MPFR_IS_ZERO (x[i]));
1334                   if (sign_zero == 0)
1335                     sign_zero = MPFR_SIGN (x[i]);
1336                   else if (MPFR_SIGN (x[i]) != sign_zero)
1337                     sign_zero = rnd == MPFR_RNDD ? -1 : 1;
1338                 }
1339             }
1340           else
1341             {
1342               /* The current value x[i] is a regular number. */
1343               mpfr_exp_t e = MPFR_GET_EXP (x[i]);
1344               if (e > maxexp)
1345                 maxexp = e;  /* maximum exponent found until now */
1346               rn++;  /* current number of regular inputs */
1347             }
1348         }
1349 
1350       MPFR_LOG_MSG (("rn=%lu sign_inf=%d sign_zero=%d\n",
1351                      rn, sign_inf, sign_zero));
1352 
1353       /* At this point the result cannot be NaN (this case has already
1354          been filtered out). */
1355 
1356       if (MPFR_UNLIKELY (sign_inf != 0))
1357         {
1358           /* At least one infinity, and all of them have the same sign
1359              sign_inf. The sum is the infinity of this sign. */
1360           MPFR_SET_INF (sum);
1361           MPFR_SET_SIGN (sum, sign_inf);
1362           MPFR_RET (0);
1363         }
1364 
1365       /* At this point, all the inputs are finite numbers. */
1366 
1367       if (MPFR_UNLIKELY (rn == 0))
1368         {
1369           /* All the numbers were zeros (and there is at least one).
1370              The sum is zero with sign sign_zero. */
1371           MPFR_ASSERTD (sign_zero != 0);
1372           MPFR_SET_ZERO (sum);
1373           MPFR_SET_SIGN (sum, sign_zero);
1374           MPFR_RET (0);
1375         }
1376 
1377       /* Optimize the case where there are only two regular numbers. */
1378       if (MPFR_UNLIKELY (rn <= 2))
1379         {
1380           unsigned long h = ULONG_MAX;
1381 
1382           for (i = 0; i < n; i++)
1383             if (! MPFR_IS_SINGULAR (x[i]))
1384               {
1385                 if (rn == 1)
1386                   return mpfr_set (sum, x[i], rnd);
1387                 if (h != ULONG_MAX)
1388                   return mpfr_add (sum, x[h], x[i], rnd);
1389                 h = i;
1390               }
1391           MPFR_RET_NEVER_GO_HERE();
1392         }
1393 
1394       return sum_aux (sum, x, n, rnd, maxexp, rn);
1395     }
1396 }
1397