xref: /netbsd-src/external/lgpl3/mpfr/dist/src/sum.c (revision a04395531661c5e8d314125d5ae77d4cbedd5d73)
1 /* Sum -- efficiently sum a list of floating-point numbers
2 
3 Copyright 2014-2020 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25 
26 /* Note: In the prototypes, one uses
27  *
28  *   const mpfr_ptr *x      i.e.:  __mpfr_struct *const *x
29  *
30  * instead of
31  *
32  *   const mpfr_srcptr *x   i.e.:  const __mpfr_struct *const *x
33  *
34  * because here one has a double indirection and the type matching rules
35  * from the C standard in such a case are stricter and they would yield
36  * annoying errors for the user in practice. See:
37  *
38  *   Why can't I pass a char ** to a function which expects a const char **?
39  *
40  * in the comp.lang.c FAQ:
41  *
42  *   http://c-faq.com/ansi/constmismatch.html
43  */
44 
45 /* See the doc/sum.txt file for the algorithm and a part of its proof
46 (this will later go into algorithms.tex).
47 
48 TODO [VL, after a discussion with James Demmel]: Compared to
49   James Demmel and Yozo Hida, Fast and accurate floating-point summation
50   with application to computational geometry, Numerical Algorithms,
51   volume 37, number 1-4, pages 101--112, 2004.
52 sorting is not necessary here. It is not done because in the most common
53 cases (where big cancellations are rare), it would take time and be
54 useless. However the lack of sorting increases the worst case complexity.
55 For instance, consider many inputs that cancel one another (two by two).
56 One would need n/2 iterations, where each iteration reads the exponent
57 of each input, therefore n*n/2 read operations. Using a worst-case sort
58 in O(n log n) could give a O(n log n) worst-case complexity. As we don't
59 want to slow down the most common cases, this could be done at the 3rd
60 iteration. But are there practical applications which would be used as
61 tests?
62 
63 Note: see the following paper and its references:
64 http://www.eecs.berkeley.edu/~hdnguyen/public/papers/ARITH21_Fast_Sum.pdf
65 VL: This is very different:
66           In MPFR             In the paper & references
67     arbitrary precision            fixed precision
68      correct rounding        just reproducible rounding
69     integer operations        floating-point operations
70         sequential             parallel (& sequential)
71 */
72 
73 #ifdef MPFR_COV_CHECK
74 int __gmpfr_cov_sum_tmd[MPFR_RND_MAX][2][2][3][2][2] = { 0 };
75 #endif
76 
77 /* Update minexp (V) after detecting a potential integer overflow in
78    extreme cases (only a 32-bit ABI may be concerned in practice).
79    Instead of an assertion failure below, we could
80    1. check that the ulp of each regular input has an exponent >= MPFR_EXP_MIN
81       (with an assertion failure if this is not the case);
82    2. set minexp to MPFR_EXP_MIN and shift the accumulator accordingly
83       (the sum will then be exact).
84    However, such cases, which involve huge precisions, will probably
85    never occur in practice (at least with a 64-bit ABI) and are not
86    easily testable due to these huge precisions. Moreover, switching
87    to a 64-bit ABI would be a better solution for such computations.
88    So, let's leave this unimplemented. */
89 #define SAFE_SUB(V,E,SH)                        \
90   do                                            \
91     {                                           \
92       mpfr_prec_t sh = (SH);                    \
93       MPFR_ASSERTN ((E) >= MPFR_EXP_MIN + sh);  \
94       V = (E) - sh;                             \
95     }                                           \
96   while (0)
97 
98 /* Function sum_raw
99  * ================
100  *
101  * Accumulate a new [minexp,maxexp[ block into (wp,ws). If e and err denote
102  * the exponents of the computed result and of the error bound respectively,
103  * while e - err is less than some given bound (due to cancellation), shift
104  * the accumulator and reiterate.
105  *
106  * Inputs:
107  *   wp: pointer to the accumulator (least significant limb first).
108  *   ws: size of the accumulator (in limbs).
109  *   wq: precision of the accumulator (ws * GMP_NUMB_BITS).
110  *   x: array of the input numbers.
111  *   n: size of this array (number of inputs, regular or not).
112  *   minexp: exponent of the least significant bit of the first block.
113  *   maxexp: exponent of the first block (exponent of its MSB + 1).
114  *   tp: pointer to a temporary area (pre-allocated).
115  *   ts: size of this temporary area.
116  *   logn: ceil(log2(rn)), where rn is the number of regular inputs.
117  *   prec: lower bound for e - err (as described above).
118  *   ep: pointer to mpfr_exp_t (see below), or a null pointer.
119  *   minexpp: pointer to mpfr_exp_t (see below), or a null pointer.
120  *   maxexpp: pointer to mpfr_exp_t (see below), or a null pointer.
121  *
122  * Preconditions:
123  *   prec >= 1
124  *   wq >= logn + prec + 2
125  *
126  * This function returns 0 if the accumulator is 0 (which implies that
127  * the exact sum for this sum_raw invocation is 0), otherwise the number
128  * of cancelled bits (>= 1), defined as the number of identical bits on
129  * the most significant part of the accumulator. In the latter case, it
130  * also returns the following data in variables passed by reference, if
131  * the pointers are not NULL:
132  * - in ep: the exponent e of the computed result;
133  * - in minexpp: the last value of minexp;
134  * - in maxexpp: the new value of maxexp (for the next iteration after
135  *   the first invocation of sum_raw in the main code).
136  *
137  * Notes:
138  * - minexp is also the exponent of the least significant bit of the
139  *   accumulator;
140  * - the temporary area must be large enough to hold a shifted input
141  *   block, and the value of ts is used only when the full assertions
142  *   are checked (i.e. with the --enable-assert configure option), to
143  *   check that a buffer overflow doesn't occur;
144  * - contrary to the returned value of minexp (the value in the last
145  *   iteration), the returned value of maxexp is the one for the next
146  *   iteration (= maxexp2 of the last iteration).
147  */
148 static mpfr_prec_t
149 sum_raw (mp_limb_t *wp, mp_size_t ws, mpfr_prec_t wq, const mpfr_ptr *x,
150          unsigned long n, mpfr_exp_t minexp, mpfr_exp_t maxexp,
151          mp_limb_t *tp, mp_size_t ts, int logn, mpfr_prec_t prec,
152          mpfr_exp_t *ep, mpfr_exp_t *minexpp, mpfr_exp_t *maxexpp)
153 {
154   MPFR_LOG_FUNC
155     (("ws=%Pd ts=%Pd prec=%Pd", (mpfr_prec_t) ws, (mpfr_prec_t) ts, prec),
156      ("", 0));
157 
158   /* The C code below requires prec >= 0 due to the use of unsigned
159      integer arithmetic on it. Actually the computation makes sense
160      only with prec >= 1 (otherwise one can't even know the sign of
161      the result), hence the following assertion. */
162   MPFR_ASSERTD (prec >= 1);
163 
164   /* Consistency check. */
165   MPFR_ASSERTD (wq == (mpfr_prec_t) ws * GMP_NUMB_BITS);
166 
167   /* The following precondition together with prec >= 1 will imply:
168      minexp - shiftq < maxexp2, as required by the algorithm. */
169   MPFR_ASSERTD (wq >= logn + prec + 2);
170 
171   while (1)
172     {
173       mpfr_exp_t maxexp2 = MPFR_EXP_MIN;
174       unsigned long i;
175 
176       MPFR_LOG_MSG (("sum_raw loop: "
177                      "maxexp=%" MPFR_EXP_FSPEC "d "
178                      "minexp=%" MPFR_EXP_FSPEC "d\n",
179                      (mpfr_eexp_t) maxexp, (mpfr_eexp_t) minexp));
180 
181       MPFR_ASSERTD (maxexp > minexp);
182 
183       for (i = 0; i < n; i++)
184         if (! MPFR_IS_SINGULAR (x[i]))  /* Step 1 (see sum_raw in sum.txt) */
185           {
186             mp_limb_t *dp, *vp;
187             mp_size_t ds, vs, vds;
188             mpfr_exp_t xe, vd;
189             mpfr_prec_t xq;
190             int tr;
191 
192             xe = MPFR_GET_EXP (x[i]);
193             xq = MPFR_GET_PREC (x[i]);
194 
195             vp = MPFR_MANT (x[i]);
196             vs = MPFR_PREC2LIMBS (xq);
197             vd = xe - vs * GMP_NUMB_BITS - minexp;
198             /* vd is the exponent of the least significant represented bit of
199                x[i] (including the trailing bits, whose value is 0) minus the
200                exponent of the least significant bit of the accumulator. To
201                make the code simpler, we won't try to filter out the trailing
202                bits of x[i]. */
203 
204             /* Steps 2, 3, 4 (see sum_raw in sum.txt) */
205 
206             if (vd < 0)
207               {
208                 /* This covers the following cases:
209                  *     [-+- accumulator ---]
210                  *   [---|----- x[i] ------|--]
211                  *       |   [----- x[i] --|--]
212                  *       |                 |[----- x[i] -----]
213                  *       |                 |    [----- x[i] -----]
214                  *     maxexp           minexp
215                  */
216 
217                 /* Step 2 for subcase vd < 0 */
218 
219                 if (xe <= minexp)
220                   {
221                     /* x[i] is entirely after the LSB of the accumulator,
222                        so that it will be ignored at this iteration. */
223                     if (xe > maxexp2)
224                       {
225                         maxexp2 = xe;
226                         /* And since the exponent of x[i] is valid... */
227                         MPFR_ASSERTD (maxexp2 >= MPFR_EMIN_MIN);
228                       }
229                     continue;
230                   }
231 
232                 /* Step 3 for subcase vd < 0 */
233 
234                 /* If some significant bits of x[i] are after the LSB of the
235                    accumulator, then maxexp2 will necessarily be minexp. */
236                 if (MPFR_LIKELY (xe - xq < minexp))
237                   maxexp2 = minexp;
238 
239                 /* Step 4 for subcase vd < 0 */
240 
241                 /* We need to ignore the least |vd| significant bits of x[i].
242                    First, let's ignore the least vds = |vd| / GMP_NUMB_BITS
243                    limbs. */
244                 vd = - vd;
245                 vds = vd / GMP_NUMB_BITS;
246                 vs -= vds;
247                 MPFR_ASSERTD (vs > 0);  /* see xe <= minexp test above */
248                 vp += vds;
249                 vd -= vds * GMP_NUMB_BITS;
250                 MPFR_ASSERTD (vd >= 0 && vd < GMP_NUMB_BITS);
251 
252                 if (xe > maxexp)
253                   {
254                     vs -= (xe - maxexp) / GMP_NUMB_BITS;
255                     MPFR_ASSERTD (vs > 0);
256                     tr = (xe - maxexp) % GMP_NUMB_BITS;
257                   }
258                 else
259                   tr = 0;
260 
261                 if (vd != 0)
262                   {
263                     MPFR_ASSERTD (vs <= ts);
264                     mpn_rshift (tp, vp, vs, vd);
265                     vp = tp;
266                     tr += vd;
267                     if (tr >= GMP_NUMB_BITS)
268                       {
269                         vs--;
270                         tr -= GMP_NUMB_BITS;
271                       }
272                     MPFR_ASSERTD (vs >= 1);
273                     MPFR_ASSERTD (tr >= 0 && tr < GMP_NUMB_BITS);
274                     if (tr != 0)
275                       {
276                         tp[vs-1] &= MPFR_LIMB_MASK (GMP_NUMB_BITS - tr);
277                         tr = 0;
278                       }
279                     /* Truncation has now been taken into account. */
280                     MPFR_ASSERTD (tr == 0);
281                   }
282 
283                 dp = wp;
284                 ds = ws;
285               }
286             else  /* vd >= 0 */
287               {
288                 /* This covers the following cases:
289                  *               [-+- accumulator ---]
290                  *   [- x[i] -]    |                 |
291                  *             [---|-- x[i] ------]  |
292                  *          [------|-- x[i] ---------]
293                  *                 |   [- x[i] -]    |
294                  *               maxexp           minexp
295                  */
296 
297                 /* Steps 2 and 3 for subcase vd >= 0 */
298 
299                 MPFR_ASSERTD (xe - xq >= minexp);  /* see definition of vd */
300 
301                 /* Step 4 for subcase vd >= 0 */
302 
303                 /* We need to ignore the least vd significant bits
304                    of the accumulator. First, let's ignore the least
305                    vds = vd / GMP_NUMB_BITS limbs. -> (dp,ds) */
306                 vds = vd / GMP_NUMB_BITS;
307                 ds = ws - vds;
308                 if (ds <= 0)
309                   continue;
310                 dp = wp + vds;
311                 vd -= vds * GMP_NUMB_BITS;
312                 MPFR_ASSERTD (vd >= 0 && vd < GMP_NUMB_BITS);
313 
314                 /* The low part of x[i] (to be determined) will have to be
315                    shifted vd bits to the left if vd != 0. */
316 
317                 if (xe > maxexp)
318                   {
319                     vs -= (xe - maxexp) / GMP_NUMB_BITS;
320                     if (vs <= 0)
321                       continue;
322                     tr = (xe - maxexp) % GMP_NUMB_BITS;
323                   }
324                 else
325                   tr = 0;
326 
327                 MPFR_ASSERTD (tr >= 0 && tr < GMP_NUMB_BITS && vs > 0);
328 
329                 /* We need to consider the least significant vs limbs of x[i]
330                    except the most significant tr bits. */
331 
332                 if (vd != 0)
333                   {
334                     mp_limb_t carry;
335 
336                     MPFR_ASSERTD (vs <= ts);
337                     carry = mpn_lshift (tp, vp, vs, vd);
338                     tr -= vd;
339                     if (tr < 0)
340                       {
341                         tr += GMP_NUMB_BITS;
342                         MPFR_ASSERTD (vs + 1 <= ts);
343                         tp[vs++] = carry;
344                       }
345                     MPFR_ASSERTD (tr >= 0 && tr < GMP_NUMB_BITS);
346                     vp = tp;
347                   }
348               }  /* vd >= 0 */
349 
350             MPFR_ASSERTD (vs > 0 && vs <= ds);
351 
352             /* We can't truncate the most significant limb of the input
353                (in case it hasn't been shifted to the temporary area).
354                So, let's ignore it now. It will be taken into account
355                via carry propagation after the addition. */
356             if (tr != 0)
357               vs--;
358 
359             /* Step 5 (see sum_raw in sum.txt) */
360 
361             if (MPFR_IS_POS (x[i]))
362               {
363                 mp_limb_t carry;
364 
365                 carry = vs > 0 ? mpn_add_n (dp, dp, vp, vs) : 0;
366                 MPFR_ASSERTD (carry <= 1);
367                 if (tr != 0)
368                   carry += vp[vs] & MPFR_LIMB_MASK (GMP_NUMB_BITS - tr);
369                 if (ds > vs)
370                   mpn_add_1 (dp + vs, dp + vs, ds - vs, carry);
371               }
372             else
373               {
374                 mp_limb_t borrow;
375 
376                 borrow = vs > 0 ? mpn_sub_n (dp, dp, vp, vs) : 0;
377                 MPFR_ASSERTD (borrow <= 1);
378                 if (tr != 0)
379                   borrow += vp[vs] & MPFR_LIMB_MASK (GMP_NUMB_BITS - tr);
380                 if (ds > vs)
381                   mpn_sub_1 (dp + vs, dp + vs, ds - vs, borrow);
382               }
383           }
384 
385       {
386         mpfr_prec_t cancel;  /* number of cancelled bits */
387         mp_size_t wi;        /* index in the accumulator */
388         mp_limb_t a, b;
389         int cnt;
390 
391         cancel = 0;
392         wi = ws - 1;
393         MPFR_ASSERTD (wi >= 0);
394         a = wp[wi] >> (GMP_NUMB_BITS - 1) ? MPFR_LIMB_MAX : MPFR_LIMB_ZERO;
395 
396         while (wi >= 0)
397           if ((b = wp[wi]) == a)
398             {
399               cancel += GMP_NUMB_BITS;
400               wi--;
401             }
402           else
403             {
404               b ^= a;
405               MPFR_ASSERTD (b != 0);
406               count_leading_zeros (cnt, b);
407               cancel += cnt;
408               break;
409             }
410 
411         if (wi >= 0 || a != MPFR_LIMB_ZERO)  /* accumulator != 0 */
412           {
413             mpfr_exp_t e;        /* exponent of the computed result */
414             mpfr_exp_t err;      /* exponent of the error bound */
415 
416             MPFR_LOG_MSG (("accumulator %s 0, cancel=%Pd\n",
417                            a != MPFR_LIMB_ZERO ? "<" : ">", cancel));
418 
419             MPFR_ASSERTD (cancel > 0);
420             e = minexp + wq - cancel;
421             MPFR_ASSERTD (e >= minexp);
422             err = maxexp2 + logn;  /* OK even if maxexp2 == MPFR_EXP_MIN */
423 
424             /* The absolute value of the truncated sum is in the binade
425                [2^(e-1),2^e] (closed on both ends due to two's complement).
426                The error is strictly less than 2^err (and is 0 if
427                maxexp2 == MPFR_EXP_MIN). */
428 
429             /* This basically tests whether err <= e - prec without
430                potential integer overflow (since prec >= 0)...
431                Note that the maxexp2 == MPFR_EXP_MIN test is there just for
432                the potential corner case e - prec < MPFR_EXP_MIN + logn.
433                Such corner cases, involving specific huge-precision numbers,
434                are probably not supported in many places in MPFR, but this
435                test doesn't hurt... */
436             if (maxexp2 == MPFR_EXP_MIN ||
437                 (err <= e && SAFE_DIFF (mpfr_uexp_t, e, err) >= prec))
438               {
439                 MPFR_LOG_MSG (("(err=%" MPFR_EXP_FSPEC "d) <= (e=%"
440                                MPFR_EXP_FSPEC "d) - (prec=%Pd)\n",
441                                (mpfr_eexp_t) err, (mpfr_eexp_t) e, prec));
442                 /* To avoid tests or copies, we consider the only two cases
443                    that will occur in sum_aux. */
444                 MPFR_ASSERTD ((ep != NULL &&
445                                minexpp != NULL &&
446                                maxexpp != NULL) ||
447                               (ep == NULL &&
448                                minexpp == NULL &&
449                                maxexpp == NULL));
450                 if (ep != NULL)
451                   {
452                     *ep = e;
453                     *minexpp = minexp;
454                     *maxexpp = maxexp2;
455                   }
456                 MPFR_LOG_MSG (("return with minexp=%" MPFR_EXP_FSPEC
457                                "d maxexp2=%" MPFR_EXP_FSPEC "d%s\n",
458                                (mpfr_eexp_t) minexp, (mpfr_eexp_t) maxexp2,
459                                maxexp2 == MPFR_EXP_MIN ?
460                                " (MPFR_EXP_MIN)" : ""));
461                 return cancel;
462               }
463             else
464               {
465                 mpfr_exp_t diffexp;
466                 mpfr_prec_t shiftq;
467                 mpfr_size_t shifts;
468                 int shiftc;
469 
470                 MPFR_LOG_MSG (("e=%" MPFR_EXP_FSPEC "d err=%" MPFR_EXP_FSPEC
471                                "d maxexp2=%" MPFR_EXP_FSPEC "d%s\n",
472                                (mpfr_eexp_t) e, (mpfr_eexp_t) err,
473                                (mpfr_eexp_t) maxexp2,
474                                maxexp2 == MPFR_EXP_MIN ?
475                                " (MPFR_EXP_MIN)" : ""));
476 
477                 diffexp = err - e;
478                 if (diffexp < 0)
479                   diffexp = 0;
480                 /* diffexp = max(0, err - e) */
481 
482                 MPFR_LOG_MSG (("diffexp=%" MPFR_EXP_FSPEC "d\n",
483                                 (mpfr_eexp_t) diffexp));
484 
485                 MPFR_ASSERTD (diffexp < cancel - 2);
486                 shiftq = cancel - 2 - (mpfr_prec_t) diffexp;
487                 /* equivalent to: minexp + wq - 2 - max(e,err) */
488                 MPFR_ASSERTD (shiftq > 0);
489                 shifts = shiftq / GMP_NUMB_BITS;
490                 shiftc = shiftq % GMP_NUMB_BITS;
491                 MPFR_LOG_MSG (("shiftq = %Pd = %Pd * GMP_NUMB_BITS + %d\n",
492                                shiftq, (mpfr_prec_t) shifts, shiftc));
493                 if (MPFR_LIKELY (shiftc != 0))
494                   mpn_lshift (wp + shifts, wp, ws - shifts, shiftc);
495                 else
496                   mpn_copyd (wp + shifts, wp, ws - shifts);
497                 MPN_ZERO (wp, shifts);
498                 /* Compute minexp = minexp - shiftq safely. */
499                 SAFE_SUB (minexp, minexp, shiftq);
500                 MPFR_ASSERTD (minexp < maxexp2);
501               }
502           }
503         else if (maxexp2 == MPFR_EXP_MIN)
504           {
505             MPFR_LOG_MSG (("accumulator = 0, maxexp2 = MPFR_EXP_MIN\n", 0));
506             return 0;
507           }
508         else
509           {
510             MPFR_LOG_MSG (("accumulator = 0, reiterate\n", 0));
511             /* Compute minexp = maxexp2 - (wq - (logn + 1)) safely. */
512             SAFE_SUB (minexp, maxexp2, wq - (logn + 1));
513             /* Note: the logn + 1 corresponds to cq in the main code. */
514           }
515       }
516 
517       maxexp = maxexp2;
518     }
519 }
520 
521 /**********************************************************************/
522 
523 /* Generic case: all the inputs are finite numbers,
524    with at least 3 regular numbers. */
525 static int
526 sum_aux (mpfr_ptr sum, const mpfr_ptr *x, unsigned long n, mpfr_rnd_t rnd,
527          mpfr_exp_t maxexp, unsigned long rn)
528 {
529   mp_limb_t *sump;
530   mp_limb_t *tp;  /* pointer to a temporary area */
531   mp_limb_t *wp;  /* pointer to the accumulator */
532   mp_size_t ts;   /* size of the temporary area, in limbs */
533   mp_size_t ws;   /* size of the accumulator, in limbs */
534   mp_size_t zs;   /* size of the TMD accumulator, in limbs */
535   mpfr_prec_t wq; /* size of the accumulator, in bits */
536   int logn;       /* ceil(log2(rn)) */
537   int cq;
538   mpfr_prec_t sq;
539   int inex;
540   MPFR_TMP_DECL (marker);
541 
542   MPFR_LOG_FUNC
543     (("n=%lu rnd=%d maxexp=%" MPFR_EXP_FSPEC "d rn=%lu",
544       n, rnd, (mpfr_eexp_t) maxexp, rn),
545      ("sum[%Pu]=%.*Rg", mpfr_get_prec (sum), mpfr_log_prec, sum));
546 
547   MPFR_ASSERTD (rn >= 3 && rn <= n);
548 
549   /* In practice, no integer overflow on the exponent. */
550   MPFR_STAT_STATIC_ASSERT (MPFR_EXP_MAX - MPFR_EMAX_MAX >=
551                            sizeof (unsigned long) * CHAR_BIT);
552 
553   /* Set up some variables and the accumulator. */
554 
555   sump = MPFR_MANT (sum);
556 
557   /* rn is the number of regular inputs (the singular ones will be
558      ignored). Compute logn = ceil(log2(rn)). */
559   logn = MPFR_INT_CEIL_LOG2 (rn);
560   MPFR_ASSERTD (logn >= 2);
561 
562   MPFR_LOG_MSG (("logn=%d maxexp=%" MPFR_EXP_FSPEC "d\n",
563                  logn, (mpfr_eexp_t) maxexp));
564 
565   sq = MPFR_GET_PREC (sum);
566   cq = logn + 1;
567 
568   /* First determine the size of the accumulator.
569    * cq + sq + logn + 2 >= logn + sq + 5, which will be used later.
570    * The assertion wq - cq - sq >= 4 is another way to check that.
571    */
572   ws = MPFR_PREC2LIMBS (cq + sq + logn + 2);
573   wq = (mpfr_prec_t) ws * GMP_NUMB_BITS;
574   MPFR_ASSERTD (wq - cq - sq >= 4);
575 
576   /* TODO: timings, comparing with a larger zs. */
577   zs = MPFR_PREC2LIMBS (wq - sq);
578 
579   MPFR_LOG_MSG (("cq=%d sq=%Pd logn=%d wq=%Pd\n", cq, sq, logn, wq));
580 
581   /* An input block will have up to wq - cq bits, and its shifted value
582      (to be correctly aligned) may take GMP_NUMB_BITS - 1 additional bits. */
583   ts = MPFR_PREC2LIMBS (wq - cq + GMP_NUMB_BITS - 1);
584 
585   MPFR_TMP_MARK (marker);
586 
587   /* Note: If the TMD does not occur, which should be the case for most
588      sums, allocating zs limbs is not necessary. However, we choose to
589      do this now (thus in all cases) because zs is very small, so that
590      the difference on the memory footprint will not be noticeable.
591      More precisely, zs is at most 2 in practice with the current code;
592      we may want to increase it in order to avoid performance issues in
593      some unlikely corner cases, but even in this case, it will remain
594      small.
595      One will have:
596        [------ ts ------][------ ws ------][- zs -]
597      The following would probably be better:
598        [------ ts ------]  [------ ws ------]
599                    [- zs -]
600      i.e. where the TMD accumulator (partially or completely) takes
601      some unneeded part of the temporary area in order to improve
602      data locality. But
603        * in low precision, data locality is regarded as ensured even
604          with the actual choice;
605        * in high precision, data locality for TMD resolution may not
606          be that important.
607   */
608   tp = MPFR_TMP_LIMBS_ALLOC (ts + ws + zs);
609   wp = tp + ts;
610 
611   MPN_ZERO (wp, ws);  /* zero the accumulator */
612 
613   {
614     mpfr_exp_t minexp;   /* exponent of the LSB of the block for sum_raw */
615     mpfr_prec_t cancel;  /* number of cancelled bits */
616     mpfr_exp_t e;        /* temporary exponent of the result */
617     mpfr_exp_t u;        /* temporary exponent of the ulp (quantum) */
618     mp_limb_t lbit;      /* last bit (useful if even rounding) */
619     mp_limb_t rbit;      /* rounding bit (corrected in halfway case) */
620     int corr;            /* correction term (from -1 to 2) */
621     int sd, sh;          /* shift counts */
622     mp_size_t sn;        /* size of the output number */
623     int tmd;             /* 0: the TMD does not occur
624                             1: the TMD occurs on a machine number
625                             2: the TMD occurs on a midpoint */
626     int neg;             /* 0 if positive sum, 1 if negative */
627     int sgn;             /* +1 if positive sum, -1 if negative */
628 
629     MPFR_LOG_MSG (("Compute an approximation with sum_raw...\n", 0));
630 
631     /* Compute minexp = maxexp - (wq - cq) safely. */
632     SAFE_SUB (minexp, maxexp, wq - cq);
633     MPFR_ASSERTD (wq >= logn + sq + 5);
634     cancel = sum_raw (wp, ws, wq, x, n, minexp, maxexp, tp, ts,
635                       logn, sq + 3, &e, &minexp, &maxexp);
636 
637     if (MPFR_UNLIKELY (cancel == 0))
638       {
639         /* The exact sum is zero. Since not all inputs are 0, the sum
640          * is +0 except in MPFR_RNDD, as specified according to the
641          * IEEE 754 rules for the addition of two numbers.
642          */
643         MPFR_SET_SIGN (sum, (rnd != MPFR_RNDD ?
644                              MPFR_SIGN_POS : MPFR_SIGN_NEG));
645         MPFR_SET_ZERO (sum);
646         MPFR_TMP_FREE (marker);
647         MPFR_RET (0);
648       }
649 
650     /* The absolute value of the truncated sum is in the binade
651        [2^(e-1),2^e] (closed on both ends due to two's complement).
652        The error is strictly less than 2^(maxexp + logn) (and is 0
653        if maxexp == MPFR_EXP_MIN). */
654 
655     u = e - sq;  /* e being the exponent, u is the ulp of the target */
656 
657     /* neg = 1 if negative, 0 if positive. */
658     neg = wp[ws-1] >> (GMP_NUMB_BITS - 1);
659     MPFR_ASSERTD (neg == 0 || neg == 1);
660 
661     sgn = neg ? -1 : 1;
662     MPFR_ASSERTN (sgn == (neg ? MPFR_SIGN_NEG : MPFR_SIGN_POS));
663 
664     MPFR_LOG_MSG (("neg=%d sgn=%d cancel=%Pd"
665                    " e=%" MPFR_EXP_FSPEC "d"
666                    " u=%" MPFR_EXP_FSPEC "d"
667                    " maxexp=%" MPFR_EXP_FSPEC "d%s\n",
668                    neg, sgn, cancel, (mpfr_eexp_t) e, (mpfr_eexp_t) u,
669                    (mpfr_eexp_t) maxexp,
670                    maxexp == MPFR_EXP_MIN ? " (MPFR_EXP_MIN)" : ""));
671 
672     if (rnd == MPFR_RNDF)
673       {
674         /* Rounding the approximate value to nearest (ties don't matter) is
675            sufficient. We need to get the rounding bit; the code is similar
676            to a part from the generic code (here, corr = rbit). */
677         if (MPFR_LIKELY (u > minexp))
678           {
679             mpfr_prec_t tq;
680             mp_size_t wi;
681             int td;
682 
683             tq = u - minexp;
684             MPFR_ASSERTD (tq > 0); /* number of trailing bits */
685             MPFR_LOG_MSG (("tq=%Pd\n", tq));
686 
687             wi = tq / GMP_NUMB_BITS;
688             td = tq % GMP_NUMB_BITS;
689             corr = td >= 1 ? ((wp[wi] >> (td - 1)) & MPFR_LIMB_ONE) :
690               (MPFR_ASSERTD (wi >= 1), wp[wi-1] >> (GMP_NUMB_BITS - 1));
691           }
692         else
693           corr = 0;
694         inex = 0;  /* not meaningful, but needs to have a value */
695       }
696     else  /* rnd != MPFR_RNDF */
697       {
698         if (MPFR_LIKELY (u > minexp))
699           {
700             mpfr_prec_t tq;
701             mp_size_t wi;
702             int td;
703 
704             tq = u - minexp;
705             MPFR_ASSERTD (tq > 0); /* number of trailing bits */
706             MPFR_LOG_MSG (("tq=%Pd\n", tq));
707 
708             wi = tq / GMP_NUMB_BITS;
709 
710             /* Determine the rounding bit, which is represented. */
711             td = tq % GMP_NUMB_BITS;
712             lbit = (wp[wi] >> td) & MPFR_LIMB_ONE;
713             rbit = td >= 1 ? ((wp[wi] >> (td - 1)) & MPFR_LIMB_ONE) :
714               (MPFR_ASSERTD (wi >= 1), wp[wi-1] >> (GMP_NUMB_BITS - 1));
715             MPFR_ASSERTD (rbit == 0 || rbit == 1);
716             MPFR_LOG_MSG (("rbit=%d\n", (int) rbit));
717 
718             if (maxexp == MPFR_EXP_MIN)
719               {
720                 /* The sum in the accumulator is exact. Determine inex:
721                    inex = 0 if the final sum is exact, else 1, i.e.
722                    inex = rounding bit || sticky bit. In round to nearest,
723                    also determine the rounding direction: obtained from
724                    the rounding bit possibly except in halfway cases.
725                    Halfway cases are rounded toward -inf iff the last bit
726                    of the truncated significand in two's complement is 0
727                    (in precision > 1, because the parity after rounding is
728                    the same in two's complement and sign + magnitude; in
729                    precision 1, one checks that the rule works for both
730                    positive (lbit == 1) and negative (lbit == 0) numbers,
731                    rounding halfway cases away from zero). */
732                 if (MPFR_LIKELY (rbit == 0 || (rnd == MPFR_RNDN && lbit == 0)))
733                   {
734                     /* We need to determine the sticky bit, either to set inex
735                        (if the rounding bit is 0) or to possibly "correct" rbit
736                        (round to nearest, halfway case rounded downward) from
737                        which the rounding direction will be determined. */
738                     MPFR_LOG_MSG (("Determine the sticky bit...\n", 0));
739 
740                     inex = td >= 2 ? (wp[wi] & MPFR_LIMB_MASK (td - 1)) != 0
741                       : td == 0 ?
742                       (MPFR_ASSERTD (wi >= 1),
743                        (wp[--wi] & MPFR_LIMB_MASK (GMP_NUMB_BITS - 1)) != 0)
744                       : 0;
745 
746                     if (!inex)
747                       {
748                         while (!inex && wi > 0)
749                           inex = wp[--wi] != 0;
750                         if (!inex && rbit != 0)
751                           {
752                             /* sticky bit = 0, rounding bit = 1,
753                                i.e. halfway case, which will be
754                                rounded downward (see earlier if). */
755                             MPFR_ASSERTD (rnd == MPFR_RNDN);
756                             inex = 1;
757                             rbit = 0;  /* even rounding downward */
758                             MPFR_LOG_MSG (("Halfway case rounded downward;"
759                                            " set inex=1 rbit=0\n", 0));
760                           }
761                       }
762                   }
763                 else
764                   inex = 1;
765                 tmd = 0;  /* We can round correctly -> no TMD. */
766               }
767             else  /* maxexp > MPFR_EXP_MIN */
768               {
769                 mpfr_exp_t d;
770                 mp_limb_t limb, mask;
771                 int nbits;
772 
773                 /* Since maxexp was set to either the exponent of a x[i] or
774                    to minexp... */
775                 MPFR_ASSERTD (maxexp >= MPFR_EMIN_MIN || maxexp == minexp);
776 
777                 inex = 1;  /* We do not know whether the sum is exact. */
778 
779                 MPFR_ASSERTD (u <= MPFR_EMAX_MAX && u <= minexp + wq);
780                 d = u - (maxexp + logn);  /* representable */
781                 MPFR_ASSERTD (d >= 3);  /* due to prec = sq + 3 in sum_raw */
782 
783                 /* Let's see whether the TMD occurs by looking at the d bits
784                    following the ulp bit, or the d-1 bits after the rounding
785                    bit. */
786 
787                 /* First chunk after the rounding bit... It starts at:
788                    (wi,td-2) if td >= 2,
789                    (wi-1,td-2+GMP_NUMB_BITS) if td < 2. */
790                 if (td == 0)
791                   {
792                     MPFR_ASSERTD (wi >= 1);
793                     limb = wp[--wi];
794                     mask = MPFR_LIMB_MASK (GMP_NUMB_BITS - 1);
795                     nbits = GMP_NUMB_BITS;
796                   }
797                 else if (td == 1)
798                   {
799                     limb = wi >= 1 ? wp[--wi] : MPFR_LIMB_ZERO;
800                     mask = MPFR_LIMB_MAX;
801                     nbits = GMP_NUMB_BITS + 1;
802                   }
803                 else  /* td >= 2 */
804                   {
805                     MPFR_ASSERTD (td >= 2);
806                     limb = wp[wi];
807                     mask = MPFR_LIMB_MASK (td - 1);
808                     nbits = td;
809                   }
810 
811                 /* nbits: number of bits of the first chunk + 1
812                    (the +1 is for the rounding bit). */
813 
814                 if (nbits > d)
815                   {
816                     /* Some low significant bits must be ignored. */
817                     limb >>= nbits - d;
818                     mask >>= nbits - d;
819                     d = 0;
820                   }
821                 else
822                   {
823                     d -= nbits;
824                     MPFR_ASSERTD (d >= 0);
825                   }
826 
827                 limb &= mask;
828                 tmd =
829                   limb == MPFR_LIMB_ZERO ?
830                     (rbit == 0 ? 1 : rnd == MPFR_RNDN ? 2 : 0) :
831                   limb == mask ?
832                     (limb = MPFR_LIMB_MAX,
833                      rbit != 0 ? 1 : rnd == MPFR_RNDN ? 2 : 0) : 0;
834 
835                 while (tmd != 0 && d != 0)
836                   {
837                     mp_limb_t limb2;
838 
839                     MPFR_ASSERTD (d > 0);
840                     if (wi == 0)
841                       {
842                         /* The non-represented bits are 0's. */
843                         if (limb != MPFR_LIMB_ZERO)
844                           tmd = 0;
845                         break;
846                       }
847                     MPFR_ASSERTD (wi > 0);
848                     limb2 = wp[--wi];
849                     if (d < GMP_NUMB_BITS)
850                       {
851                         int c = GMP_NUMB_BITS - d;
852                         MPFR_ASSERTD (c > 0 && c < GMP_NUMB_BITS);
853                         if ((limb2 >> c) != (limb >> c))
854                           tmd = 0;
855                         break;
856                       }
857                     if (limb2 != limb)
858                       tmd = 0;
859                     d -= GMP_NUMB_BITS;
860                   }
861               }
862           }
863         else  /* u <= minexp */
864           {
865             /* The exact value of the accumulator will be copied.
866              * The TMD occurs if and only if there are bits still
867              * not taken into account, and if it occurs, this is
868              * necessarily on a machine number (-> tmd = 1).
869              */
870             lbit = u == minexp ? wp[0] & MPFR_LIMB_ONE : 0;
871             rbit = 0;
872             inex = tmd = maxexp != MPFR_EXP_MIN;
873           }
874 
875         MPFR_ASSERTD (rbit == 0 || rbit == 1);
876 
877         MPFR_LOG_MSG (("tmd=%d lbit=%d rbit=%d inex=%d neg=%d\n",
878                        tmd, (int) lbit, (int) rbit, inex, neg));
879 
880         /* Here, if the final sum is known to be exact, inex = 0, otherwise
881          * inex = 1. We have a truncated significand, a trailing term t such
882          * that 0 <= t < 1 ulp, and an error on the trailing term bounded by
883          * t' in absolute value. Thus the error e on the truncated significand
884          * satisfies -t' <= e < 1 ulp + t'. Thus one has 4 correction cases
885          * denoted by a corr value between -1 and 2 depending on e, neg, rbit,
886          * and the rounding mode:
887          *   -1: equivalent to nextbelow;
888          *    0: the truncated significand is not corrected;
889          *    1: add 1 ulp;
890          *    2: add 1 ulp, then nextabove.
891          * The nextbelow and nextabove are used here since there may be a
892          * change of the binade.
893          */
894 
895         if (tmd == 0)  /* no TMD */
896           {
897             switch (rnd)
898               {
899               case MPFR_RNDD:
900                 corr = 0;
901                 break;
902               case MPFR_RNDU:
903                 corr = inex;
904                 break;
905               case MPFR_RNDZ:
906                 corr = inex && neg;
907                 break;
908               case MPFR_RNDA:
909                 corr = inex && !neg;
910                 break;
911               default:
912                 MPFR_ASSERTN (rnd == MPFR_RNDN);
913                 /* Note: for halfway cases (maxexp == MPFR_EXP_MIN) that are
914                    rounded downward, rbit has been changed to 0 so that corr
915                    is set correctly. */
916                 corr = rbit;
917               }
918             MPFR_ASSERTD (corr == 0 || corr == 1);
919             if (inex &&
920                 corr == 0)  /* two's complement significand decreased */
921               inex = -1;
922           }
923         else  /* tmd */
924           {
925             mpfr_exp_t minexp2;
926             mpfr_prec_t cancel2;
927             mpfr_exp_t err;  /* exponent of the error bound */
928             mp_size_t zz;    /* nb of limbs to zero in the TMD accumulator */
929             mp_limb_t *zp;   /* pointer to the TMD accumulator */
930             mpfr_prec_t zq;  /* size of the TMD accumulator, in bits */
931             int sst;         /* sign of the secondary term */
932 
933             /* TMD case. Here we use a new variable minexp2, with the same
934                meaning as minexp, as we want to keep the minexp value for
935                the copy to the destination. */
936 
937             MPFR_ASSERTD (maxexp > MPFR_EXP_MIN);
938             MPFR_ASSERTD (tmd == 1 || tmd == 2);
939 
940             /* TMD accumulator */
941             zp = wp + ws;
942             zq = (mpfr_prec_t) zs * GMP_NUMB_BITS;
943 
944             err = maxexp + logn;
945 
946             MPFR_LOG_MSG (("TMD with"
947                            " maxexp=%" MPFR_EXP_FSPEC "d"
948                            " err=%" MPFR_EXP_FSPEC "d"
949                            " zs=%Pd"
950                            " zq=%Pd\n",
951                            (mpfr_eexp_t) maxexp, (mpfr_eexp_t) err,
952                            (mpfr_prec_t) zs, zq));
953 
954             /* The d-1 bits from u-2 to u-d (= err) are identical. */
955 
956             if (err >= minexp)
957               {
958                 mpfr_prec_t tq;
959                 mp_size_t wi;
960                 int td;
961 
962                 /* Let's keep the last 2 over the d-1 identical bits and the
963                    following bits, i.e. the bits from err+1 to minexp. */
964                 tq = err - minexp + 2;  /* tq = number of such bits */
965                 MPFR_LOG_MSG (("[TMD] tq=%Pd\n", tq));
966                 MPFR_ASSERTD (tq >= 2);
967 
968                 wi = tq / GMP_NUMB_BITS;
969                 td = tq % GMP_NUMB_BITS;
970 
971                 /* Note: The "else" (td == 0) branch below can be executed
972                    only if tq >= GMP_NUMB_BITS, which is possible only when
973                    logn is large enough. Indeed, if tq > logn + some constant,
974                    this means that the TMD did not occur.
975                    TODO: Find an upper bound on tq, and add a corresponding
976                    MPFR_ASSERTD assertion / hint. On some platforms, this
977                    branch might be dead code, and such information would
978                    allow the compiler to remove it.
979                    It seems that this branch is never tested (r12754). */
980 
981                 if (td != 0)
982                   {
983                     wi++;  /* number of words with represented bits */
984                     td = GMP_NUMB_BITS - td;
985                     zz = zs - wi;
986                     MPFR_ASSERTD (zz >= 0 && zz < zs);
987                     mpn_lshift (zp + zz, wp, wi, td);
988                   }
989                 else
990                   {
991                     MPFR_ASSERTD (wi > 0);
992                     zz = zs - wi;
993                     MPFR_ASSERTD (zz >= 0 && zz < zs);
994                     if (zz > 0)
995                       MPN_COPY (zp + zz, wp, wi);
996                   }
997 
998                 /* Compute minexp2 = minexp - (zs * GMP_NUMB_BITS + td)
999                    safely. */
1000                 SAFE_SUB (minexp2, minexp, zz * GMP_NUMB_BITS + td);
1001                 MPFR_ASSERTD (minexp2 == err + 2 - zq);
1002               }
1003             else  /* err < minexp */
1004               {
1005                 /* At least one of the identical bits is not represented,
1006                    meaning that it is 0 and all these bits are 0's. Thus
1007                    the accumulator will be 0. The new minexp is determined
1008                    from maxexp, with cq bits reserved to avoid an overflow
1009                    (as in the early steps). */
1010                 MPFR_LOG_MSG (("[TMD] err < minexp\n", 0));
1011                 zz = zs;
1012 
1013                 /* Compute minexp2 = maxexp - (zq - cq) safely. */
1014                 SAFE_SUB (minexp2, maxexp, zq - cq);
1015                 MPFR_ASSERTD (minexp2 == err + 1 - zq);
1016               }
1017 
1018             MPN_ZERO (zp, zz);
1019 
1020             /* We need to determine the sign sst of the secondary term.
1021                In sum_raw, since the truncated sum corresponding to this
1022                secondary term will be in [2^(e-1),2^e] and the error
1023                strictly less than 2^err, we can stop the iterations when
1024                e - err >= 1 (this bound is the 11th argument of sum_raw). */
1025             cancel2 = sum_raw (zp, zs, zq, x, n, minexp2, maxexp, tp, ts,
1026                                logn, 1, NULL, NULL, NULL);
1027 
1028             if (cancel2 != 0)
1029               sst = MPFR_LIMB_MSB (zp[zs-1]) == 0 ? 1 : -1;
1030             else if (tmd == 1)
1031               sst = 0;
1032             else
1033               {
1034                 /* For halfway cases, let's virtually eliminate them
1035                    by setting a sst equivalent to a non-halfway case,
1036                    which depends on the last bit of the pre-rounded
1037                    result. */
1038                 MPFR_ASSERTD (rnd == MPFR_RNDN && tmd == 2);
1039                 sst = lbit != 0 ? 1 : -1;
1040               }
1041 
1042             MPFR_LOG_MSG (("[TMD] tmd=%d rbit=%d sst=%d\n",
1043                            tmd, (int) rbit, sst));
1044 
1045             /* Do not consider the corrected sst for MPFR_COV_SET */
1046             MPFR_COV_SET (sum_tmd[(int) rnd][tmd-1][rbit]
1047                           [cancel2 == 0 ? 1 : sst+1][neg][sq > MPFR_PREC_MIN]);
1048 
1049             inex =
1050               MPFR_IS_LIKE_RNDD (rnd, sgn) ? (sst ? -1 : 0) :
1051               MPFR_IS_LIKE_RNDU (rnd, sgn) ? (sst ?  1 : 0) :
1052               (MPFR_ASSERTD (rnd == MPFR_RNDN),
1053                tmd == 1 ? - sst : sst);
1054 
1055             if (tmd == 2 && sst == (rbit != 0 ? -1 : 1))
1056               corr = 1 - (int) rbit;
1057             else if (MPFR_IS_LIKE_RNDD (rnd, sgn) && sst == -1)
1058               corr = (int) rbit - 1;
1059             else if (MPFR_IS_LIKE_RNDU (rnd, sgn) && sst == +1)
1060               corr = (int) rbit + 1;
1061             else
1062               corr = (int) rbit;
1063           }  /* tmd */
1064       }  /* rnd != MPFR_RNDF */
1065 
1066     MPFR_LOG_MSG (("neg=%d corr=%d inex=%d\n", neg, corr, inex));
1067 
1068     /* Sign handling (-> absolute value and sign), together with
1069        rounding. The most common cases are corr = 0 and corr = 1
1070        as this is necessarily the case when the TMD did not occur. */
1071 
1072     MPFR_ASSERTD (corr >= -1 && corr <= 2);
1073 
1074     MPFR_SIGN (sum) = sgn;
1075 
1076     /* Let's copy/shift the bits [max(u,minexp),e) to the
1077        most significant part of the destination, and zero
1078        the least significant part (there can be one only if
1079        u < minexp). The trailing bits of the destination may
1080        contain garbage at this point. */
1081 
1082     sn = MPFR_PREC2LIMBS (sq);
1083     sd = (mpfr_prec_t) sn * GMP_NUMB_BITS - sq;
1084     sh = cancel % GMP_NUMB_BITS;
1085 
1086     MPFR_ASSERTD (sd >= 0 && sd < GMP_NUMB_BITS);
1087 
1088     if (MPFR_LIKELY (u > minexp))
1089       {
1090         mp_size_t wi;
1091 
1092         /* Recompute the initial value of wi. */
1093         wi = (u - minexp) / GMP_NUMB_BITS;
1094         if (MPFR_LIKELY (sh != 0))
1095           {
1096             mp_size_t fi;
1097 
1098             fi = (e - minexp) / GMP_NUMB_BITS - (sn - 1);
1099             MPFR_ASSERTD (fi == wi || fi == wi + 1);
1100             mpn_lshift (sump, wp + fi, sn, sh);
1101             if (fi != wi)
1102               sump[0] |= wp[wi] >> (GMP_NUMB_BITS - sh);
1103           }
1104         else
1105           {
1106             MPFR_ASSERTD ((mpfr_prec_t) (ws - (wi + sn)) * GMP_NUMB_BITS
1107                           == cancel);
1108             MPN_COPY (sump, wp + wi, sn);
1109           }
1110       }
1111     else  /* u <= minexp */
1112       {
1113         mp_size_t en;
1114 
1115         en = (e - minexp + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS;
1116         if (MPFR_LIKELY (sh != 0))
1117           mpn_lshift (sump + sn - en, wp, en, sh);
1118         else if (MPFR_UNLIKELY (en > 0))
1119           MPN_COPY (sump + sn - en, wp, en);
1120         if (sn > en)
1121           MPN_ZERO (sump, sn - en);
1122       }
1123 
1124     /* Let's take the complement if the result is negative, and at
1125        the same time, do the rounding and zero the trailing bits.
1126        As this is valid only for precisions >= 2, there is special
1127        code for precision 1 first. */
1128 
1129     if (MPFR_UNLIKELY (sq == 1))  /* precision 1 */
1130       {
1131         sump[0] = MPFR_LIMB_HIGHBIT;
1132         e += neg ? 1 - corr : corr;
1133       }
1134     else if (neg)  /* negative result with sq > 1 */
1135       {
1136         MPFR_ASSERTD (MPFR_LIMB_MSB (sump[sn-1]) == 0);
1137 
1138         /* abs(x + corr) = - (x + corr) = com(x) + (1 - corr) */
1139 
1140         /* We want to avoid separate mpn_com (or mpn_neg) and mpn_add_1
1141            (or mpn_sub_1) operations, as they could yield two loops in
1142            some particular cases involving a long sequence of 0's in
1143            the low significant bits (except the least significant bit,
1144            which doesn't matter). */
1145 
1146         if (corr <= 1)
1147           {
1148             mp_limb_t corr2;
1149 
1150             /* Here we can just do the correction operation on the
1151                least significant limb, then do either a mpn_com or
1152                a mpn_neg on the remaining limbs, depending on the
1153                carry (BTW, mpn_neg is just a mpn_com with an initial
1154                carry propagation: after some point, mpn_neg does a
1155                complement). */
1156 
1157             corr2 = (mp_limb_t) (1 - corr) << sd;
1158             /* Note: If corr = -1, this can overflow to corr2 = 0.
1159                This case is taken into account below. */
1160 
1161             sump[0] = (~ (sump[0] | MPFR_LIMB_MASK (sd))) + corr2;
1162 
1163             if (sump[0] < corr2 || (corr2 == 0 && corr < 0))
1164               {
1165                 if (sn == 1 || ! mpn_neg (sump + 1, sump + 1, sn - 1))
1166                   {
1167                     /* Note: The | is important when sump[sn-1] is not 0
1168                        (this can occur with sn = 1 and corr = -1). TODO:
1169                        Add something to make sure that this is tested. */
1170                     sump[sn-1] |= MPFR_LIMB_HIGHBIT;
1171                     e++;
1172                   }
1173               }
1174             else if (sn > 1)
1175               mpn_com (sump + 1, sump + 1, sn - 1);
1176           }
1177         else  /* corr == 2 */
1178           {
1179             mp_limb_t corr2, c;
1180             mp_size_t i = 1;
1181 
1182             /* We want to compute com(x) - 1, but GMP doesn't have an
1183                operation for that. The fact is that a sequence of low
1184                significant bits 1 is invariant. Starting at the first
1185                low significant bit 0, we can do the complement with
1186                mpn_com. */
1187 
1188             corr2 = MPFR_LIMB_ONE << sd;
1189             c = ~ (sump[0] | MPFR_LIMB_MASK (sd));
1190             sump[0] = c - corr2;
1191 
1192             if (c == 0)
1193               {
1194                 while (MPFR_ASSERTD (i < sn), sump[i] == MPFR_LIMB_MAX)
1195                   i++;
1196                 sump[i] = (~ sump[i]) - 1;
1197                 i++;
1198               }
1199 
1200             if (i < sn)
1201               mpn_com (sump + i, sump + i, sn - i);
1202             else if (MPFR_UNLIKELY (MPFR_LIMB_MSB (sump[sn-1]) == 0))
1203               {
1204                 /* Happens on 01111...111, whose complement is
1205                    10000...000, and com(x) - 1 is 01111...111. */
1206                 sump[sn-1] |= MPFR_LIMB_HIGHBIT;
1207                 e--;
1208               }
1209           }
1210       }
1211     else  /* positive result with sq > 1 */
1212       {
1213         MPFR_ASSERTD (MPFR_LIMB_MSB (sump[sn-1]) != 0);
1214         sump[0] &= ~ MPFR_LIMB_MASK (sd);
1215 
1216         if (corr > 0)
1217           {
1218             mp_limb_t corr2, carry_out;
1219 
1220             corr2 = (mp_limb_t) corr << sd;
1221             /* If corr == 2 && sd == GMP_NUMB_BITS - 1, this overflows
1222                to corr2 = 0. This case is taken into account below. */
1223 
1224             carry_out = corr2 != 0 ? mpn_add_1 (sump, sump, sn, corr2) :
1225               (MPFR_ASSERTD (sn > 1),
1226                mpn_add_1 (sump + 1, sump + 1, sn - 1, MPFR_LIMB_ONE));
1227 
1228             MPFR_ASSERTD (sump[sn-1] >> (GMP_NUMB_BITS - 1) == !carry_out);
1229 
1230             if (MPFR_UNLIKELY (carry_out))
1231               {
1232                 /* Note: The | is important when sump[sn-1] is not 0
1233                    (this can occur with sn = 1 and corr = 2). TODO:
1234                    Add something to make sure that this is tested. */
1235                 sump[sn-1] |= MPFR_LIMB_HIGHBIT;
1236                 e++;
1237               }
1238           }
1239 
1240         if (corr < 0)
1241           {
1242             mpn_sub_1 (sump, sump, sn, MPFR_LIMB_ONE << sd);
1243 
1244             if (MPFR_UNLIKELY (MPFR_LIMB_MSB (sump[sn-1]) == 0))
1245               {
1246                 sump[sn-1] |= MPFR_LIMB_HIGHBIT;
1247                 e--;
1248               }
1249           }
1250       }
1251 
1252     MPFR_ASSERTD (MPFR_LIMB_MSB (sump[sn-1]) != 0);
1253     MPFR_LOG_MSG (("Set exponent e=%" MPFR_EXP_FSPEC "d\n", (mpfr_eexp_t) e));
1254     /* e may be outside the current exponent range, but this will be checked
1255        with mpfr_check_range below. */
1256     MPFR_EXP (sum) = e;
1257   }  /* main block */
1258 
1259   MPFR_TMP_FREE (marker);
1260   return mpfr_check_range (sum, inex, rnd);
1261 }
1262 
1263 /**********************************************************************/
1264 
1265 int
1266 mpfr_sum (mpfr_ptr sum, const mpfr_ptr *x, unsigned long n, mpfr_rnd_t rnd)
1267 {
1268   MPFR_LOG_FUNC
1269     (("n=%lu rnd=%d", n, rnd),
1270      ("sum[%Pu]=%.*Rg", mpfr_get_prec (sum), mpfr_log_prec, sum));
1271 
1272   if (MPFR_UNLIKELY (n <= 2))
1273     {
1274       if (n == 0)
1275         {
1276           MPFR_SET_ZERO (sum);
1277           MPFR_SET_POS (sum);
1278           MPFR_RET (0);
1279         }
1280       else if (n == 1)
1281         return mpfr_set (sum, x[0], rnd);
1282       else
1283         return mpfr_add (sum, x[0], x[1], rnd);
1284     }
1285   else
1286     {
1287       mpfr_exp_t maxexp = MPFR_EXP_MIN;  /* max(Empty) */
1288       unsigned long i;
1289       unsigned long rn = 0;  /* will be the number of regular inputs */
1290       /* sign of infinities and zeros (0: currently unknown) */
1291       int sign_inf = 0, sign_zero = 0;
1292 
1293       MPFR_LOG_MSG (("Check for special inputs (n = %lu >= 3)\n", n));
1294 
1295       for (i = 0; i < n; i++)
1296         {
1297           if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x[i])))
1298             {
1299               if (MPFR_IS_NAN (x[i]))
1300                 {
1301                   /* The current value x[i] is NaN. Then the sum is NaN. */
1302                 nan:
1303                   MPFR_SET_NAN (sum);
1304                   MPFR_RET_NAN;
1305                 }
1306               else if (MPFR_IS_INF (x[i]))
1307                 {
1308                   /* The current value x[i] is an infinity.
1309                      There are two cases:
1310                      1. This is the first infinity value (sign_inf == 0).
1311                         Then set sign_inf to its sign, and go on.
1312                      2. All the infinities found until now have the same
1313                         sign sign_inf. If this new infinity has a different
1314                         sign, then return NaN immediately, else go on. */
1315                   if (sign_inf == 0)
1316                     sign_inf = MPFR_SIGN (x[i]);
1317                   else if (MPFR_SIGN (x[i]) != sign_inf)
1318                     goto nan;
1319                 }
1320               else if (MPFR_UNLIKELY (rn == 0))
1321                 {
1322                   /* The current value x[i] is a zero. The code below matters
1323                      only when all values found until now are zeros, otherwise
1324                      it is harmless (the test rn == 0 above is just a minor
1325                      optimization).
1326                      Here we track the sign of the zero result when all inputs
1327                      are zeros: if all zeros have the same sign, the result
1328                      will have this sign, otherwise (i.e. if there is at least
1329                      a zero of each sign), the sign of the zero result depends
1330                      only on the rounding mode (note that this choice is
1331                      sticky when new zeros are considered). */
1332                   MPFR_ASSERTD (MPFR_IS_ZERO (x[i]));
1333                   if (sign_zero == 0)
1334                     sign_zero = MPFR_SIGN (x[i]);
1335                   else if (MPFR_SIGN (x[i]) != sign_zero)
1336                     sign_zero = rnd == MPFR_RNDD ? -1 : 1;
1337                 }
1338             }
1339           else
1340             {
1341               /* The current value x[i] is a regular number. */
1342               mpfr_exp_t e = MPFR_GET_EXP (x[i]);
1343               if (e > maxexp)
1344                 maxexp = e;  /* maximum exponent found until now */
1345               rn++;  /* current number of regular inputs */
1346             }
1347         }
1348 
1349       MPFR_LOG_MSG (("rn=%lu sign_inf=%d sign_zero=%d\n",
1350                      rn, sign_inf, sign_zero));
1351 
1352       /* At this point the result cannot be NaN (this case has already
1353          been filtered out). */
1354 
1355       if (MPFR_UNLIKELY (sign_inf != 0))
1356         {
1357           /* At least one infinity, and all of them have the same sign
1358              sign_inf. The sum is the infinity of this sign. */
1359           MPFR_SET_INF (sum);
1360           MPFR_SET_SIGN (sum, sign_inf);
1361           MPFR_RET (0);
1362         }
1363 
1364       /* At this point, all the inputs are finite numbers. */
1365 
1366       if (MPFR_UNLIKELY (rn == 0))
1367         {
1368           /* All the numbers were zeros (and there is at least one).
1369              The sum is zero with sign sign_zero. */
1370           MPFR_ASSERTD (sign_zero != 0);
1371           MPFR_SET_ZERO (sum);
1372           MPFR_SET_SIGN (sum, sign_zero);
1373           MPFR_RET (0);
1374         }
1375 
1376       /* Optimize the case where there are only two regular numbers. */
1377       if (MPFR_UNLIKELY (rn <= 2))
1378         {
1379           unsigned long h = ULONG_MAX;
1380 
1381           for (i = 0; i < n; i++)
1382             if (! MPFR_IS_SINGULAR (x[i]))
1383               {
1384                 if (rn == 1)
1385                   return mpfr_set (sum, x[i], rnd);
1386                 if (h != ULONG_MAX)
1387                   return mpfr_add (sum, x[h], x[i], rnd);
1388                 h = i;
1389               }
1390           MPFR_RET_NEVER_GO_HERE();
1391         }
1392 
1393       return sum_aux (sum, x, n, rnd, maxexp, rn);
1394     }
1395 }
1396