xref: /netbsd-src/external/lgpl3/mpfr/dist/src/mulders.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* Mulders' MulHigh function (short product)
2 
3 Copyright 2005-2016 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 /* References:
24    [1] Short Division of Long Integers, David Harvey and Paul Zimmermann,
25        Proceedings of the 20th Symposium on Computer Arithmetic (ARITH-20),
26        July 25-27, 2011, pages 7-14.
27 */
28 
29 #define MPFR_NEED_LONGLONG_H
30 #include "mpfr-impl.h"
31 
32 #ifndef MUL_FFT_THRESHOLD
33 #define MUL_FFT_THRESHOLD 8448
34 #endif
35 
36 /* Don't use MPFR_MULHIGH_SIZE since it is handled by tuneup */
37 #ifdef MPFR_MULHIGH_TAB_SIZE
38 static short mulhigh_ktab[MPFR_MULHIGH_TAB_SIZE];
39 #else
40 static short mulhigh_ktab[] = {MPFR_MULHIGH_TAB};
41 #define MPFR_MULHIGH_TAB_SIZE \
42   ((mp_size_t) (sizeof(mulhigh_ktab) / sizeof(mulhigh_ktab[0])))
43 #endif
44 
45 /* Put in  rp[n..2n-1] an approximation of the n high limbs
46    of {up, n} * {vp, n}. The error is less than n ulps of rp[n] (and the
47    approximation is always less or equal to the truncated full product).
48    Assume 2n limbs are allocated at rp.
49 
50    Implements Algorithm ShortMulNaive from [1].
51 */
52 static void
53 mpfr_mulhigh_n_basecase (mpfr_limb_ptr rp, mpfr_limb_srcptr up,
54                          mpfr_limb_srcptr vp, mp_size_t n)
55 {
56   mp_size_t i;
57 
58   rp += n - 1;
59   umul_ppmm (rp[1], rp[0], up[n-1], vp[0]); /* we neglect up[0..n-2]*vp[0],
60                                                which is less than B^n */
61   for (i = 1 ; i < n ; i++)
62     /* here, we neglect up[0..n-i-2] * vp[i], which is less than B^n too */
63     rp[i + 1] = mpn_addmul_1 (rp, up + (n - i - 1), i + 1, vp[i]);
64   /* in total, we neglect less than n*B^n, i.e., n ulps of rp[n]. */
65 }
66 
67 /* Put in  rp[0..n] the n+1 low limbs of {up, n} * {vp, n}.
68    Assume 2n limbs are allocated at rp. */
69 static void
70 mpfr_mullow_n_basecase (mpfr_limb_ptr rp, mpfr_limb_srcptr up,
71                         mpfr_limb_srcptr vp, mp_size_t n)
72 {
73   mp_size_t i;
74 
75   rp[n] = mpn_mul_1 (rp, up, n, vp[0]);
76   for (i = 1 ; i < n ; i++)
77     mpn_addmul_1 (rp + i, up, n - i + 1, vp[i]);
78 }
79 
80 /* Put in  rp[n..2n-1] an approximation of the n high limbs
81    of {np, n} * {mp, n}. The error is less than n ulps of rp[n] (and the
82    approximation is always less or equal to the truncated full product).
83 
84    Implements Algorithm ShortMul from [1].
85 */
86 void
87 mpfr_mulhigh_n (mpfr_limb_ptr rp, mpfr_limb_srcptr np, mpfr_limb_srcptr mp,
88                 mp_size_t n)
89 {
90   mp_size_t k;
91 
92   MPFR_ASSERTN (MPFR_MULHIGH_TAB_SIZE >= 8); /* so that 3*(n/4) > n/2 */
93   k = MPFR_LIKELY (n < MPFR_MULHIGH_TAB_SIZE) ? mulhigh_ktab[n] : 3*(n/4);
94   /* Algorithm ShortMul from [1] requires k >= (n+3)/2, which translates
95      into k >= (n+4)/2 in the C language. */
96   MPFR_ASSERTD (k == -1 || k == 0 || (k >= (n+4)/2 && k < n));
97   if (k < 0)
98     mpn_mul_basecase (rp, np, n, mp, n); /* result is exact, no error */
99   else if (k == 0)
100     mpfr_mulhigh_n_basecase (rp, np, mp, n); /* basecase error < n ulps */
101   else if (n > MUL_FFT_THRESHOLD)
102     mpn_mul_n (rp, np, mp, n); /* result is exact, no error */
103   else
104     {
105       mp_size_t l = n - k;
106       mp_limb_t cy;
107 
108       mpn_mul_n (rp + 2 * l, np + l, mp + l, k); /* fills rp[2l..2n-1] */
109       mpfr_mulhigh_n (rp, np + k, mp, l);        /* fills rp[l-1..2l-1] */
110       cy = mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
111       mpfr_mulhigh_n (rp, np, mp + k, l);        /* fills rp[l-1..2l-1] */
112       cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
113       mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */
114     }
115 }
116 
117 /* Put in  rp[0..n] the n+1 low limbs of {np, n} * {mp, n}.
118    Assume 2n limbs are allocated at rp. */
119 void
120 mpfr_mullow_n (mpfr_limb_ptr rp, mpfr_limb_srcptr np, mpfr_limb_srcptr mp,
121                mp_size_t n)
122 {
123   mp_size_t k;
124 
125   MPFR_ASSERTN (MPFR_MULHIGH_TAB_SIZE >= 8); /* so that 3*(n/4) > n/2 */
126   k = MPFR_LIKELY (n < MPFR_MULHIGH_TAB_SIZE) ? mulhigh_ktab[n] : 3*(n/4);
127   MPFR_ASSERTD (k == -1 || k == 0 || (2 * k >= n && k < n));
128   if (k < 0)
129     mpn_mul_basecase (rp, np, n, mp, n);
130   else if (k == 0)
131     mpfr_mullow_n_basecase (rp, np, mp, n);
132   else if (n > MUL_FFT_THRESHOLD)
133     mpn_mul_n (rp, np, mp, n);
134   else
135     {
136       mp_size_t l = n - k;
137 
138       mpn_mul_n (rp, np, mp, k);                      /* fills rp[0..2k] */
139       mpfr_mullow_n (rp + n, np + k, mp, l);          /* fills rp[n..n+2l] */
140       mpn_add_n (rp + k, rp + k, rp + n, l + 1);
141       mpfr_mullow_n (rp + n, np, mp + k, l);          /* fills rp[n..n+2l] */
142       mpn_add_n (rp + k, rp + k, rp + n, l + 1);
143     }
144 }
145 
146 #ifdef MPFR_SQRHIGH_TAB_SIZE
147 static short sqrhigh_ktab[MPFR_SQRHIGH_TAB_SIZE];
148 #else
149 static short sqrhigh_ktab[] = {MPFR_SQRHIGH_TAB};
150 #define MPFR_SQRHIGH_TAB_SIZE (sizeof(sqrhigh_ktab) / sizeof(sqrhigh_ktab[0]))
151 #endif
152 
153 /* Put in  rp[n..2n-1] an approximation of the n high limbs
154    of {np, n}^2. The error is less than n ulps of rp[n]. */
155 void
156 mpfr_sqrhigh_n (mpfr_limb_ptr rp, mpfr_limb_srcptr np, mp_size_t n)
157 {
158   mp_size_t k;
159 
160   MPFR_ASSERTN (MPFR_SQRHIGH_TAB_SIZE > 2); /* ensures k < n */
161   k = MPFR_LIKELY (n < MPFR_SQRHIGH_TAB_SIZE) ? sqrhigh_ktab[n]
162     : (n+4)/2; /* ensures that k >= (n+3)/2 */
163   MPFR_ASSERTD (k == -1 || k == 0 || (k >= (n+4)/2 && k < n));
164   if (k < 0)
165     /* we can't use mpn_sqr_basecase here, since it requires
166        n <= SQR_KARATSUBA_THRESHOLD, where SQR_KARATSUBA_THRESHOLD
167        is not exported by GMP */
168     mpn_sqr_n (rp, np, n);
169   else if (k == 0)
170     mpfr_mulhigh_n_basecase (rp, np, np, n);
171   else
172     {
173       mp_size_t l = n - k;
174       mp_limb_t cy;
175 
176       mpn_sqr_n (rp + 2 * l, np + l, k);          /* fills rp[2l..2n-1] */
177       mpfr_mulhigh_n (rp, np, np + k, l);         /* fills rp[l-1..2l-1] */
178       /* {rp+n-1,l+1} += 2 * {rp+l-1,l+1} */
179       cy = mpn_lshift (rp + l - 1, rp + l - 1, l + 1, 1);
180       cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
181       mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */
182     }
183 }
184 
185 #ifdef MPFR_DIVHIGH_TAB_SIZE
186 static short divhigh_ktab[MPFR_DIVHIGH_TAB_SIZE];
187 #else
188 static short divhigh_ktab[] = {MPFR_DIVHIGH_TAB};
189 #define MPFR_DIVHIGH_TAB_SIZE (sizeof(divhigh_ktab) / sizeof(divhigh_ktab[0]))
190 #endif
191 
192 #ifndef __GMPFR_GMP_H__
193 #define mpfr_pi1_t gmp_pi1_t /* with a GMP build */
194 #endif
195 
196 #if !(defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_SBPI1_DIVAPPR_Q))
197 /* Put in Q={qp, n} an approximation of N={np, 2*n} divided by D={dp, n},
198    with the most significant limb of the quotient as return value (0 or 1).
199    Assumes the most significant bit of D is set. Clobbers N.
200 
201    The approximate quotient Q satisfies - 2(n-1) < N/D - Q <= 4.
202 */
203 static mp_limb_t
204 mpfr_divhigh_n_basecase (mpfr_limb_ptr qp, mpfr_limb_ptr np,
205                          mpfr_limb_srcptr dp, mp_size_t n)
206 {
207   mp_limb_t qh, d1, d0, dinv, q2, q1, q0;
208   mpfr_pi1_t dinv2;
209 
210   np += n;
211 
212   if ((qh = (mpn_cmp (np, dp, n) >= 0)))
213     mpn_sub_n (np, np, dp, n);
214 
215   /* now {np, n} is less than D={dp, n}, which implies np[n-1] <= dp[n-1] */
216 
217   d1 = dp[n - 1];
218 
219   if (n == 1)
220     {
221       invert_limb (dinv, d1);
222       umul_ppmm (q1, q0, np[0], dinv);
223       qp[0] = np[0] + q1;
224       return qh;
225     }
226 
227   /* now n >= 2 */
228   d0 = dp[n - 2];
229   invert_pi1 (dinv2, d1, d0);
230   /* dinv2.inv32 = floor ((B^3 - 1) / (d0 + d1 B)) - B */
231   while (n > 1)
232     {
233       /* Invariant: it remains to reduce n limbs from N (in addition to the
234          initial low n limbs).
235          Since n >= 2 here, necessarily we had n >= 2 initially, which means
236          that in addition to the limb np[n-1] to reduce, we have at least 2
237          extra limbs, thus accessing np[n-3] is valid. */
238 
239       /* Warning: we can have np[n-1]>d1 or (np[n-1]=d1 and np[n-2]>=d0) here,
240          since we truncate the divisor at each step, but since {np,n} < D
241          originally, the largest possible partial quotient is B-1. */
242       if (MPFR_UNLIKELY(np[n-1] > d1 || (np[n-1] == d1 && np[n-2] >= d0)))
243         q2 = ~ (mp_limb_t) 0;
244       else
245         udiv_qr_3by2 (q2, q1, q0, np[n - 1], np[n - 2], np[n - 3],
246                       d1, d0, dinv2.inv32);
247       /* since q2 = floor((np[n-1]*B^2+np[n-2]*B+np[n-3])/(d1*B+d0)),
248          we have q2 <= (np[n-1]*B^2+np[n-2]*B+np[n-3])/(d1*B+d0),
249          thus np[n-1]*B^2+np[n-2]*B+np[n-3] >= q2*(d1*B+d0)
250          and {np-1, n} >= q2*D - q2*B^(n-2) >= q2*D - B^(n-1)
251          thus {np-1, n} - (q2-1)*D >= D - B^(n-1) >= 0
252          which proves that at most one correction is needed */
253       q0 = mpn_submul_1 (np - 1, dp, n, q2);
254       if (MPFR_UNLIKELY(q0 > np[n - 1]))
255         {
256           mpn_add_n (np - 1, np - 1, dp, n);
257           q2 --;
258         }
259       qp[--n] = q2;
260       dp ++;
261     }
262 
263   /* we have B+dinv2 = floor((B^3-1)/(d1*B+d0)) < B^2/d1
264      q1 = floor(np[0]*(B+dinv2)/B) <= floor(np[0]*B/d1)
265         <= floor((np[0]*B+np[1])/d1)
266      thus q1 is not larger than the true quotient.
267      q1 > np[0]*(B+dinv2)/B - 1 > np[0]*(B^3-1)/(d1*B+d0)/B - 2
268      For d1*B+d0 <> B^2/2, we have B+dinv2 = floor(B^3/(d1*B+d0))
269      thus q1 > np[0]*B^2/(d1*B+d0) - 2, i.e.,
270      (d1*B+d0)*q1 > np[0]*B^2 - 2*(d1*B+d0)
271      d1*B*q1 > np[0]*B^2 - 2*d1*B - 2*d0 - d0*q1 >= np[0]*B^2 - 2*d1*B - B^2
272      thus q1 > np[0]*B/d1 - 2 - B/d1 > np[0]*B/d1 - 4.
273 
274      For d1*B+d0 = B^2/2, dinv2 = B-1 thus q1 > np[0]*(2B-1)/B - 1 >
275      np[0]*B/d1 - 2.
276 
277      In all cases, if q = floor((np[0]*B+np[1])/d1), we have:
278      q - 4 <= q1 <= q
279   */
280   umul_ppmm (q1, q0, np[0], dinv2.inv32);
281   qp[0] = np[0] + q1;
282 
283   return qh;
284 }
285 #endif
286 
287 /* Put in {qp, n} an approximation of N={np, 2*n} divided by D={dp, n},
288    with the most significant limb of the quotient as return value (0 or 1).
289    Assumes the most significant bit of D is set. Clobbers N.
290 
291    This implements the ShortDiv algorithm from reference [1].
292 */
293 #if 1
294 mp_limb_t
295 mpfr_divhigh_n (mpfr_limb_ptr qp, mpfr_limb_ptr np, mpfr_limb_ptr dp,
296                 mp_size_t n)
297 {
298   mp_size_t k, l;
299   mp_limb_t qh, cy;
300   mpfr_limb_ptr tp;
301   MPFR_TMP_DECL(marker);
302 
303   MPFR_ASSERTN (MPFR_MULHIGH_TAB_SIZE >= 15); /* so that 2*(n/3) >= (n+4)/2 */
304   k = MPFR_LIKELY (n < MPFR_DIVHIGH_TAB_SIZE) ? divhigh_ktab[n] : 2*(n/3);
305 
306   if (k == 0)
307 #if defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_SBPI1_DIVAPPR_Q)
308   {
309     mpfr_pi1_t dinv2;
310     invert_pi1 (dinv2, dp[n - 1], dp[n - 2]);
311     return __gmpn_sbpi1_divappr_q (qp, np, n + n, dp, n, dinv2.inv32);
312   }
313 #else /* use our own code for base-case short division */
314     return mpfr_divhigh_n_basecase (qp, np, dp, n);
315 #endif
316   else if (k == n)
317     /* for k=n, we use a division with remainder (mpn_divrem),
318      which computes the exact quotient */
319     return mpn_divrem (qp, 0, np, 2 * n, dp, n);
320 
321   MPFR_ASSERTD ((n+4)/2 <= k && k < n); /* bounds from [1] */
322   MPFR_TMP_MARK (marker);
323   l = n - k;
324   /* first divide the most significant 2k limbs from N by the most significant
325      k limbs of D */
326   qh = mpn_divrem (qp + l, 0, np + 2 * l, 2 * k, dp + l, k); /* exact */
327 
328   /* it remains {np,2l+k} = {np,n+l} as remainder */
329 
330   /* now we have to subtract high(Q1)*D0 where Q1=qh*B^k+{qp+l,k} and
331      D0={dp,l} */
332   tp = MPFR_TMP_LIMBS_ALLOC (2 * l);
333   mpfr_mulhigh_n (tp, qp + k, dp, l);
334   /* we are only interested in the upper l limbs from {tp,2l} */
335   cy = mpn_sub_n (np + n, np + n, tp + l, l);
336   if (qh)
337     cy += mpn_sub_n (np + n, np + n, dp, l);
338   while (cy > 0) /* Q1 was too large: subtract 1 to Q1 and add D to np+l */
339     {
340       qh -= mpn_sub_1 (qp + l, qp + l, k, MPFR_LIMB_ONE);
341       cy -= mpn_add_n (np + l, np + l, dp, n);
342     }
343 
344   /* now it remains {np,n+l} to divide by D */
345   cy = mpfr_divhigh_n (qp, np + k, dp + k, l);
346   qh += mpn_add_1 (qp + l, qp + l, k, cy);
347   MPFR_TMP_FREE(marker);
348 
349   return qh;
350 }
351 #else /* below is the FoldDiv(K) algorithm from [1] */
352 mp_limb_t
353 mpfr_divhigh_n (mpfr_limb_ptr qp, mpfr_limb_ptr np, mpfr_limb_ptr dp,
354                 mp_size_t n)
355 {
356   mp_size_t k, r;
357   mpfr_limb_ptr ip, tp, up;
358   mp_limb_t qh = 0, cy, cc;
359   int count;
360   MPFR_TMP_DECL(marker);
361 
362 #define K 3
363   if (n < K)
364     return mpn_divrem (qp, 0, np, 2 * n, dp, n);
365 
366   k = (n - 1) / K + 1; /* ceil(n/K) */
367 
368   MPFR_TMP_MARK (marker);
369   ip = MPFR_TMP_LIMBS_ALLOC (k + 1);
370   tp = MPFR_TMP_LIMBS_ALLOC (n + k);
371   up = MPFR_TMP_LIMBS_ALLOC (2 * (k + 1));
372   mpn_invert (ip, dp + n - (k + 1), k + 1, NULL); /* takes about 13% for n=1000 */
373   /* {ip, k+1} = floor((B^(2k+2)-1)/D - B^(k+1) where D = {dp+n-(k+1),k+1} */
374   for (r = n, cc = 0UL; r > 0;)
375     {
376       /* cc is the carry at np[n+r] */
377       MPFR_ASSERTD(cc <= 1);
378       /* FIXME: why can we have cc as large as say 8? */
379       count = 0;
380       while (cc > 0)
381         {
382           count ++;
383           MPFR_ASSERTD(count <= 1);
384           /* subtract {dp+n-r,r} from {np+n,r} */
385           cc -= mpn_sub_n (np + n, np + n, dp + n - r, r);
386           /* add 1 at qp[r] */
387           qh += mpn_add_1 (qp + r, qp + r, n - r, 1UL);
388         }
389       /* it remains r limbs to reduce, i.e., the remainder is {np, n+r} */
390       if (r < k)
391         {
392           ip += k - r;
393           k = r;
394         }
395       /* now r >= k */
396       /* qp + r - 2 * k -> up */
397       mpfr_mulhigh_n (up, np + n + r - (k + 1), ip, k + 1);
398       /* take into account the term B^k in the inverse: B^k * {np+n+r-k, k} */
399       cy = mpn_add_n (qp + r - k, up + k + 2, np + n + r - k, k);
400       /* since we need only r limbs of tp (below), it suffices to consider
401          r high limbs of dp */
402       if (r > k)
403         {
404 #if 0
405           mpn_mul (tp, dp + n - r, r, qp + r - k, k);
406 #else  /* use a short product for the low k x k limbs */
407           /* we know the upper k limbs of the r-limb product cancel with the
408              remainder, thus we only need to compute the low r-k limbs */
409           if (r - k >= k)
410             mpn_mul (tp + k, dp + n - r + k, r - k, qp + r - k, k);
411           else /* r-k < k */
412             {
413 /* #define LOW */
414 #ifndef LOW
415               mpn_mul (tp + k, qp + r - k, k, dp + n - r + k, r - k);
416 #else
417               mpfr_mullow_n_basecase (tp + k, qp + r - k, dp + n - r + k, r - k);
418               /* take into account qp[2r-2k] * dp[n - r + k] */
419               tp[r] += qp[2*r-2*k] * dp[n - r + k];
420 #endif
421               /* tp[k..r] is filled */
422             }
423 #if 0
424           mpfr_mulhigh_n (up, dp + n - r, qp + r - k, k);
425 #else /* compute one more limb. FIXME: we could add one limb of dp in the
426          above, to save one mpn_addmul_1 call */
427           mpfr_mulhigh_n (up, dp + n - r, qp + r - k, k - 1); /* {up,2k-2} */
428           /* add {qp + r - k, k - 1} * dp[n-r+k-1] */
429           up[2*k-2] = mpn_addmul_1 (up + k - 1, qp + r - k, k-1, dp[n-r+k-1]);
430           /* add {dp+n-r, k} * qp[r-1] */
431           up[2*k-1] = mpn_addmul_1 (up + k - 1, dp + n - r, k, qp[r-1]);
432 #endif
433 #ifndef LOW
434           cc = mpn_add_n (tp + k, tp + k, up + k, k);
435           mpn_add_1 (tp + 2 * k, tp + 2 * k, r - k, cc);
436 #else
437           /* update tp[k..r] */
438           if (r - k + 1 <= k)
439             mpn_add_n (tp + k, tp + k, up + k, r - k + 1);
440           else /* r - k >= k */
441             {
442               cc = mpn_add_n (tp + k, tp + k, up + k, k);
443               mpn_add_1 (tp + 2 * k, tp + 2 * k, r - 2 * k + 1, cc);
444             }
445 #endif
446 #endif
447         }
448       else /* last step: since we only want the quotient, no need to update,
449               just propagate the carry cy */
450         {
451           MPFR_ASSERTD(r < n);
452           if (cy > 0)
453             qh += mpn_add_1 (qp + r, qp + r, n - r, cy);
454           break;
455         }
456       /* subtract {tp, n+k} from {np+r-k, n+k}; however we only want to
457          update {np+n, n} */
458       /* we should have tp[r] = np[n+r-k] up to 1 */
459       MPFR_ASSERTD(tp[r] == np[n + r - k] || tp[r] + 1 == np[n + r - k]);
460 #ifndef LOW
461       cc = mpn_sub_n (np + n - 1, np + n - 1, tp + k - 1, r + 1); /* borrow at np[n+r] */
462 #else
463       cc = mpn_sub_n (np + n - 1, np + n - 1, tp + k - 1, r - k + 2);
464 #endif
465       /* if cy = 1, subtract {dp, n} from {np+r, n}, thus
466          {dp+n-r,r} from {np+n,r} */
467       if (cy)
468         {
469           if (r < n)
470             cc += mpn_sub_n (np + n - 1, np + n - 1, dp + n - r - 1, r + 1);
471           else
472             cc += mpn_sub_n (np + n, np + n, dp + n - r, r);
473           /* propagate cy */
474           if (r == n)
475             qh = cy;
476           else
477             qh += mpn_add_1 (qp + r, qp + r, n - r, cy);
478         }
479       /* cc is the borrow at np[n+r] */
480       count = 0;
481       while (cc > 0) /* quotient was too large */
482         {
483           count++;
484           MPFR_ASSERTD (count <= 1);
485           cy = mpn_add_n (np + n, np + n, dp + n - (r - k), r - k);
486           cc -= mpn_add_1 (np + n + r - k, np + n + r - k, k, cy);
487           qh -= mpn_sub_1 (qp + r - k, qp + r - k, n - (r - k), 1UL);
488         }
489       r -= k;
490       cc = np[n + r];
491     }
492   MPFR_TMP_FREE(marker);
493 
494   return qh;
495 }
496 #endif
497