xref: /dflybsd-src/contrib/gmp/mpn/generic/invertappr.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
186d7f5d3SJohn Marino /* mpn_invertappr and helper functions.  Compute I such that
286d7f5d3SJohn Marino    floor((B^{2n}-1)/U - 1 <= I + B^n <= floor((B^{2n}-1)/U.
386d7f5d3SJohn Marino 
486d7f5d3SJohn Marino    Contributed to the GNU project by Marco Bodrato.
586d7f5d3SJohn Marino 
686d7f5d3SJohn Marino    The algorithm used here was inspired by ApproximateReciprocal from "Modern
786d7f5d3SJohn Marino    Computer Arithmetic", by Richard P. Brent and Paul Zimmermann.  Special
886d7f5d3SJohn Marino    thanks to Paul Zimmermann for his very valuable suggestions on all the
986d7f5d3SJohn Marino    theoretical aspects during the work on this code.
1086d7f5d3SJohn Marino 
1186d7f5d3SJohn Marino    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
1286d7f5d3SJohn Marino    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
1386d7f5d3SJohn Marino    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
1486d7f5d3SJohn Marino 
1586d7f5d3SJohn Marino Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc.
1686d7f5d3SJohn Marino 
1786d7f5d3SJohn Marino This file is part of the GNU MP Library.
1886d7f5d3SJohn Marino 
1986d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
2086d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
2186d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
2286d7f5d3SJohn Marino option) any later version.
2386d7f5d3SJohn Marino 
2486d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
2586d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
2686d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
2786d7f5d3SJohn Marino License for more details.
2886d7f5d3SJohn Marino 
2986d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
3086d7f5d3SJohn Marino along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
3186d7f5d3SJohn Marino 
3286d7f5d3SJohn Marino /* FIXME: Remove NULL and TMP_*, as soon as all the callers properly
3386d7f5d3SJohn Marino    allocate and pass the scratch to the function. */
3486d7f5d3SJohn Marino #include <stdlib.h>		/* for NULL */
3586d7f5d3SJohn Marino 
3686d7f5d3SJohn Marino #include "gmp.h"
3786d7f5d3SJohn Marino #include "gmp-impl.h"
3886d7f5d3SJohn Marino #include "longlong.h"
3986d7f5d3SJohn Marino 
4086d7f5d3SJohn Marino /* FIXME: The iterative version splits the operand in two slighty unbalanced
4186d7f5d3SJohn Marino    parts, the use of log_2 (or counting the bits) underestimate the maximum
4286d7f5d3SJohn Marino    number of iterations.  */
4386d7f5d3SJohn Marino 
4486d7f5d3SJohn Marino /* This is intended for constant THRESHOLDs only, where the compiler
4586d7f5d3SJohn Marino    can completely fold the result.  */
4686d7f5d3SJohn Marino #define LOG2C(n) \
4786d7f5d3SJohn Marino  (((n) >=    0x1) + ((n) >=    0x2) + ((n) >=    0x4) + ((n) >=    0x8) + \
4886d7f5d3SJohn Marino   ((n) >=   0x10) + ((n) >=   0x20) + ((n) >=   0x40) + ((n) >=   0x80) + \
4986d7f5d3SJohn Marino   ((n) >=  0x100) + ((n) >=  0x200) + ((n) >=  0x400) + ((n) >=  0x800) + \
5086d7f5d3SJohn Marino   ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
5186d7f5d3SJohn Marino 
5286d7f5d3SJohn Marino #if TUNE_PROGRAM_BUILD
5386d7f5d3SJohn Marino #define NPOWS \
5486d7f5d3SJohn Marino  ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
5586d7f5d3SJohn Marino #define MAYBE_dcpi1_divappr   1
5686d7f5d3SJohn Marino #else
5786d7f5d3SJohn Marino #define NPOWS \
5886d7f5d3SJohn Marino  ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)) - LOG2C (INV_NEWTON_THRESHOLD))
5986d7f5d3SJohn Marino #define MAYBE_dcpi1_divappr \
6086d7f5d3SJohn Marino   (INV_NEWTON_THRESHOLD < DC_DIVAPPR_Q_THRESHOLD)
6186d7f5d3SJohn Marino #if (INV_NEWTON_THRESHOLD > INV_MULMOD_BNM1_THRESHOLD) && \
6286d7f5d3SJohn Marino     (INV_APPR_THRESHOLD > INV_MULMOD_BNM1_THRESHOLD)
6386d7f5d3SJohn Marino #undef  INV_MULMOD_BNM1_THRESHOLD
6486d7f5d3SJohn Marino #define INV_MULMOD_BNM1_THRESHOLD 0 /* always when Newton */
6586d7f5d3SJohn Marino #endif
6686d7f5d3SJohn Marino #endif
6786d7f5d3SJohn Marino 
6886d7f5d3SJohn Marino /* All the three functions mpn{,_bc,_ni}_invertappr (ip, dp, n, scratch), take
6986d7f5d3SJohn Marino    the strictly normalised value {dp,n} (i.e., most significant bit must be set)
7086d7f5d3SJohn Marino    as an input, and compute {ip,n}: the approximate reciprocal of {dp,n}.
7186d7f5d3SJohn Marino 
7286d7f5d3SJohn Marino    Let e = mpn*_invertappr (ip, dp, n, scratch) be the returned value; the
7386d7f5d3SJohn Marino    following conditions are satisfied by the output:
7486d7f5d3SJohn Marino      0 <= e <= 1;
7586d7f5d3SJohn Marino      {dp,n}*(B^n+{ip,n}) < B^{2n} <= {dp,n}*(B^n+{ip,n}+1+e) .
7686d7f5d3SJohn Marino    I.e. e=0 means that the result {ip,n} equals the one given by mpn_invert.
7786d7f5d3SJohn Marino 	e=1 means that the result _may_ be one less than expected.
7886d7f5d3SJohn Marino 
7986d7f5d3SJohn Marino    The _bc version returns e=1 most of the time.
8086d7f5d3SJohn Marino    The _ni version should return e=0 most of the time; only about 1% of
8186d7f5d3SJohn Marino    possible random input should give e=1.
8286d7f5d3SJohn Marino 
8386d7f5d3SJohn Marino    When the strict result is needed, i.e., e=0 in the relation above:
8486d7f5d3SJohn Marino      {dp,n}*(B^n+{ip,n}) < B^{2n} <= {dp,n}*(B^n+{ip,n}+1) ;
8586d7f5d3SJohn Marino    the function mpn_invert (ip, dp, n, scratch) should be used instead.  */
8686d7f5d3SJohn Marino 
8786d7f5d3SJohn Marino /* Maximum scratch needed by this branch (at tp): 3*n + 2 */
8886d7f5d3SJohn Marino static mp_limb_t
mpn_bc_invertappr(mp_ptr ip,mp_srcptr dp,mp_size_t n,mp_ptr tp)8986d7f5d3SJohn Marino mpn_bc_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr tp)
9086d7f5d3SJohn Marino {
9186d7f5d3SJohn Marino   mp_ptr xp;
9286d7f5d3SJohn Marino 
9386d7f5d3SJohn Marino   ASSERT (n > 0);
9486d7f5d3SJohn Marino   ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
9586d7f5d3SJohn Marino   ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
9686d7f5d3SJohn Marino   ASSERT (! MPN_OVERLAP_P (ip, n, tp, mpn_invertappr_itch(n)));
9786d7f5d3SJohn Marino   ASSERT (! MPN_OVERLAP_P (dp, n, tp, mpn_invertappr_itch(n)));
9886d7f5d3SJohn Marino 
9986d7f5d3SJohn Marino   /* Compute a base value of r limbs. */
10086d7f5d3SJohn Marino   if (n == 1)
10186d7f5d3SJohn Marino     invert_limb (*ip, *dp);
10286d7f5d3SJohn Marino   else {
10386d7f5d3SJohn Marino     mp_size_t i;
10486d7f5d3SJohn Marino     xp = tp + n + 2;				/* 2 * n limbs */
10586d7f5d3SJohn Marino 
10686d7f5d3SJohn Marino     for (i = n - 1; i >= 0; i--)
10786d7f5d3SJohn Marino       xp[i] = GMP_NUMB_MAX;
10886d7f5d3SJohn Marino     mpn_com (xp + n, dp, n);
10986d7f5d3SJohn Marino 
11086d7f5d3SJohn Marino     /* Now xp contains B^2n - {dp,n}*B^n - 1 */
11186d7f5d3SJohn Marino 
11286d7f5d3SJohn Marino     /* FIXME: if mpn_*pi1_divappr_q handles n==2, use it! */
11386d7f5d3SJohn Marino     if (n == 2) {
11486d7f5d3SJohn Marino       mpn_divrem_2 (ip, 0, xp, 4, dp);
11586d7f5d3SJohn Marino     } else {
11686d7f5d3SJohn Marino       gmp_pi1_t inv;
11786d7f5d3SJohn Marino       invert_pi1 (inv, dp[n-1], dp[n-2]);
11886d7f5d3SJohn Marino       if (! MAYBE_dcpi1_divappr
11986d7f5d3SJohn Marino 	  || BELOW_THRESHOLD (n, DC_DIVAPPR_Q_THRESHOLD))
12086d7f5d3SJohn Marino 	mpn_sbpi1_divappr_q (ip, xp, 2 * n, dp, n, inv.inv32);
12186d7f5d3SJohn Marino       else
12286d7f5d3SJohn Marino 	mpn_dcpi1_divappr_q (ip, xp, 2 * n, dp, n, &inv);
12386d7f5d3SJohn Marino       MPN_DECR_U(ip, n, 1);
12486d7f5d3SJohn Marino       return 1;
12586d7f5d3SJohn Marino     }
12686d7f5d3SJohn Marino   }
12786d7f5d3SJohn Marino   return 0;
12886d7f5d3SJohn Marino }
12986d7f5d3SJohn Marino 
13086d7f5d3SJohn Marino /* mpn_ni_invertappr: computes the approximate reciprocal using Newton's
13186d7f5d3SJohn Marino    iterations (at least one).
13286d7f5d3SJohn Marino 
13386d7f5d3SJohn Marino    Inspired by Algorithm "ApproximateReciprocal", published in "Modern Computer
13486d7f5d3SJohn Marino    Arithmetic" by Richard P. Brent and Paul Zimmermann, algorithm 3.5, page 121
13586d7f5d3SJohn Marino    in version 0.4 of the book.
13686d7f5d3SJohn Marino 
13786d7f5d3SJohn Marino    Some adaptations were introduced, to allow product mod B^m-1 and return the
13886d7f5d3SJohn Marino    value e.
13986d7f5d3SJohn Marino 
14086d7f5d3SJohn Marino    USE_MUL_N = 1 (default) introduces a correction in such a way that "the
14186d7f5d3SJohn Marino    value of B^{n+h}-T computed at step 8 cannot exceed B^n-1" (the book reads
14286d7f5d3SJohn Marino    "2B^n-1").  This correction should not require to modify the proof.
14386d7f5d3SJohn Marino 
14486d7f5d3SJohn Marino    We use a wrapped product modulo B^m-1.  NOTE: is there any normalisation
14586d7f5d3SJohn Marino    problem for the [0] class?  It shouldn't: we compute 2*|A*X_h - B^{n+h}| <
14686d7f5d3SJohn Marino    B^m-1.  We may get [0] if and only if we get AX_h = B^{n+h}.  This can
14786d7f5d3SJohn Marino    happen only if A=B^{n}/2, but this implies X_h = B^{h}*2-1 i.e., AX_h =
14886d7f5d3SJohn Marino    B^{n+h} - A, then we get into the "negative" branch, where X_h is not
14986d7f5d3SJohn Marino    incremented (because A < B^n).
15086d7f5d3SJohn Marino 
15186d7f5d3SJohn Marino    FIXME: the scratch for mulmod_bnm1 does not currently fit in the scratch, it
15286d7f5d3SJohn Marino    is allocated apart.  */
15386d7f5d3SJohn Marino 
15486d7f5d3SJohn Marino #define USE_MUL_N 1
15586d7f5d3SJohn Marino 
15686d7f5d3SJohn Marino mp_limb_t
mpn_ni_invertappr(mp_ptr ip,mp_srcptr dp,mp_size_t n,mp_ptr scratch)15786d7f5d3SJohn Marino mpn_ni_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
15886d7f5d3SJohn Marino {
15986d7f5d3SJohn Marino   mp_limb_t cy;
16086d7f5d3SJohn Marino   mp_ptr xp;
16186d7f5d3SJohn Marino   mp_size_t rn, mn;
16286d7f5d3SJohn Marino   mp_size_t sizes[NPOWS], *sizp;
16386d7f5d3SJohn Marino   mp_ptr tp;
16486d7f5d3SJohn Marino   TMP_DECL;
16586d7f5d3SJohn Marino #define rp scratch
16686d7f5d3SJohn Marino 
16786d7f5d3SJohn Marino   ASSERT (n > 2);
16886d7f5d3SJohn Marino   ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
16986d7f5d3SJohn Marino   ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
17086d7f5d3SJohn Marino   ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
17186d7f5d3SJohn Marino   ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
17286d7f5d3SJohn Marino 
17386d7f5d3SJohn Marino   /* Compute the computation precisions from highest to lowest, leaving the
17486d7f5d3SJohn Marino      base case size in 'rn'.  */
17586d7f5d3SJohn Marino   sizp = sizes;
17686d7f5d3SJohn Marino   rn = n;
17786d7f5d3SJohn Marino   do {
17886d7f5d3SJohn Marino     *sizp = rn;
17986d7f5d3SJohn Marino     rn = ((rn) >> 1) + 1;
18086d7f5d3SJohn Marino     sizp ++;
18186d7f5d3SJohn Marino   } while (ABOVE_THRESHOLD (rn, INV_NEWTON_THRESHOLD));
18286d7f5d3SJohn Marino 
18386d7f5d3SJohn Marino   /* We search the inverse of 0.{dp,n}, we compute it as 1.{ip,n} */
18486d7f5d3SJohn Marino   dp += n;
18586d7f5d3SJohn Marino   ip += n;
18686d7f5d3SJohn Marino 
18786d7f5d3SJohn Marino   /* Compute a base value of rn limbs. */
18886d7f5d3SJohn Marino   mpn_bc_invertappr (ip - rn, dp - rn, rn, scratch);
18986d7f5d3SJohn Marino 
19086d7f5d3SJohn Marino   TMP_MARK;
19186d7f5d3SJohn Marino 
19286d7f5d3SJohn Marino   if (ABOVE_THRESHOLD (n, INV_MULMOD_BNM1_THRESHOLD))
19386d7f5d3SJohn Marino     {
19486d7f5d3SJohn Marino       mn = mpn_mulmod_bnm1_next_size (n + 1);
19586d7f5d3SJohn Marino       tp = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (mn, n, (n >> 1) + 1));
19686d7f5d3SJohn Marino     }
19786d7f5d3SJohn Marino   /* Use Newton's iterations to get the desired precision.*/
19886d7f5d3SJohn Marino 
19986d7f5d3SJohn Marino   /* define rp scratch; 2rn + 1 limbs <= 2(n>>1 + 1) + 1 <= n + 3  limbs */
20086d7f5d3SJohn Marino   /* Maximum scratch needed by this branch <= 3*n + 2 */
20186d7f5d3SJohn Marino   xp = scratch + n + 3;				/*  n + rn limbs */
20286d7f5d3SJohn Marino   while (1) {
20386d7f5d3SJohn Marino     mp_limb_t method;
20486d7f5d3SJohn Marino 
20586d7f5d3SJohn Marino     n = *--sizp;
20686d7f5d3SJohn Marino     /*
20786d7f5d3SJohn Marino       v    n  v
20886d7f5d3SJohn Marino       +----+--+
20986d7f5d3SJohn Marino       ^ rn ^
21086d7f5d3SJohn Marino     */
21186d7f5d3SJohn Marino 
21286d7f5d3SJohn Marino     /* Compute i_jd . */
21386d7f5d3SJohn Marino     if (BELOW_THRESHOLD (n, INV_MULMOD_BNM1_THRESHOLD)
21486d7f5d3SJohn Marino 	|| ((mn = mpn_mulmod_bnm1_next_size (n + 1)) > (n + rn))) {
21586d7f5d3SJohn Marino       /* FIXME: We do only need {xp,n+1}*/
21686d7f5d3SJohn Marino       mpn_mul (xp, dp - n, n, ip - rn, rn);
21786d7f5d3SJohn Marino       mpn_add_n (xp + rn, xp + rn, dp - n, n - rn + 1);
21886d7f5d3SJohn Marino       method = 1; /* Remember we used (truncated) product */
21986d7f5d3SJohn Marino       /* We computed cy.{xp,rn+n} <- 1.{ip,rn} * 0.{dp,n} */
22086d7f5d3SJohn Marino     } else { /* Use B^n-1 wraparound */
22186d7f5d3SJohn Marino       mpn_mulmod_bnm1 (xp, mn, dp - n, n, ip - rn, rn, tp);
22286d7f5d3SJohn Marino       /* We computed {xp,mn} <- {ip,rn} * {dp,n} mod (B^mn-1) */
22386d7f5d3SJohn Marino       /* We know that 2*|ip*dp + dp*B^rn - B^{rn+n}| < B^mn-1 */
22486d7f5d3SJohn Marino       /* Add dp*B^rn mod (B^mn-1) */
22586d7f5d3SJohn Marino       ASSERT (n >= mn - rn);
22686d7f5d3SJohn Marino       xp[mn] = 1 + mpn_add_n (xp + rn, xp + rn, dp - n, mn - rn);
22786d7f5d3SJohn Marino       cy = mpn_add_n (xp, xp, dp - (n - (mn - rn)), n - (mn - rn));
22886d7f5d3SJohn Marino       MPN_INCR_U (xp + n - (mn - rn), mn + 1 - n + (mn - rn), cy);
22986d7f5d3SJohn Marino       ASSERT (n + rn >=  mn);
23086d7f5d3SJohn Marino       /* Subtract B^{rn+n} */
23186d7f5d3SJohn Marino       MPN_DECR_U (xp + rn + n - mn, 2*mn + 1 - rn - n, 1);
23286d7f5d3SJohn Marino       if (xp[mn])
23386d7f5d3SJohn Marino 	MPN_INCR_U (xp, mn, xp[mn] - 1);
23486d7f5d3SJohn Marino       else
23586d7f5d3SJohn Marino 	MPN_DECR_U (xp, mn, 1);
23686d7f5d3SJohn Marino       method = 0; /* Remember we are working Mod B^m-1 */
23786d7f5d3SJohn Marino     }
23886d7f5d3SJohn Marino 
23986d7f5d3SJohn Marino     if (xp[n] < 2) { /* "positive" residue class */
24086d7f5d3SJohn Marino       cy = 1;
24186d7f5d3SJohn Marino       while (xp[n] || mpn_cmp (xp, dp - n, n)>0) {
24286d7f5d3SJohn Marino 	xp[n] -= mpn_sub_n (xp, xp, dp - n, n);
24386d7f5d3SJohn Marino 	cy ++;
24486d7f5d3SJohn Marino       }
24586d7f5d3SJohn Marino       MPN_DECR_U(ip - rn, rn, cy);
24686d7f5d3SJohn Marino       ASSERT (cy <= 4); /* at most 3 cycles for the while above */
24786d7f5d3SJohn Marino       ASSERT_NOCARRY (mpn_sub_n (xp, dp - n, xp, n));
24886d7f5d3SJohn Marino       ASSERT (xp[n] == 0);
24986d7f5d3SJohn Marino     } else { /* "negative" residue class */
25086d7f5d3SJohn Marino       mpn_com (xp, xp, n + 1);
25186d7f5d3SJohn Marino       MPN_INCR_U(xp, n + 1, method);
25286d7f5d3SJohn Marino       ASSERT (xp[n] <= 1);
25386d7f5d3SJohn Marino #if USE_MUL_N
25486d7f5d3SJohn Marino       if (xp[n]) {
25586d7f5d3SJohn Marino 	MPN_INCR_U(ip - rn, rn, 1);
25686d7f5d3SJohn Marino 	ASSERT_CARRY (mpn_sub_n (xp, xp, dp - n, n));
25786d7f5d3SJohn Marino       }
25886d7f5d3SJohn Marino #endif
25986d7f5d3SJohn Marino     }
26086d7f5d3SJohn Marino 
26186d7f5d3SJohn Marino     /* Compute x_ju_j. FIXME:We need {rp+rn,rn}, mulhi? */
26286d7f5d3SJohn Marino #if USE_MUL_N
26386d7f5d3SJohn Marino     mpn_mul_n (rp, xp + n - rn, ip - rn, rn);
26486d7f5d3SJohn Marino #else
26586d7f5d3SJohn Marino     rp[2*rn] = 0;
26686d7f5d3SJohn Marino     mpn_mul (rp, xp + n - rn, rn + xp[n], ip - rn, rn);
26786d7f5d3SJohn Marino #endif
26886d7f5d3SJohn Marino     /* We need _only_ the carry from the next addition  */
26986d7f5d3SJohn Marino     /* Anyway 2rn-n <= 2... we don't need to optimise.  */
27086d7f5d3SJohn Marino     cy = mpn_add_n (rp + rn, rp + rn, xp + n - rn, 2*rn - n);
27186d7f5d3SJohn Marino     cy = mpn_add_nc (ip - n, rp + 3*rn - n, xp + rn, n - rn, cy);
27286d7f5d3SJohn Marino     MPN_INCR_U (ip - rn, rn, cy + (1-USE_MUL_N)*(rp[2*rn] + xp[n]));
27386d7f5d3SJohn Marino     if (sizp == sizes) { /* Get out of the cycle */
27486d7f5d3SJohn Marino       /* Check for possible carry propagation from below. */
27586d7f5d3SJohn Marino       cy = rp[3*rn - n - 1] > GMP_NUMB_MAX - 7; /* Be conservative. */
27686d7f5d3SJohn Marino /*    cy = mpn_add_1 (rp + rn, rp + rn, 2*rn - n, 4); */
27786d7f5d3SJohn Marino       break;
27886d7f5d3SJohn Marino     }
27986d7f5d3SJohn Marino     rn = n;
28086d7f5d3SJohn Marino   }
28186d7f5d3SJohn Marino   TMP_FREE;
28286d7f5d3SJohn Marino 
28386d7f5d3SJohn Marino   return cy;
28486d7f5d3SJohn Marino #undef rp
28586d7f5d3SJohn Marino }
28686d7f5d3SJohn Marino 
28786d7f5d3SJohn Marino mp_limb_t
mpn_invertappr(mp_ptr ip,mp_srcptr dp,mp_size_t n,mp_ptr scratch)28886d7f5d3SJohn Marino mpn_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
28986d7f5d3SJohn Marino {
29086d7f5d3SJohn Marino   mp_limb_t res;
29186d7f5d3SJohn Marino   TMP_DECL;
29286d7f5d3SJohn Marino 
29386d7f5d3SJohn Marino   TMP_MARK;
29486d7f5d3SJohn Marino 
29586d7f5d3SJohn Marino   if (scratch == NULL)
29686d7f5d3SJohn Marino     scratch = TMP_ALLOC_LIMBS (mpn_invertappr_itch (n));
29786d7f5d3SJohn Marino 
29886d7f5d3SJohn Marino   ASSERT (n > 0);
29986d7f5d3SJohn Marino   ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
30086d7f5d3SJohn Marino   ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
30186d7f5d3SJohn Marino   ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
30286d7f5d3SJohn Marino   ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
30386d7f5d3SJohn Marino 
30486d7f5d3SJohn Marino   if (BELOW_THRESHOLD (n, INV_NEWTON_THRESHOLD))
30586d7f5d3SJohn Marino     res = mpn_bc_invertappr (ip, dp, n, scratch);
30686d7f5d3SJohn Marino   else
30786d7f5d3SJohn Marino     res = mpn_ni_invertappr (ip, dp, n, scratch);
30886d7f5d3SJohn Marino 
30986d7f5d3SJohn Marino   TMP_FREE;
31086d7f5d3SJohn Marino   return res;
31186d7f5d3SJohn Marino }
312