xref: /dflybsd-src/contrib/gmp/mpn/generic/binvert.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
186d7f5d3SJohn Marino /* Compute {up,n}^(-1) mod B^n.
286d7f5d3SJohn Marino 
386d7f5d3SJohn Marino    Contributed to the GNU project by Torbjorn Granlund.
486d7f5d3SJohn Marino 
586d7f5d3SJohn Marino    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
686d7f5d3SJohn Marino    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
786d7f5d3SJohn Marino    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
886d7f5d3SJohn Marino 
986d7f5d3SJohn Marino Copyright (C) 2004, 2005, 2006, 2007, 2009 Free Software Foundation, Inc.
1086d7f5d3SJohn Marino 
1186d7f5d3SJohn Marino This file is part of the GNU MP Library.
1286d7f5d3SJohn Marino 
1386d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
1486d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1586d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1686d7f5d3SJohn Marino option) any later version.
1786d7f5d3SJohn Marino 
1886d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
1986d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
2086d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
2186d7f5d3SJohn Marino License for more details.
2286d7f5d3SJohn Marino 
2386d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
2486d7f5d3SJohn Marino along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
2586d7f5d3SJohn Marino 
2686d7f5d3SJohn Marino #include "gmp.h"
2786d7f5d3SJohn Marino #include "gmp-impl.h"
2886d7f5d3SJohn Marino 
2986d7f5d3SJohn Marino 
3086d7f5d3SJohn Marino /*
3186d7f5d3SJohn Marino   r[k+1] = r[k] - r[k] * (u*r[k] - 1)
3286d7f5d3SJohn Marino   r[k+1] = r[k] + r[k] - r[k]*(u*r[k])
3386d7f5d3SJohn Marino */
3486d7f5d3SJohn Marino 
3586d7f5d3SJohn Marino /* This is intended for constant THRESHOLDs only, where the compiler can
3686d7f5d3SJohn Marino    completely fold the result.  */
3786d7f5d3SJohn Marino #define LOG2C(n) \
3886d7f5d3SJohn Marino  (((n) >=    0x1) + ((n) >=    0x2) + ((n) >=    0x4) + ((n) >=    0x8) + \
3986d7f5d3SJohn Marino   ((n) >=   0x10) + ((n) >=   0x20) + ((n) >=   0x40) + ((n) >=   0x80) + \
4086d7f5d3SJohn Marino   ((n) >=  0x100) + ((n) >=  0x200) + ((n) >=  0x400) + ((n) >=  0x800) + \
4186d7f5d3SJohn Marino   ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
4286d7f5d3SJohn Marino 
4386d7f5d3SJohn Marino #if TUNE_PROGRAM_BUILD
4486d7f5d3SJohn Marino #define NPOWS \
4586d7f5d3SJohn Marino  ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
4686d7f5d3SJohn Marino #else
4786d7f5d3SJohn Marino #define NPOWS \
4886d7f5d3SJohn Marino  ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)) - LOG2C (BINV_NEWTON_THRESHOLD))
4986d7f5d3SJohn Marino #endif
5086d7f5d3SJohn Marino 
5186d7f5d3SJohn Marino mp_size_t
mpn_binvert_itch(mp_size_t n)5286d7f5d3SJohn Marino mpn_binvert_itch (mp_size_t n)
5386d7f5d3SJohn Marino {
5486d7f5d3SJohn Marino   mp_size_t itch_local = mpn_mulmod_bnm1_next_size (n);
5586d7f5d3SJohn Marino   mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, n, (n + 1) >> 1);
5686d7f5d3SJohn Marino   return itch_local + itch_out;
5786d7f5d3SJohn Marino }
5886d7f5d3SJohn Marino 
5986d7f5d3SJohn Marino void
mpn_binvert(mp_ptr rp,mp_srcptr up,mp_size_t n,mp_ptr scratch)6086d7f5d3SJohn Marino mpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
6186d7f5d3SJohn Marino {
6286d7f5d3SJohn Marino   mp_ptr xp;
6386d7f5d3SJohn Marino   mp_size_t rn, newrn;
6486d7f5d3SJohn Marino   mp_size_t sizes[NPOWS], *sizp;
6586d7f5d3SJohn Marino   mp_limb_t di;
6686d7f5d3SJohn Marino 
6786d7f5d3SJohn Marino   /* Compute the computation precisions from highest to lowest, leaving the
6886d7f5d3SJohn Marino      base case size in 'rn'.  */
6986d7f5d3SJohn Marino   sizp = sizes;
7086d7f5d3SJohn Marino   for (rn = n; ABOVE_THRESHOLD (rn, BINV_NEWTON_THRESHOLD); rn = (rn + 1) >> 1)
7186d7f5d3SJohn Marino     *sizp++ = rn;
7286d7f5d3SJohn Marino 
7386d7f5d3SJohn Marino   xp = scratch;
7486d7f5d3SJohn Marino 
7586d7f5d3SJohn Marino   /* Compute a base value of rn limbs.  */
7686d7f5d3SJohn Marino   MPN_ZERO (xp, rn);
7786d7f5d3SJohn Marino   xp[0] = 1;
7886d7f5d3SJohn Marino   binvert_limb (di, up[0]);
7986d7f5d3SJohn Marino   if (BELOW_THRESHOLD (rn, DC_BDIV_Q_THRESHOLD))
8086d7f5d3SJohn Marino     mpn_sbpi1_bdiv_q (rp, xp, rn, up, rn, -di);
8186d7f5d3SJohn Marino   else
8286d7f5d3SJohn Marino     mpn_dcpi1_bdiv_q (rp, xp, rn, up, rn, -di);
8386d7f5d3SJohn Marino 
8486d7f5d3SJohn Marino   /* Use Newton iterations to get the desired precision.  */
8586d7f5d3SJohn Marino   for (; rn < n; rn = newrn)
8686d7f5d3SJohn Marino     {
8786d7f5d3SJohn Marino       mp_size_t m;
8886d7f5d3SJohn Marino       newrn = *--sizp;
8986d7f5d3SJohn Marino 
9086d7f5d3SJohn Marino       /* X <- UR. */
9186d7f5d3SJohn Marino       m = mpn_mulmod_bnm1_next_size (newrn);
9286d7f5d3SJohn Marino       mpn_mulmod_bnm1 (xp, m, up, newrn, rp, rn, xp + m);
9386d7f5d3SJohn Marino       mpn_sub_1 (xp + m, xp, rn - (m - newrn), 1);
9486d7f5d3SJohn Marino 
9586d7f5d3SJohn Marino       /* R = R(X/B^rn) */
9686d7f5d3SJohn Marino       mpn_mullo_n (rp + rn, rp, xp + rn, newrn - rn);
9786d7f5d3SJohn Marino       mpn_neg (rp + rn, rp + rn, newrn - rn);
9886d7f5d3SJohn Marino     }
9986d7f5d3SJohn Marino }
100