xref: /dflybsd-src/contrib/gmp/mpn/generic/dcpi1_bdiv_qr.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
186d7f5d3SJohn Marino /* mpn_dcpi1_bdiv_qr -- divide-and-conquer Hensel division with precomputed
286d7f5d3SJohn Marino    inverse, returning quotient and remainder.
386d7f5d3SJohn Marino 
486d7f5d3SJohn Marino    Contributed to the GNU project by Niels M�ller and Torbjorn Granlund.
586d7f5d3SJohn Marino 
686d7f5d3SJohn Marino    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
786d7f5d3SJohn Marino    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
886d7f5d3SJohn Marino    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
986d7f5d3SJohn Marino 
1086d7f5d3SJohn Marino Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
1186d7f5d3SJohn Marino 
1286d7f5d3SJohn Marino This file is part of the GNU MP Library.
1386d7f5d3SJohn Marino 
1486d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
1586d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1686d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1786d7f5d3SJohn Marino option) any later version.
1886d7f5d3SJohn Marino 
1986d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
2086d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
2186d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
2286d7f5d3SJohn Marino License for more details.
2386d7f5d3SJohn Marino 
2486d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
2586d7f5d3SJohn Marino along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
2686d7f5d3SJohn Marino 
2786d7f5d3SJohn Marino #include "gmp.h"
2886d7f5d3SJohn Marino #include "gmp-impl.h"
2986d7f5d3SJohn Marino 
3086d7f5d3SJohn Marino 
3186d7f5d3SJohn Marino /* Computes Hensel binary division of {np, 2*n} by {dp, n}.
3286d7f5d3SJohn Marino 
3386d7f5d3SJohn Marino    Output:
3486d7f5d3SJohn Marino 
3586d7f5d3SJohn Marino       q = n * d^{-1} mod 2^{qn * GMP_NUMB_BITS},
3686d7f5d3SJohn Marino 
3786d7f5d3SJohn Marino       r = (n - q * d) * 2^{-qn * GMP_NUMB_BITS}
3886d7f5d3SJohn Marino 
3986d7f5d3SJohn Marino    Stores q at qp. Stores the n least significant limbs of r at the high half
4086d7f5d3SJohn Marino    of np, and returns the borrow from the subtraction n - q*d.
4186d7f5d3SJohn Marino 
4286d7f5d3SJohn Marino    d must be odd. dinv is (-d)^-1 mod 2^GMP_NUMB_BITS. */
4386d7f5d3SJohn Marino 
4486d7f5d3SJohn Marino mp_size_t
mpn_dcpi1_bdiv_qr_n_itch(mp_size_t n)4586d7f5d3SJohn Marino mpn_dcpi1_bdiv_qr_n_itch (mp_size_t n)
4686d7f5d3SJohn Marino {
4786d7f5d3SJohn Marino   return n;
4886d7f5d3SJohn Marino }
4986d7f5d3SJohn Marino 
5086d7f5d3SJohn Marino mp_limb_t
mpn_dcpi1_bdiv_qr_n(mp_ptr qp,mp_ptr np,mp_srcptr dp,mp_size_t n,mp_limb_t dinv,mp_ptr tp)5186d7f5d3SJohn Marino mpn_dcpi1_bdiv_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
5286d7f5d3SJohn Marino 		     mp_limb_t dinv, mp_ptr tp)
5386d7f5d3SJohn Marino {
5486d7f5d3SJohn Marino   mp_size_t lo, hi;
5586d7f5d3SJohn Marino   mp_limb_t cy;
5686d7f5d3SJohn Marino   mp_limb_t rh;
5786d7f5d3SJohn Marino 
5886d7f5d3SJohn Marino   lo = n >> 1;			/* floor(n/2) */
5986d7f5d3SJohn Marino   hi = n - lo;			/* ceil(n/2) */
6086d7f5d3SJohn Marino 
6186d7f5d3SJohn Marino   if (BELOW_THRESHOLD (lo, DC_BDIV_QR_THRESHOLD))
6286d7f5d3SJohn Marino     cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * lo, dp, lo, dinv);
6386d7f5d3SJohn Marino   else
6486d7f5d3SJohn Marino     cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, lo, dinv, tp);
6586d7f5d3SJohn Marino 
6686d7f5d3SJohn Marino   mpn_mul (tp, dp + lo, hi, qp, lo);
6786d7f5d3SJohn Marino 
6886d7f5d3SJohn Marino   mpn_incr_u (tp + lo, cy);
6986d7f5d3SJohn Marino   rh = mpn_sub (np + lo, np + lo, n + hi, tp, n);
7086d7f5d3SJohn Marino 
7186d7f5d3SJohn Marino   if (BELOW_THRESHOLD (hi, DC_BDIV_QR_THRESHOLD))
7286d7f5d3SJohn Marino     cy = mpn_sbpi1_bdiv_qr (qp + lo, np + lo, 2 * hi, dp, hi, dinv);
7386d7f5d3SJohn Marino   else
7486d7f5d3SJohn Marino     cy = mpn_dcpi1_bdiv_qr_n (qp + lo, np + lo, dp, hi, dinv, tp);
7586d7f5d3SJohn Marino 
7686d7f5d3SJohn Marino   mpn_mul (tp, qp + lo, hi, dp + hi, lo);
7786d7f5d3SJohn Marino 
7886d7f5d3SJohn Marino   mpn_incr_u (tp + hi, cy);
7986d7f5d3SJohn Marino   rh += mpn_sub_n (np + n, np + n, tp, n);
8086d7f5d3SJohn Marino 
8186d7f5d3SJohn Marino   return rh;
8286d7f5d3SJohn Marino }
8386d7f5d3SJohn Marino 
8486d7f5d3SJohn Marino mp_limb_t
mpn_dcpi1_bdiv_qr(mp_ptr qp,mp_ptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn,mp_limb_t dinv)8586d7f5d3SJohn Marino mpn_dcpi1_bdiv_qr (mp_ptr qp, mp_ptr np, mp_size_t nn,
8686d7f5d3SJohn Marino 		   mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
8786d7f5d3SJohn Marino {
8886d7f5d3SJohn Marino   mp_size_t qn;
8986d7f5d3SJohn Marino   mp_limb_t rr, cy;
9086d7f5d3SJohn Marino   mp_ptr tp;
9186d7f5d3SJohn Marino   TMP_DECL;
9286d7f5d3SJohn Marino 
9386d7f5d3SJohn Marino   TMP_MARK;
9486d7f5d3SJohn Marino 
9586d7f5d3SJohn Marino   ASSERT (dn >= 2);		/* to adhere to mpn_sbpi1_div_qr's limits */
9686d7f5d3SJohn Marino   ASSERT (nn - dn >= 1);	/* to adhere to mpn_sbpi1_div_qr's limits */
9786d7f5d3SJohn Marino   ASSERT (dp[0] & 1);
9886d7f5d3SJohn Marino 
9986d7f5d3SJohn Marino   tp = TMP_SALLOC_LIMBS (dn);
10086d7f5d3SJohn Marino 
10186d7f5d3SJohn Marino   qn = nn - dn;
10286d7f5d3SJohn Marino 
10386d7f5d3SJohn Marino   if (qn > dn)
10486d7f5d3SJohn Marino     {
10586d7f5d3SJohn Marino       /* Reduce qn mod dn without division, optimizing small operations.  */
10686d7f5d3SJohn Marino       do
10786d7f5d3SJohn Marino 	qn -= dn;
10886d7f5d3SJohn Marino       while (qn > dn);
10986d7f5d3SJohn Marino 
11086d7f5d3SJohn Marino       /* Perform the typically smaller block first.  */
11186d7f5d3SJohn Marino       if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
11286d7f5d3SJohn Marino 	cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
11386d7f5d3SJohn Marino       else
11486d7f5d3SJohn Marino 	cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
11586d7f5d3SJohn Marino 
11686d7f5d3SJohn Marino       rr = 0;
11786d7f5d3SJohn Marino       if (qn != dn)
11886d7f5d3SJohn Marino 	{
11986d7f5d3SJohn Marino 	  if (qn > dn - qn)
12086d7f5d3SJohn Marino 	    mpn_mul (tp, qp, qn, dp + qn, dn - qn);
12186d7f5d3SJohn Marino 	  else
12286d7f5d3SJohn Marino 	    mpn_mul (tp, dp + qn, dn - qn, qp, qn);
12386d7f5d3SJohn Marino 	  mpn_incr_u (tp + qn, cy);
12486d7f5d3SJohn Marino 
12586d7f5d3SJohn Marino 	  rr = mpn_sub (np + qn, np + qn, nn - qn, tp, dn);
12686d7f5d3SJohn Marino 	  cy = 0;
12786d7f5d3SJohn Marino 	}
12886d7f5d3SJohn Marino 
12986d7f5d3SJohn Marino       np += qn;
13086d7f5d3SJohn Marino       qp += qn;
13186d7f5d3SJohn Marino 
13286d7f5d3SJohn Marino       qn = nn - dn - qn;
13386d7f5d3SJohn Marino       do
13486d7f5d3SJohn Marino 	{
13586d7f5d3SJohn Marino 	  rr += mpn_sub_1 (np + dn, np + dn, qn, cy);
13686d7f5d3SJohn Marino 	  cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
13786d7f5d3SJohn Marino 	  qp += dn;
13886d7f5d3SJohn Marino 	  np += dn;
13986d7f5d3SJohn Marino 	  qn -= dn;
14086d7f5d3SJohn Marino 	}
14186d7f5d3SJohn Marino       while (qn > 0);
14286d7f5d3SJohn Marino       TMP_FREE;
14386d7f5d3SJohn Marino       return rr + cy;
14486d7f5d3SJohn Marino     }
14586d7f5d3SJohn Marino 
14686d7f5d3SJohn Marino   if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
14786d7f5d3SJohn Marino     cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
14886d7f5d3SJohn Marino   else
14986d7f5d3SJohn Marino     cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
15086d7f5d3SJohn Marino 
15186d7f5d3SJohn Marino   rr = 0;
15286d7f5d3SJohn Marino   if (qn != dn)
15386d7f5d3SJohn Marino     {
15486d7f5d3SJohn Marino       if (qn > dn - qn)
15586d7f5d3SJohn Marino 	mpn_mul (tp, qp, qn, dp + qn, dn - qn);
15686d7f5d3SJohn Marino       else
15786d7f5d3SJohn Marino 	mpn_mul (tp, dp + qn, dn - qn, qp, qn);
15886d7f5d3SJohn Marino       mpn_incr_u (tp + qn, cy);
15986d7f5d3SJohn Marino 
16086d7f5d3SJohn Marino       rr = mpn_sub (np + qn, np + qn, nn - qn, tp, dn);
16186d7f5d3SJohn Marino       cy = 0;
16286d7f5d3SJohn Marino     }
16386d7f5d3SJohn Marino 
16486d7f5d3SJohn Marino   TMP_FREE;
16586d7f5d3SJohn Marino   return rr + cy;
16686d7f5d3SJohn Marino }
167