186d7f5d3SJohn Marino /* mpn_dcpi1_bdiv_qr -- divide-and-conquer Hensel division with precomputed
286d7f5d3SJohn Marino inverse, returning quotient and remainder.
386d7f5d3SJohn Marino
486d7f5d3SJohn Marino Contributed to the GNU project by Niels M�ller and Torbjorn Granlund.
586d7f5d3SJohn Marino
686d7f5d3SJohn Marino THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
786d7f5d3SJohn Marino SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
886d7f5d3SJohn Marino GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
986d7f5d3SJohn Marino
1086d7f5d3SJohn Marino Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
1186d7f5d3SJohn Marino
1286d7f5d3SJohn Marino This file is part of the GNU MP Library.
1386d7f5d3SJohn Marino
1486d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
1586d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1686d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1786d7f5d3SJohn Marino option) any later version.
1886d7f5d3SJohn Marino
1986d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
2086d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
2186d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
2286d7f5d3SJohn Marino License for more details.
2386d7f5d3SJohn Marino
2486d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
2586d7f5d3SJohn Marino along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
2686d7f5d3SJohn Marino
2786d7f5d3SJohn Marino #include "gmp.h"
2886d7f5d3SJohn Marino #include "gmp-impl.h"
2986d7f5d3SJohn Marino
3086d7f5d3SJohn Marino
3186d7f5d3SJohn Marino /* Computes Hensel binary division of {np, 2*n} by {dp, n}.
3286d7f5d3SJohn Marino
3386d7f5d3SJohn Marino Output:
3486d7f5d3SJohn Marino
3586d7f5d3SJohn Marino q = n * d^{-1} mod 2^{qn * GMP_NUMB_BITS},
3686d7f5d3SJohn Marino
3786d7f5d3SJohn Marino r = (n - q * d) * 2^{-qn * GMP_NUMB_BITS}
3886d7f5d3SJohn Marino
3986d7f5d3SJohn Marino Stores q at qp. Stores the n least significant limbs of r at the high half
4086d7f5d3SJohn Marino of np, and returns the borrow from the subtraction n - q*d.
4186d7f5d3SJohn Marino
4286d7f5d3SJohn Marino d must be odd. dinv is (-d)^-1 mod 2^GMP_NUMB_BITS. */
4386d7f5d3SJohn Marino
4486d7f5d3SJohn Marino mp_size_t
mpn_dcpi1_bdiv_qr_n_itch(mp_size_t n)4586d7f5d3SJohn Marino mpn_dcpi1_bdiv_qr_n_itch (mp_size_t n)
4686d7f5d3SJohn Marino {
4786d7f5d3SJohn Marino return n;
4886d7f5d3SJohn Marino }
4986d7f5d3SJohn Marino
5086d7f5d3SJohn Marino mp_limb_t
mpn_dcpi1_bdiv_qr_n(mp_ptr qp,mp_ptr np,mp_srcptr dp,mp_size_t n,mp_limb_t dinv,mp_ptr tp)5186d7f5d3SJohn Marino mpn_dcpi1_bdiv_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
5286d7f5d3SJohn Marino mp_limb_t dinv, mp_ptr tp)
5386d7f5d3SJohn Marino {
5486d7f5d3SJohn Marino mp_size_t lo, hi;
5586d7f5d3SJohn Marino mp_limb_t cy;
5686d7f5d3SJohn Marino mp_limb_t rh;
5786d7f5d3SJohn Marino
5886d7f5d3SJohn Marino lo = n >> 1; /* floor(n/2) */
5986d7f5d3SJohn Marino hi = n - lo; /* ceil(n/2) */
6086d7f5d3SJohn Marino
6186d7f5d3SJohn Marino if (BELOW_THRESHOLD (lo, DC_BDIV_QR_THRESHOLD))
6286d7f5d3SJohn Marino cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * lo, dp, lo, dinv);
6386d7f5d3SJohn Marino else
6486d7f5d3SJohn Marino cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, lo, dinv, tp);
6586d7f5d3SJohn Marino
6686d7f5d3SJohn Marino mpn_mul (tp, dp + lo, hi, qp, lo);
6786d7f5d3SJohn Marino
6886d7f5d3SJohn Marino mpn_incr_u (tp + lo, cy);
6986d7f5d3SJohn Marino rh = mpn_sub (np + lo, np + lo, n + hi, tp, n);
7086d7f5d3SJohn Marino
7186d7f5d3SJohn Marino if (BELOW_THRESHOLD (hi, DC_BDIV_QR_THRESHOLD))
7286d7f5d3SJohn Marino cy = mpn_sbpi1_bdiv_qr (qp + lo, np + lo, 2 * hi, dp, hi, dinv);
7386d7f5d3SJohn Marino else
7486d7f5d3SJohn Marino cy = mpn_dcpi1_bdiv_qr_n (qp + lo, np + lo, dp, hi, dinv, tp);
7586d7f5d3SJohn Marino
7686d7f5d3SJohn Marino mpn_mul (tp, qp + lo, hi, dp + hi, lo);
7786d7f5d3SJohn Marino
7886d7f5d3SJohn Marino mpn_incr_u (tp + hi, cy);
7986d7f5d3SJohn Marino rh += mpn_sub_n (np + n, np + n, tp, n);
8086d7f5d3SJohn Marino
8186d7f5d3SJohn Marino return rh;
8286d7f5d3SJohn Marino }
8386d7f5d3SJohn Marino
8486d7f5d3SJohn Marino mp_limb_t
mpn_dcpi1_bdiv_qr(mp_ptr qp,mp_ptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn,mp_limb_t dinv)8586d7f5d3SJohn Marino mpn_dcpi1_bdiv_qr (mp_ptr qp, mp_ptr np, mp_size_t nn,
8686d7f5d3SJohn Marino mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
8786d7f5d3SJohn Marino {
8886d7f5d3SJohn Marino mp_size_t qn;
8986d7f5d3SJohn Marino mp_limb_t rr, cy;
9086d7f5d3SJohn Marino mp_ptr tp;
9186d7f5d3SJohn Marino TMP_DECL;
9286d7f5d3SJohn Marino
9386d7f5d3SJohn Marino TMP_MARK;
9486d7f5d3SJohn Marino
9586d7f5d3SJohn Marino ASSERT (dn >= 2); /* to adhere to mpn_sbpi1_div_qr's limits */
9686d7f5d3SJohn Marino ASSERT (nn - dn >= 1); /* to adhere to mpn_sbpi1_div_qr's limits */
9786d7f5d3SJohn Marino ASSERT (dp[0] & 1);
9886d7f5d3SJohn Marino
9986d7f5d3SJohn Marino tp = TMP_SALLOC_LIMBS (dn);
10086d7f5d3SJohn Marino
10186d7f5d3SJohn Marino qn = nn - dn;
10286d7f5d3SJohn Marino
10386d7f5d3SJohn Marino if (qn > dn)
10486d7f5d3SJohn Marino {
10586d7f5d3SJohn Marino /* Reduce qn mod dn without division, optimizing small operations. */
10686d7f5d3SJohn Marino do
10786d7f5d3SJohn Marino qn -= dn;
10886d7f5d3SJohn Marino while (qn > dn);
10986d7f5d3SJohn Marino
11086d7f5d3SJohn Marino /* Perform the typically smaller block first. */
11186d7f5d3SJohn Marino if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
11286d7f5d3SJohn Marino cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
11386d7f5d3SJohn Marino else
11486d7f5d3SJohn Marino cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
11586d7f5d3SJohn Marino
11686d7f5d3SJohn Marino rr = 0;
11786d7f5d3SJohn Marino if (qn != dn)
11886d7f5d3SJohn Marino {
11986d7f5d3SJohn Marino if (qn > dn - qn)
12086d7f5d3SJohn Marino mpn_mul (tp, qp, qn, dp + qn, dn - qn);
12186d7f5d3SJohn Marino else
12286d7f5d3SJohn Marino mpn_mul (tp, dp + qn, dn - qn, qp, qn);
12386d7f5d3SJohn Marino mpn_incr_u (tp + qn, cy);
12486d7f5d3SJohn Marino
12586d7f5d3SJohn Marino rr = mpn_sub (np + qn, np + qn, nn - qn, tp, dn);
12686d7f5d3SJohn Marino cy = 0;
12786d7f5d3SJohn Marino }
12886d7f5d3SJohn Marino
12986d7f5d3SJohn Marino np += qn;
13086d7f5d3SJohn Marino qp += qn;
13186d7f5d3SJohn Marino
13286d7f5d3SJohn Marino qn = nn - dn - qn;
13386d7f5d3SJohn Marino do
13486d7f5d3SJohn Marino {
13586d7f5d3SJohn Marino rr += mpn_sub_1 (np + dn, np + dn, qn, cy);
13686d7f5d3SJohn Marino cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
13786d7f5d3SJohn Marino qp += dn;
13886d7f5d3SJohn Marino np += dn;
13986d7f5d3SJohn Marino qn -= dn;
14086d7f5d3SJohn Marino }
14186d7f5d3SJohn Marino while (qn > 0);
14286d7f5d3SJohn Marino TMP_FREE;
14386d7f5d3SJohn Marino return rr + cy;
14486d7f5d3SJohn Marino }
14586d7f5d3SJohn Marino
14686d7f5d3SJohn Marino if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
14786d7f5d3SJohn Marino cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
14886d7f5d3SJohn Marino else
14986d7f5d3SJohn Marino cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
15086d7f5d3SJohn Marino
15186d7f5d3SJohn Marino rr = 0;
15286d7f5d3SJohn Marino if (qn != dn)
15386d7f5d3SJohn Marino {
15486d7f5d3SJohn Marino if (qn > dn - qn)
15586d7f5d3SJohn Marino mpn_mul (tp, qp, qn, dp + qn, dn - qn);
15686d7f5d3SJohn Marino else
15786d7f5d3SJohn Marino mpn_mul (tp, dp + qn, dn - qn, qp, qn);
15886d7f5d3SJohn Marino mpn_incr_u (tp + qn, cy);
15986d7f5d3SJohn Marino
16086d7f5d3SJohn Marino rr = mpn_sub (np + qn, np + qn, nn - qn, tp, dn);
16186d7f5d3SJohn Marino cy = 0;
16286d7f5d3SJohn Marino }
16386d7f5d3SJohn Marino
16486d7f5d3SJohn Marino TMP_FREE;
16586d7f5d3SJohn Marino return rr + cy;
16686d7f5d3SJohn Marino }
167