186d7f5d3SJohn Marino /* mpn_divexact(qp,np,nn,dp,dn,tp) -- Divide N = {np,nn} by D = {dp,dn} storing
286d7f5d3SJohn Marino the result in Q = {qp,nn-dn+1} expecting no remainder. Overlap allowed
386d7f5d3SJohn Marino between Q and N; all other overlap disallowed.
486d7f5d3SJohn Marino
586d7f5d3SJohn Marino Contributed to the GNU project by Torbjorn Granlund.
686d7f5d3SJohn Marino
786d7f5d3SJohn Marino THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
886d7f5d3SJohn Marino SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
986d7f5d3SJohn Marino GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
1086d7f5d3SJohn Marino
1186d7f5d3SJohn Marino Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
1286d7f5d3SJohn Marino
1386d7f5d3SJohn Marino This file is part of the GNU MP Library.
1486d7f5d3SJohn Marino
1586d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
1686d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1786d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1886d7f5d3SJohn Marino option) any later version.
1986d7f5d3SJohn Marino
2086d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
2186d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
2286d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
2386d7f5d3SJohn Marino License for more details.
2486d7f5d3SJohn Marino
2586d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
2686d7f5d3SJohn Marino along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
2786d7f5d3SJohn Marino
2886d7f5d3SJohn Marino
2986d7f5d3SJohn Marino #include "gmp.h"
3086d7f5d3SJohn Marino #include "gmp-impl.h"
3186d7f5d3SJohn Marino #include "longlong.h"
3286d7f5d3SJohn Marino
3386d7f5d3SJohn Marino #if 1
3486d7f5d3SJohn Marino void
mpn_divexact(mp_ptr qp,mp_srcptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn)3586d7f5d3SJohn Marino mpn_divexact (mp_ptr qp,
3686d7f5d3SJohn Marino mp_srcptr np, mp_size_t nn,
3786d7f5d3SJohn Marino mp_srcptr dp, mp_size_t dn)
3886d7f5d3SJohn Marino {
3986d7f5d3SJohn Marino unsigned shift;
4086d7f5d3SJohn Marino mp_size_t qn;
4186d7f5d3SJohn Marino mp_ptr tp, wp;
4286d7f5d3SJohn Marino TMP_DECL;
4386d7f5d3SJohn Marino
4486d7f5d3SJohn Marino ASSERT (dn > 0);
4586d7f5d3SJohn Marino ASSERT (nn >= dn);
4686d7f5d3SJohn Marino ASSERT (dp[dn-1] > 0);
4786d7f5d3SJohn Marino
4886d7f5d3SJohn Marino while (dp[0] == 0)
4986d7f5d3SJohn Marino {
5086d7f5d3SJohn Marino ASSERT (np[0] == 0);
5186d7f5d3SJohn Marino dp++;
5286d7f5d3SJohn Marino np++;
5386d7f5d3SJohn Marino dn--;
5486d7f5d3SJohn Marino nn--;
5586d7f5d3SJohn Marino }
5686d7f5d3SJohn Marino
5786d7f5d3SJohn Marino if (dn == 1)
5886d7f5d3SJohn Marino {
5986d7f5d3SJohn Marino MPN_DIVREM_OR_DIVEXACT_1 (qp, np, nn, dp[0]);
6086d7f5d3SJohn Marino return;
6186d7f5d3SJohn Marino }
6286d7f5d3SJohn Marino
6386d7f5d3SJohn Marino TMP_MARK;
6486d7f5d3SJohn Marino
6586d7f5d3SJohn Marino qn = nn + 1 - dn;
6686d7f5d3SJohn Marino count_trailing_zeros (shift, dp[0]);
6786d7f5d3SJohn Marino
6886d7f5d3SJohn Marino if (shift > 0)
6986d7f5d3SJohn Marino {
7086d7f5d3SJohn Marino mp_size_t ss = (dn > qn) ? qn + 1 : dn;
7186d7f5d3SJohn Marino
7286d7f5d3SJohn Marino tp = TMP_ALLOC_LIMBS (ss);
7386d7f5d3SJohn Marino mpn_rshift (tp, dp, ss, shift);
7486d7f5d3SJohn Marino dp = tp;
7586d7f5d3SJohn Marino
7686d7f5d3SJohn Marino /* Since we have excluded dn == 1, we have nn > qn, and we need
7786d7f5d3SJohn Marino to shift one limb beyond qn. */
7886d7f5d3SJohn Marino wp = TMP_ALLOC_LIMBS (qn + 1);
7986d7f5d3SJohn Marino mpn_rshift (wp, np, qn + 1, shift);
8086d7f5d3SJohn Marino }
8186d7f5d3SJohn Marino else
8286d7f5d3SJohn Marino {
8386d7f5d3SJohn Marino wp = TMP_ALLOC_LIMBS (qn);
8486d7f5d3SJohn Marino MPN_COPY (wp, np, qn);
8586d7f5d3SJohn Marino }
8686d7f5d3SJohn Marino
8786d7f5d3SJohn Marino if (dn > qn)
8886d7f5d3SJohn Marino dn = qn;
8986d7f5d3SJohn Marino
9086d7f5d3SJohn Marino tp = TMP_ALLOC_LIMBS (mpn_bdiv_q_itch (qn, dn));
9186d7f5d3SJohn Marino mpn_bdiv_q (qp, wp, qn, dp, dn, tp);
9286d7f5d3SJohn Marino TMP_FREE;
9386d7f5d3SJohn Marino }
9486d7f5d3SJohn Marino
9586d7f5d3SJohn Marino #else
9686d7f5d3SJohn Marino
9786d7f5d3SJohn Marino /* We use the Jebelean's bidirectional exact division algorithm. This is
9886d7f5d3SJohn Marino somewhat naively implemented, with equal quotient parts done by 2-adic
9986d7f5d3SJohn Marino division and truncating division. Since 2-adic division is faster, it
10086d7f5d3SJohn Marino should be used for a larger chunk.
10186d7f5d3SJohn Marino
10286d7f5d3SJohn Marino This code is horrendously ugly, in all sorts of ways.
10386d7f5d3SJohn Marino
10486d7f5d3SJohn Marino * It was hacked without much care or thought, but with a testing program.
10586d7f5d3SJohn Marino * It handles scratch space frivolously, and furthermore the itch function
10686d7f5d3SJohn Marino is broken.
10786d7f5d3SJohn Marino * Doesn't provide any measures to deal with mu_divappr_q's +3 error. We
10886d7f5d3SJohn Marino have yet to provoke an error due to this, though.
10986d7f5d3SJohn Marino * Algorithm selection leaves a lot to be desired. In particular, the choice
11086d7f5d3SJohn Marino between DC and MU isn't a point, but we treat it like one.
11186d7f5d3SJohn Marino * It makes the msb part 1 or 2 limbs larger than the lsb part, in spite of
11286d7f5d3SJohn Marino that the latter is faster. We should at least reverse this, but perhaps
11386d7f5d3SJohn Marino we should make the lsb part considerably larger. (How do we tune this?)
11486d7f5d3SJohn Marino */
11586d7f5d3SJohn Marino
11686d7f5d3SJohn Marino mp_size_t
mpn_divexact_itch(mp_size_t nn,mp_size_t dn)11786d7f5d3SJohn Marino mpn_divexact_itch (mp_size_t nn, mp_size_t dn)
11886d7f5d3SJohn Marino {
11986d7f5d3SJohn Marino return nn + dn; /* FIXME this is not right */
12086d7f5d3SJohn Marino }
12186d7f5d3SJohn Marino
12286d7f5d3SJohn Marino void
mpn_divexact(mp_ptr qp,mp_srcptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn,mp_ptr scratch)12386d7f5d3SJohn Marino mpn_divexact (mp_ptr qp,
12486d7f5d3SJohn Marino mp_srcptr np, mp_size_t nn,
12586d7f5d3SJohn Marino mp_srcptr dp, mp_size_t dn,
12686d7f5d3SJohn Marino mp_ptr scratch)
12786d7f5d3SJohn Marino {
12886d7f5d3SJohn Marino mp_size_t qn;
12986d7f5d3SJohn Marino mp_size_t nn0, qn0;
13086d7f5d3SJohn Marino mp_size_t nn1, qn1;
13186d7f5d3SJohn Marino mp_ptr tp;
13286d7f5d3SJohn Marino mp_limb_t qml;
13386d7f5d3SJohn Marino mp_limb_t qh;
13486d7f5d3SJohn Marino int cnt;
13586d7f5d3SJohn Marino mp_ptr xdp;
13686d7f5d3SJohn Marino mp_limb_t di;
13786d7f5d3SJohn Marino mp_limb_t cy;
13886d7f5d3SJohn Marino gmp_pi1_t dinv;
13986d7f5d3SJohn Marino TMP_DECL;
14086d7f5d3SJohn Marino
14186d7f5d3SJohn Marino TMP_MARK;
14286d7f5d3SJohn Marino
14386d7f5d3SJohn Marino qn = nn - dn + 1;
14486d7f5d3SJohn Marino
14586d7f5d3SJohn Marino /* For small divisors, and small quotients, don't use Jebelean's algorithm. */
14686d7f5d3SJohn Marino if (dn < DIVEXACT_JEB_THRESHOLD || qn < DIVEXACT_JEB_THRESHOLD)
14786d7f5d3SJohn Marino {
14886d7f5d3SJohn Marino tp = scratch;
14986d7f5d3SJohn Marino MPN_COPY (tp, np, qn);
15086d7f5d3SJohn Marino binvert_limb (di, dp[0]); di = -di;
15186d7f5d3SJohn Marino dn = MIN (dn, qn);
15286d7f5d3SJohn Marino mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
15386d7f5d3SJohn Marino TMP_FREE;
15486d7f5d3SJohn Marino return;
15586d7f5d3SJohn Marino }
15686d7f5d3SJohn Marino
15786d7f5d3SJohn Marino qn0 = ((nn - dn) >> 1) + 1; /* low quotient size */
15886d7f5d3SJohn Marino
15986d7f5d3SJohn Marino /* If quotient is much larger than the divisor, the bidirectional algorithm
16086d7f5d3SJohn Marino does not work as currently implemented. Fall back to plain bdiv. */
16186d7f5d3SJohn Marino if (qn0 > dn)
16286d7f5d3SJohn Marino {
16386d7f5d3SJohn Marino if (BELOW_THRESHOLD (dn, DC_BDIV_Q_THRESHOLD))
16486d7f5d3SJohn Marino {
16586d7f5d3SJohn Marino tp = scratch;
16686d7f5d3SJohn Marino MPN_COPY (tp, np, qn);
16786d7f5d3SJohn Marino binvert_limb (di, dp[0]); di = -di;
16886d7f5d3SJohn Marino dn = MIN (dn, qn);
16986d7f5d3SJohn Marino mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
17086d7f5d3SJohn Marino }
17186d7f5d3SJohn Marino else if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
17286d7f5d3SJohn Marino {
17386d7f5d3SJohn Marino tp = scratch;
17486d7f5d3SJohn Marino MPN_COPY (tp, np, qn);
17586d7f5d3SJohn Marino binvert_limb (di, dp[0]); di = -di;
17686d7f5d3SJohn Marino mpn_dcpi1_bdiv_q (qp, tp, qn, dp, dn, di);
17786d7f5d3SJohn Marino }
17886d7f5d3SJohn Marino else
17986d7f5d3SJohn Marino {
18086d7f5d3SJohn Marino mpn_mu_bdiv_q (qp, np, qn, dp, dn, scratch);
18186d7f5d3SJohn Marino }
18286d7f5d3SJohn Marino TMP_FREE;
18386d7f5d3SJohn Marino return;
18486d7f5d3SJohn Marino }
18586d7f5d3SJohn Marino
18686d7f5d3SJohn Marino nn0 = qn0 + qn0;
18786d7f5d3SJohn Marino
18886d7f5d3SJohn Marino nn1 = nn0 - 1 + ((nn-dn) & 1);
18986d7f5d3SJohn Marino qn1 = qn0;
19086d7f5d3SJohn Marino if (LIKELY (qn0 != dn))
19186d7f5d3SJohn Marino {
19286d7f5d3SJohn Marino nn1 = nn1 + 1;
19386d7f5d3SJohn Marino qn1 = qn1 + 1;
19486d7f5d3SJohn Marino if (UNLIKELY (dp[dn - 1] == 1 && qn1 != dn))
19586d7f5d3SJohn Marino {
19686d7f5d3SJohn Marino /* If the leading divisor limb == 1, i.e. has just one bit, we have
19786d7f5d3SJohn Marino to include an extra limb in order to get the needed overlap. */
19886d7f5d3SJohn Marino /* FIXME: Now with the mu_divappr_q function, we should really need
19986d7f5d3SJohn Marino more overlap. That indicates one of two things: (1) The test code
20086d7f5d3SJohn Marino is not good. (2) We actually overlap too much by default. */
20186d7f5d3SJohn Marino nn1 = nn1 + 1;
20286d7f5d3SJohn Marino qn1 = qn1 + 1;
20386d7f5d3SJohn Marino }
20486d7f5d3SJohn Marino }
20586d7f5d3SJohn Marino
20686d7f5d3SJohn Marino tp = TMP_ALLOC_LIMBS (nn1 + 1);
20786d7f5d3SJohn Marino
20886d7f5d3SJohn Marino count_leading_zeros (cnt, dp[dn - 1]);
20986d7f5d3SJohn Marino
21086d7f5d3SJohn Marino /* Normalize divisor, store into tmp area. */
21186d7f5d3SJohn Marino if (cnt != 0)
21286d7f5d3SJohn Marino {
21386d7f5d3SJohn Marino xdp = TMP_ALLOC_LIMBS (qn1);
21486d7f5d3SJohn Marino mpn_lshift (xdp, dp + dn - qn1, qn1, cnt);
21586d7f5d3SJohn Marino }
21686d7f5d3SJohn Marino else
21786d7f5d3SJohn Marino {
21886d7f5d3SJohn Marino xdp = (mp_ptr) dp + dn - qn1;
21986d7f5d3SJohn Marino }
22086d7f5d3SJohn Marino
22186d7f5d3SJohn Marino /* Shift dividend according to the divisor normalization. */
22286d7f5d3SJohn Marino /* FIXME: We compute too much here for XX_divappr_q, but these functions'
22386d7f5d3SJohn Marino interfaces want a pointer to the imaginative least significant limb, not
22486d7f5d3SJohn Marino to the least significant *used* limb. Of course, we could leave nn1-qn1
22586d7f5d3SJohn Marino rubbish limbs in the low part, to save some time. */
22686d7f5d3SJohn Marino if (cnt != 0)
22786d7f5d3SJohn Marino {
22886d7f5d3SJohn Marino cy = mpn_lshift (tp, np + nn - nn1, nn1, cnt);
22986d7f5d3SJohn Marino if (cy != 0)
23086d7f5d3SJohn Marino {
23186d7f5d3SJohn Marino tp[nn1] = cy;
23286d7f5d3SJohn Marino nn1++;
23386d7f5d3SJohn Marino }
23486d7f5d3SJohn Marino }
23586d7f5d3SJohn Marino else
23686d7f5d3SJohn Marino {
23786d7f5d3SJohn Marino /* FIXME: This copy is not needed for mpn_mu_divappr_q, except when the
23886d7f5d3SJohn Marino mpn_sub_n right before is executed. */
23986d7f5d3SJohn Marino MPN_COPY (tp, np + nn - nn1, nn1);
24086d7f5d3SJohn Marino }
24186d7f5d3SJohn Marino
24286d7f5d3SJohn Marino invert_pi1 (dinv, xdp[qn1 - 1], xdp[qn1 - 2]);
24386d7f5d3SJohn Marino if (BELOW_THRESHOLD (qn1, DC_DIVAPPR_Q_THRESHOLD))
24486d7f5d3SJohn Marino {
24586d7f5d3SJohn Marino qp[qn0 - 1 + nn1 - qn1] = mpn_sbpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, dinv.inv32);
24686d7f5d3SJohn Marino }
24786d7f5d3SJohn Marino else if (BELOW_THRESHOLD (qn1, MU_DIVAPPR_Q_THRESHOLD))
24886d7f5d3SJohn Marino {
24986d7f5d3SJohn Marino qp[qn0 - 1 + nn1 - qn1] = mpn_dcpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, &dinv);
25086d7f5d3SJohn Marino }
25186d7f5d3SJohn Marino else
25286d7f5d3SJohn Marino {
25386d7f5d3SJohn Marino /* FIXME: mpn_mu_divappr_q doesn't handle qh != 0. Work around it with a
25486d7f5d3SJohn Marino conditional subtraction here. */
25586d7f5d3SJohn Marino qh = mpn_cmp (tp + nn1 - qn1, xdp, qn1) >= 0;
25686d7f5d3SJohn Marino if (qh)
25786d7f5d3SJohn Marino mpn_sub_n (tp + nn1 - qn1, tp + nn1 - qn1, xdp, qn1);
25886d7f5d3SJohn Marino mpn_mu_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, scratch);
25986d7f5d3SJohn Marino qp[qn0 - 1 + nn1 - qn1] = qh;
26086d7f5d3SJohn Marino }
26186d7f5d3SJohn Marino qml = qp[qn0 - 1];
26286d7f5d3SJohn Marino
26386d7f5d3SJohn Marino binvert_limb (di, dp[0]); di = -di;
26486d7f5d3SJohn Marino
26586d7f5d3SJohn Marino if (BELOW_THRESHOLD (qn0, DC_BDIV_Q_THRESHOLD))
26686d7f5d3SJohn Marino {
26786d7f5d3SJohn Marino MPN_COPY (tp, np, qn0);
26886d7f5d3SJohn Marino mpn_sbpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
26986d7f5d3SJohn Marino }
27086d7f5d3SJohn Marino else if (BELOW_THRESHOLD (qn0, MU_BDIV_Q_THRESHOLD))
27186d7f5d3SJohn Marino {
27286d7f5d3SJohn Marino MPN_COPY (tp, np, qn0);
27386d7f5d3SJohn Marino mpn_dcpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
27486d7f5d3SJohn Marino }
27586d7f5d3SJohn Marino else
27686d7f5d3SJohn Marino {
27786d7f5d3SJohn Marino mpn_mu_bdiv_q (qp, np, qn0, dp, qn0, scratch);
27886d7f5d3SJohn Marino }
27986d7f5d3SJohn Marino
28086d7f5d3SJohn Marino if (qml < qp[qn0 - 1])
28186d7f5d3SJohn Marino mpn_decr_u (qp + qn0, 1);
28286d7f5d3SJohn Marino
28386d7f5d3SJohn Marino TMP_FREE;
28486d7f5d3SJohn Marino }
28586d7f5d3SJohn Marino #endif
286