xref: /dflybsd-src/contrib/gmp/mpn/generic/divexact.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
186d7f5d3SJohn Marino /* mpn_divexact(qp,np,nn,dp,dn,tp) -- Divide N = {np,nn} by D = {dp,dn} storing
286d7f5d3SJohn Marino    the result in Q = {qp,nn-dn+1} expecting no remainder.  Overlap allowed
386d7f5d3SJohn Marino    between Q and N; all other overlap disallowed.
486d7f5d3SJohn Marino 
586d7f5d3SJohn Marino    Contributed to the GNU project by Torbjorn Granlund.
686d7f5d3SJohn Marino 
786d7f5d3SJohn Marino    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
886d7f5d3SJohn Marino    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
986d7f5d3SJohn Marino    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
1086d7f5d3SJohn Marino 
1186d7f5d3SJohn Marino Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
1286d7f5d3SJohn Marino 
1386d7f5d3SJohn Marino This file is part of the GNU MP Library.
1486d7f5d3SJohn Marino 
1586d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
1686d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1786d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1886d7f5d3SJohn Marino option) any later version.
1986d7f5d3SJohn Marino 
2086d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
2186d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
2286d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
2386d7f5d3SJohn Marino License for more details.
2486d7f5d3SJohn Marino 
2586d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
2686d7f5d3SJohn Marino along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
2786d7f5d3SJohn Marino 
2886d7f5d3SJohn Marino 
2986d7f5d3SJohn Marino #include "gmp.h"
3086d7f5d3SJohn Marino #include "gmp-impl.h"
3186d7f5d3SJohn Marino #include "longlong.h"
3286d7f5d3SJohn Marino 
3386d7f5d3SJohn Marino #if 1
3486d7f5d3SJohn Marino void
mpn_divexact(mp_ptr qp,mp_srcptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn)3586d7f5d3SJohn Marino mpn_divexact (mp_ptr qp,
3686d7f5d3SJohn Marino 	      mp_srcptr np, mp_size_t nn,
3786d7f5d3SJohn Marino 	      mp_srcptr dp, mp_size_t dn)
3886d7f5d3SJohn Marino {
3986d7f5d3SJohn Marino   unsigned shift;
4086d7f5d3SJohn Marino   mp_size_t qn;
4186d7f5d3SJohn Marino   mp_ptr tp, wp;
4286d7f5d3SJohn Marino   TMP_DECL;
4386d7f5d3SJohn Marino 
4486d7f5d3SJohn Marino   ASSERT (dn > 0);
4586d7f5d3SJohn Marino   ASSERT (nn >= dn);
4686d7f5d3SJohn Marino   ASSERT (dp[dn-1] > 0);
4786d7f5d3SJohn Marino 
4886d7f5d3SJohn Marino   while (dp[0] == 0)
4986d7f5d3SJohn Marino     {
5086d7f5d3SJohn Marino       ASSERT (np[0] == 0);
5186d7f5d3SJohn Marino       dp++;
5286d7f5d3SJohn Marino       np++;
5386d7f5d3SJohn Marino       dn--;
5486d7f5d3SJohn Marino       nn--;
5586d7f5d3SJohn Marino     }
5686d7f5d3SJohn Marino 
5786d7f5d3SJohn Marino   if (dn == 1)
5886d7f5d3SJohn Marino     {
5986d7f5d3SJohn Marino       MPN_DIVREM_OR_DIVEXACT_1 (qp, np, nn, dp[0]);
6086d7f5d3SJohn Marino       return;
6186d7f5d3SJohn Marino     }
6286d7f5d3SJohn Marino 
6386d7f5d3SJohn Marino   TMP_MARK;
6486d7f5d3SJohn Marino 
6586d7f5d3SJohn Marino   qn = nn + 1 - dn;
6686d7f5d3SJohn Marino   count_trailing_zeros (shift, dp[0]);
6786d7f5d3SJohn Marino 
6886d7f5d3SJohn Marino   if (shift > 0)
6986d7f5d3SJohn Marino     {
7086d7f5d3SJohn Marino       mp_size_t ss = (dn > qn) ? qn + 1 : dn;
7186d7f5d3SJohn Marino 
7286d7f5d3SJohn Marino       tp = TMP_ALLOC_LIMBS (ss);
7386d7f5d3SJohn Marino       mpn_rshift (tp, dp, ss, shift);
7486d7f5d3SJohn Marino       dp = tp;
7586d7f5d3SJohn Marino 
7686d7f5d3SJohn Marino       /* Since we have excluded dn == 1, we have nn > qn, and we need
7786d7f5d3SJohn Marino 	 to shift one limb beyond qn. */
7886d7f5d3SJohn Marino       wp = TMP_ALLOC_LIMBS (qn + 1);
7986d7f5d3SJohn Marino       mpn_rshift (wp, np, qn + 1, shift);
8086d7f5d3SJohn Marino     }
8186d7f5d3SJohn Marino   else
8286d7f5d3SJohn Marino     {
8386d7f5d3SJohn Marino       wp = TMP_ALLOC_LIMBS (qn);
8486d7f5d3SJohn Marino       MPN_COPY (wp, np, qn);
8586d7f5d3SJohn Marino     }
8686d7f5d3SJohn Marino 
8786d7f5d3SJohn Marino   if (dn > qn)
8886d7f5d3SJohn Marino     dn = qn;
8986d7f5d3SJohn Marino 
9086d7f5d3SJohn Marino   tp = TMP_ALLOC_LIMBS (mpn_bdiv_q_itch (qn, dn));
9186d7f5d3SJohn Marino   mpn_bdiv_q (qp, wp, qn, dp, dn, tp);
9286d7f5d3SJohn Marino   TMP_FREE;
9386d7f5d3SJohn Marino }
9486d7f5d3SJohn Marino 
9586d7f5d3SJohn Marino #else
9686d7f5d3SJohn Marino 
9786d7f5d3SJohn Marino /* We use the Jebelean's bidirectional exact division algorithm.  This is
9886d7f5d3SJohn Marino    somewhat naively implemented, with equal quotient parts done by 2-adic
9986d7f5d3SJohn Marino    division and truncating division.  Since 2-adic division is faster, it
10086d7f5d3SJohn Marino    should be used for a larger chunk.
10186d7f5d3SJohn Marino 
10286d7f5d3SJohn Marino    This code is horrendously ugly, in all sorts of ways.
10386d7f5d3SJohn Marino 
10486d7f5d3SJohn Marino    * It was hacked without much care or thought, but with a testing program.
10586d7f5d3SJohn Marino    * It handles scratch space frivolously, and furthermore the itch function
10686d7f5d3SJohn Marino      is broken.
10786d7f5d3SJohn Marino    * Doesn't provide any measures to deal with mu_divappr_q's +3 error.  We
10886d7f5d3SJohn Marino      have yet to provoke an error due to this, though.
10986d7f5d3SJohn Marino    * Algorithm selection leaves a lot to be desired.  In particular, the choice
11086d7f5d3SJohn Marino      between DC and MU isn't a point, but we treat it like one.
11186d7f5d3SJohn Marino    * It makes the msb part 1 or 2 limbs larger than the lsb part, in spite of
11286d7f5d3SJohn Marino      that the latter is faster.  We should at least reverse this, but perhaps
11386d7f5d3SJohn Marino      we should make the lsb part considerably larger.  (How do we tune this?)
11486d7f5d3SJohn Marino */
11586d7f5d3SJohn Marino 
11686d7f5d3SJohn Marino mp_size_t
mpn_divexact_itch(mp_size_t nn,mp_size_t dn)11786d7f5d3SJohn Marino mpn_divexact_itch (mp_size_t nn, mp_size_t dn)
11886d7f5d3SJohn Marino {
11986d7f5d3SJohn Marino   return nn + dn;		/* FIXME this is not right */
12086d7f5d3SJohn Marino }
12186d7f5d3SJohn Marino 
12286d7f5d3SJohn Marino void
mpn_divexact(mp_ptr qp,mp_srcptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn,mp_ptr scratch)12386d7f5d3SJohn Marino mpn_divexact (mp_ptr qp,
12486d7f5d3SJohn Marino 	      mp_srcptr np, mp_size_t nn,
12586d7f5d3SJohn Marino 	      mp_srcptr dp, mp_size_t dn,
12686d7f5d3SJohn Marino 	      mp_ptr scratch)
12786d7f5d3SJohn Marino {
12886d7f5d3SJohn Marino   mp_size_t qn;
12986d7f5d3SJohn Marino   mp_size_t nn0, qn0;
13086d7f5d3SJohn Marino   mp_size_t nn1, qn1;
13186d7f5d3SJohn Marino   mp_ptr tp;
13286d7f5d3SJohn Marino   mp_limb_t qml;
13386d7f5d3SJohn Marino   mp_limb_t qh;
13486d7f5d3SJohn Marino   int cnt;
13586d7f5d3SJohn Marino   mp_ptr xdp;
13686d7f5d3SJohn Marino   mp_limb_t di;
13786d7f5d3SJohn Marino   mp_limb_t cy;
13886d7f5d3SJohn Marino   gmp_pi1_t dinv;
13986d7f5d3SJohn Marino   TMP_DECL;
14086d7f5d3SJohn Marino 
14186d7f5d3SJohn Marino   TMP_MARK;
14286d7f5d3SJohn Marino 
14386d7f5d3SJohn Marino   qn = nn - dn + 1;
14486d7f5d3SJohn Marino 
14586d7f5d3SJohn Marino   /* For small divisors, and small quotients, don't use Jebelean's algorithm. */
14686d7f5d3SJohn Marino   if (dn < DIVEXACT_JEB_THRESHOLD || qn < DIVEXACT_JEB_THRESHOLD)
14786d7f5d3SJohn Marino     {
14886d7f5d3SJohn Marino       tp = scratch;
14986d7f5d3SJohn Marino       MPN_COPY (tp, np, qn);
15086d7f5d3SJohn Marino       binvert_limb (di, dp[0]);  di = -di;
15186d7f5d3SJohn Marino       dn = MIN (dn, qn);
15286d7f5d3SJohn Marino       mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
15386d7f5d3SJohn Marino       TMP_FREE;
15486d7f5d3SJohn Marino       return;
15586d7f5d3SJohn Marino     }
15686d7f5d3SJohn Marino 
15786d7f5d3SJohn Marino   qn0 = ((nn - dn) >> 1) + 1;	/* low quotient size */
15886d7f5d3SJohn Marino 
15986d7f5d3SJohn Marino   /* If quotient is much larger than the divisor, the bidirectional algorithm
16086d7f5d3SJohn Marino      does not work as currently implemented.  Fall back to plain bdiv.  */
16186d7f5d3SJohn Marino   if (qn0 > dn)
16286d7f5d3SJohn Marino     {
16386d7f5d3SJohn Marino       if (BELOW_THRESHOLD (dn, DC_BDIV_Q_THRESHOLD))
16486d7f5d3SJohn Marino 	{
16586d7f5d3SJohn Marino 	  tp = scratch;
16686d7f5d3SJohn Marino 	  MPN_COPY (tp, np, qn);
16786d7f5d3SJohn Marino 	  binvert_limb (di, dp[0]);  di = -di;
16886d7f5d3SJohn Marino 	  dn = MIN (dn, qn);
16986d7f5d3SJohn Marino 	  mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
17086d7f5d3SJohn Marino 	}
17186d7f5d3SJohn Marino       else if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
17286d7f5d3SJohn Marino 	{
17386d7f5d3SJohn Marino 	  tp = scratch;
17486d7f5d3SJohn Marino 	  MPN_COPY (tp, np, qn);
17586d7f5d3SJohn Marino 	  binvert_limb (di, dp[0]);  di = -di;
17686d7f5d3SJohn Marino 	  mpn_dcpi1_bdiv_q (qp, tp, qn, dp, dn, di);
17786d7f5d3SJohn Marino 	}
17886d7f5d3SJohn Marino       else
17986d7f5d3SJohn Marino 	{
18086d7f5d3SJohn Marino 	  mpn_mu_bdiv_q (qp, np, qn, dp, dn, scratch);
18186d7f5d3SJohn Marino 	}
18286d7f5d3SJohn Marino       TMP_FREE;
18386d7f5d3SJohn Marino       return;
18486d7f5d3SJohn Marino     }
18586d7f5d3SJohn Marino 
18686d7f5d3SJohn Marino   nn0 = qn0 + qn0;
18786d7f5d3SJohn Marino 
18886d7f5d3SJohn Marino   nn1 = nn0 - 1 + ((nn-dn) & 1);
18986d7f5d3SJohn Marino   qn1 = qn0;
19086d7f5d3SJohn Marino   if (LIKELY (qn0 != dn))
19186d7f5d3SJohn Marino     {
19286d7f5d3SJohn Marino       nn1 = nn1 + 1;
19386d7f5d3SJohn Marino       qn1 = qn1 + 1;
19486d7f5d3SJohn Marino       if (UNLIKELY (dp[dn - 1] == 1 && qn1 != dn))
19586d7f5d3SJohn Marino 	{
19686d7f5d3SJohn Marino 	  /* If the leading divisor limb == 1, i.e. has just one bit, we have
19786d7f5d3SJohn Marino 	     to include an extra limb in order to get the needed overlap.  */
19886d7f5d3SJohn Marino 	  /* FIXME: Now with the mu_divappr_q function, we should really need
19986d7f5d3SJohn Marino 	     more overlap. That indicates one of two things: (1) The test code
20086d7f5d3SJohn Marino 	     is not good. (2) We actually overlap too much by default.  */
20186d7f5d3SJohn Marino 	  nn1 = nn1 + 1;
20286d7f5d3SJohn Marino 	  qn1 = qn1 + 1;
20386d7f5d3SJohn Marino 	}
20486d7f5d3SJohn Marino     }
20586d7f5d3SJohn Marino 
20686d7f5d3SJohn Marino   tp = TMP_ALLOC_LIMBS (nn1 + 1);
20786d7f5d3SJohn Marino 
20886d7f5d3SJohn Marino   count_leading_zeros (cnt, dp[dn - 1]);
20986d7f5d3SJohn Marino 
21086d7f5d3SJohn Marino   /* Normalize divisor, store into tmp area.  */
21186d7f5d3SJohn Marino   if (cnt != 0)
21286d7f5d3SJohn Marino     {
21386d7f5d3SJohn Marino       xdp = TMP_ALLOC_LIMBS (qn1);
21486d7f5d3SJohn Marino       mpn_lshift (xdp, dp + dn - qn1, qn1, cnt);
21586d7f5d3SJohn Marino     }
21686d7f5d3SJohn Marino   else
21786d7f5d3SJohn Marino     {
21886d7f5d3SJohn Marino       xdp = (mp_ptr) dp + dn - qn1;
21986d7f5d3SJohn Marino     }
22086d7f5d3SJohn Marino 
22186d7f5d3SJohn Marino   /* Shift dividend according to the divisor normalization.  */
22286d7f5d3SJohn Marino   /* FIXME: We compute too much here for XX_divappr_q, but these functions'
22386d7f5d3SJohn Marino      interfaces want a pointer to the imaginative least significant limb, not
22486d7f5d3SJohn Marino      to the least significant *used* limb.  Of course, we could leave nn1-qn1
22586d7f5d3SJohn Marino      rubbish limbs in the low part, to save some time.  */
22686d7f5d3SJohn Marino   if (cnt != 0)
22786d7f5d3SJohn Marino     {
22886d7f5d3SJohn Marino       cy = mpn_lshift (tp, np + nn - nn1, nn1, cnt);
22986d7f5d3SJohn Marino       if (cy != 0)
23086d7f5d3SJohn Marino 	{
23186d7f5d3SJohn Marino 	  tp[nn1] = cy;
23286d7f5d3SJohn Marino 	  nn1++;
23386d7f5d3SJohn Marino 	}
23486d7f5d3SJohn Marino     }
23586d7f5d3SJohn Marino   else
23686d7f5d3SJohn Marino     {
23786d7f5d3SJohn Marino       /* FIXME: This copy is not needed for mpn_mu_divappr_q, except when the
23886d7f5d3SJohn Marino 	 mpn_sub_n right before is executed.  */
23986d7f5d3SJohn Marino       MPN_COPY (tp, np + nn - nn1, nn1);
24086d7f5d3SJohn Marino     }
24186d7f5d3SJohn Marino 
24286d7f5d3SJohn Marino   invert_pi1 (dinv, xdp[qn1 - 1], xdp[qn1 - 2]);
24386d7f5d3SJohn Marino   if (BELOW_THRESHOLD (qn1, DC_DIVAPPR_Q_THRESHOLD))
24486d7f5d3SJohn Marino     {
24586d7f5d3SJohn Marino       qp[qn0 - 1 + nn1 - qn1] = mpn_sbpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, dinv.inv32);
24686d7f5d3SJohn Marino     }
24786d7f5d3SJohn Marino   else if (BELOW_THRESHOLD (qn1, MU_DIVAPPR_Q_THRESHOLD))
24886d7f5d3SJohn Marino     {
24986d7f5d3SJohn Marino       qp[qn0 - 1 + nn1 - qn1] = mpn_dcpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, &dinv);
25086d7f5d3SJohn Marino     }
25186d7f5d3SJohn Marino   else
25286d7f5d3SJohn Marino     {
25386d7f5d3SJohn Marino       /* FIXME: mpn_mu_divappr_q doesn't handle qh != 0.  Work around it with a
25486d7f5d3SJohn Marino 	 conditional subtraction here.  */
25586d7f5d3SJohn Marino       qh = mpn_cmp (tp + nn1 - qn1, xdp, qn1) >= 0;
25686d7f5d3SJohn Marino       if (qh)
25786d7f5d3SJohn Marino 	mpn_sub_n (tp + nn1 - qn1, tp + nn1 - qn1, xdp, qn1);
25886d7f5d3SJohn Marino       mpn_mu_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, scratch);
25986d7f5d3SJohn Marino       qp[qn0 - 1 + nn1 - qn1] = qh;
26086d7f5d3SJohn Marino     }
26186d7f5d3SJohn Marino   qml = qp[qn0 - 1];
26286d7f5d3SJohn Marino 
26386d7f5d3SJohn Marino   binvert_limb (di, dp[0]);  di = -di;
26486d7f5d3SJohn Marino 
26586d7f5d3SJohn Marino   if (BELOW_THRESHOLD (qn0, DC_BDIV_Q_THRESHOLD))
26686d7f5d3SJohn Marino     {
26786d7f5d3SJohn Marino       MPN_COPY (tp, np, qn0);
26886d7f5d3SJohn Marino       mpn_sbpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
26986d7f5d3SJohn Marino     }
27086d7f5d3SJohn Marino   else if (BELOW_THRESHOLD (qn0, MU_BDIV_Q_THRESHOLD))
27186d7f5d3SJohn Marino     {
27286d7f5d3SJohn Marino       MPN_COPY (tp, np, qn0);
27386d7f5d3SJohn Marino       mpn_dcpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
27486d7f5d3SJohn Marino     }
27586d7f5d3SJohn Marino   else
27686d7f5d3SJohn Marino     {
27786d7f5d3SJohn Marino       mpn_mu_bdiv_q (qp, np, qn0, dp, qn0, scratch);
27886d7f5d3SJohn Marino     }
27986d7f5d3SJohn Marino 
28086d7f5d3SJohn Marino   if (qml < qp[qn0 - 1])
28186d7f5d3SJohn Marino     mpn_decr_u (qp + qn0, 1);
28286d7f5d3SJohn Marino 
28386d7f5d3SJohn Marino   TMP_FREE;
28486d7f5d3SJohn Marino }
28586d7f5d3SJohn Marino #endif
286