xref: /dflybsd-src/contrib/gmp/mpn/generic/mu_div_q.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
186d7f5d3SJohn Marino /* mpn_mu_div_q.
286d7f5d3SJohn Marino 
386d7f5d3SJohn Marino    Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
486d7f5d3SJohn Marino 
586d7f5d3SJohn Marino    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
686d7f5d3SJohn Marino    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
786d7f5d3SJohn Marino    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
886d7f5d3SJohn Marino 
986d7f5d3SJohn Marino Copyright 2005, 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
1086d7f5d3SJohn Marino 
1186d7f5d3SJohn Marino This file is part of the GNU MP Library.
1286d7f5d3SJohn Marino 
1386d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
1486d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1586d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1686d7f5d3SJohn Marino option) any later version.
1786d7f5d3SJohn Marino 
1886d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
1986d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
2086d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
2186d7f5d3SJohn Marino License for more details.
2286d7f5d3SJohn Marino 
2386d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
2486d7f5d3SJohn Marino along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
2586d7f5d3SJohn Marino 
2686d7f5d3SJohn Marino 
2786d7f5d3SJohn Marino /*
2886d7f5d3SJohn Marino    The idea of the algorithm used herein is to compute a smaller inverted value
2986d7f5d3SJohn Marino    than used in the standard Barrett algorithm, and thus save time in the
3086d7f5d3SJohn Marino    Newton iterations, and pay just a small price when using the inverted value
3186d7f5d3SJohn Marino    for developing quotient bits.  This algorithm was presented at ICMS 2006.
3286d7f5d3SJohn Marino */
3386d7f5d3SJohn Marino 
3486d7f5d3SJohn Marino /*
3586d7f5d3SJohn Marino   Things to work on:
3686d7f5d3SJohn Marino 
3786d7f5d3SJohn Marino   1. This is a rudimentary implementation of mpn_mu_div_q.  The algorithm is
3886d7f5d3SJohn Marino      probably close to optimal, except when mpn_mu_divappr_q fails.
3986d7f5d3SJohn Marino 
4086d7f5d3SJohn Marino      An alternative which could be considered for much simpler code for the
4186d7f5d3SJohn Marino      complex qn>=dn arm would be to allocate a temporary nn+1 limb buffer, then
4286d7f5d3SJohn Marino      simply call mpn_mu_divappr_q.  Such a temporary allocation is
4386d7f5d3SJohn Marino      unfortunately very large.
4486d7f5d3SJohn Marino 
4586d7f5d3SJohn Marino   2. We used to fall back to mpn_mu_div_qr when we detect a possible
4686d7f5d3SJohn Marino      mpn_mu_divappr_q rounding problem, now we multiply and compare.
4786d7f5d3SJohn Marino      Unfortunately, since mpn_mu_divappr_q does not return the partial
4886d7f5d3SJohn Marino      remainder, this also doesn't become optimal.  A mpn_mu_divappr_qr could
4986d7f5d3SJohn Marino      solve that.
5086d7f5d3SJohn Marino 
5186d7f5d3SJohn Marino   3. The allocations done here should be made from the scratch area, which
5286d7f5d3SJohn Marino      then would need to be amended.
5386d7f5d3SJohn Marino */
5486d7f5d3SJohn Marino 
5586d7f5d3SJohn Marino #include <stdlib.h>		/* for NULL */
5686d7f5d3SJohn Marino #include "gmp.h"
5786d7f5d3SJohn Marino #include "gmp-impl.h"
5886d7f5d3SJohn Marino 
5986d7f5d3SJohn Marino 
6086d7f5d3SJohn Marino mp_limb_t
mpn_mu_div_q(mp_ptr qp,mp_srcptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn,mp_ptr scratch)6186d7f5d3SJohn Marino mpn_mu_div_q (mp_ptr qp,
6286d7f5d3SJohn Marino 	      mp_srcptr np, mp_size_t nn,
6386d7f5d3SJohn Marino 	      mp_srcptr dp, mp_size_t dn,
6486d7f5d3SJohn Marino 	      mp_ptr scratch)
6586d7f5d3SJohn Marino {
6686d7f5d3SJohn Marino   mp_ptr tp, rp, ip, this_ip;
6786d7f5d3SJohn Marino   mp_size_t qn, in, this_in;
6886d7f5d3SJohn Marino   mp_limb_t cy, qh;
6986d7f5d3SJohn Marino   TMP_DECL;
7086d7f5d3SJohn Marino 
7186d7f5d3SJohn Marino   TMP_MARK;
7286d7f5d3SJohn Marino 
7386d7f5d3SJohn Marino   qn = nn - dn;
7486d7f5d3SJohn Marino 
7586d7f5d3SJohn Marino   tp = TMP_BALLOC_LIMBS (qn + 1);
7686d7f5d3SJohn Marino 
7786d7f5d3SJohn Marino   if (qn >= dn)			/* nn >= 2*dn + 1 */
7886d7f5d3SJohn Marino     {
7986d7f5d3SJohn Marino       /* Find max inverse size needed by the two preinv calls.  FIXME: This is
8086d7f5d3SJohn Marino 	 not optimal, it underestimates the invariance.  */
8186d7f5d3SJohn Marino       if (dn != qn)
8286d7f5d3SJohn Marino 	{
8386d7f5d3SJohn Marino 	  mp_size_t in1, in2;
8486d7f5d3SJohn Marino 
8586d7f5d3SJohn Marino 	  in1 = mpn_mu_div_qr_choose_in (qn - dn, dn, 0);
8686d7f5d3SJohn Marino 	  in2 = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0);
8786d7f5d3SJohn Marino 	  in = MAX (in1, in2);
8886d7f5d3SJohn Marino 	}
8986d7f5d3SJohn Marino       else
9086d7f5d3SJohn Marino 	{
9186d7f5d3SJohn Marino 	  in = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0);
9286d7f5d3SJohn Marino 	}
9386d7f5d3SJohn Marino 
9486d7f5d3SJohn Marino       ip = TMP_BALLOC_LIMBS (in + 1);
9586d7f5d3SJohn Marino 
9686d7f5d3SJohn Marino       if (dn == in)
9786d7f5d3SJohn Marino 	{
9886d7f5d3SJohn Marino 	  MPN_COPY (scratch + 1, dp, in);
9986d7f5d3SJohn Marino 	  scratch[0] = 1;
10086d7f5d3SJohn Marino 	  mpn_invertappr (ip, scratch, in + 1, NULL);
10186d7f5d3SJohn Marino 	  MPN_COPY_INCR (ip, ip + 1, in);
10286d7f5d3SJohn Marino 	}
10386d7f5d3SJohn Marino       else
10486d7f5d3SJohn Marino 	{
10586d7f5d3SJohn Marino 	  cy = mpn_add_1 (scratch, dp + dn - (in + 1), in + 1, 1);
10686d7f5d3SJohn Marino 	  if (UNLIKELY (cy != 0))
10786d7f5d3SJohn Marino 	    MPN_ZERO (ip, in);
10886d7f5d3SJohn Marino 	  else
10986d7f5d3SJohn Marino 	    {
11086d7f5d3SJohn Marino 	      mpn_invertappr (ip, scratch, in + 1, NULL);
11186d7f5d3SJohn Marino 	      MPN_COPY_INCR (ip, ip + 1, in);
11286d7f5d3SJohn Marino 	    }
11386d7f5d3SJohn Marino 	}
11486d7f5d3SJohn Marino 
11586d7f5d3SJohn Marino        /* |_______________________|   dividend
11686d7f5d3SJohn Marino 			 |________|   divisor  */
11786d7f5d3SJohn Marino       rp = TMP_BALLOC_LIMBS (2 * dn + 1);
11886d7f5d3SJohn Marino 
11986d7f5d3SJohn Marino       this_in = mpn_mu_div_qr_choose_in (qn - dn, dn, 0);
12086d7f5d3SJohn Marino       this_ip = ip + in - this_in;
12186d7f5d3SJohn Marino       qh = mpn_preinv_mu_div_qr (tp + dn + 1, rp + dn + 1, np + dn, qn, dp, dn,
12286d7f5d3SJohn Marino 				 this_ip, this_in, scratch);
12386d7f5d3SJohn Marino 
12486d7f5d3SJohn Marino       MPN_COPY (rp + 1, np, dn);
12586d7f5d3SJohn Marino       rp[0] = 0;
12686d7f5d3SJohn Marino       this_in = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0);
12786d7f5d3SJohn Marino       this_ip = ip + in - this_in;
12886d7f5d3SJohn Marino       cy = mpn_preinv_mu_divappr_q (tp, rp, 2 * dn + 1, dp, dn,
12986d7f5d3SJohn Marino 				    this_ip, this_in, scratch);
13086d7f5d3SJohn Marino 
13186d7f5d3SJohn Marino       if (UNLIKELY (cy != 0))
13286d7f5d3SJohn Marino 	{
13386d7f5d3SJohn Marino 	  /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was
13486d7f5d3SJohn Marino 	     canonically reduced, replace the returned value of B^(qn-dn)+eps
13586d7f5d3SJohn Marino 	     by the largest possible value.  */
13686d7f5d3SJohn Marino 	  mp_size_t i;
13786d7f5d3SJohn Marino 	  for (i = 0; i < dn + 1; i++)
13886d7f5d3SJohn Marino 	    tp[i] = GMP_NUMB_MAX;
13986d7f5d3SJohn Marino 	}
14086d7f5d3SJohn Marino 
14186d7f5d3SJohn Marino       /* The max error of mpn_mu_divappr_q is +4.  If the low quotient limb is
14286d7f5d3SJohn Marino 	 greater than the max error, we cannot trust the quotient.  */
14386d7f5d3SJohn Marino       if (tp[0] > 4)
14486d7f5d3SJohn Marino 	{
14586d7f5d3SJohn Marino 	  MPN_COPY (qp, tp + 1, qn);
14686d7f5d3SJohn Marino 	}
14786d7f5d3SJohn Marino       else
14886d7f5d3SJohn Marino 	{
14986d7f5d3SJohn Marino 	  mp_limb_t cy;
15086d7f5d3SJohn Marino 	  mp_ptr pp;
15186d7f5d3SJohn Marino 
15286d7f5d3SJohn Marino 	  /* FIXME: can we use already allocated space? */
15386d7f5d3SJohn Marino 	  pp = TMP_BALLOC_LIMBS (nn);
15486d7f5d3SJohn Marino 	  mpn_mul (pp, tp + 1, qn, dp, dn);
15586d7f5d3SJohn Marino 
15686d7f5d3SJohn Marino 	  cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0;
15786d7f5d3SJohn Marino 
15886d7f5d3SJohn Marino 	  if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */
15986d7f5d3SJohn Marino 	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
16086d7f5d3SJohn Marino 	  else /* Same as above */
16186d7f5d3SJohn Marino 	    MPN_COPY (qp, tp + 1, qn);
16286d7f5d3SJohn Marino 	}
16386d7f5d3SJohn Marino     }
16486d7f5d3SJohn Marino   else
16586d7f5d3SJohn Marino     {
16686d7f5d3SJohn Marino        /* |_______________________|   dividend
16786d7f5d3SJohn Marino 		 |________________|   divisor  */
16886d7f5d3SJohn Marino 
16986d7f5d3SJohn Marino       /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed
17086d7f5d3SJohn Marino 	 here becomes 2dn, i.e., more than nn.  This shouldn't hurt, since only
17186d7f5d3SJohn Marino 	 the most significant dn-1 limbs will actually be read, but it is not
17286d7f5d3SJohn Marino 	 pretty.  */
17386d7f5d3SJohn Marino 
17486d7f5d3SJohn Marino       qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2,
17586d7f5d3SJohn Marino 			     dp + dn - (qn + 1), qn + 1, scratch);
17686d7f5d3SJohn Marino 
17786d7f5d3SJohn Marino       /* The max error of mpn_mu_divappr_q is +4, but we get an additional
17886d7f5d3SJohn Marino          error from the divisor truncation.  */
17986d7f5d3SJohn Marino       if (tp[0] > 6)
18086d7f5d3SJohn Marino 	{
18186d7f5d3SJohn Marino 	  MPN_COPY (qp, tp + 1, qn);
18286d7f5d3SJohn Marino 	}
18386d7f5d3SJohn Marino       else
18486d7f5d3SJohn Marino 	{
18586d7f5d3SJohn Marino 	  mp_limb_t cy;
18686d7f5d3SJohn Marino 
18786d7f5d3SJohn Marino 	  /* FIXME: a shorter product should be enough; we may use already
18886d7f5d3SJohn Marino 	     allocated space... */
18986d7f5d3SJohn Marino 	  rp = TMP_BALLOC_LIMBS (nn);
19086d7f5d3SJohn Marino 	  mpn_mul (rp, dp, dn, tp + 1, qn);
19186d7f5d3SJohn Marino 
19286d7f5d3SJohn Marino 	  cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0;
19386d7f5d3SJohn Marino 
19486d7f5d3SJohn Marino 	  if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */
19586d7f5d3SJohn Marino 	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
19686d7f5d3SJohn Marino 	  else /* Same as above */
19786d7f5d3SJohn Marino 	    MPN_COPY (qp, tp + 1, qn);
19886d7f5d3SJohn Marino 	}
19986d7f5d3SJohn Marino     }
20086d7f5d3SJohn Marino 
20186d7f5d3SJohn Marino   TMP_FREE;
20286d7f5d3SJohn Marino   return qh;
20386d7f5d3SJohn Marino }
20486d7f5d3SJohn Marino 
20586d7f5d3SJohn Marino mp_size_t
mpn_mu_div_q_itch(mp_size_t nn,mp_size_t dn,int mua_k)20686d7f5d3SJohn Marino mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
20786d7f5d3SJohn Marino {
20886d7f5d3SJohn Marino   mp_size_t qn, itch1, itch2;
20986d7f5d3SJohn Marino 
21086d7f5d3SJohn Marino   qn = nn - dn;
21186d7f5d3SJohn Marino   if (qn >= dn)
21286d7f5d3SJohn Marino     {
21386d7f5d3SJohn Marino       itch1 = mpn_mu_div_qr_itch (qn, dn, mua_k);
21486d7f5d3SJohn Marino       itch2 = mpn_mu_divappr_q_itch (2 * dn + 1, dn, mua_k);
21586d7f5d3SJohn Marino       return MAX (itch1, itch2);
21686d7f5d3SJohn Marino     }
21786d7f5d3SJohn Marino   else
21886d7f5d3SJohn Marino     {
21986d7f5d3SJohn Marino       itch1 = mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k);
22086d7f5d3SJohn Marino       return itch1;
22186d7f5d3SJohn Marino     }
22286d7f5d3SJohn Marino }
223