mpn/generic/mu_div_q.c

86d7f5d3SJohn Marino/* mpn_mu_div_q.
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
86d7f5d3SJohn Marino   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
86d7f5d3SJohn Marino   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
86d7f5d3SJohn Marino
86d7f5d3SJohn MarinoCopyright 2005, 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
86d7f5d3SJohn Marino
86d7f5d3SJohn MarinoThis file is part of the GNU MP Library.
86d7f5d3SJohn Marino
86d7f5d3SJohn MarinoThe GNU MP Library is free software; you can redistribute it and/or modify
86d7f5d3SJohn Marinoit under the terms of the GNU Lesser General Public License as published by
86d7f5d3SJohn Marinothe Free Software Foundation; either version 3 of the License, or (at your
86d7f5d3SJohn Marinooption) any later version.
86d7f5d3SJohn Marino
86d7f5d3SJohn MarinoThe GNU MP Library is distributed in the hope that it will be useful, but
86d7f5d3SJohn MarinoWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
86d7f5d3SJohn Marinoor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
86d7f5d3SJohn MarinoLicense for more details.
86d7f5d3SJohn Marino
86d7f5d3SJohn MarinoYou should have received a copy of the GNU Lesser General Public License
86d7f5d3SJohn Marinoalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino/*
86d7f5d3SJohn Marino   The idea of the algorithm used herein is to compute a smaller inverted value
86d7f5d3SJohn Marino   than used in the standard Barrett algorithm, and thus save time in the
86d7f5d3SJohn Marino   Newton iterations, and pay just a small price when using the inverted value
86d7f5d3SJohn Marino   for developing quotient bits.  This algorithm was presented at ICMS 2006.
86d7f5d3SJohn Marino*/
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino/*
86d7f5d3SJohn Marino  Things to work on:
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino  1. This is a rudimentary implementation of mpn_mu_div_q.  The algorithm is
86d7f5d3SJohn Marino     probably close to optimal, except when mpn_mu_divappr_q fails.
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino     An alternative which could be considered for much simpler code for the
86d7f5d3SJohn Marino     complex qn>=dn arm would be to allocate a temporary nn+1 limb buffer, then
86d7f5d3SJohn Marino     simply call mpn_mu_divappr_q.  Such a temporary allocation is
86d7f5d3SJohn Marino     unfortunately very large.
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino  2. We used to fall back to mpn_mu_div_qr when we detect a possible
86d7f5d3SJohn Marino     mpn_mu_divappr_q rounding problem, now we multiply and compare.
86d7f5d3SJohn Marino     Unfortunately, since mpn_mu_divappr_q does not return the partial
86d7f5d3SJohn Marino     remainder, this also doesn't become optimal.  A mpn_mu_divappr_qr could
86d7f5d3SJohn Marino     solve that.
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino  3. The allocations done here should be made from the scratch area, which
86d7f5d3SJohn Marino     then would need to be amended.
86d7f5d3SJohn Marino*/
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino#include <stdlib.h>		/* for NULL */
86d7f5d3SJohn Marino#include "gmp.h"
86d7f5d3SJohn Marino#include "gmp-impl.h"
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino
86d7f5d3SJohn Marinomp_limb_t
86d7f5d3SJohn Marinompn_mu_div_q (mp_ptr qp,
86d7f5d3SJohn Marino	      mp_srcptr np, mp_size_t nn,
86d7f5d3SJohn Marino	      mp_srcptr dp, mp_size_t dn,
86d7f5d3SJohn Marino	      mp_ptr scratch)
86d7f5d3SJohn Marino{
86d7f5d3SJohn Marino  mp_ptr tp, rp, ip, this_ip;
86d7f5d3SJohn Marino  mp_size_t qn, in, this_in;
86d7f5d3SJohn Marino  mp_limb_t cy, qh;
86d7f5d3SJohn Marino  TMP_DECL;
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino  TMP_MARK;
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino  qn = nn - dn;
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino  tp = TMP_BALLOC_LIMBS (qn + 1);
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino  if (qn >= dn)			/* nn >= 2*dn + 1 */
86d7f5d3SJohn Marino    {
86d7f5d3SJohn Marino      /* Find max inverse size needed by the two preinv calls.  FIXME: This is
86d7f5d3SJohn Marino	 not optimal, it underestimates the invariance.  */
86d7f5d3SJohn Marino      if (dn != qn)
86d7f5d3SJohn Marino	{
86d7f5d3SJohn Marino	  mp_size_t in1, in2;
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino	  in1 = mpn_mu_div_qr_choose_in (qn - dn, dn, 0);
86d7f5d3SJohn Marino	  in2 = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0);
86d7f5d3SJohn Marino	  in = MAX (in1, in2);
86d7f5d3SJohn Marino	}
86d7f5d3SJohn Marino      else
86d7f5d3SJohn Marino	{
86d7f5d3SJohn Marino	  in = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0);
86d7f5d3SJohn Marino	}
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino      ip = TMP_BALLOC_LIMBS (in + 1);
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino      if (dn == in)
86d7f5d3SJohn Marino	{
86d7f5d3SJohn Marino	  MPN_COPY (scratch + 1, dp, in);
86d7f5d3SJohn Marino	  scratch[0] = 1;
86d7f5d3SJohn Marino	  mpn_invertappr (ip, scratch, in + 1, NULL);
86d7f5d3SJohn Marino	  MPN_COPY_INCR (ip, ip + 1, in);
86d7f5d3SJohn Marino	}
86d7f5d3SJohn Marino      else
86d7f5d3SJohn Marino	{
86d7f5d3SJohn Marino	  cy = mpn_add_1 (scratch, dp + dn - (in + 1), in + 1, 1);
86d7f5d3SJohn Marino	  if (UNLIKELY (cy != 0))
86d7f5d3SJohn Marino	    MPN_ZERO (ip, in);
86d7f5d3SJohn Marino	  else
86d7f5d3SJohn Marino	    {
86d7f5d3SJohn Marino	      mpn_invertappr (ip, scratch, in + 1, NULL);
86d7f5d3SJohn Marino	      MPN_COPY_INCR (ip, ip + 1, in);
86d7f5d3SJohn Marino	    }
86d7f5d3SJohn Marino	}
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino       /* |_______________________|   dividend
86d7f5d3SJohn Marino			 |________|   divisor  */
86d7f5d3SJohn Marino      rp = TMP_BALLOC_LIMBS (2 * dn + 1);
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino      this_in = mpn_mu_div_qr_choose_in (qn - dn, dn, 0);
86d7f5d3SJohn Marino      this_ip = ip + in - this_in;
86d7f5d3SJohn Marino      qh = mpn_preinv_mu_div_qr (tp + dn + 1, rp + dn + 1, np + dn, qn, dp, dn,
86d7f5d3SJohn Marino				 this_ip, this_in, scratch);
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino      MPN_COPY (rp + 1, np, dn);
86d7f5d3SJohn Marino      rp[0] = 0;
86d7f5d3SJohn Marino      this_in = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0);
86d7f5d3SJohn Marino      this_ip = ip + in - this_in;
86d7f5d3SJohn Marino      cy = mpn_preinv_mu_divappr_q (tp, rp, 2 * dn + 1, dp, dn,
86d7f5d3SJohn Marino				    this_ip, this_in, scratch);
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino      if (UNLIKELY (cy != 0))
86d7f5d3SJohn Marino	{
86d7f5d3SJohn Marino	  /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was
86d7f5d3SJohn Marino	     canonically reduced, replace the returned value of B^(qn-dn)+eps
86d7f5d3SJohn Marino	     by the largest possible value.  */
86d7f5d3SJohn Marino	  mp_size_t i;
86d7f5d3SJohn Marino	  for (i = 0; i < dn + 1; i++)
86d7f5d3SJohn Marino	    tp[i] = GMP_NUMB_MAX;
86d7f5d3SJohn Marino	}
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino      /* The max error of mpn_mu_divappr_q is +4.  If the low quotient limb is
86d7f5d3SJohn Marino	 greater than the max error, we cannot trust the quotient.  */
86d7f5d3SJohn Marino      if (tp[0] > 4)
86d7f5d3SJohn Marino	{
86d7f5d3SJohn Marino	  MPN_COPY (qp, tp + 1, qn);
86d7f5d3SJohn Marino	}
86d7f5d3SJohn Marino      else
86d7f5d3SJohn Marino	{
86d7f5d3SJohn Marino	  mp_limb_t cy;
86d7f5d3SJohn Marino	  mp_ptr pp;
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino	  /* FIXME: can we use already allocated space? */
86d7f5d3SJohn Marino	  pp = TMP_BALLOC_LIMBS (nn);
86d7f5d3SJohn Marino	  mpn_mul (pp, tp + 1, qn, dp, dn);
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino	  cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0;
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino	  if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */
86d7f5d3SJohn Marino	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
86d7f5d3SJohn Marino	  else /* Same as above */
86d7f5d3SJohn Marino	    MPN_COPY (qp, tp + 1, qn);
86d7f5d3SJohn Marino	}
86d7f5d3SJohn Marino    }
86d7f5d3SJohn Marino  else
86d7f5d3SJohn Marino    {
86d7f5d3SJohn Marino       /* |_______________________|   dividend
86d7f5d3SJohn Marino		 |________________|   divisor  */
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino      /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed
86d7f5d3SJohn Marino	 here becomes 2dn, i.e., more than nn.  This shouldn't hurt, since only
86d7f5d3SJohn Marino	 the most significant dn-1 limbs will actually be read, but it is not
86d7f5d3SJohn Marino	 pretty.  */
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino      qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2,
86d7f5d3SJohn Marino			     dp + dn - (qn + 1), qn + 1, scratch);
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino      /* The max error of mpn_mu_divappr_q is +4, but we get an additional
86d7f5d3SJohn Marino         error from the divisor truncation.  */
86d7f5d3SJohn Marino      if (tp[0] > 6)
86d7f5d3SJohn Marino	{
86d7f5d3SJohn Marino	  MPN_COPY (qp, tp + 1, qn);
86d7f5d3SJohn Marino	}
86d7f5d3SJohn Marino      else
86d7f5d3SJohn Marino	{
86d7f5d3SJohn Marino	  mp_limb_t cy;
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino	  /* FIXME: a shorter product should be enough; we may use already
86d7f5d3SJohn Marino	     allocated space... */
86d7f5d3SJohn Marino	  rp = TMP_BALLOC_LIMBS (nn);
86d7f5d3SJohn Marino	  mpn_mul (rp, dp, dn, tp + 1, qn);
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino	  cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0;
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino	  if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */
86d7f5d3SJohn Marino	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
86d7f5d3SJohn Marino	  else /* Same as above */
86d7f5d3SJohn Marino	    MPN_COPY (qp, tp + 1, qn);
86d7f5d3SJohn Marino	}
86d7f5d3SJohn Marino    }
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino  TMP_FREE;
86d7f5d3SJohn Marino  return qh;
86d7f5d3SJohn Marino}
86d7f5d3SJohn Marino
86d7f5d3SJohn Marinomp_size_t
86d7f5d3SJohn Marinompn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
86d7f5d3SJohn Marino{
86d7f5d3SJohn Marino  mp_size_t qn, itch1, itch2;
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino  qn = nn - dn;
86d7f5d3SJohn Marino  if (qn >= dn)
86d7f5d3SJohn Marino    {
86d7f5d3SJohn Marino      itch1 = mpn_mu_div_qr_itch (qn, dn, mua_k);
86d7f5d3SJohn Marino      itch2 = mpn_mu_divappr_q_itch (2 * dn + 1, dn, mua_k);
86d7f5d3SJohn Marino      return MAX (itch1, itch2);
86d7f5d3SJohn Marino    }
86d7f5d3SJohn Marino  else
86d7f5d3SJohn Marino    {
86d7f5d3SJohn Marino      itch1 = mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k);
86d7f5d3SJohn Marino      return itch1;
86d7f5d3SJohn Marino    }
86d7f5d3SJohn Marino}