libquadmath/printf/mul.c

*181254a7Smrg/* mpn_mul -- Multiply two natural numbers.
*181254a7Smrg
*181254a7SmrgCopyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
*181254a7Smrg
*181254a7SmrgThis file is part of the GNU MP Library.
*181254a7Smrg
*181254a7SmrgThe GNU MP Library is free software; you can redistribute it and/or modify
*181254a7Smrgit under the terms of the GNU Lesser General Public License as published by
*181254a7Smrgthe Free Software Foundation; either version 2.1 of the License, or (at your
*181254a7Smrgoption) any later version.
*181254a7Smrg
*181254a7SmrgThe GNU MP Library is distributed in the hope that it will be useful, but
*181254a7SmrgWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
*181254a7Smrgor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
*181254a7SmrgLicense for more details.
*181254a7Smrg
*181254a7SmrgYou should have received a copy of the GNU Lesser General Public License
*181254a7Smrgalong with the GNU MP Library; see the file COPYING.LIB.  If not, write to
*181254a7Smrgthe Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
*181254a7SmrgMA 02111-1307, USA. */
*181254a7Smrg
*181254a7Smrg#include <config.h>
*181254a7Smrg#include "gmp-impl.h"
*181254a7Smrg
*181254a7Smrg/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
*181254a7Smrg   and v (pointed to by VP, with VSIZE limbs), and store the result at
*181254a7Smrg   PRODP.  USIZE + VSIZE limbs are always stored, but if the input
*181254a7Smrg   operands are normalized.  Return the most significant limb of the
*181254a7Smrg   result.
*181254a7Smrg
*181254a7Smrg   NOTE: The space pointed to by PRODP is overwritten before finished
*181254a7Smrg   with U and V, so overlap is an error.
*181254a7Smrg
*181254a7Smrg   Argument constraints:
*181254a7Smrg   1. USIZE >= VSIZE.
*181254a7Smrg   2. PRODP != UP and PRODP != VP, i.e. the destination
*181254a7Smrg      must be distinct from the multiplier and the multiplicand.  */
*181254a7Smrg
*181254a7Smrg/* If KARATSUBA_THRESHOLD is not already defined, define it to a
*181254a7Smrg   value which is good on most machines.  */
*181254a7Smrg#ifndef KARATSUBA_THRESHOLD
*181254a7Smrg#define KARATSUBA_THRESHOLD 32
*181254a7Smrg#endif
*181254a7Smrg
*181254a7Smrgmp_limb_t
*181254a7Smrg#if __STDC__
*181254a7Smrgmpn_mul (mp_ptr prodp,
*181254a7Smrg	 mp_srcptr up, mp_size_t usize,
*181254a7Smrg	 mp_srcptr vp, mp_size_t vsize)
*181254a7Smrg#else
*181254a7Smrgmpn_mul (prodp, up, usize, vp, vsize)
*181254a7Smrg     mp_ptr prodp;
*181254a7Smrg     mp_srcptr up;
*181254a7Smrg     mp_size_t usize;
*181254a7Smrg     mp_srcptr vp;
*181254a7Smrg     mp_size_t vsize;
*181254a7Smrg#endif
*181254a7Smrg{
*181254a7Smrg  mp_ptr prod_endp = prodp + usize + vsize - 1;
*181254a7Smrg  mp_limb_t cy;
*181254a7Smrg  mp_ptr tspace;
*181254a7Smrg
*181254a7Smrg  if (vsize < KARATSUBA_THRESHOLD)
*181254a7Smrg    {
*181254a7Smrg      /* Handle simple cases with traditional multiplication.
*181254a7Smrg
*181254a7Smrg	 This is the most critical code of the entire function.  All
*181254a7Smrg	 multiplies rely on this, both small and huge.  Small ones arrive
*181254a7Smrg	 here immediately.  Huge ones arrive here as this is the base case
*181254a7Smrg	 for Karatsuba's recursive algorithm below.  */
*181254a7Smrg      mp_size_t i;
*181254a7Smrg      mp_limb_t cy_limb;
*181254a7Smrg      mp_limb_t v_limb;
*181254a7Smrg
*181254a7Smrg      if (vsize == 0)
*181254a7Smrg	return 0;
*181254a7Smrg
*181254a7Smrg      /* Multiply by the first limb in V separately, as the result can be
*181254a7Smrg	 stored (not added) to PROD.  We also avoid a loop for zeroing.  */
*181254a7Smrg      v_limb = vp[0];
*181254a7Smrg      if (v_limb <= 1)
*181254a7Smrg	{
*181254a7Smrg	  if (v_limb == 1)
*181254a7Smrg	    MPN_COPY (prodp, up, usize);
*181254a7Smrg	  else
*181254a7Smrg	    MPN_ZERO (prodp, usize);
*181254a7Smrg	  cy_limb = 0;
*181254a7Smrg	}
*181254a7Smrg      else
*181254a7Smrg	cy_limb = mpn_mul_1 (prodp, up, usize, v_limb);
*181254a7Smrg
*181254a7Smrg      prodp[usize] = cy_limb;
*181254a7Smrg      prodp++;
*181254a7Smrg
*181254a7Smrg      /* For each iteration in the outer loop, multiply one limb from
*181254a7Smrg	 U with one limb from V, and add it to PROD.  */
*181254a7Smrg      for (i = 1; i < vsize; i++)
*181254a7Smrg	{
*181254a7Smrg	  v_limb = vp[i];
*181254a7Smrg	  if (v_limb <= 1)
*181254a7Smrg	    {
*181254a7Smrg	      cy_limb = 0;
*181254a7Smrg	      if (v_limb == 1)
*181254a7Smrg		cy_limb = mpn_add_n (prodp, prodp, up, usize);
*181254a7Smrg	    }
*181254a7Smrg	  else
*181254a7Smrg	    cy_limb = mpn_addmul_1 (prodp, up, usize, v_limb);
*181254a7Smrg
*181254a7Smrg	  prodp[usize] = cy_limb;
*181254a7Smrg	  prodp++;
*181254a7Smrg	}
*181254a7Smrg      return cy_limb;
*181254a7Smrg    }
*181254a7Smrg
*181254a7Smrg  tspace = (mp_ptr) alloca (2 * vsize * BYTES_PER_MP_LIMB);
*181254a7Smrg  MPN_MUL_N_RECURSE (prodp, up, vp, vsize, tspace);
*181254a7Smrg
*181254a7Smrg  prodp += vsize;
*181254a7Smrg  up += vsize;
*181254a7Smrg  usize -= vsize;
*181254a7Smrg  if (usize >= vsize)
*181254a7Smrg    {
*181254a7Smrg      mp_ptr tp = (mp_ptr) alloca (2 * vsize * BYTES_PER_MP_LIMB);
*181254a7Smrg      do
*181254a7Smrg	{
*181254a7Smrg	  MPN_MUL_N_RECURSE (tp, up, vp, vsize, tspace);
*181254a7Smrg	  cy = mpn_add_n (prodp, prodp, tp, vsize);
*181254a7Smrg	  mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy);
*181254a7Smrg	  prodp += vsize;
*181254a7Smrg	  up += vsize;
*181254a7Smrg	  usize -= vsize;
*181254a7Smrg	}
*181254a7Smrg      while (usize >= vsize);
*181254a7Smrg    }
*181254a7Smrg
*181254a7Smrg  /* True: usize < vsize.  */
*181254a7Smrg
*181254a7Smrg  /* Make life simple: Recurse.  */
*181254a7Smrg
*181254a7Smrg  if (usize != 0)
*181254a7Smrg    {
*181254a7Smrg      mpn_mul (tspace, vp, vsize, up, usize);
*181254a7Smrg      cy = mpn_add_n (prodp, prodp, tspace, vsize);
*181254a7Smrg      mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy);
*181254a7Smrg    }
*181254a7Smrg
*181254a7Smrg  return *prod_endp;
*181254a7Smrg}