mpn/generic/mod_34lsub1.c

86d7f5d3SJohn Marino/* mpn_mod_34lsub1 -- remainder modulo 2^(GMP_NUMB_BITS*3/4)-1.
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
86d7f5d3SJohn Marino   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
86d7f5d3SJohn Marino   FUTURE GNU MP RELEASES.
86d7f5d3SJohn Marino
86d7f5d3SJohn MarinoCopyright 2000, 2001, 2002 Free Software Foundation, Inc.
86d7f5d3SJohn Marino
86d7f5d3SJohn MarinoThis file is part of the GNU MP Library.
86d7f5d3SJohn Marino
86d7f5d3SJohn MarinoThe GNU MP Library is free software; you can redistribute it and/or modify
86d7f5d3SJohn Marinoit under the terms of the GNU Lesser General Public License as published by
86d7f5d3SJohn Marinothe Free Software Foundation; either version 3 of the License, or (at your
86d7f5d3SJohn Marinooption) any later version.
86d7f5d3SJohn Marino
86d7f5d3SJohn MarinoThe GNU MP Library is distributed in the hope that it will be useful, but
86d7f5d3SJohn MarinoWITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
86d7f5d3SJohn Marinoor FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
86d7f5d3SJohn MarinoLicense for more details.
86d7f5d3SJohn Marino
86d7f5d3SJohn MarinoYou should have received a copy of the GNU Lesser General Public License
86d7f5d3SJohn Marinoalong with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino#include "gmp.h"
86d7f5d3SJohn Marino#include "gmp-impl.h"
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino/* Calculate a remainder from {p,n} divided by 2^(GMP_NUMB_BITS*3/4)-1.
86d7f5d3SJohn Marino   The remainder is not fully reduced, it's any limb value congruent to
86d7f5d3SJohn Marino   {p,n} modulo that divisor.
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino   This implementation is only correct when GMP_NUMB_BITS is a multiple of
86d7f5d3SJohn Marino   4.
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino   FIXME: If GMP_NAIL_BITS is some silly big value during development then
86d7f5d3SJohn Marino   it's possible the carry accumulators c0,c1,c2 could overflow.
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino   General notes:
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino   The basic idea is to use a set of N accumulators (N=3 in this case) to
86d7f5d3SJohn Marino   effectively get a remainder mod 2^(GMP_NUMB_BITS*N)-1 followed at the end
86d7f5d3SJohn Marino   by a reduction to GMP_NUMB_BITS*N/M bits (M=4 in this case) for a
86d7f5d3SJohn Marino   remainder mod 2^(GMP_NUMB_BITS*N/M)-1.  N and M are chosen to give a good
86d7f5d3SJohn Marino   set of small prime factors in 2^(GMP_NUMB_BITS*N/M)-1.
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino   N=3 M=4 suits GMP_NUMB_BITS==32 and GMP_NUMB_BITS==64 quite well, giving
86d7f5d3SJohn Marino   a few more primes than a single accumulator N=1 does, and for no extra
86d7f5d3SJohn Marino   cost (assuming the processor has a decent number of registers).
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino   For strange nailified values of GMP_NUMB_BITS the idea would be to look
86d7f5d3SJohn Marino   for what N and M give good primes.  With GMP_NUMB_BITS not a power of 2
86d7f5d3SJohn Marino   the choices for M may be opened up a bit.  But such things are probably
86d7f5d3SJohn Marino   best done in separate code, not grafted on here.  */
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino#if GMP_NUMB_BITS % 4 == 0
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino#define B1  (GMP_NUMB_BITS / 4)
86d7f5d3SJohn Marino#define B2  (B1 * 2)
86d7f5d3SJohn Marino#define B3  (B1 * 3)
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino#define M1  ((CNST_LIMB(1) << B1) - 1)
86d7f5d3SJohn Marino#define M2  ((CNST_LIMB(1) << B2) - 1)
86d7f5d3SJohn Marino#define M3  ((CNST_LIMB(1) << B3) - 1)
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino#define LOW0(n)      ((n) & M3)
86d7f5d3SJohn Marino#define HIGH0(n)     ((n) >> B3)
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino#define LOW1(n)      (((n) & M2) << B1)
86d7f5d3SJohn Marino#define HIGH1(n)     ((n) >> B2)
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino#define LOW2(n)      (((n) & M1) << B2)
86d7f5d3SJohn Marino#define HIGH2(n)     ((n) >> B1)
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino#define PARTS0(n)    (LOW0(n) + HIGH0(n))
86d7f5d3SJohn Marino#define PARTS1(n)    (LOW1(n) + HIGH1(n))
86d7f5d3SJohn Marino#define PARTS2(n)    (LOW2(n) + HIGH2(n))
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino#define ADD(c,a,val)                    \
86d7f5d3SJohn Marino  do {                                  \
86d7f5d3SJohn Marino    mp_limb_t  new_c;                   \
86d7f5d3SJohn Marino    ADDC_LIMB (new_c, a, a, val);       \
86d7f5d3SJohn Marino    (c) += new_c;                       \
86d7f5d3SJohn Marino  } while (0)
86d7f5d3SJohn Marino
86d7f5d3SJohn Marinomp_limb_t
86d7f5d3SJohn Marinompn_mod_34lsub1 (mp_srcptr p, mp_size_t n)
86d7f5d3SJohn Marino{
86d7f5d3SJohn Marino  mp_limb_t  c0 = 0;
86d7f5d3SJohn Marino  mp_limb_t  c1 = 0;
86d7f5d3SJohn Marino  mp_limb_t  c2 = 0;
86d7f5d3SJohn Marino  mp_limb_t  a0, a1, a2;
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino  ASSERT (n >= 1);
86d7f5d3SJohn Marino  ASSERT (n/3 < GMP_NUMB_MAX);
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino  a0 = a1 = a2 = 0;
86d7f5d3SJohn Marino  c0 = c1 = c2 = 0;
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino  while ((n -= 3) >= 0)
86d7f5d3SJohn Marino    {
86d7f5d3SJohn Marino      ADD (c0, a0, p[0]);
86d7f5d3SJohn Marino      ADD (c1, a1, p[1]);
86d7f5d3SJohn Marino      ADD (c2, a2, p[2]);
86d7f5d3SJohn Marino      p += 3;
86d7f5d3SJohn Marino    }
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino  if (n != -3)
86d7f5d3SJohn Marino    {
86d7f5d3SJohn Marino      ADD (c0, a0, p[0]);
86d7f5d3SJohn Marino      if (n != -2)
86d7f5d3SJohn Marino	ADD (c1, a1, p[1]);
86d7f5d3SJohn Marino    }
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino  return
86d7f5d3SJohn Marino    PARTS0 (a0) + PARTS1 (a1) + PARTS2 (a2)
86d7f5d3SJohn Marino    + PARTS1 (c0) + PARTS2 (c1) + PARTS0 (c2);
86d7f5d3SJohn Marino}
86d7f5d3SJohn Marino
86d7f5d3SJohn Marino#endif