xref: /dflybsd-src/contrib/gmp/mpn/generic/mod_34lsub1.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
186d7f5d3SJohn Marino /* mpn_mod_34lsub1 -- remainder modulo 2^(GMP_NUMB_BITS*3/4)-1.
286d7f5d3SJohn Marino 
386d7f5d3SJohn Marino    THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
486d7f5d3SJohn Marino    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
586d7f5d3SJohn Marino    FUTURE GNU MP RELEASES.
686d7f5d3SJohn Marino 
786d7f5d3SJohn Marino Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
886d7f5d3SJohn Marino 
986d7f5d3SJohn Marino This file is part of the GNU MP Library.
1086d7f5d3SJohn Marino 
1186d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
1286d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1386d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1486d7f5d3SJohn Marino option) any later version.
1586d7f5d3SJohn Marino 
1686d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
1786d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1886d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
1986d7f5d3SJohn Marino License for more details.
2086d7f5d3SJohn Marino 
2186d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
2286d7f5d3SJohn Marino along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
2386d7f5d3SJohn Marino 
2486d7f5d3SJohn Marino 
2586d7f5d3SJohn Marino #include "gmp.h"
2686d7f5d3SJohn Marino #include "gmp-impl.h"
2786d7f5d3SJohn Marino 
2886d7f5d3SJohn Marino 
2986d7f5d3SJohn Marino /* Calculate a remainder from {p,n} divided by 2^(GMP_NUMB_BITS*3/4)-1.
3086d7f5d3SJohn Marino    The remainder is not fully reduced, it's any limb value congruent to
3186d7f5d3SJohn Marino    {p,n} modulo that divisor.
3286d7f5d3SJohn Marino 
3386d7f5d3SJohn Marino    This implementation is only correct when GMP_NUMB_BITS is a multiple of
3486d7f5d3SJohn Marino    4.
3586d7f5d3SJohn Marino 
3686d7f5d3SJohn Marino    FIXME: If GMP_NAIL_BITS is some silly big value during development then
3786d7f5d3SJohn Marino    it's possible the carry accumulators c0,c1,c2 could overflow.
3886d7f5d3SJohn Marino 
3986d7f5d3SJohn Marino    General notes:
4086d7f5d3SJohn Marino 
4186d7f5d3SJohn Marino    The basic idea is to use a set of N accumulators (N=3 in this case) to
4286d7f5d3SJohn Marino    effectively get a remainder mod 2^(GMP_NUMB_BITS*N)-1 followed at the end
4386d7f5d3SJohn Marino    by a reduction to GMP_NUMB_BITS*N/M bits (M=4 in this case) for a
4486d7f5d3SJohn Marino    remainder mod 2^(GMP_NUMB_BITS*N/M)-1.  N and M are chosen to give a good
4586d7f5d3SJohn Marino    set of small prime factors in 2^(GMP_NUMB_BITS*N/M)-1.
4686d7f5d3SJohn Marino 
4786d7f5d3SJohn Marino    N=3 M=4 suits GMP_NUMB_BITS==32 and GMP_NUMB_BITS==64 quite well, giving
4886d7f5d3SJohn Marino    a few more primes than a single accumulator N=1 does, and for no extra
4986d7f5d3SJohn Marino    cost (assuming the processor has a decent number of registers).
5086d7f5d3SJohn Marino 
5186d7f5d3SJohn Marino    For strange nailified values of GMP_NUMB_BITS the idea would be to look
5286d7f5d3SJohn Marino    for what N and M give good primes.  With GMP_NUMB_BITS not a power of 2
5386d7f5d3SJohn Marino    the choices for M may be opened up a bit.  But such things are probably
5486d7f5d3SJohn Marino    best done in separate code, not grafted on here.  */
5586d7f5d3SJohn Marino 
5686d7f5d3SJohn Marino #if GMP_NUMB_BITS % 4 == 0
5786d7f5d3SJohn Marino 
5886d7f5d3SJohn Marino #define B1  (GMP_NUMB_BITS / 4)
5986d7f5d3SJohn Marino #define B2  (B1 * 2)
6086d7f5d3SJohn Marino #define B3  (B1 * 3)
6186d7f5d3SJohn Marino 
6286d7f5d3SJohn Marino #define M1  ((CNST_LIMB(1) << B1) - 1)
6386d7f5d3SJohn Marino #define M2  ((CNST_LIMB(1) << B2) - 1)
6486d7f5d3SJohn Marino #define M3  ((CNST_LIMB(1) << B3) - 1)
6586d7f5d3SJohn Marino 
6686d7f5d3SJohn Marino #define LOW0(n)      ((n) & M3)
6786d7f5d3SJohn Marino #define HIGH0(n)     ((n) >> B3)
6886d7f5d3SJohn Marino 
6986d7f5d3SJohn Marino #define LOW1(n)      (((n) & M2) << B1)
7086d7f5d3SJohn Marino #define HIGH1(n)     ((n) >> B2)
7186d7f5d3SJohn Marino 
7286d7f5d3SJohn Marino #define LOW2(n)      (((n) & M1) << B2)
7386d7f5d3SJohn Marino #define HIGH2(n)     ((n) >> B1)
7486d7f5d3SJohn Marino 
7586d7f5d3SJohn Marino #define PARTS0(n)    (LOW0(n) + HIGH0(n))
7686d7f5d3SJohn Marino #define PARTS1(n)    (LOW1(n) + HIGH1(n))
7786d7f5d3SJohn Marino #define PARTS2(n)    (LOW2(n) + HIGH2(n))
7886d7f5d3SJohn Marino 
7986d7f5d3SJohn Marino #define ADD(c,a,val)                    \
8086d7f5d3SJohn Marino   do {                                  \
8186d7f5d3SJohn Marino     mp_limb_t  new_c;                   \
8286d7f5d3SJohn Marino     ADDC_LIMB (new_c, a, a, val);       \
8386d7f5d3SJohn Marino     (c) += new_c;                       \
8486d7f5d3SJohn Marino   } while (0)
8586d7f5d3SJohn Marino 
8686d7f5d3SJohn Marino mp_limb_t
mpn_mod_34lsub1(mp_srcptr p,mp_size_t n)8786d7f5d3SJohn Marino mpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)
8886d7f5d3SJohn Marino {
8986d7f5d3SJohn Marino   mp_limb_t  c0 = 0;
9086d7f5d3SJohn Marino   mp_limb_t  c1 = 0;
9186d7f5d3SJohn Marino   mp_limb_t  c2 = 0;
9286d7f5d3SJohn Marino   mp_limb_t  a0, a1, a2;
9386d7f5d3SJohn Marino 
9486d7f5d3SJohn Marino   ASSERT (n >= 1);
9586d7f5d3SJohn Marino   ASSERT (n/3 < GMP_NUMB_MAX);
9686d7f5d3SJohn Marino 
9786d7f5d3SJohn Marino   a0 = a1 = a2 = 0;
9886d7f5d3SJohn Marino   c0 = c1 = c2 = 0;
9986d7f5d3SJohn Marino 
10086d7f5d3SJohn Marino   while ((n -= 3) >= 0)
10186d7f5d3SJohn Marino     {
10286d7f5d3SJohn Marino       ADD (c0, a0, p[0]);
10386d7f5d3SJohn Marino       ADD (c1, a1, p[1]);
10486d7f5d3SJohn Marino       ADD (c2, a2, p[2]);
10586d7f5d3SJohn Marino       p += 3;
10686d7f5d3SJohn Marino     }
10786d7f5d3SJohn Marino 
10886d7f5d3SJohn Marino   if (n != -3)
10986d7f5d3SJohn Marino     {
11086d7f5d3SJohn Marino       ADD (c0, a0, p[0]);
11186d7f5d3SJohn Marino       if (n != -2)
11286d7f5d3SJohn Marino 	ADD (c1, a1, p[1]);
11386d7f5d3SJohn Marino     }
11486d7f5d3SJohn Marino 
11586d7f5d3SJohn Marino   return
11686d7f5d3SJohn Marino     PARTS0 (a0) + PARTS1 (a1) + PARTS2 (a2)
11786d7f5d3SJohn Marino     + PARTS1 (c0) + PARTS2 (c1) + PARTS0 (c2);
11886d7f5d3SJohn Marino }
11986d7f5d3SJohn Marino 
12086d7f5d3SJohn Marino #endif
121