xref: /dflybsd-src/contrib/gmp/mpn/generic/mod_1_4.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
186d7f5d3SJohn Marino /* mpn_mod_1s_3p (ap, n, b, cps)
286d7f5d3SJohn Marino    Divide (ap,,n) by b.  Return the single-limb remainder.
386d7f5d3SJohn Marino    Requires that d < B / 4.
486d7f5d3SJohn Marino 
586d7f5d3SJohn Marino    Contributed to the GNU project by Torbjorn Granlund.
686d7f5d3SJohn Marino 
786d7f5d3SJohn Marino    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
886d7f5d3SJohn Marino    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
986d7f5d3SJohn Marino    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
1086d7f5d3SJohn Marino 
1186d7f5d3SJohn Marino Copyright 2008, 2009 Free Software Foundation, Inc.
1286d7f5d3SJohn Marino 
1386d7f5d3SJohn Marino This file is part of the GNU MP Library.
1486d7f5d3SJohn Marino 
1586d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
1686d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1786d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1886d7f5d3SJohn Marino option) any later version.
1986d7f5d3SJohn Marino 
2086d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
2186d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
2286d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
2386d7f5d3SJohn Marino License for more details.
2486d7f5d3SJohn Marino 
2586d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
2686d7f5d3SJohn Marino along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
2786d7f5d3SJohn Marino 
2886d7f5d3SJohn Marino #include "gmp.h"
2986d7f5d3SJohn Marino #include "gmp-impl.h"
3086d7f5d3SJohn Marino #include "longlong.h"
3186d7f5d3SJohn Marino 
3286d7f5d3SJohn Marino void
mpn_mod_1s_4p_cps(mp_limb_t cps[7],mp_limb_t b)3386d7f5d3SJohn Marino mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b)
3486d7f5d3SJohn Marino {
3586d7f5d3SJohn Marino   mp_limb_t bi;
3686d7f5d3SJohn Marino   mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
3786d7f5d3SJohn Marino   int cnt;
3886d7f5d3SJohn Marino 
3986d7f5d3SJohn Marino   ASSERT (b <= (~(mp_limb_t) 0) / 4);
4086d7f5d3SJohn Marino 
4186d7f5d3SJohn Marino   count_leading_zeros (cnt, b);
4286d7f5d3SJohn Marino 
4386d7f5d3SJohn Marino   b <<= cnt;
4486d7f5d3SJohn Marino   invert_limb (bi, b);
4586d7f5d3SJohn Marino 
4686d7f5d3SJohn Marino   B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
4786d7f5d3SJohn Marino   ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
4886d7f5d3SJohn Marino   udiv_rnd_preinv (B2modb, B1modb, b, bi);
4986d7f5d3SJohn Marino   udiv_rnd_preinv (B3modb, B2modb, b, bi);
5086d7f5d3SJohn Marino   udiv_rnd_preinv (B4modb, B3modb, b, bi);
5186d7f5d3SJohn Marino   udiv_rnd_preinv (B5modb, B4modb, b, bi);
5286d7f5d3SJohn Marino 
5386d7f5d3SJohn Marino   cps[0] = bi;
5486d7f5d3SJohn Marino   cps[1] = cnt;
5586d7f5d3SJohn Marino   cps[2] = B1modb >> cnt;
5686d7f5d3SJohn Marino   cps[3] = B2modb >> cnt;
5786d7f5d3SJohn Marino   cps[4] = B3modb >> cnt;
5886d7f5d3SJohn Marino   cps[5] = B4modb >> cnt;
5986d7f5d3SJohn Marino   cps[6] = B5modb >> cnt;
6086d7f5d3SJohn Marino 
6186d7f5d3SJohn Marino #if WANT_ASSERT
6286d7f5d3SJohn Marino   {
6386d7f5d3SJohn Marino     int i;
6486d7f5d3SJohn Marino     b = cps[2];
6586d7f5d3SJohn Marino     for (i = 3; i <= 6; i++)
6686d7f5d3SJohn Marino       {
6786d7f5d3SJohn Marino 	b += cps[i];
6886d7f5d3SJohn Marino 	ASSERT (b >= cps[i]);
6986d7f5d3SJohn Marino       }
7086d7f5d3SJohn Marino   }
7186d7f5d3SJohn Marino #endif
7286d7f5d3SJohn Marino }
7386d7f5d3SJohn Marino 
7486d7f5d3SJohn Marino mp_limb_t
mpn_mod_1s_4p(mp_srcptr ap,mp_size_t n,mp_limb_t b,mp_limb_t cps[7])7586d7f5d3SJohn Marino mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[7])
7686d7f5d3SJohn Marino {
7786d7f5d3SJohn Marino   mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
7886d7f5d3SJohn Marino   mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
7986d7f5d3SJohn Marino   mp_size_t i;
8086d7f5d3SJohn Marino   int cnt;
8186d7f5d3SJohn Marino 
8286d7f5d3SJohn Marino   ASSERT (n >= 1);
8386d7f5d3SJohn Marino 
8486d7f5d3SJohn Marino   B1modb = cps[2];
8586d7f5d3SJohn Marino   B2modb = cps[3];
8686d7f5d3SJohn Marino   B3modb = cps[4];
8786d7f5d3SJohn Marino   B4modb = cps[5];
8886d7f5d3SJohn Marino   B5modb = cps[6];
8986d7f5d3SJohn Marino 
9086d7f5d3SJohn Marino   switch (n & 3)
9186d7f5d3SJohn Marino     {
9286d7f5d3SJohn Marino     case 0:
9386d7f5d3SJohn Marino       umul_ppmm (ph, pl, ap[n - 3], B1modb);
9486d7f5d3SJohn Marino       add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 4]);
9586d7f5d3SJohn Marino       umul_ppmm (ch, cl, ap[n - 2], B2modb);
9686d7f5d3SJohn Marino       add_ssaaaa (ph, pl, ph, pl, ch, cl);
9786d7f5d3SJohn Marino       umul_ppmm (rh, rl, ap[n - 1], B3modb);
9886d7f5d3SJohn Marino       add_ssaaaa (rh, rl, rh, rl, ph, pl);
9986d7f5d3SJohn Marino       n -= 4;
10086d7f5d3SJohn Marino       break;
10186d7f5d3SJohn Marino     case 1:
10286d7f5d3SJohn Marino       rh = 0;
10386d7f5d3SJohn Marino       rl = ap[n - 1];
10486d7f5d3SJohn Marino       n -= 1;
10586d7f5d3SJohn Marino       break;
10686d7f5d3SJohn Marino     case 2:
10786d7f5d3SJohn Marino       umul_ppmm (ph, pl, ap[n - 1], B1modb);
10886d7f5d3SJohn Marino       add_ssaaaa (rh, rl, ph, pl, 0, ap[n - 2]);
10986d7f5d3SJohn Marino       n -= 2;
11086d7f5d3SJohn Marino       break;
11186d7f5d3SJohn Marino     case 3:
11286d7f5d3SJohn Marino       umul_ppmm (ph, pl, ap[n - 2], B1modb);
11386d7f5d3SJohn Marino       add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 3]);
11486d7f5d3SJohn Marino       umul_ppmm (rh, rl, ap[n - 1], B2modb);
11586d7f5d3SJohn Marino       add_ssaaaa (rh, rl, rh, rl, ph, pl);
11686d7f5d3SJohn Marino       n -= 3;
11786d7f5d3SJohn Marino       break;
11886d7f5d3SJohn Marino     }
11986d7f5d3SJohn Marino 
12086d7f5d3SJohn Marino   for (i = n - 4; i >= 0; i -= 4)
12186d7f5d3SJohn Marino     {
12286d7f5d3SJohn Marino       /* rr = ap[i]				< B
12386d7f5d3SJohn Marino 	    + ap[i+1] * (B mod b)		<= (B-1)(b-1)
12486d7f5d3SJohn Marino 	    + ap[i+2] * (B^2 mod b)		<= (B-1)(b-1)
12586d7f5d3SJohn Marino 	    + ap[i+3] * (B^3 mod b)		<= (B-1)(b-1)
12686d7f5d3SJohn Marino 	    + LO(rr)  * (B^4 mod b)		<= (B-1)(b-1)
12786d7f5d3SJohn Marino 	    + HI(rr)  * (B^5 mod b)		<= (B-1)(b-1)
12886d7f5d3SJohn Marino       */
12986d7f5d3SJohn Marino       umul_ppmm (ph, pl, ap[i + 1], B1modb);
13086d7f5d3SJohn Marino       add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
13186d7f5d3SJohn Marino 
13286d7f5d3SJohn Marino       umul_ppmm (ch, cl, ap[i + 2], B2modb);
13386d7f5d3SJohn Marino       add_ssaaaa (ph, pl, ph, pl, ch, cl);
13486d7f5d3SJohn Marino 
13586d7f5d3SJohn Marino       umul_ppmm (ch, cl, ap[i + 3], B3modb);
13686d7f5d3SJohn Marino       add_ssaaaa (ph, pl, ph, pl, ch, cl);
13786d7f5d3SJohn Marino 
13886d7f5d3SJohn Marino       umul_ppmm (ch, cl, rl, B4modb);
13986d7f5d3SJohn Marino       add_ssaaaa (ph, pl, ph, pl, ch, cl);
14086d7f5d3SJohn Marino 
14186d7f5d3SJohn Marino       umul_ppmm (rh, rl, rh, B5modb);
14286d7f5d3SJohn Marino       add_ssaaaa (rh, rl, rh, rl, ph, pl);
14386d7f5d3SJohn Marino     }
14486d7f5d3SJohn Marino 
14586d7f5d3SJohn Marino   bi = cps[0];
14686d7f5d3SJohn Marino   cnt = cps[1];
14786d7f5d3SJohn Marino 
14886d7f5d3SJohn Marino #if 1
14986d7f5d3SJohn Marino   umul_ppmm (rh, cl, rh, B1modb);
15086d7f5d3SJohn Marino   add_ssaaaa (rh, rl, rh, rl, 0, cl);
15186d7f5d3SJohn Marino   r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
15286d7f5d3SJohn Marino #else
15386d7f5d3SJohn Marino   udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
15486d7f5d3SJohn Marino 		     (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
15586d7f5d3SJohn Marino   ASSERT (q <= 4);	/* optimize for small quotient? */
15686d7f5d3SJohn Marino #endif
15786d7f5d3SJohn Marino 
15886d7f5d3SJohn Marino   udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
15986d7f5d3SJohn Marino 
16086d7f5d3SJohn Marino   return r >> cnt;
16186d7f5d3SJohn Marino }
162