xref: /dflybsd-src/contrib/gmp/mpn/generic/mod_1_3.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
186d7f5d3SJohn Marino /* mpn_mod_1s_3p (ap, n, b, cps)
286d7f5d3SJohn Marino    Divide (ap,,n) by b.  Return the single-limb remainder.
386d7f5d3SJohn Marino    Requires that d < B / 3.
486d7f5d3SJohn Marino 
586d7f5d3SJohn Marino    Contributed to the GNU project by Torbjorn Granlund.
686d7f5d3SJohn Marino 
786d7f5d3SJohn Marino    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
886d7f5d3SJohn Marino    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
986d7f5d3SJohn Marino    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
1086d7f5d3SJohn Marino 
1186d7f5d3SJohn Marino Copyright 2008, 2009, 2010 Free Software Foundation, Inc.
1286d7f5d3SJohn Marino 
1386d7f5d3SJohn Marino This file is part of the GNU MP Library.
1486d7f5d3SJohn Marino 
1586d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
1686d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1786d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1886d7f5d3SJohn Marino option) any later version.
1986d7f5d3SJohn Marino 
2086d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
2186d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
2286d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
2386d7f5d3SJohn Marino License for more details.
2486d7f5d3SJohn Marino 
2586d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
2686d7f5d3SJohn Marino along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
2786d7f5d3SJohn Marino 
2886d7f5d3SJohn Marino #include "gmp.h"
2986d7f5d3SJohn Marino #include "gmp-impl.h"
3086d7f5d3SJohn Marino #include "longlong.h"
3186d7f5d3SJohn Marino 
3286d7f5d3SJohn Marino void
mpn_mod_1s_3p_cps(mp_limb_t cps[6],mp_limb_t b)3386d7f5d3SJohn Marino mpn_mod_1s_3p_cps (mp_limb_t cps[6], mp_limb_t b)
3486d7f5d3SJohn Marino {
3586d7f5d3SJohn Marino   mp_limb_t bi;
3686d7f5d3SJohn Marino   mp_limb_t B1modb, B2modb, B3modb, B4modb;
3786d7f5d3SJohn Marino   int cnt;
3886d7f5d3SJohn Marino 
3986d7f5d3SJohn Marino   ASSERT (b <= (~(mp_limb_t) 0) / 3);
4086d7f5d3SJohn Marino 
4186d7f5d3SJohn Marino   count_leading_zeros (cnt, b);
4286d7f5d3SJohn Marino 
4386d7f5d3SJohn Marino   b <<= cnt;
4486d7f5d3SJohn Marino   invert_limb (bi, b);
4586d7f5d3SJohn Marino 
4686d7f5d3SJohn Marino   B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
4786d7f5d3SJohn Marino   ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
4886d7f5d3SJohn Marino   udiv_rnd_preinv (B2modb, B1modb, b, bi);
4986d7f5d3SJohn Marino   udiv_rnd_preinv (B3modb, B2modb, b, bi);
5086d7f5d3SJohn Marino   udiv_rnd_preinv (B4modb, B3modb, b, bi);
5186d7f5d3SJohn Marino 
5286d7f5d3SJohn Marino   cps[0] = bi;
5386d7f5d3SJohn Marino   cps[1] = cnt;
5486d7f5d3SJohn Marino   cps[2] = B1modb >> cnt;
5586d7f5d3SJohn Marino   cps[3] = B2modb >> cnt;
5686d7f5d3SJohn Marino   cps[4] = B3modb >> cnt;
5786d7f5d3SJohn Marino   cps[5] = B4modb >> cnt;
5886d7f5d3SJohn Marino 
5986d7f5d3SJohn Marino #if WANT_ASSERT
6086d7f5d3SJohn Marino   {
6186d7f5d3SJohn Marino     int i;
6286d7f5d3SJohn Marino     b = cps[2];
6386d7f5d3SJohn Marino     for (i = 3; i <= 5; i++)
6486d7f5d3SJohn Marino       {
6586d7f5d3SJohn Marino 	b += cps[i];
6686d7f5d3SJohn Marino 	ASSERT (b >= cps[i]);
6786d7f5d3SJohn Marino       }
6886d7f5d3SJohn Marino   }
6986d7f5d3SJohn Marino #endif
7086d7f5d3SJohn Marino }
7186d7f5d3SJohn Marino 
7286d7f5d3SJohn Marino mp_limb_t
mpn_mod_1s_3p(mp_srcptr ap,mp_size_t n,mp_limb_t b,mp_limb_t cps[6])7386d7f5d3SJohn Marino mpn_mod_1s_3p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[6])
7486d7f5d3SJohn Marino {
7586d7f5d3SJohn Marino   mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
7686d7f5d3SJohn Marino   mp_limb_t B1modb, B2modb, B3modb, B4modb;
7786d7f5d3SJohn Marino   mp_size_t i;
7886d7f5d3SJohn Marino   int cnt;
7986d7f5d3SJohn Marino 
8086d7f5d3SJohn Marino   ASSERT (n >= 1);
8186d7f5d3SJohn Marino 
8286d7f5d3SJohn Marino   B1modb = cps[2];
8386d7f5d3SJohn Marino   B2modb = cps[3];
8486d7f5d3SJohn Marino   B3modb = cps[4];
8586d7f5d3SJohn Marino   B4modb = cps[5];
8686d7f5d3SJohn Marino 
8786d7f5d3SJohn Marino   /* We compute n mod 3 in a tricky way, which works except for when n is so
8886d7f5d3SJohn Marino      close to the maximum size that we don't need to support it.  The final
8986d7f5d3SJohn Marino      cast to int is a workaround for HP cc.  */
9086d7f5d3SJohn Marino   switch ((int) ((mp_limb_t) n * MODLIMB_INVERSE_3 >> (GMP_NUMB_BITS - 2)))
9186d7f5d3SJohn Marino     {
9286d7f5d3SJohn Marino     case 0:
9386d7f5d3SJohn Marino       umul_ppmm (ph, pl, ap[n - 2], B1modb);
9486d7f5d3SJohn Marino       add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 3]);
9586d7f5d3SJohn Marino       umul_ppmm (rh, rl, ap[n - 1], B2modb);
9686d7f5d3SJohn Marino       add_ssaaaa (rh, rl, rh, rl, ph, pl);
9786d7f5d3SJohn Marino       n -= 3;
9886d7f5d3SJohn Marino       break;
9986d7f5d3SJohn Marino     case 2:	/* n mod 3 = 1 */
10086d7f5d3SJohn Marino       rh = 0;
10186d7f5d3SJohn Marino       rl = ap[n - 1];
10286d7f5d3SJohn Marino       n -= 1;
10386d7f5d3SJohn Marino       break;
10486d7f5d3SJohn Marino     case 1:	/* n mod 3 = 2 */
10586d7f5d3SJohn Marino       umul_ppmm (ph, pl, ap[n - 1], B1modb);
10686d7f5d3SJohn Marino       add_ssaaaa (rh, rl, ph, pl, 0, ap[n - 2]);
10786d7f5d3SJohn Marino       n -= 2;
10886d7f5d3SJohn Marino       break;
10986d7f5d3SJohn Marino     }
11086d7f5d3SJohn Marino 
11186d7f5d3SJohn Marino   for (i = n - 3; i >= 0; i -= 3)
11286d7f5d3SJohn Marino     {
11386d7f5d3SJohn Marino       /* rr = ap[i]				< B
11486d7f5d3SJohn Marino 	    + ap[i+1] * (B mod b)		<= (B-1)(b-1)
11586d7f5d3SJohn Marino 	    + ap[i+2] * (B^2 mod b)		<= (B-1)(b-1)
11686d7f5d3SJohn Marino 	    + LO(rr)  * (B^3 mod b)		<= (B-1)(b-1)
11786d7f5d3SJohn Marino 	    + HI(rr)  * (B^4 mod b)		<= (B-1)(b-1)
11886d7f5d3SJohn Marino       */
11986d7f5d3SJohn Marino       umul_ppmm (ph, pl, ap[i + 1], B1modb);
12086d7f5d3SJohn Marino       add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
12186d7f5d3SJohn Marino 
12286d7f5d3SJohn Marino       umul_ppmm (ch, cl, ap[i + 2], B2modb);
12386d7f5d3SJohn Marino       add_ssaaaa (ph, pl, ph, pl, ch, cl);
12486d7f5d3SJohn Marino 
12586d7f5d3SJohn Marino       umul_ppmm (ch, cl, rl, B3modb);
12686d7f5d3SJohn Marino       add_ssaaaa (ph, pl, ph, pl, ch, cl);
12786d7f5d3SJohn Marino 
12886d7f5d3SJohn Marino       umul_ppmm (rh, rl, rh, B4modb);
12986d7f5d3SJohn Marino       add_ssaaaa (rh, rl, rh, rl, ph, pl);
13086d7f5d3SJohn Marino     }
13186d7f5d3SJohn Marino 
13286d7f5d3SJohn Marino   bi = cps[0];
13386d7f5d3SJohn Marino   cnt = cps[1];
13486d7f5d3SJohn Marino 
13586d7f5d3SJohn Marino #if 1
13686d7f5d3SJohn Marino   umul_ppmm (rh, cl, rh, B1modb);
13786d7f5d3SJohn Marino   add_ssaaaa (rh, rl, rh, rl, 0, cl);
13886d7f5d3SJohn Marino   r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
13986d7f5d3SJohn Marino #else
14086d7f5d3SJohn Marino   udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
14186d7f5d3SJohn Marino 		     (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
14286d7f5d3SJohn Marino   ASSERT (q <= 3);	/* optimize for small quotient? */
14386d7f5d3SJohn Marino #endif
14486d7f5d3SJohn Marino 
14586d7f5d3SJohn Marino   udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
14686d7f5d3SJohn Marino 
14786d7f5d3SJohn Marino   return r >> cnt;
14886d7f5d3SJohn Marino }
149