186d7f5d3SJohn Marino /* mpn_mod_1s_3p (ap, n, b, cps)
286d7f5d3SJohn Marino Divide (ap,,n) by b. Return the single-limb remainder.
386d7f5d3SJohn Marino Requires that d < B / 3.
486d7f5d3SJohn Marino
586d7f5d3SJohn Marino Contributed to the GNU project by Torbjorn Granlund.
686d7f5d3SJohn Marino
786d7f5d3SJohn Marino THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
886d7f5d3SJohn Marino SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
986d7f5d3SJohn Marino GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
1086d7f5d3SJohn Marino
1186d7f5d3SJohn Marino Copyright 2008, 2009, 2010 Free Software Foundation, Inc.
1286d7f5d3SJohn Marino
1386d7f5d3SJohn Marino This file is part of the GNU MP Library.
1486d7f5d3SJohn Marino
1586d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
1686d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1786d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1886d7f5d3SJohn Marino option) any later version.
1986d7f5d3SJohn Marino
2086d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
2186d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
2286d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
2386d7f5d3SJohn Marino License for more details.
2486d7f5d3SJohn Marino
2586d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
2686d7f5d3SJohn Marino along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
2786d7f5d3SJohn Marino
2886d7f5d3SJohn Marino #include "gmp.h"
2986d7f5d3SJohn Marino #include "gmp-impl.h"
3086d7f5d3SJohn Marino #include "longlong.h"
3186d7f5d3SJohn Marino
3286d7f5d3SJohn Marino void
mpn_mod_1s_3p_cps(mp_limb_t cps[6],mp_limb_t b)3386d7f5d3SJohn Marino mpn_mod_1s_3p_cps (mp_limb_t cps[6], mp_limb_t b)
3486d7f5d3SJohn Marino {
3586d7f5d3SJohn Marino mp_limb_t bi;
3686d7f5d3SJohn Marino mp_limb_t B1modb, B2modb, B3modb, B4modb;
3786d7f5d3SJohn Marino int cnt;
3886d7f5d3SJohn Marino
3986d7f5d3SJohn Marino ASSERT (b <= (~(mp_limb_t) 0) / 3);
4086d7f5d3SJohn Marino
4186d7f5d3SJohn Marino count_leading_zeros (cnt, b);
4286d7f5d3SJohn Marino
4386d7f5d3SJohn Marino b <<= cnt;
4486d7f5d3SJohn Marino invert_limb (bi, b);
4586d7f5d3SJohn Marino
4686d7f5d3SJohn Marino B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
4786d7f5d3SJohn Marino ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
4886d7f5d3SJohn Marino udiv_rnd_preinv (B2modb, B1modb, b, bi);
4986d7f5d3SJohn Marino udiv_rnd_preinv (B3modb, B2modb, b, bi);
5086d7f5d3SJohn Marino udiv_rnd_preinv (B4modb, B3modb, b, bi);
5186d7f5d3SJohn Marino
5286d7f5d3SJohn Marino cps[0] = bi;
5386d7f5d3SJohn Marino cps[1] = cnt;
5486d7f5d3SJohn Marino cps[2] = B1modb >> cnt;
5586d7f5d3SJohn Marino cps[3] = B2modb >> cnt;
5686d7f5d3SJohn Marino cps[4] = B3modb >> cnt;
5786d7f5d3SJohn Marino cps[5] = B4modb >> cnt;
5886d7f5d3SJohn Marino
5986d7f5d3SJohn Marino #if WANT_ASSERT
6086d7f5d3SJohn Marino {
6186d7f5d3SJohn Marino int i;
6286d7f5d3SJohn Marino b = cps[2];
6386d7f5d3SJohn Marino for (i = 3; i <= 5; i++)
6486d7f5d3SJohn Marino {
6586d7f5d3SJohn Marino b += cps[i];
6686d7f5d3SJohn Marino ASSERT (b >= cps[i]);
6786d7f5d3SJohn Marino }
6886d7f5d3SJohn Marino }
6986d7f5d3SJohn Marino #endif
7086d7f5d3SJohn Marino }
7186d7f5d3SJohn Marino
7286d7f5d3SJohn Marino mp_limb_t
mpn_mod_1s_3p(mp_srcptr ap,mp_size_t n,mp_limb_t b,mp_limb_t cps[6])7386d7f5d3SJohn Marino mpn_mod_1s_3p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[6])
7486d7f5d3SJohn Marino {
7586d7f5d3SJohn Marino mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
7686d7f5d3SJohn Marino mp_limb_t B1modb, B2modb, B3modb, B4modb;
7786d7f5d3SJohn Marino mp_size_t i;
7886d7f5d3SJohn Marino int cnt;
7986d7f5d3SJohn Marino
8086d7f5d3SJohn Marino ASSERT (n >= 1);
8186d7f5d3SJohn Marino
8286d7f5d3SJohn Marino B1modb = cps[2];
8386d7f5d3SJohn Marino B2modb = cps[3];
8486d7f5d3SJohn Marino B3modb = cps[4];
8586d7f5d3SJohn Marino B4modb = cps[5];
8686d7f5d3SJohn Marino
8786d7f5d3SJohn Marino /* We compute n mod 3 in a tricky way, which works except for when n is so
8886d7f5d3SJohn Marino close to the maximum size that we don't need to support it. The final
8986d7f5d3SJohn Marino cast to int is a workaround for HP cc. */
9086d7f5d3SJohn Marino switch ((int) ((mp_limb_t) n * MODLIMB_INVERSE_3 >> (GMP_NUMB_BITS - 2)))
9186d7f5d3SJohn Marino {
9286d7f5d3SJohn Marino case 0:
9386d7f5d3SJohn Marino umul_ppmm (ph, pl, ap[n - 2], B1modb);
9486d7f5d3SJohn Marino add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 3]);
9586d7f5d3SJohn Marino umul_ppmm (rh, rl, ap[n - 1], B2modb);
9686d7f5d3SJohn Marino add_ssaaaa (rh, rl, rh, rl, ph, pl);
9786d7f5d3SJohn Marino n -= 3;
9886d7f5d3SJohn Marino break;
9986d7f5d3SJohn Marino case 2: /* n mod 3 = 1 */
10086d7f5d3SJohn Marino rh = 0;
10186d7f5d3SJohn Marino rl = ap[n - 1];
10286d7f5d3SJohn Marino n -= 1;
10386d7f5d3SJohn Marino break;
10486d7f5d3SJohn Marino case 1: /* n mod 3 = 2 */
10586d7f5d3SJohn Marino umul_ppmm (ph, pl, ap[n - 1], B1modb);
10686d7f5d3SJohn Marino add_ssaaaa (rh, rl, ph, pl, 0, ap[n - 2]);
10786d7f5d3SJohn Marino n -= 2;
10886d7f5d3SJohn Marino break;
10986d7f5d3SJohn Marino }
11086d7f5d3SJohn Marino
11186d7f5d3SJohn Marino for (i = n - 3; i >= 0; i -= 3)
11286d7f5d3SJohn Marino {
11386d7f5d3SJohn Marino /* rr = ap[i] < B
11486d7f5d3SJohn Marino + ap[i+1] * (B mod b) <= (B-1)(b-1)
11586d7f5d3SJohn Marino + ap[i+2] * (B^2 mod b) <= (B-1)(b-1)
11686d7f5d3SJohn Marino + LO(rr) * (B^3 mod b) <= (B-1)(b-1)
11786d7f5d3SJohn Marino + HI(rr) * (B^4 mod b) <= (B-1)(b-1)
11886d7f5d3SJohn Marino */
11986d7f5d3SJohn Marino umul_ppmm (ph, pl, ap[i + 1], B1modb);
12086d7f5d3SJohn Marino add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
12186d7f5d3SJohn Marino
12286d7f5d3SJohn Marino umul_ppmm (ch, cl, ap[i + 2], B2modb);
12386d7f5d3SJohn Marino add_ssaaaa (ph, pl, ph, pl, ch, cl);
12486d7f5d3SJohn Marino
12586d7f5d3SJohn Marino umul_ppmm (ch, cl, rl, B3modb);
12686d7f5d3SJohn Marino add_ssaaaa (ph, pl, ph, pl, ch, cl);
12786d7f5d3SJohn Marino
12886d7f5d3SJohn Marino umul_ppmm (rh, rl, rh, B4modb);
12986d7f5d3SJohn Marino add_ssaaaa (rh, rl, rh, rl, ph, pl);
13086d7f5d3SJohn Marino }
13186d7f5d3SJohn Marino
13286d7f5d3SJohn Marino bi = cps[0];
13386d7f5d3SJohn Marino cnt = cps[1];
13486d7f5d3SJohn Marino
13586d7f5d3SJohn Marino #if 1
13686d7f5d3SJohn Marino umul_ppmm (rh, cl, rh, B1modb);
13786d7f5d3SJohn Marino add_ssaaaa (rh, rl, rh, rl, 0, cl);
13886d7f5d3SJohn Marino r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
13986d7f5d3SJohn Marino #else
14086d7f5d3SJohn Marino udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
14186d7f5d3SJohn Marino (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
14286d7f5d3SJohn Marino ASSERT (q <= 3); /* optimize for small quotient? */
14386d7f5d3SJohn Marino #endif
14486d7f5d3SJohn Marino
14586d7f5d3SJohn Marino udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
14686d7f5d3SJohn Marino
14786d7f5d3SJohn Marino return r >> cnt;
14886d7f5d3SJohn Marino }
149