186d7f5d3SJohn Marino /* mpn_gcd_1 -- mpn and limb greatest common divisor.
286d7f5d3SJohn Marino
386d7f5d3SJohn Marino Copyright 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
486d7f5d3SJohn Marino
586d7f5d3SJohn Marino This file is part of the GNU MP Library.
686d7f5d3SJohn Marino
786d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
886d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
986d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1086d7f5d3SJohn Marino option) any later version.
1186d7f5d3SJohn Marino
1286d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
1386d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1486d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1586d7f5d3SJohn Marino License for more details.
1686d7f5d3SJohn Marino
1786d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
1886d7f5d3SJohn Marino along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
1986d7f5d3SJohn Marino
2086d7f5d3SJohn Marino #include "gmp.h"
2186d7f5d3SJohn Marino #include "gmp-impl.h"
2286d7f5d3SJohn Marino #include "longlong.h"
2386d7f5d3SJohn Marino
2486d7f5d3SJohn Marino #ifndef GCD_1_METHOD
2586d7f5d3SJohn Marino #define GCD_1_METHOD 2
2686d7f5d3SJohn Marino #endif
2786d7f5d3SJohn Marino
2886d7f5d3SJohn Marino #define USE_ZEROTAB 0
2986d7f5d3SJohn Marino
3086d7f5d3SJohn Marino #if USE_ZEROTAB
3186d7f5d3SJohn Marino static const unsigned char zerotab[16] = {
3286d7f5d3SJohn Marino 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
3386d7f5d3SJohn Marino };
3486d7f5d3SJohn Marino #endif
3586d7f5d3SJohn Marino
3686d7f5d3SJohn Marino /* Does not work for U == 0 or V == 0. It would be tough to make it work for
3786d7f5d3SJohn Marino V == 0 since gcd(x,0) = x, and U does not generally fit in an mp_limb_t.
3886d7f5d3SJohn Marino
3986d7f5d3SJohn Marino The threshold for doing u%v when size==1 will vary by CPU according to
4086d7f5d3SJohn Marino the speed of a division and the code generated for the main loop. Any
4186d7f5d3SJohn Marino tuning for this is left to a CPU specific implementation. */
4286d7f5d3SJohn Marino
4386d7f5d3SJohn Marino mp_limb_t
mpn_gcd_1(mp_srcptr up,mp_size_t size,mp_limb_t vlimb)4486d7f5d3SJohn Marino mpn_gcd_1 (mp_srcptr up, mp_size_t size, mp_limb_t vlimb)
4586d7f5d3SJohn Marino {
4686d7f5d3SJohn Marino mp_limb_t ulimb;
4786d7f5d3SJohn Marino unsigned long zero_bits, u_low_zero_bits;
4886d7f5d3SJohn Marino
4986d7f5d3SJohn Marino ASSERT (size >= 1);
5086d7f5d3SJohn Marino ASSERT (vlimb != 0);
5186d7f5d3SJohn Marino ASSERT_MPN_NONZERO_P (up, size);
5286d7f5d3SJohn Marino
5386d7f5d3SJohn Marino ulimb = up[0];
5486d7f5d3SJohn Marino
5586d7f5d3SJohn Marino /* Need vlimb odd for modexact, want it odd to get common zeros. */
5686d7f5d3SJohn Marino count_trailing_zeros (zero_bits, vlimb);
5786d7f5d3SJohn Marino vlimb >>= zero_bits;
5886d7f5d3SJohn Marino
5986d7f5d3SJohn Marino if (size > 1)
6086d7f5d3SJohn Marino {
6186d7f5d3SJohn Marino /* Must get common zeros before the mod reduction. If ulimb==0 then
6286d7f5d3SJohn Marino vlimb already gives the common zeros. */
6386d7f5d3SJohn Marino if (ulimb != 0)
6486d7f5d3SJohn Marino {
6586d7f5d3SJohn Marino count_trailing_zeros (u_low_zero_bits, ulimb);
6686d7f5d3SJohn Marino zero_bits = MIN (zero_bits, u_low_zero_bits);
6786d7f5d3SJohn Marino }
6886d7f5d3SJohn Marino
6986d7f5d3SJohn Marino ulimb = MPN_MOD_OR_MODEXACT_1_ODD (up, size, vlimb);
7086d7f5d3SJohn Marino if (ulimb == 0)
7186d7f5d3SJohn Marino goto done;
7286d7f5d3SJohn Marino
7386d7f5d3SJohn Marino goto strip_u_maybe;
7486d7f5d3SJohn Marino }
7586d7f5d3SJohn Marino
7686d7f5d3SJohn Marino /* size==1, so up[0]!=0 */
7786d7f5d3SJohn Marino count_trailing_zeros (u_low_zero_bits, ulimb);
7886d7f5d3SJohn Marino ulimb >>= u_low_zero_bits;
7986d7f5d3SJohn Marino zero_bits = MIN (zero_bits, u_low_zero_bits);
8086d7f5d3SJohn Marino
8186d7f5d3SJohn Marino /* make u bigger */
8286d7f5d3SJohn Marino if (vlimb > ulimb)
8386d7f5d3SJohn Marino MP_LIMB_T_SWAP (ulimb, vlimb);
8486d7f5d3SJohn Marino
8586d7f5d3SJohn Marino /* if u is much bigger than v, reduce using a division rather than
8686d7f5d3SJohn Marino chipping away at it bit-by-bit */
8786d7f5d3SJohn Marino if ((ulimb >> 16) > vlimb)
8886d7f5d3SJohn Marino {
8986d7f5d3SJohn Marino ulimb %= vlimb;
9086d7f5d3SJohn Marino if (ulimb == 0)
9186d7f5d3SJohn Marino goto done;
9286d7f5d3SJohn Marino goto strip_u_maybe;
9386d7f5d3SJohn Marino }
9486d7f5d3SJohn Marino
9586d7f5d3SJohn Marino ASSERT (ulimb & 1);
9686d7f5d3SJohn Marino ASSERT (vlimb & 1);
9786d7f5d3SJohn Marino
9886d7f5d3SJohn Marino #if GCD_1_METHOD == 1
9986d7f5d3SJohn Marino while (ulimb != vlimb)
10086d7f5d3SJohn Marino {
10186d7f5d3SJohn Marino ASSERT (ulimb & 1);
10286d7f5d3SJohn Marino ASSERT (vlimb & 1);
10386d7f5d3SJohn Marino
10486d7f5d3SJohn Marino if (ulimb > vlimb)
10586d7f5d3SJohn Marino {
10686d7f5d3SJohn Marino ulimb -= vlimb;
10786d7f5d3SJohn Marino do
10886d7f5d3SJohn Marino {
10986d7f5d3SJohn Marino ulimb >>= 1;
11086d7f5d3SJohn Marino ASSERT (ulimb != 0);
11186d7f5d3SJohn Marino strip_u_maybe:
11286d7f5d3SJohn Marino ;
11386d7f5d3SJohn Marino }
11486d7f5d3SJohn Marino while ((ulimb & 1) == 0);
11586d7f5d3SJohn Marino }
11686d7f5d3SJohn Marino else /* vlimb > ulimb. */
11786d7f5d3SJohn Marino {
11886d7f5d3SJohn Marino vlimb -= ulimb;
11986d7f5d3SJohn Marino do
12086d7f5d3SJohn Marino {
12186d7f5d3SJohn Marino vlimb >>= 1;
12286d7f5d3SJohn Marino ASSERT (vlimb != 0);
12386d7f5d3SJohn Marino }
12486d7f5d3SJohn Marino while ((vlimb & 1) == 0);
12586d7f5d3SJohn Marino }
12686d7f5d3SJohn Marino }
12786d7f5d3SJohn Marino #else
12886d7f5d3SJohn Marino # if GCD_1_METHOD == 2
12986d7f5d3SJohn Marino
13086d7f5d3SJohn Marino ulimb >>= 1;
13186d7f5d3SJohn Marino vlimb >>= 1;
13286d7f5d3SJohn Marino
13386d7f5d3SJohn Marino while (ulimb != vlimb)
13486d7f5d3SJohn Marino {
13586d7f5d3SJohn Marino int c;
13686d7f5d3SJohn Marino mp_limb_t t = ulimb - vlimb;
13786d7f5d3SJohn Marino mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (t);
13886d7f5d3SJohn Marino
13986d7f5d3SJohn Marino /* v <-- min (u, v) */
14086d7f5d3SJohn Marino vlimb += (vgtu & t);
14186d7f5d3SJohn Marino
14286d7f5d3SJohn Marino /* u <-- |u - v| */
14386d7f5d3SJohn Marino ulimb = (t ^ vgtu) - vgtu;
14486d7f5d3SJohn Marino
14586d7f5d3SJohn Marino #if USE_ZEROTAB
14686d7f5d3SJohn Marino /* Number of trailing zeros is the same no matter if we look at
14786d7f5d3SJohn Marino * t or ulimb, but using t gives more parallelism. */
14886d7f5d3SJohn Marino c = zerotab[t & 15];
14986d7f5d3SJohn Marino
15086d7f5d3SJohn Marino while (UNLIKELY (c == 4))
15186d7f5d3SJohn Marino {
15286d7f5d3SJohn Marino ulimb >>= 4;
15386d7f5d3SJohn Marino if (0)
15486d7f5d3SJohn Marino strip_u_maybe:
15586d7f5d3SJohn Marino vlimb >>= 1;
15686d7f5d3SJohn Marino
15786d7f5d3SJohn Marino c = zerotab[ulimb & 15];
15886d7f5d3SJohn Marino }
15986d7f5d3SJohn Marino #else
16086d7f5d3SJohn Marino if (0)
16186d7f5d3SJohn Marino {
16286d7f5d3SJohn Marino strip_u_maybe:
16386d7f5d3SJohn Marino vlimb >>= 1;
16486d7f5d3SJohn Marino t = ulimb;
16586d7f5d3SJohn Marino }
16686d7f5d3SJohn Marino count_trailing_zeros (c, t);
16786d7f5d3SJohn Marino #endif
16886d7f5d3SJohn Marino ulimb >>= (c + 1);
16986d7f5d3SJohn Marino }
17086d7f5d3SJohn Marino
17186d7f5d3SJohn Marino vlimb = (vlimb << 1) | 1;
17286d7f5d3SJohn Marino # else
17386d7f5d3SJohn Marino # error Unknown GCD_1_METHOD
17486d7f5d3SJohn Marino # endif
17586d7f5d3SJohn Marino #endif
17686d7f5d3SJohn Marino
17786d7f5d3SJohn Marino done:
17886d7f5d3SJohn Marino return vlimb << zero_bits;
17986d7f5d3SJohn Marino }
180