186d7f5d3SJohn Marino /* mpn_gcdext -- Extended Greatest Common Divisor.
286d7f5d3SJohn Marino
386d7f5d3SJohn Marino Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
486d7f5d3SJohn Marino Foundation, Inc.
586d7f5d3SJohn Marino
686d7f5d3SJohn Marino This file is part of the GNU MP Library.
786d7f5d3SJohn Marino
886d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
986d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1086d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1186d7f5d3SJohn Marino option) any later version.
1286d7f5d3SJohn Marino
1386d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
1486d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1586d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1686d7f5d3SJohn Marino License for more details.
1786d7f5d3SJohn Marino
1886d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
1986d7f5d3SJohn Marino along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
2086d7f5d3SJohn Marino
2186d7f5d3SJohn Marino #include "gmp.h"
2286d7f5d3SJohn Marino #include "gmp-impl.h"
2386d7f5d3SJohn Marino #include "longlong.h"
2486d7f5d3SJohn Marino
2586d7f5d3SJohn Marino /* Temporary storage: 3*(n+1) for u. n+1 for the matrix-vector
2686d7f5d3SJohn Marino multiplications (if hgcd2 succeeds). If hgcd fails, n+1 limbs are
2786d7f5d3SJohn Marino needed for the division, with most n for the quotient, and n+1 for
2886d7f5d3SJohn Marino the product q u0. In all, 4n + 3. */
2986d7f5d3SJohn Marino
3086d7f5d3SJohn Marino mp_size_t
mpn_gcdext_lehmer_n(mp_ptr gp,mp_ptr up,mp_size_t * usize,mp_ptr ap,mp_ptr bp,mp_size_t n,mp_ptr tp)3186d7f5d3SJohn Marino mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
3286d7f5d3SJohn Marino mp_ptr ap, mp_ptr bp, mp_size_t n,
3386d7f5d3SJohn Marino mp_ptr tp)
3486d7f5d3SJohn Marino {
3586d7f5d3SJohn Marino mp_size_t ualloc = n + 1;
3686d7f5d3SJohn Marino
3786d7f5d3SJohn Marino /* Keeps track of the second row of the reduction matrix
3886d7f5d3SJohn Marino *
3986d7f5d3SJohn Marino * M = (v0, v1 ; u0, u1)
4086d7f5d3SJohn Marino *
4186d7f5d3SJohn Marino * which correspond to the first column of the inverse
4286d7f5d3SJohn Marino *
4386d7f5d3SJohn Marino * M^{-1} = (u1, -v1; -u0, v0)
4486d7f5d3SJohn Marino */
4586d7f5d3SJohn Marino
4686d7f5d3SJohn Marino mp_size_t un;
4786d7f5d3SJohn Marino mp_ptr u0;
4886d7f5d3SJohn Marino mp_ptr u1;
4986d7f5d3SJohn Marino mp_ptr u2;
5086d7f5d3SJohn Marino
5186d7f5d3SJohn Marino MPN_ZERO (tp, 3*ualloc);
5286d7f5d3SJohn Marino u0 = tp; tp += ualloc;
5386d7f5d3SJohn Marino u1 = tp; tp += ualloc;
5486d7f5d3SJohn Marino u2 = tp; tp += ualloc;
5586d7f5d3SJohn Marino
5686d7f5d3SJohn Marino u1[0] = 1; un = 1;
5786d7f5d3SJohn Marino
5886d7f5d3SJohn Marino /* FIXME: Handle n == 2 differently, after the loop? */
5986d7f5d3SJohn Marino while (n >= 2)
6086d7f5d3SJohn Marino {
6186d7f5d3SJohn Marino struct hgcd_matrix1 M;
6286d7f5d3SJohn Marino mp_limb_t ah, al, bh, bl;
6386d7f5d3SJohn Marino mp_limb_t mask;
6486d7f5d3SJohn Marino
6586d7f5d3SJohn Marino mask = ap[n-1] | bp[n-1];
6686d7f5d3SJohn Marino ASSERT (mask > 0);
6786d7f5d3SJohn Marino
6886d7f5d3SJohn Marino if (mask & GMP_NUMB_HIGHBIT)
6986d7f5d3SJohn Marino {
7086d7f5d3SJohn Marino ah = ap[n-1]; al = ap[n-2];
7186d7f5d3SJohn Marino bh = bp[n-1]; bl = bp[n-2];
7286d7f5d3SJohn Marino }
7386d7f5d3SJohn Marino else if (n == 2)
7486d7f5d3SJohn Marino {
7586d7f5d3SJohn Marino /* We use the full inputs without truncation, so we can
7686d7f5d3SJohn Marino safely shift left. */
7786d7f5d3SJohn Marino int shift;
7886d7f5d3SJohn Marino
7986d7f5d3SJohn Marino count_leading_zeros (shift, mask);
8086d7f5d3SJohn Marino ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
8186d7f5d3SJohn Marino al = ap[0] << shift;
8286d7f5d3SJohn Marino bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
8386d7f5d3SJohn Marino bl = bp[0] << shift;
8486d7f5d3SJohn Marino }
8586d7f5d3SJohn Marino else
8686d7f5d3SJohn Marino {
8786d7f5d3SJohn Marino int shift;
8886d7f5d3SJohn Marino
8986d7f5d3SJohn Marino count_leading_zeros (shift, mask);
9086d7f5d3SJohn Marino ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
9186d7f5d3SJohn Marino al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
9286d7f5d3SJohn Marino bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
9386d7f5d3SJohn Marino bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
9486d7f5d3SJohn Marino }
9586d7f5d3SJohn Marino
9686d7f5d3SJohn Marino /* Try an mpn_nhgcd2 step */
9786d7f5d3SJohn Marino if (mpn_hgcd2 (ah, al, bh, bl, &M))
9886d7f5d3SJohn Marino {
9986d7f5d3SJohn Marino n = mpn_hgcd_mul_matrix1_inverse_vector (&M, tp, ap, bp, n);
10086d7f5d3SJohn Marino MP_PTR_SWAP (ap, tp);
10186d7f5d3SJohn Marino un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
10286d7f5d3SJohn Marino MP_PTR_SWAP (u0, u2);
10386d7f5d3SJohn Marino }
10486d7f5d3SJohn Marino else
10586d7f5d3SJohn Marino {
10686d7f5d3SJohn Marino /* mpn_hgcd2 has failed. Then either one of a or b is very
10786d7f5d3SJohn Marino small, or the difference is very small. Perform one
10886d7f5d3SJohn Marino subtraction followed by one division. */
10986d7f5d3SJohn Marino mp_size_t gn;
11086d7f5d3SJohn Marino mp_size_t updated_un = un;
11186d7f5d3SJohn Marino
11286d7f5d3SJohn Marino /* Temporary storage n for the quotient and ualloc for the
11386d7f5d3SJohn Marino new cofactor. */
11486d7f5d3SJohn Marino n = mpn_gcdext_subdiv_step (gp, &gn, up, usize, ap, bp, n,
11586d7f5d3SJohn Marino u0, u1, &updated_un, tp, u2);
11686d7f5d3SJohn Marino if (n == 0)
11786d7f5d3SJohn Marino return gn;
11886d7f5d3SJohn Marino
11986d7f5d3SJohn Marino un = updated_un;
12086d7f5d3SJohn Marino }
12186d7f5d3SJohn Marino }
12286d7f5d3SJohn Marino ASSERT_ALWAYS (ap[0] > 0);
12386d7f5d3SJohn Marino ASSERT_ALWAYS (bp[0] > 0);
12486d7f5d3SJohn Marino
12586d7f5d3SJohn Marino if (ap[0] == bp[0])
12686d7f5d3SJohn Marino {
12786d7f5d3SJohn Marino int c;
12886d7f5d3SJohn Marino
12986d7f5d3SJohn Marino /* Which cofactor to return now? Candidates are +u1 and -u0,
13086d7f5d3SJohn Marino depending on which of a and b was most recently reduced,
13186d7f5d3SJohn Marino which we don't keep track of. So compare and get the smallest
13286d7f5d3SJohn Marino one. */
13386d7f5d3SJohn Marino
13486d7f5d3SJohn Marino gp[0] = ap[0];
13586d7f5d3SJohn Marino
13686d7f5d3SJohn Marino MPN_CMP (c, u0, u1, un);
13786d7f5d3SJohn Marino ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
13886d7f5d3SJohn Marino if (c < 0)
13986d7f5d3SJohn Marino {
14086d7f5d3SJohn Marino MPN_NORMALIZE (u0, un);
14186d7f5d3SJohn Marino MPN_COPY (up, u0, un);
14286d7f5d3SJohn Marino *usize = -un;
14386d7f5d3SJohn Marino }
14486d7f5d3SJohn Marino else
14586d7f5d3SJohn Marino {
14686d7f5d3SJohn Marino MPN_NORMALIZE_NOT_ZERO (u1, un);
14786d7f5d3SJohn Marino MPN_COPY (up, u1, un);
14886d7f5d3SJohn Marino *usize = un;
14986d7f5d3SJohn Marino }
15086d7f5d3SJohn Marino return 1;
15186d7f5d3SJohn Marino }
15286d7f5d3SJohn Marino else
15386d7f5d3SJohn Marino {
15486d7f5d3SJohn Marino mp_limb_t uh, vh;
15586d7f5d3SJohn Marino mp_limb_signed_t u;
15686d7f5d3SJohn Marino mp_limb_signed_t v;
15786d7f5d3SJohn Marino int negate;
15886d7f5d3SJohn Marino
15986d7f5d3SJohn Marino gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
16086d7f5d3SJohn Marino
16186d7f5d3SJohn Marino /* Set up = u u1 - v u0. Keep track of size, un grows by one or
16286d7f5d3SJohn Marino two limbs. */
16386d7f5d3SJohn Marino
16486d7f5d3SJohn Marino if (u == 0)
16586d7f5d3SJohn Marino {
16686d7f5d3SJohn Marino ASSERT (v == 1);
16786d7f5d3SJohn Marino MPN_NORMALIZE (u0, un);
16886d7f5d3SJohn Marino MPN_COPY (up, u0, un);
16986d7f5d3SJohn Marino *usize = -un;
17086d7f5d3SJohn Marino return 1;
17186d7f5d3SJohn Marino }
17286d7f5d3SJohn Marino else if (v == 0)
17386d7f5d3SJohn Marino {
17486d7f5d3SJohn Marino ASSERT (u == 1);
17586d7f5d3SJohn Marino MPN_NORMALIZE (u1, un);
17686d7f5d3SJohn Marino MPN_COPY (up, u1, un);
17786d7f5d3SJohn Marino *usize = un;
17886d7f5d3SJohn Marino return 1;
17986d7f5d3SJohn Marino }
18086d7f5d3SJohn Marino else if (u > 0)
18186d7f5d3SJohn Marino {
18286d7f5d3SJohn Marino negate = 0;
18386d7f5d3SJohn Marino ASSERT (v < 0);
18486d7f5d3SJohn Marino v = -v;
18586d7f5d3SJohn Marino }
18686d7f5d3SJohn Marino else
18786d7f5d3SJohn Marino {
18886d7f5d3SJohn Marino negate = 1;
18986d7f5d3SJohn Marino ASSERT (v > 0);
19086d7f5d3SJohn Marino u = -u;
19186d7f5d3SJohn Marino }
19286d7f5d3SJohn Marino
19386d7f5d3SJohn Marino uh = mpn_mul_1 (up, u1, un, u);
19486d7f5d3SJohn Marino vh = mpn_addmul_1 (up, u0, un, v);
19586d7f5d3SJohn Marino
19686d7f5d3SJohn Marino if ( (uh | vh) > 0)
19786d7f5d3SJohn Marino {
19886d7f5d3SJohn Marino uh += vh;
19986d7f5d3SJohn Marino up[un++] = uh;
20086d7f5d3SJohn Marino if (uh < vh)
20186d7f5d3SJohn Marino up[un++] = 1;
20286d7f5d3SJohn Marino }
20386d7f5d3SJohn Marino
20486d7f5d3SJohn Marino MPN_NORMALIZE_NOT_ZERO (up, un);
20586d7f5d3SJohn Marino
20686d7f5d3SJohn Marino *usize = negate ? -un : un;
20786d7f5d3SJohn Marino return 1;
20886d7f5d3SJohn Marino }
20986d7f5d3SJohn Marino }
210