186d7f5d3SJohn Marino /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
286d7f5d3SJohn Marino
386d7f5d3SJohn Marino Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003,
486d7f5d3SJohn Marino 2004, 2005, 2008 Free Software Foundation, Inc.
586d7f5d3SJohn Marino
686d7f5d3SJohn Marino This file is part of the GNU MP Library.
786d7f5d3SJohn Marino
886d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
986d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1086d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1186d7f5d3SJohn Marino option) any later version.
1286d7f5d3SJohn Marino
1386d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
1486d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1586d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1686d7f5d3SJohn Marino License for more details.
1786d7f5d3SJohn Marino
1886d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
1986d7f5d3SJohn Marino along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
2086d7f5d3SJohn Marino
2186d7f5d3SJohn Marino #include "gmp.h"
2286d7f5d3SJohn Marino #include "gmp-impl.h"
2386d7f5d3SJohn Marino #include "longlong.h"
2486d7f5d3SJohn Marino
2586d7f5d3SJohn Marino /* Uses the HGCD operation described in
2686d7f5d3SJohn Marino
2786d7f5d3SJohn Marino N. M�ller, On Sch�nhage's algorithm and subquadratic integer gcd
2886d7f5d3SJohn Marino computation, Math. Comp. 77 (2008), 589-607.
2986d7f5d3SJohn Marino
3086d7f5d3SJohn Marino to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
3186d7f5d3SJohn Marino then uses Lehmer's algorithm.
3286d7f5d3SJohn Marino */
3386d7f5d3SJohn Marino
3486d7f5d3SJohn Marino /* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
3586d7f5d3SJohn Marino * 2)/3, which gives a balanced multiplication in
3686d7f5d3SJohn Marino * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
3786d7f5d3SJohn Marino * performance. The matrix-vector multiplication is then
3886d7f5d3SJohn Marino * 4:1-unbalanced, with matrix elements of size n/6, and vector
3986d7f5d3SJohn Marino * elements of size p = 2n/3. */
4086d7f5d3SJohn Marino
4186d7f5d3SJohn Marino /* From analysis of the theoretical running time, it appears that when
4286d7f5d3SJohn Marino * multiplication takes time O(n^alpha), p should be chosen so that
4386d7f5d3SJohn Marino * the ratio of the time for the mpn_hgcd call, and the time for the
4486d7f5d3SJohn Marino * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
4586d7f5d3SJohn Marino * 1). */
4686d7f5d3SJohn Marino #ifdef TUNE_GCD_P
4786d7f5d3SJohn Marino #define P_TABLE_SIZE 10000
4886d7f5d3SJohn Marino mp_size_t p_table[P_TABLE_SIZE];
4986d7f5d3SJohn Marino #define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
5086d7f5d3SJohn Marino #else
5186d7f5d3SJohn Marino #define CHOOSE_P(n) (2*(n) / 3)
5286d7f5d3SJohn Marino #endif
5386d7f5d3SJohn Marino
5486d7f5d3SJohn Marino mp_size_t
mpn_gcd(mp_ptr gp,mp_ptr up,mp_size_t usize,mp_ptr vp,mp_size_t n)5586d7f5d3SJohn Marino mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
5686d7f5d3SJohn Marino {
5786d7f5d3SJohn Marino mp_size_t talloc;
5886d7f5d3SJohn Marino mp_size_t scratch;
5986d7f5d3SJohn Marino mp_size_t matrix_scratch;
6086d7f5d3SJohn Marino
6186d7f5d3SJohn Marino mp_size_t gn;
6286d7f5d3SJohn Marino mp_ptr tp;
6386d7f5d3SJohn Marino TMP_DECL;
6486d7f5d3SJohn Marino
6586d7f5d3SJohn Marino /* FIXME: Check for small sizes first, before setting up temporary
6686d7f5d3SJohn Marino storage etc. */
6786d7f5d3SJohn Marino talloc = MPN_GCD_LEHMER_N_ITCH(n);
6886d7f5d3SJohn Marino
6986d7f5d3SJohn Marino /* For initial division */
7086d7f5d3SJohn Marino scratch = usize - n + 1;
7186d7f5d3SJohn Marino if (scratch > talloc)
7286d7f5d3SJohn Marino talloc = scratch;
7386d7f5d3SJohn Marino
7486d7f5d3SJohn Marino #if TUNE_GCD_P
7586d7f5d3SJohn Marino if (CHOOSE_P (n) > 0)
7686d7f5d3SJohn Marino #else
7786d7f5d3SJohn Marino if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
7886d7f5d3SJohn Marino #endif
7986d7f5d3SJohn Marino {
8086d7f5d3SJohn Marino mp_size_t hgcd_scratch;
8186d7f5d3SJohn Marino mp_size_t update_scratch;
8286d7f5d3SJohn Marino mp_size_t p = CHOOSE_P (n);
8386d7f5d3SJohn Marino mp_size_t scratch;
8486d7f5d3SJohn Marino #if TUNE_GCD_P
8586d7f5d3SJohn Marino /* Worst case, since we don't guarantee that n - CHOOSE_P(n)
8686d7f5d3SJohn Marino is increasing */
8786d7f5d3SJohn Marino matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);
8886d7f5d3SJohn Marino hgcd_scratch = mpn_hgcd_itch (n);
8986d7f5d3SJohn Marino update_scratch = 2*(n - 1);
9086d7f5d3SJohn Marino #else
9186d7f5d3SJohn Marino matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
9286d7f5d3SJohn Marino hgcd_scratch = mpn_hgcd_itch (n - p);
9386d7f5d3SJohn Marino update_scratch = p + n - 1;
9486d7f5d3SJohn Marino #endif
9586d7f5d3SJohn Marino scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
9686d7f5d3SJohn Marino if (scratch > talloc)
9786d7f5d3SJohn Marino talloc = scratch;
9886d7f5d3SJohn Marino }
9986d7f5d3SJohn Marino
10086d7f5d3SJohn Marino TMP_MARK;
10186d7f5d3SJohn Marino tp = TMP_ALLOC_LIMBS(talloc);
10286d7f5d3SJohn Marino
10386d7f5d3SJohn Marino if (usize > n)
10486d7f5d3SJohn Marino {
10586d7f5d3SJohn Marino mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);
10686d7f5d3SJohn Marino
10786d7f5d3SJohn Marino if (mpn_zero_p (up, n))
10886d7f5d3SJohn Marino {
10986d7f5d3SJohn Marino MPN_COPY (gp, vp, n);
11086d7f5d3SJohn Marino TMP_FREE;
11186d7f5d3SJohn Marino return n;
11286d7f5d3SJohn Marino }
11386d7f5d3SJohn Marino }
11486d7f5d3SJohn Marino
11586d7f5d3SJohn Marino #if TUNE_GCD_P
11686d7f5d3SJohn Marino while (CHOOSE_P (n) > 0)
11786d7f5d3SJohn Marino #else
11886d7f5d3SJohn Marino while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
11986d7f5d3SJohn Marino #endif
12086d7f5d3SJohn Marino {
12186d7f5d3SJohn Marino struct hgcd_matrix M;
12286d7f5d3SJohn Marino mp_size_t p = CHOOSE_P (n);
12386d7f5d3SJohn Marino mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
12486d7f5d3SJohn Marino mp_size_t nn;
12586d7f5d3SJohn Marino mpn_hgcd_matrix_init (&M, n - p, tp);
12686d7f5d3SJohn Marino nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);
12786d7f5d3SJohn Marino if (nn > 0)
12886d7f5d3SJohn Marino {
12986d7f5d3SJohn Marino ASSERT (M.n <= (n - p - 1)/2);
13086d7f5d3SJohn Marino ASSERT (M.n + p <= (p + n - 1) / 2);
13186d7f5d3SJohn Marino /* Temporary storage 2 (p + M->n) <= p + n - 1. */
13286d7f5d3SJohn Marino n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);
13386d7f5d3SJohn Marino }
13486d7f5d3SJohn Marino else
13586d7f5d3SJohn Marino {
13686d7f5d3SJohn Marino /* Temporary storage n */
13786d7f5d3SJohn Marino n = mpn_gcd_subdiv_step (gp, &gn, up, vp, n, tp);
13886d7f5d3SJohn Marino if (n == 0)
13986d7f5d3SJohn Marino {
14086d7f5d3SJohn Marino TMP_FREE;
14186d7f5d3SJohn Marino return gn;
14286d7f5d3SJohn Marino }
14386d7f5d3SJohn Marino }
14486d7f5d3SJohn Marino }
14586d7f5d3SJohn Marino
14686d7f5d3SJohn Marino gn = mpn_gcd_lehmer_n (gp, up, vp, n, tp);
14786d7f5d3SJohn Marino TMP_FREE;
14886d7f5d3SJohn Marino return gn;
14986d7f5d3SJohn Marino }
15086d7f5d3SJohn Marino
15186d7f5d3SJohn Marino #ifdef TUNE_GCD_P
15286d7f5d3SJohn Marino #include <stdio.h>
15386d7f5d3SJohn Marino #include <string.h>
15486d7f5d3SJohn Marino #include <time.h>
15586d7f5d3SJohn Marino #include "speed.h"
15686d7f5d3SJohn Marino
15786d7f5d3SJohn Marino static int
compare_double(const void * ap,const void * bp)15886d7f5d3SJohn Marino compare_double(const void *ap, const void *bp)
15986d7f5d3SJohn Marino {
16086d7f5d3SJohn Marino double a = * (const double *) ap;
16186d7f5d3SJohn Marino double b = * (const double *) bp;
16286d7f5d3SJohn Marino
16386d7f5d3SJohn Marino if (a < b)
16486d7f5d3SJohn Marino return -1;
16586d7f5d3SJohn Marino else if (a > b)
16686d7f5d3SJohn Marino return 1;
16786d7f5d3SJohn Marino else
16886d7f5d3SJohn Marino return 0;
16986d7f5d3SJohn Marino }
17086d7f5d3SJohn Marino
17186d7f5d3SJohn Marino static double
median(double * v,size_t n)17286d7f5d3SJohn Marino median (double *v, size_t n)
17386d7f5d3SJohn Marino {
17486d7f5d3SJohn Marino qsort(v, n, sizeof(*v), compare_double);
17586d7f5d3SJohn Marino
17686d7f5d3SJohn Marino return v[n/2];
17786d7f5d3SJohn Marino }
17886d7f5d3SJohn Marino
17986d7f5d3SJohn Marino #define TIME(res, code) do { \
18086d7f5d3SJohn Marino double time_measurement[5]; \
18186d7f5d3SJohn Marino unsigned time_i; \
18286d7f5d3SJohn Marino \
18386d7f5d3SJohn Marino for (time_i = 0; time_i < 5; time_i++) \
18486d7f5d3SJohn Marino { \
18586d7f5d3SJohn Marino speed_starttime(); \
18686d7f5d3SJohn Marino code; \
18786d7f5d3SJohn Marino time_measurement[time_i] = speed_endtime(); \
18886d7f5d3SJohn Marino } \
18986d7f5d3SJohn Marino res = median(time_measurement, 5); \
19086d7f5d3SJohn Marino } while (0)
19186d7f5d3SJohn Marino
19286d7f5d3SJohn Marino int
main(int argc,char * argv)19386d7f5d3SJohn Marino main(int argc, char *argv)
19486d7f5d3SJohn Marino {
19586d7f5d3SJohn Marino gmp_randstate_t rands;
19686d7f5d3SJohn Marino mp_size_t n;
19786d7f5d3SJohn Marino mp_ptr ap;
19886d7f5d3SJohn Marino mp_ptr bp;
19986d7f5d3SJohn Marino mp_ptr up;
20086d7f5d3SJohn Marino mp_ptr vp;
20186d7f5d3SJohn Marino mp_ptr gp;
20286d7f5d3SJohn Marino mp_ptr tp;
20386d7f5d3SJohn Marino TMP_DECL;
20486d7f5d3SJohn Marino
20586d7f5d3SJohn Marino /* Unbuffered so if output is redirected to a file it isn't lost if the
20686d7f5d3SJohn Marino program is killed part way through. */
20786d7f5d3SJohn Marino setbuf (stdout, NULL);
20886d7f5d3SJohn Marino setbuf (stderr, NULL);
20986d7f5d3SJohn Marino
21086d7f5d3SJohn Marino gmp_randinit_default (rands);
21186d7f5d3SJohn Marino
21286d7f5d3SJohn Marino TMP_MARK;
21386d7f5d3SJohn Marino
21486d7f5d3SJohn Marino ap = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
21586d7f5d3SJohn Marino bp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
21686d7f5d3SJohn Marino up = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
21786d7f5d3SJohn Marino vp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
21886d7f5d3SJohn Marino gp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
21986d7f5d3SJohn Marino tp = TMP_ALLOC_LIMBS (MPN_GCD_LEHMER_N_ITCH (P_TABLE_SIZE));
22086d7f5d3SJohn Marino
22186d7f5d3SJohn Marino mpn_random (ap, P_TABLE_SIZE);
22286d7f5d3SJohn Marino mpn_random (bp, P_TABLE_SIZE);
22386d7f5d3SJohn Marino
22486d7f5d3SJohn Marino memset (p_table, 0, sizeof(p_table));
22586d7f5d3SJohn Marino
22686d7f5d3SJohn Marino for (n = 100; n++; n < P_TABLE_SIZE)
22786d7f5d3SJohn Marino {
22886d7f5d3SJohn Marino mp_size_t p;
22986d7f5d3SJohn Marino mp_size_t best_p;
23086d7f5d3SJohn Marino double best_time;
23186d7f5d3SJohn Marino double lehmer_time;
23286d7f5d3SJohn Marino
23386d7f5d3SJohn Marino if (ap[n-1] == 0)
23486d7f5d3SJohn Marino ap[n-1] = 1;
23586d7f5d3SJohn Marino
23686d7f5d3SJohn Marino if (bp[n-1] == 0)
23786d7f5d3SJohn Marino bp[n-1] = 1;
23886d7f5d3SJohn Marino
23986d7f5d3SJohn Marino p_table[n] = 0;
24086d7f5d3SJohn Marino TIME(lehmer_time, {
24186d7f5d3SJohn Marino MPN_COPY (up, ap, n);
24286d7f5d3SJohn Marino MPN_COPY (vp, bp, n);
24386d7f5d3SJohn Marino mpn_gcd_lehmer_n (gp, up, vp, n, tp);
24486d7f5d3SJohn Marino });
24586d7f5d3SJohn Marino
24686d7f5d3SJohn Marino best_time = lehmer_time;
24786d7f5d3SJohn Marino best_p = 0;
24886d7f5d3SJohn Marino
24986d7f5d3SJohn Marino for (p = n * 0.48; p < n * 0.77; p++)
25086d7f5d3SJohn Marino {
25186d7f5d3SJohn Marino double t;
25286d7f5d3SJohn Marino
25386d7f5d3SJohn Marino p_table[n] = p;
25486d7f5d3SJohn Marino
25586d7f5d3SJohn Marino TIME(t, {
25686d7f5d3SJohn Marino MPN_COPY (up, ap, n);
25786d7f5d3SJohn Marino MPN_COPY (vp, bp, n);
25886d7f5d3SJohn Marino mpn_gcd (gp, up, n, vp, n);
25986d7f5d3SJohn Marino });
26086d7f5d3SJohn Marino
26186d7f5d3SJohn Marino if (t < best_time)
26286d7f5d3SJohn Marino {
26386d7f5d3SJohn Marino best_time = t;
26486d7f5d3SJohn Marino best_p = p;
26586d7f5d3SJohn Marino }
26686d7f5d3SJohn Marino }
26786d7f5d3SJohn Marino printf("%6d %6d %5.3g", n, best_p, (double) best_p / n);
26886d7f5d3SJohn Marino if (best_p > 0)
26986d7f5d3SJohn Marino {
27086d7f5d3SJohn Marino double speedup = 100 * (lehmer_time - best_time) / lehmer_time;
27186d7f5d3SJohn Marino printf(" %5.3g%%", speedup);
27286d7f5d3SJohn Marino if (speedup < 1.0)
27386d7f5d3SJohn Marino {
27486d7f5d3SJohn Marino printf(" (ignored)");
27586d7f5d3SJohn Marino best_p = 0;
27686d7f5d3SJohn Marino }
27786d7f5d3SJohn Marino }
27886d7f5d3SJohn Marino printf("\n");
27986d7f5d3SJohn Marino
28086d7f5d3SJohn Marino p_table[n] = best_p;
28186d7f5d3SJohn Marino }
28286d7f5d3SJohn Marino TMP_FREE;
28386d7f5d3SJohn Marino gmp_randclear(rands);
28486d7f5d3SJohn Marino return 0;
28586d7f5d3SJohn Marino }
28686d7f5d3SJohn Marino #endif /* TUNE_GCD_P */
287