186d7f5d3SJohn Marino /* mpz_lucnum_ui -- calculate Lucas number.
286d7f5d3SJohn Marino
386d7f5d3SJohn Marino Copyright 2001, 2003, 2005 Free Software Foundation, Inc.
486d7f5d3SJohn Marino
586d7f5d3SJohn Marino This file is part of the GNU MP Library.
686d7f5d3SJohn Marino
786d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
886d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
986d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1086d7f5d3SJohn Marino option) any later version.
1186d7f5d3SJohn Marino
1286d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
1386d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1486d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1586d7f5d3SJohn Marino License for more details.
1686d7f5d3SJohn Marino
1786d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
1886d7f5d3SJohn Marino along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
1986d7f5d3SJohn Marino
2086d7f5d3SJohn Marino #include <stdio.h>
2186d7f5d3SJohn Marino #include "gmp.h"
2286d7f5d3SJohn Marino #include "gmp-impl.h"
2386d7f5d3SJohn Marino
2486d7f5d3SJohn Marino
2586d7f5d3SJohn Marino /* change this to "#define TRACE(x) x" for diagnostics */
2686d7f5d3SJohn Marino #define TRACE(x)
2786d7f5d3SJohn Marino
2886d7f5d3SJohn Marino
2986d7f5d3SJohn Marino /* Notes:
3086d7f5d3SJohn Marino
3186d7f5d3SJohn Marino For the +4 in L[2k+1] when k is even, all L[4m+3] == 4, 5 or 7 mod 8, so
3286d7f5d3SJohn Marino there can't be an overflow applying +4 to just the low limb (since that
3386d7f5d3SJohn Marino would leave 0, 1, 2 or 3 mod 8).
3486d7f5d3SJohn Marino
3586d7f5d3SJohn Marino For the -4 in L[2k+1] when k is even, it seems (no proof) that
3686d7f5d3SJohn Marino L[3*2^(b-2)-3] == -4 mod 2^b, so for instance with a 32-bit limb
3786d7f5d3SJohn Marino L[0xBFFFFFFD] == 0xFFFFFFFC mod 2^32, and this implies a borrow from the
3886d7f5d3SJohn Marino low limb. Obviously L[0xBFFFFFFD] is a huge number, but it's at least
3986d7f5d3SJohn Marino conceivable to calculate it, so it probably should be handled.
4086d7f5d3SJohn Marino
4186d7f5d3SJohn Marino For the -2 in L[2k] with k even, it seems (no proof) L[2^(b-1)] == -1 mod
4286d7f5d3SJohn Marino 2^b, so for instance in 32-bits L[0x80000000] has a low limb of
4386d7f5d3SJohn Marino 0xFFFFFFFF so there would have been a borrow. Again L[0x80000000] is
4486d7f5d3SJohn Marino obviously huge, but probably should be made to work. */
4586d7f5d3SJohn Marino
4686d7f5d3SJohn Marino void
mpz_lucnum_ui(mpz_ptr ln,unsigned long n)4786d7f5d3SJohn Marino mpz_lucnum_ui (mpz_ptr ln, unsigned long n)
4886d7f5d3SJohn Marino {
4986d7f5d3SJohn Marino mp_size_t lalloc, xalloc, lsize, xsize;
5086d7f5d3SJohn Marino mp_ptr lp, xp;
5186d7f5d3SJohn Marino mp_limb_t c;
5286d7f5d3SJohn Marino int zeros;
5386d7f5d3SJohn Marino TMP_DECL;
5486d7f5d3SJohn Marino
5586d7f5d3SJohn Marino TRACE (printf ("mpn_lucnum_ui n=%lu\n", n));
5686d7f5d3SJohn Marino
5786d7f5d3SJohn Marino if (n <= FIB_TABLE_LUCNUM_LIMIT)
5886d7f5d3SJohn Marino {
5986d7f5d3SJohn Marino /* L[n] = F[n] + 2F[n-1] */
6086d7f5d3SJohn Marino PTR(ln)[0] = FIB_TABLE(n) + 2 * FIB_TABLE ((int) n - 1);
6186d7f5d3SJohn Marino SIZ(ln) = 1;
6286d7f5d3SJohn Marino return;
6386d7f5d3SJohn Marino }
6486d7f5d3SJohn Marino
6586d7f5d3SJohn Marino /* +1 since L[n]=F[n]+2F[n-1] might be 1 limb bigger than F[n], further +1
6686d7f5d3SJohn Marino since square or mul used below might need an extra limb over the true
6786d7f5d3SJohn Marino size */
6886d7f5d3SJohn Marino lalloc = MPN_FIB2_SIZE (n) + 2;
6986d7f5d3SJohn Marino MPZ_REALLOC (ln, lalloc);
7086d7f5d3SJohn Marino lp = PTR (ln);
7186d7f5d3SJohn Marino
7286d7f5d3SJohn Marino TMP_MARK;
7386d7f5d3SJohn Marino xalloc = lalloc;
7486d7f5d3SJohn Marino xp = TMP_ALLOC_LIMBS (xalloc);
7586d7f5d3SJohn Marino
7686d7f5d3SJohn Marino /* Strip trailing zeros from n, until either an odd number is reached
7786d7f5d3SJohn Marino where the L[2k+1] formula can be used, or until n fits within the
7886d7f5d3SJohn Marino FIB_TABLE data. The table is preferred of course. */
7986d7f5d3SJohn Marino zeros = 0;
8086d7f5d3SJohn Marino for (;;)
8186d7f5d3SJohn Marino {
8286d7f5d3SJohn Marino if (n & 1)
8386d7f5d3SJohn Marino {
8486d7f5d3SJohn Marino /* L[2k+1] = 5*F[k-1]*(2*F[k]+F[k-1]) - 4*(-1)^k */
8586d7f5d3SJohn Marino
8686d7f5d3SJohn Marino mp_size_t yalloc, ysize;
8786d7f5d3SJohn Marino mp_ptr yp;
8886d7f5d3SJohn Marino
8986d7f5d3SJohn Marino TRACE (printf (" initial odd n=%lu\n", n));
9086d7f5d3SJohn Marino
9186d7f5d3SJohn Marino yalloc = MPN_FIB2_SIZE (n/2);
9286d7f5d3SJohn Marino yp = TMP_ALLOC_LIMBS (yalloc);
9386d7f5d3SJohn Marino ASSERT (xalloc >= yalloc);
9486d7f5d3SJohn Marino
9586d7f5d3SJohn Marino xsize = mpn_fib2_ui (xp, yp, n/2);
9686d7f5d3SJohn Marino
9786d7f5d3SJohn Marino /* possible high zero on F[k-1] */
9886d7f5d3SJohn Marino ysize = xsize;
9986d7f5d3SJohn Marino ysize -= (yp[ysize-1] == 0);
10086d7f5d3SJohn Marino ASSERT (yp[ysize-1] != 0);
10186d7f5d3SJohn Marino
10286d7f5d3SJohn Marino /* xp = 2*F[k] + F[k-1] */
10386d7f5d3SJohn Marino #if HAVE_NATIVE_mpn_addlsh1_n
10486d7f5d3SJohn Marino c = mpn_addlsh1_n (xp, yp, xp, xsize);
10586d7f5d3SJohn Marino #else
10686d7f5d3SJohn Marino c = mpn_lshift (xp, xp, xsize, 1);
10786d7f5d3SJohn Marino c += mpn_add_n (xp, xp, yp, xsize);
10886d7f5d3SJohn Marino #endif
10986d7f5d3SJohn Marino ASSERT (xalloc >= xsize+1);
11086d7f5d3SJohn Marino xp[xsize] = c;
11186d7f5d3SJohn Marino xsize += (c != 0);
11286d7f5d3SJohn Marino ASSERT (xp[xsize-1] != 0);
11386d7f5d3SJohn Marino
11486d7f5d3SJohn Marino ASSERT (lalloc >= xsize + ysize);
11586d7f5d3SJohn Marino c = mpn_mul (lp, xp, xsize, yp, ysize);
11686d7f5d3SJohn Marino lsize = xsize + ysize;
11786d7f5d3SJohn Marino lsize -= (c == 0);
11886d7f5d3SJohn Marino
11986d7f5d3SJohn Marino /* lp = 5*lp */
12086d7f5d3SJohn Marino #if HAVE_NATIVE_mpn_addlshift
12186d7f5d3SJohn Marino c = mpn_addlshift (lp, lp, lsize, 2);
12286d7f5d3SJohn Marino #else
12386d7f5d3SJohn Marino c = mpn_lshift (xp, lp, lsize, 2);
12486d7f5d3SJohn Marino c += mpn_add_n (lp, lp, xp, lsize);
12586d7f5d3SJohn Marino #endif
12686d7f5d3SJohn Marino ASSERT (lalloc >= lsize+1);
12786d7f5d3SJohn Marino lp[lsize] = c;
12886d7f5d3SJohn Marino lsize += (c != 0);
12986d7f5d3SJohn Marino
13086d7f5d3SJohn Marino /* lp = lp - 4*(-1)^k */
13186d7f5d3SJohn Marino if (n & 2)
13286d7f5d3SJohn Marino {
13386d7f5d3SJohn Marino /* no overflow, see comments above */
13486d7f5d3SJohn Marino ASSERT (lp[0] <= MP_LIMB_T_MAX-4);
13586d7f5d3SJohn Marino lp[0] += 4;
13686d7f5d3SJohn Marino }
13786d7f5d3SJohn Marino else
13886d7f5d3SJohn Marino {
13986d7f5d3SJohn Marino /* won't go negative */
14086d7f5d3SJohn Marino MPN_DECR_U (lp, lsize, CNST_LIMB(4));
14186d7f5d3SJohn Marino }
14286d7f5d3SJohn Marino
14386d7f5d3SJohn Marino TRACE (mpn_trace (" l",lp, lsize));
14486d7f5d3SJohn Marino break;
14586d7f5d3SJohn Marino }
14686d7f5d3SJohn Marino
14786d7f5d3SJohn Marino MP_PTR_SWAP (xp, lp); /* balance the swaps wanted in the L[2k] below */
14886d7f5d3SJohn Marino zeros++;
14986d7f5d3SJohn Marino n /= 2;
15086d7f5d3SJohn Marino
15186d7f5d3SJohn Marino if (n <= FIB_TABLE_LUCNUM_LIMIT)
15286d7f5d3SJohn Marino {
15386d7f5d3SJohn Marino /* L[n] = F[n] + 2F[n-1] */
15486d7f5d3SJohn Marino lp[0] = FIB_TABLE (n) + 2 * FIB_TABLE ((int) n - 1);
15586d7f5d3SJohn Marino lsize = 1;
15686d7f5d3SJohn Marino
15786d7f5d3SJohn Marino TRACE (printf (" initial small n=%lu\n", n);
15886d7f5d3SJohn Marino mpn_trace (" l",lp, lsize));
15986d7f5d3SJohn Marino break;
16086d7f5d3SJohn Marino }
16186d7f5d3SJohn Marino }
16286d7f5d3SJohn Marino
16386d7f5d3SJohn Marino for ( ; zeros != 0; zeros--)
16486d7f5d3SJohn Marino {
16586d7f5d3SJohn Marino /* L[2k] = L[k]^2 + 2*(-1)^k */
16686d7f5d3SJohn Marino
16786d7f5d3SJohn Marino TRACE (printf (" zeros=%d\n", zeros));
16886d7f5d3SJohn Marino
16986d7f5d3SJohn Marino ASSERT (xalloc >= 2*lsize);
17086d7f5d3SJohn Marino mpn_sqr (xp, lp, lsize);
17186d7f5d3SJohn Marino lsize *= 2;
17286d7f5d3SJohn Marino lsize -= (xp[lsize-1] == 0);
17386d7f5d3SJohn Marino
17486d7f5d3SJohn Marino /* First time around the loop k==n determines (-1)^k, after that k is
17586d7f5d3SJohn Marino always even and we set n=0 to indicate that. */
17686d7f5d3SJohn Marino if (n & 1)
17786d7f5d3SJohn Marino {
17886d7f5d3SJohn Marino /* L[n]^2 == 0 or 1 mod 4, like all squares, so +2 gives no carry */
17986d7f5d3SJohn Marino ASSERT (xp[0] <= MP_LIMB_T_MAX-2);
18086d7f5d3SJohn Marino xp[0] += 2;
18186d7f5d3SJohn Marino n = 0;
18286d7f5d3SJohn Marino }
18386d7f5d3SJohn Marino else
18486d7f5d3SJohn Marino {
18586d7f5d3SJohn Marino /* won't go negative */
18686d7f5d3SJohn Marino MPN_DECR_U (xp, lsize, CNST_LIMB(2));
18786d7f5d3SJohn Marino }
18886d7f5d3SJohn Marino
18986d7f5d3SJohn Marino MP_PTR_SWAP (xp, lp);
19086d7f5d3SJohn Marino ASSERT (lp[lsize-1] != 0);
19186d7f5d3SJohn Marino }
19286d7f5d3SJohn Marino
19386d7f5d3SJohn Marino /* should end up in the right spot after all the xp/lp swaps */
19486d7f5d3SJohn Marino ASSERT (lp == PTR(ln));
19586d7f5d3SJohn Marino SIZ(ln) = lsize;
19686d7f5d3SJohn Marino
19786d7f5d3SJohn Marino TMP_FREE;
19886d7f5d3SJohn Marino }
199