1 /* mpz_lucnum_ui -- calculate Lucas number. 2 3 Copyright 2001, 2003, 2005, 2011, 2012, 2015, 2016 Free Software Foundation, Inc. 4 5 This file is part of the GNU MP Library. 6 7 The GNU MP Library is free software; you can redistribute it and/or modify 8 it under the terms of either: 9 10 * the GNU Lesser General Public License as published by the Free 11 Software Foundation; either version 3 of the License, or (at your 12 option) any later version. 13 14 or 15 16 * the GNU General Public License as published by the Free Software 17 Foundation; either version 2 of the License, or (at your option) any 18 later version. 19 20 or both in parallel, as here. 21 22 The GNU MP Library is distributed in the hope that it will be useful, but 23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 for more details. 26 27 You should have received copies of the GNU General Public License and the 28 GNU Lesser General Public License along with the GNU MP Library. If not, 29 see https://www.gnu.org/licenses/. */ 30 31 #include <stdio.h> 32 #include "gmp-impl.h" 33 34 35 /* change this to "#define TRACE(x) x" for diagnostics */ 36 #define TRACE(x) 37 38 39 /* Notes: 40 41 For the +4 in L[2k+1] when k is even, all L[4m+3] == 4, 5 or 7 mod 8, so 42 there can't be an overflow applying +4 to just the low limb (since that 43 would leave 0, 1, 2 or 3 mod 8). 44 45 For the -4 in L[2k+1] when k is even, it seems (no proof) that 46 L[3*2^(b-2)-3] == -4 mod 2^b, so for instance with a 32-bit limb 47 L[0xBFFFFFFD] == 0xFFFFFFFC mod 2^32, and this implies a borrow from the 48 low limb. Obviously L[0xBFFFFFFD] is a huge number, but it's at least 49 conceivable to calculate it, so it probably should be handled. 50 51 For the -2 in L[2k] with k even, it seems (no proof) L[2^(b-1)] == -1 mod 52 2^b, so for instance in 32-bits L[0x80000000] has a low limb of 53 0xFFFFFFFF so there would have been a borrow. Again L[0x80000000] is 54 obviously huge, but probably should be made to work. */ 55 56 void 57 mpz_lucnum_ui (mpz_ptr ln, unsigned long n) 58 { 59 mp_size_t lalloc, xalloc, lsize, xsize; 60 mp_ptr lp, xp; 61 mp_limb_t c; 62 int zeros; 63 TMP_DECL; 64 65 TRACE (printf ("mpn_lucnum_ui n=%lu\n", n)); 66 67 if (n <= FIB_TABLE_LUCNUM_LIMIT) 68 { 69 /* L[n] = F[n] + 2F[n-1] */ 70 MPZ_NEWALLOC (ln, 1)[0] = FIB_TABLE(n) + 2 * FIB_TABLE ((int) n - 1); 71 SIZ(ln) = 1; 72 return; 73 } 74 75 /* +1 since L[n]=F[n]+2F[n-1] might be 1 limb bigger than F[n], further +1 76 since square or mul used below might need an extra limb over the true 77 size */ 78 lalloc = MPN_FIB2_SIZE (n) + 2; 79 lp = MPZ_NEWALLOC (ln, lalloc); 80 81 TMP_MARK; 82 xalloc = lalloc; 83 xp = TMP_ALLOC_LIMBS (xalloc); 84 85 /* Strip trailing zeros from n, until either an odd number is reached 86 where the L[2k+1] formula can be used, or until n fits within the 87 FIB_TABLE data. The table is preferred of course. */ 88 zeros = 0; 89 for (;;) 90 { 91 if (n & 1) 92 { 93 /* L[2k+1] = 5*F[k-1]*(2*F[k]+F[k-1]) - 4*(-1)^k */ 94 95 mp_size_t yalloc, ysize; 96 mp_ptr yp; 97 98 TRACE (printf (" initial odd n=%lu\n", n)); 99 100 yalloc = MPN_FIB2_SIZE (n/2); 101 yp = TMP_ALLOC_LIMBS (yalloc); 102 ASSERT (xalloc >= yalloc); 103 104 xsize = mpn_fib2_ui (xp, yp, n/2); 105 106 /* possible high zero on F[k-1] */ 107 ysize = xsize; 108 ysize -= (yp[ysize-1] == 0); 109 ASSERT (yp[ysize-1] != 0); 110 111 /* xp = 2*F[k] + F[k-1] */ 112 #if HAVE_NATIVE_mpn_addlsh1_n 113 c = mpn_addlsh1_n (xp, yp, xp, xsize); 114 #else 115 c = mpn_lshift (xp, xp, xsize, 1); 116 c += mpn_add_n (xp, xp, yp, xsize); 117 #endif 118 ASSERT (xalloc >= xsize+1); 119 xp[xsize] = c; 120 xsize += (c != 0); 121 ASSERT (xp[xsize-1] != 0); 122 123 ASSERT (lalloc >= xsize + ysize); 124 c = mpn_mul (lp, xp, xsize, yp, ysize); 125 lsize = xsize + ysize; 126 lsize -= (c == 0); 127 128 /* lp = 5*lp */ 129 #if HAVE_NATIVE_mpn_addlsh2_n 130 c = mpn_addlsh2_n (lp, lp, lp, lsize); 131 #else 132 /* FIXME: Is this faster than mpn_mul_1 ? */ 133 c = mpn_lshift (xp, lp, lsize, 2); 134 c += mpn_add_n (lp, lp, xp, lsize); 135 #endif 136 ASSERT (lalloc >= lsize+1); 137 lp[lsize] = c; 138 lsize += (c != 0); 139 140 /* lp = lp - 4*(-1)^k */ 141 if (n & 2) 142 { 143 /* no overflow, see comments above */ 144 ASSERT (lp[0] <= MP_LIMB_T_MAX-4); 145 lp[0] += 4; 146 } 147 else 148 { 149 /* won't go negative */ 150 MPN_DECR_U (lp, lsize, CNST_LIMB(4)); 151 } 152 153 TRACE (mpn_trace (" l",lp, lsize)); 154 break; 155 } 156 157 MP_PTR_SWAP (xp, lp); /* balance the swaps wanted in the L[2k] below */ 158 zeros++; 159 n /= 2; 160 161 if (n <= FIB_TABLE_LUCNUM_LIMIT) 162 { 163 /* L[n] = F[n] + 2F[n-1] */ 164 lp[0] = FIB_TABLE (n) + 2 * FIB_TABLE ((int) n - 1); 165 lsize = 1; 166 167 TRACE (printf (" initial small n=%lu\n", n); 168 mpn_trace (" l",lp, lsize)); 169 break; 170 } 171 } 172 173 for ( ; zeros != 0; zeros--) 174 { 175 /* L[2k] = L[k]^2 + 2*(-1)^k */ 176 177 TRACE (printf (" zeros=%d\n", zeros)); 178 179 ASSERT (xalloc >= 2*lsize); 180 mpn_sqr (xp, lp, lsize); 181 lsize *= 2; 182 lsize -= (xp[lsize-1] == 0); 183 184 /* First time around the loop k==n determines (-1)^k, after that k is 185 always even and we set n=0 to indicate that. */ 186 if (n & 1) 187 { 188 /* L[n]^2 == 0 or 1 mod 4, like all squares, so +2 gives no carry */ 189 ASSERT (xp[0] <= MP_LIMB_T_MAX-2); 190 xp[0] += 2; 191 n = 0; 192 } 193 else 194 { 195 /* won't go negative */ 196 MPN_DECR_U (xp, lsize, CNST_LIMB(2)); 197 } 198 199 MP_PTR_SWAP (xp, lp); 200 ASSERT (lp[lsize-1] != 0); 201 } 202 203 /* should end up in the right spot after all the xp/lp swaps */ 204 ASSERT (lp == PTR(ln)); 205 SIZ(ln) = lsize; 206 207 TMP_FREE; 208 } 209