xref: /dflybsd-src/contrib/gmp/mpz/lucnum_ui.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
1*86d7f5d3SJohn Marino /* mpz_lucnum_ui -- calculate Lucas number.
2*86d7f5d3SJohn Marino 
3*86d7f5d3SJohn Marino Copyright 2001, 2003, 2005 Free Software Foundation, Inc.
4*86d7f5d3SJohn Marino 
5*86d7f5d3SJohn Marino This file is part of the GNU MP Library.
6*86d7f5d3SJohn Marino 
7*86d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
8*86d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
9*86d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
10*86d7f5d3SJohn Marino option) any later version.
11*86d7f5d3SJohn Marino 
12*86d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
13*86d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14*86d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15*86d7f5d3SJohn Marino License for more details.
16*86d7f5d3SJohn Marino 
17*86d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
18*86d7f5d3SJohn Marino along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
19*86d7f5d3SJohn Marino 
20*86d7f5d3SJohn Marino #include <stdio.h>
21*86d7f5d3SJohn Marino #include "gmp.h"
22*86d7f5d3SJohn Marino #include "gmp-impl.h"
23*86d7f5d3SJohn Marino 
24*86d7f5d3SJohn Marino 
25*86d7f5d3SJohn Marino /* change this to "#define TRACE(x) x" for diagnostics */
26*86d7f5d3SJohn Marino #define TRACE(x)
27*86d7f5d3SJohn Marino 
28*86d7f5d3SJohn Marino 
29*86d7f5d3SJohn Marino /* Notes:
30*86d7f5d3SJohn Marino 
31*86d7f5d3SJohn Marino    For the +4 in L[2k+1] when k is even, all L[4m+3] == 4, 5 or 7 mod 8, so
32*86d7f5d3SJohn Marino    there can't be an overflow applying +4 to just the low limb (since that
33*86d7f5d3SJohn Marino    would leave 0, 1, 2 or 3 mod 8).
34*86d7f5d3SJohn Marino 
35*86d7f5d3SJohn Marino    For the -4 in L[2k+1] when k is even, it seems (no proof) that
36*86d7f5d3SJohn Marino    L[3*2^(b-2)-3] == -4 mod 2^b, so for instance with a 32-bit limb
37*86d7f5d3SJohn Marino    L[0xBFFFFFFD] == 0xFFFFFFFC mod 2^32, and this implies a borrow from the
38*86d7f5d3SJohn Marino    low limb.  Obviously L[0xBFFFFFFD] is a huge number, but it's at least
39*86d7f5d3SJohn Marino    conceivable to calculate it, so it probably should be handled.
40*86d7f5d3SJohn Marino 
41*86d7f5d3SJohn Marino    For the -2 in L[2k] with k even, it seems (no proof) L[2^(b-1)] == -1 mod
42*86d7f5d3SJohn Marino    2^b, so for instance in 32-bits L[0x80000000] has a low limb of
43*86d7f5d3SJohn Marino    0xFFFFFFFF so there would have been a borrow.  Again L[0x80000000] is
44*86d7f5d3SJohn Marino    obviously huge, but probably should be made to work.  */
45*86d7f5d3SJohn Marino 
46*86d7f5d3SJohn Marino void
mpz_lucnum_ui(mpz_ptr ln,unsigned long n)47*86d7f5d3SJohn Marino mpz_lucnum_ui (mpz_ptr ln, unsigned long n)
48*86d7f5d3SJohn Marino {
49*86d7f5d3SJohn Marino   mp_size_t  lalloc, xalloc, lsize, xsize;
50*86d7f5d3SJohn Marino   mp_ptr     lp, xp;
51*86d7f5d3SJohn Marino   mp_limb_t  c;
52*86d7f5d3SJohn Marino   int        zeros;
53*86d7f5d3SJohn Marino   TMP_DECL;
54*86d7f5d3SJohn Marino 
55*86d7f5d3SJohn Marino   TRACE (printf ("mpn_lucnum_ui n=%lu\n", n));
56*86d7f5d3SJohn Marino 
57*86d7f5d3SJohn Marino   if (n <= FIB_TABLE_LUCNUM_LIMIT)
58*86d7f5d3SJohn Marino     {
59*86d7f5d3SJohn Marino       /* L[n] = F[n] + 2F[n-1] */
60*86d7f5d3SJohn Marino       PTR(ln)[0] = FIB_TABLE(n) + 2 * FIB_TABLE ((int) n - 1);
61*86d7f5d3SJohn Marino       SIZ(ln) = 1;
62*86d7f5d3SJohn Marino       return;
63*86d7f5d3SJohn Marino     }
64*86d7f5d3SJohn Marino 
65*86d7f5d3SJohn Marino   /* +1 since L[n]=F[n]+2F[n-1] might be 1 limb bigger than F[n], further +1
66*86d7f5d3SJohn Marino      since square or mul used below might need an extra limb over the true
67*86d7f5d3SJohn Marino      size */
68*86d7f5d3SJohn Marino   lalloc = MPN_FIB2_SIZE (n) + 2;
69*86d7f5d3SJohn Marino   MPZ_REALLOC (ln, lalloc);
70*86d7f5d3SJohn Marino   lp = PTR (ln);
71*86d7f5d3SJohn Marino 
72*86d7f5d3SJohn Marino   TMP_MARK;
73*86d7f5d3SJohn Marino   xalloc = lalloc;
74*86d7f5d3SJohn Marino   xp = TMP_ALLOC_LIMBS (xalloc);
75*86d7f5d3SJohn Marino 
76*86d7f5d3SJohn Marino   /* Strip trailing zeros from n, until either an odd number is reached
77*86d7f5d3SJohn Marino      where the L[2k+1] formula can be used, or until n fits within the
78*86d7f5d3SJohn Marino      FIB_TABLE data.  The table is preferred of course.  */
79*86d7f5d3SJohn Marino   zeros = 0;
80*86d7f5d3SJohn Marino   for (;;)
81*86d7f5d3SJohn Marino     {
82*86d7f5d3SJohn Marino       if (n & 1)
83*86d7f5d3SJohn Marino         {
84*86d7f5d3SJohn Marino           /* L[2k+1] = 5*F[k-1]*(2*F[k]+F[k-1]) - 4*(-1)^k */
85*86d7f5d3SJohn Marino 
86*86d7f5d3SJohn Marino           mp_size_t  yalloc, ysize;
87*86d7f5d3SJohn Marino           mp_ptr     yp;
88*86d7f5d3SJohn Marino 
89*86d7f5d3SJohn Marino           TRACE (printf ("  initial odd n=%lu\n", n));
90*86d7f5d3SJohn Marino 
91*86d7f5d3SJohn Marino           yalloc = MPN_FIB2_SIZE (n/2);
92*86d7f5d3SJohn Marino           yp = TMP_ALLOC_LIMBS (yalloc);
93*86d7f5d3SJohn Marino           ASSERT (xalloc >= yalloc);
94*86d7f5d3SJohn Marino 
95*86d7f5d3SJohn Marino           xsize = mpn_fib2_ui (xp, yp, n/2);
96*86d7f5d3SJohn Marino 
97*86d7f5d3SJohn Marino           /* possible high zero on F[k-1] */
98*86d7f5d3SJohn Marino           ysize = xsize;
99*86d7f5d3SJohn Marino           ysize -= (yp[ysize-1] == 0);
100*86d7f5d3SJohn Marino           ASSERT (yp[ysize-1] != 0);
101*86d7f5d3SJohn Marino 
102*86d7f5d3SJohn Marino           /* xp = 2*F[k] + F[k-1] */
103*86d7f5d3SJohn Marino #if HAVE_NATIVE_mpn_addlsh1_n
104*86d7f5d3SJohn Marino           c = mpn_addlsh1_n (xp, yp, xp, xsize);
105*86d7f5d3SJohn Marino #else
106*86d7f5d3SJohn Marino           c = mpn_lshift (xp, xp, xsize, 1);
107*86d7f5d3SJohn Marino           c += mpn_add_n (xp, xp, yp, xsize);
108*86d7f5d3SJohn Marino #endif
109*86d7f5d3SJohn Marino           ASSERT (xalloc >= xsize+1);
110*86d7f5d3SJohn Marino           xp[xsize] = c;
111*86d7f5d3SJohn Marino           xsize += (c != 0);
112*86d7f5d3SJohn Marino           ASSERT (xp[xsize-1] != 0);
113*86d7f5d3SJohn Marino 
114*86d7f5d3SJohn Marino           ASSERT (lalloc >= xsize + ysize);
115*86d7f5d3SJohn Marino           c = mpn_mul (lp, xp, xsize, yp, ysize);
116*86d7f5d3SJohn Marino           lsize = xsize + ysize;
117*86d7f5d3SJohn Marino           lsize -= (c == 0);
118*86d7f5d3SJohn Marino 
119*86d7f5d3SJohn Marino           /* lp = 5*lp */
120*86d7f5d3SJohn Marino #if HAVE_NATIVE_mpn_addlshift
121*86d7f5d3SJohn Marino           c = mpn_addlshift (lp, lp, lsize, 2);
122*86d7f5d3SJohn Marino #else
123*86d7f5d3SJohn Marino           c = mpn_lshift (xp, lp, lsize, 2);
124*86d7f5d3SJohn Marino           c += mpn_add_n (lp, lp, xp, lsize);
125*86d7f5d3SJohn Marino #endif
126*86d7f5d3SJohn Marino           ASSERT (lalloc >= lsize+1);
127*86d7f5d3SJohn Marino           lp[lsize] = c;
128*86d7f5d3SJohn Marino           lsize += (c != 0);
129*86d7f5d3SJohn Marino 
130*86d7f5d3SJohn Marino           /* lp = lp - 4*(-1)^k */
131*86d7f5d3SJohn Marino           if (n & 2)
132*86d7f5d3SJohn Marino             {
133*86d7f5d3SJohn Marino               /* no overflow, see comments above */
134*86d7f5d3SJohn Marino               ASSERT (lp[0] <= MP_LIMB_T_MAX-4);
135*86d7f5d3SJohn Marino               lp[0] += 4;
136*86d7f5d3SJohn Marino             }
137*86d7f5d3SJohn Marino           else
138*86d7f5d3SJohn Marino             {
139*86d7f5d3SJohn Marino               /* won't go negative */
140*86d7f5d3SJohn Marino               MPN_DECR_U (lp, lsize, CNST_LIMB(4));
141*86d7f5d3SJohn Marino             }
142*86d7f5d3SJohn Marino 
143*86d7f5d3SJohn Marino           TRACE (mpn_trace ("  l",lp, lsize));
144*86d7f5d3SJohn Marino           break;
145*86d7f5d3SJohn Marino         }
146*86d7f5d3SJohn Marino 
147*86d7f5d3SJohn Marino       MP_PTR_SWAP (xp, lp); /* balance the swaps wanted in the L[2k] below */
148*86d7f5d3SJohn Marino       zeros++;
149*86d7f5d3SJohn Marino       n /= 2;
150*86d7f5d3SJohn Marino 
151*86d7f5d3SJohn Marino       if (n <= FIB_TABLE_LUCNUM_LIMIT)
152*86d7f5d3SJohn Marino         {
153*86d7f5d3SJohn Marino           /* L[n] = F[n] + 2F[n-1] */
154*86d7f5d3SJohn Marino           lp[0] = FIB_TABLE (n) + 2 * FIB_TABLE ((int) n - 1);
155*86d7f5d3SJohn Marino           lsize = 1;
156*86d7f5d3SJohn Marino 
157*86d7f5d3SJohn Marino           TRACE (printf ("  initial small n=%lu\n", n);
158*86d7f5d3SJohn Marino                  mpn_trace ("  l",lp, lsize));
159*86d7f5d3SJohn Marino           break;
160*86d7f5d3SJohn Marino         }
161*86d7f5d3SJohn Marino     }
162*86d7f5d3SJohn Marino 
163*86d7f5d3SJohn Marino   for ( ; zeros != 0; zeros--)
164*86d7f5d3SJohn Marino     {
165*86d7f5d3SJohn Marino       /* L[2k] = L[k]^2 + 2*(-1)^k */
166*86d7f5d3SJohn Marino 
167*86d7f5d3SJohn Marino       TRACE (printf ("  zeros=%d\n", zeros));
168*86d7f5d3SJohn Marino 
169*86d7f5d3SJohn Marino       ASSERT (xalloc >= 2*lsize);
170*86d7f5d3SJohn Marino       mpn_sqr (xp, lp, lsize);
171*86d7f5d3SJohn Marino       lsize *= 2;
172*86d7f5d3SJohn Marino       lsize -= (xp[lsize-1] == 0);
173*86d7f5d3SJohn Marino 
174*86d7f5d3SJohn Marino       /* First time around the loop k==n determines (-1)^k, after that k is
175*86d7f5d3SJohn Marino          always even and we set n=0 to indicate that.  */
176*86d7f5d3SJohn Marino       if (n & 1)
177*86d7f5d3SJohn Marino         {
178*86d7f5d3SJohn Marino           /* L[n]^2 == 0 or 1 mod 4, like all squares, so +2 gives no carry */
179*86d7f5d3SJohn Marino           ASSERT (xp[0] <= MP_LIMB_T_MAX-2);
180*86d7f5d3SJohn Marino           xp[0] += 2;
181*86d7f5d3SJohn Marino           n = 0;
182*86d7f5d3SJohn Marino         }
183*86d7f5d3SJohn Marino       else
184*86d7f5d3SJohn Marino         {
185*86d7f5d3SJohn Marino           /* won't go negative */
186*86d7f5d3SJohn Marino           MPN_DECR_U (xp, lsize, CNST_LIMB(2));
187*86d7f5d3SJohn Marino         }
188*86d7f5d3SJohn Marino 
189*86d7f5d3SJohn Marino       MP_PTR_SWAP (xp, lp);
190*86d7f5d3SJohn Marino       ASSERT (lp[lsize-1] != 0);
191*86d7f5d3SJohn Marino     }
192*86d7f5d3SJohn Marino 
193*86d7f5d3SJohn Marino   /* should end up in the right spot after all the xp/lp swaps */
194*86d7f5d3SJohn Marino   ASSERT (lp == PTR(ln));
195*86d7f5d3SJohn Marino   SIZ(ln) = lsize;
196*86d7f5d3SJohn Marino 
197*86d7f5d3SJohn Marino   TMP_FREE;
198*86d7f5d3SJohn Marino }
199