xref: /netbsd-src/external/lgpl3/gmp/dist/mpz/lucnum_ui.c (revision 72c7faa4dbb41dbb0238d6b4a109da0d4b236dd4)
1 /* mpz_lucnum_ui -- calculate Lucas number.
2 
3 Copyright 2001, 2003, 2005, 2011, 2012, 2015, 2016 Free Software Foundation, Inc.
4 
5 This file is part of the GNU MP Library.
6 
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of either:
9 
10   * the GNU Lesser General Public License as published by the Free
11     Software Foundation; either version 3 of the License, or (at your
12     option) any later version.
13 
14 or
15 
16   * the GNU General Public License as published by the Free Software
17     Foundation; either version 2 of the License, or (at your option) any
18     later version.
19 
20 or both in parallel, as here.
21 
22 The GNU MP Library is distributed in the hope that it will be useful, but
23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25 for more details.
26 
27 You should have received copies of the GNU General Public License and the
28 GNU Lesser General Public License along with the GNU MP Library.  If not,
29 see https://www.gnu.org/licenses/.  */
30 
31 #include <stdio.h>
32 #include "gmp-impl.h"
33 
34 
35 /* change this to "#define TRACE(x) x" for diagnostics */
36 #define TRACE(x)
37 
38 
39 /* Notes:
40 
41    For the +4 in L[2k+1] when k is even, all L[4m+3] == 4, 5 or 7 mod 8, so
42    there can't be an overflow applying +4 to just the low limb (since that
43    would leave 0, 1, 2 or 3 mod 8).
44 
45    For the -4 in L[2k+1] when k is even, it seems (no proof) that
46    L[3*2^(b-2)-3] == -4 mod 2^b, so for instance with a 32-bit limb
47    L[0xBFFFFFFD] == 0xFFFFFFFC mod 2^32, and this implies a borrow from the
48    low limb.  Obviously L[0xBFFFFFFD] is a huge number, but it's at least
49    conceivable to calculate it, so it probably should be handled.
50 
51    For the -2 in L[2k] with k even, it seems (no proof) L[2^(b-1)] == -1 mod
52    2^b, so for instance in 32-bits L[0x80000000] has a low limb of
53    0xFFFFFFFF so there would have been a borrow.  Again L[0x80000000] is
54    obviously huge, but probably should be made to work.  */
55 
56 void
mpz_lucnum_ui(mpz_ptr ln,unsigned long n)57 mpz_lucnum_ui (mpz_ptr ln, unsigned long n)
58 {
59   mp_size_t  lalloc, xalloc, lsize, xsize;
60   mp_ptr     lp, xp;
61   mp_limb_t  c;
62   int        zeros;
63   TMP_DECL;
64 
65   TRACE (printf ("mpn_lucnum_ui n=%lu\n", n));
66 
67   if (n <= FIB_TABLE_LUCNUM_LIMIT)
68     {
69       /* L[n] = F[n] + 2F[n-1] */
70       MPZ_NEWALLOC (ln, 1)[0] = FIB_TABLE(n) + 2 * FIB_TABLE ((int) n - 1);
71       SIZ(ln) = 1;
72       return;
73     }
74 
75   /* +1 since L[n]=F[n]+2F[n-1] might be 1 limb bigger than F[n], further +1
76      since square or mul used below might need an extra limb over the true
77      size */
78   lalloc = MPN_FIB2_SIZE (n) + 2;
79   lp = MPZ_NEWALLOC (ln, lalloc);
80 
81   TMP_MARK;
82   xalloc = lalloc;
83   xp = TMP_ALLOC_LIMBS (xalloc);
84 
85   /* Strip trailing zeros from n, until either an odd number is reached
86      where the L[2k+1] formula can be used, or until n fits within the
87      FIB_TABLE data.  The table is preferred of course.  */
88   zeros = 0;
89   for (;;)
90     {
91       if (n & 1)
92 	{
93 	  /* L[2k+1] = 5*F[k-1]*(2*F[k]+F[k-1]) - 4*(-1)^k */
94 
95 	  mp_size_t  yalloc, ysize;
96 	  mp_ptr     yp;
97 
98 	  TRACE (printf ("  initial odd n=%lu\n", n));
99 
100 	  yalloc = MPN_FIB2_SIZE (n/2);
101 	  yp = TMP_ALLOC_LIMBS (yalloc);
102 	  ASSERT (xalloc >= yalloc);
103 
104 	  xsize = mpn_fib2_ui (xp, yp, n/2);
105 
106 	  /* possible high zero on F[k-1] */
107 	  ysize = xsize;
108 	  ysize -= (yp[ysize-1] == 0);
109 	  ASSERT (yp[ysize-1] != 0);
110 
111 	  /* xp = 2*F[k] + F[k-1] */
112 #if HAVE_NATIVE_mpn_addlsh1_n
113 	  c = mpn_addlsh1_n (xp, yp, xp, xsize);
114 #else
115 	  c = mpn_lshift (xp, xp, xsize, 1);
116 	  c += mpn_add_n (xp, xp, yp, xsize);
117 #endif
118 	  ASSERT (xalloc >= xsize+1);
119 	  xp[xsize] = c;
120 	  xsize += (c != 0);
121 	  ASSERT (xp[xsize-1] != 0);
122 
123 	  ASSERT (lalloc >= xsize + ysize);
124 	  c = mpn_mul (lp, xp, xsize, yp, ysize);
125 	  lsize = xsize + ysize;
126 	  lsize -= (c == 0);
127 
128 	  /* lp = 5*lp */
129 #if HAVE_NATIVE_mpn_addlsh2_n
130 	  c = mpn_addlsh2_n (lp, lp, lp, lsize);
131 #else
132 	  /* FIXME: Is this faster than mpn_mul_1 ? */
133 	  c = mpn_lshift (xp, lp, lsize, 2);
134 	  c += mpn_add_n (lp, lp, xp, lsize);
135 #endif
136 	  ASSERT (lalloc >= lsize+1);
137 	  lp[lsize] = c;
138 	  lsize += (c != 0);
139 
140 	  /* lp = lp - 4*(-1)^k */
141 	  if (n & 2)
142 	    {
143 	      /* no overflow, see comments above */
144 	      ASSERT (lp[0] <= MP_LIMB_T_MAX-4);
145 	      lp[0] += 4;
146 	    }
147 	  else
148 	    {
149 	      /* won't go negative */
150 	      MPN_DECR_U (lp, lsize, CNST_LIMB(4));
151 	    }
152 
153 	  TRACE (mpn_trace ("  l",lp, lsize));
154 	  break;
155 	}
156 
157       MP_PTR_SWAP (xp, lp); /* balance the swaps wanted in the L[2k] below */
158       zeros++;
159       n /= 2;
160 
161       if (n <= FIB_TABLE_LUCNUM_LIMIT)
162 	{
163 	  /* L[n] = F[n] + 2F[n-1] */
164 	  lp[0] = FIB_TABLE (n) + 2 * FIB_TABLE ((int) n - 1);
165 	  lsize = 1;
166 
167 	  TRACE (printf ("  initial small n=%lu\n", n);
168 		 mpn_trace ("  l",lp, lsize));
169 	  break;
170 	}
171     }
172 
173   for ( ; zeros != 0; zeros--)
174     {
175       /* L[2k] = L[k]^2 + 2*(-1)^k */
176 
177       TRACE (printf ("  zeros=%d\n", zeros));
178 
179       ASSERT (xalloc >= 2*lsize);
180       mpn_sqr (xp, lp, lsize);
181       lsize *= 2;
182       lsize -= (xp[lsize-1] == 0);
183 
184       /* First time around the loop k==n determines (-1)^k, after that k is
185 	 always even and we set n=0 to indicate that.  */
186       if (n & 1)
187 	{
188 	  /* L[n]^2 == 0 or 1 mod 4, like all squares, so +2 gives no carry */
189 	  ASSERT (xp[0] <= MP_LIMB_T_MAX-2);
190 	  xp[0] += 2;
191 	  n = 0;
192 	}
193       else
194 	{
195 	  /* won't go negative */
196 	  MPN_DECR_U (xp, lsize, CNST_LIMB(2));
197 	}
198 
199       MP_PTR_SWAP (xp, lp);
200       ASSERT (lp[lsize-1] != 0);
201     }
202 
203   /* should end up in the right spot after all the xp/lp swaps */
204   ASSERT (lp == PTR(ln));
205   SIZ(ln) = lsize;
206 
207   TMP_FREE;
208 }
209