xref: /netbsd-src/external/lgpl3/gmp/dist/mpz/bin_ui.c (revision 72c7faa4dbb41dbb0238d6b4a109da0d4b236dd4)
1 /* mpz_bin_ui(RESULT, N, K) -- Set RESULT to N over K.
2 
3 Copyright 1998-2002, 2012, 2013, 2015, 2017-2018 Free Software Foundation, Inc.
4 
5 This file is part of the GNU MP Library.
6 
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of either:
9 
10   * the GNU Lesser General Public License as published by the Free
11     Software Foundation; either version 3 of the License, or (at your
12     option) any later version.
13 
14 or
15 
16   * the GNU General Public License as published by the Free Software
17     Foundation; either version 2 of the License, or (at your option) any
18     later version.
19 
20 or both in parallel, as here.
21 
22 The GNU MP Library is distributed in the hope that it will be useful, but
23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25 for more details.
26 
27 You should have received copies of the GNU General Public License and the
28 GNU Lesser General Public License along with the GNU MP Library.  If not,
29 see https://www.gnu.org/licenses/.  */
30 
31 #include "gmp-impl.h"
32 
33 /* How many special cases? Minimum is 2: 0 and 1;
34  * also 3 {0,1,2} and 5 {0,1,2,3,4} are implemented.
35  */
36 #define APARTAJ_KALKULOJ 2
37 
38 /* Whether to use (1) or not (0) the function mpz_bin_uiui whenever
39  * the operands fit.
40  */
41 #define UZU_BIN_UIUI 0
42 
43 /* Whether to use a shortcut to precompute the product of four
44  * elements (1), or precompute only the product of a couple (0).
45  *
46  * In both cases the precomputed product is then updated with some
47  * linear operations to obtain the product of the next four (1)
48  * [or two (0)] operands.
49  */
50 #define KVAROPE 1
51 
52 static void
posmpz_init(mpz_ptr r)53 posmpz_init (mpz_ptr r)
54 {
55   mp_ptr rp;
56   ASSERT (SIZ (r) > 0);
57   rp = SIZ (r) + MPZ_REALLOC (r, SIZ (r) + 2);
58   *rp = 0;
59   *++rp = 0;
60 }
61 
62 /* Equivalent to mpz_add_ui (r, r, in), but faster when
63    0 < SIZ (r) < ALLOC (r) and limbs above SIZ (r) contain 0. */
64 static void
posmpz_inc_ui(mpz_ptr r,unsigned long in)65 posmpz_inc_ui (mpz_ptr r, unsigned long in)
66 {
67 #if BITS_PER_ULONG > GMP_NUMB_BITS
68   mpz_add_ui (r, r, in);
69 #else
70   ASSERT (SIZ (r) > 0);
71   MPN_INCR_U (PTR (r), SIZ (r) + 1, in);
72   SIZ (r) += (PTR (r)[SIZ (r)] != 0);
73 #endif
74 }
75 
76 /* Equivalent to mpz_sub_ui (r, r, in), but faster when
77    0 < SIZ (r) and we know in advance that the result is positive. */
78 static void
posmpz_dec_ui(mpz_ptr r,unsigned long in)79 posmpz_dec_ui (mpz_ptr r, unsigned long in)
80 {
81 #if BITS_PER_ULONG > GMP_NUMB_BITS
82   mpz_sub_ui (r, r, in);
83 #else
84   ASSERT (mpz_cmp_ui (r, in) >= 0);
85   MPN_DECR_U (PTR (r), SIZ (r), in);
86   SIZ (r) -= (PTR (r)[SIZ (r)-1] == 0);
87 #endif
88 }
89 
90 /* Equivalent to mpz_tdiv_q_2exp (r, r, 1), but faster when
91    0 < SIZ (r) and we know in advance that the result is positive. */
92 static void
posmpz_rsh1(mpz_ptr r)93 posmpz_rsh1 (mpz_ptr r)
94 {
95   mp_ptr rp;
96   mp_size_t rn;
97 
98   rn = SIZ (r);
99   rp = PTR (r);
100   ASSERT (rn > 0);
101   mpn_rshift (rp, rp, rn, 1);
102   SIZ (r) -= rp[rn - 1] == 0;
103 }
104 
105 /* Computes r = n(n+(2*k-1))/2
106    It uses a sqare instead of a product, computing
107    r = ((n+k-1)^2 + n - (k-1)^2)/2
108    As a side effect, sets t = n+k-1
109  */
110 static void
mpz_hmul_nbnpk(mpz_ptr r,mpz_srcptr n,unsigned long int k,mpz_ptr t)111 mpz_hmul_nbnpk (mpz_ptr r, mpz_srcptr n, unsigned long int k, mpz_ptr t)
112 {
113   ASSERT (k > 0 && SIZ(n) > 0);
114   --k;
115   mpz_add_ui (t, n, k);
116   mpz_mul (r, t, t);
117   mpz_add (r, r, n);
118   posmpz_rsh1 (r);
119   if (LIKELY (k <= (1UL << (BITS_PER_ULONG / 2))))
120     posmpz_dec_ui (r, (k + (k & 1))*(k >> 1));
121   else
122     {
123       mpz_t tmp;
124       mpz_init_set_ui (tmp, (k + (k & 1)));
125       mpz_mul_ui (tmp, tmp, k >> 1);
126       mpz_sub (r, r, tmp);
127       mpz_clear (tmp);
128     }
129 }
130 
131 #if KVAROPE
132 static void
rek_raising_fac4(mpz_ptr r,mpz_ptr p,mpz_ptr P,unsigned long int k,unsigned long int lk,mpz_ptr t)133 rek_raising_fac4 (mpz_ptr r, mpz_ptr p, mpz_ptr P, unsigned long int k, unsigned long int lk, mpz_ptr t)
134 {
135   if (k - lk < 5)
136     {
137       do {
138 	posmpz_inc_ui (p, 4*k+2);
139 	mpz_addmul_ui (P, p, 4*k);
140 	posmpz_dec_ui (P, k);
141 	mpz_mul (r, r, P);
142       } while (--k > lk);
143     }
144   else
145     {
146       mpz_t lt;
147       unsigned long int m;
148 
149       m = ((k + lk) >> 1) + 1;
150       rek_raising_fac4 (r, p, P, k, m, t);
151 
152       posmpz_inc_ui (p, 4*m+2);
153       mpz_addmul_ui (P, p, 4*m);
154       posmpz_dec_ui (P, m);
155       if (t == NULL)
156 	{
157 	  mpz_init_set (lt, P);
158 	  t = lt;
159 	}
160       else
161 	{
162 	  ALLOC (lt) = 0;
163 	  mpz_set (t, P);
164 	}
165       rek_raising_fac4 (t, p, P, m - 1, lk, NULL);
166 
167       mpz_mul (r, r, t);
168       mpz_clear (lt);
169     }
170 }
171 
172 /* Computes (n+1)(n+2)...(n+k)/2^(k/2 +k/4) using the helper function
173    rek_raising_fac4, and exploiting an idea inspired by a piece of
174    code that Fredrik Johansson wrote and by a comment by Niels Möller.
175 
176    Assume k = 4i then compute:
177      p  = (n+1)(n+4i)/2 - i
178 	  (n+1+1)(n+4i)/2 = p + i + (n+4i)/2
179 	  (n+1+1)(n+4i-1)/2 = p + i + ((n+4i)-(n+1+1))/2 = p + i + (n-n+4i-2)/2 = p + 3i-1
180      P  = (p + i)*(p+3i-1)/2 = (n+1)(n+2)(n+4i-1)(n+4i)/8
181      n' = n + 2
182      i' = i - 1
183 	  (n'-1)(n')(n'+4i'+1)(n'+4i'+2)/8 = P
184 	  (n'-1)(n'+4i'+2)/2 - i' - 1 = p
185 	  (n'-1+2)(n'+4i'+2)/2 - i' - 1 = p + (n'+4i'+2)
186 	  (n'-1+2)(n'+4i'+2-2)/2 - i' - 1 = p + (n'+4i'+2) - (n'-1+2) =  p + 4i' + 1
187 	  (n'-1+2)(n'+4i'+2-2)/2 - i' = p + 4i' + 2
188      p' = p + 4i' + 2 = (n'+1)(n'+4i')/2 - i'
189 	  p' - 4i' - 2 = p
190 	  (p' - 4i' - 2 + i)*(p' - 4i' - 2+3i-1)/2 = P
191 	  (p' - 4i' - 2 + i' + 1)*(p' - 4i' - 2 + 3i' + 3 - 1)/2 = P
192 	  (p' - 3i' - 1)*(p' - i')/2 = P
193 	  (p' - 3i' - 1 + 4i' + 1)*(p' - i' + 4i' - 1)/2 = P + (4i' + 1)*(p' - i')/2 + (p' - 3i' - 1 + 4i' + 1)*(4i' - 1)/2
194 	  (p' + i')*(p' + 3i' - 1)/2 = P + (4i')*(p' + p')/2 + (p' - i' - (p' + i'))/2
195 	  (p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' + (p' - i' - p' - i')/2
196 	  (p' + i')*(p' + 3i' - 1)/2 = P + 4i'p' - i'
197      P' = P + 4i'p' - i'
198 
199    And compute the product P * P' * P" ...
200  */
201 
202 static void
mpz_raising_fac4(mpz_ptr r,mpz_ptr n,unsigned long int k,mpz_ptr t,mpz_ptr p)203 mpz_raising_fac4 (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p)
204 {
205   ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 0));
206   posmpz_init (n);
207   posmpz_inc_ui (n, 1);
208   SIZ (r) = 0;
209   if (k & 1)
210     {
211       mpz_set (r, n);
212       posmpz_inc_ui (n, 1);
213     }
214   k >>= 1;
215   if (APARTAJ_KALKULOJ < 2 && k == 0)
216     return;
217 
218   mpz_hmul_nbnpk (p, n, k, t);
219   posmpz_init (p);
220 
221   if (k & 1)
222     {
223       if (SIZ (r))
224 	mpz_mul (r, r, p);
225       else
226 	mpz_set (r, p);
227       posmpz_inc_ui (p, k - 1);
228     }
229   k >>= 1;
230   if (APARTAJ_KALKULOJ < 4 && k == 0)
231     return;
232 
233   mpz_hmul_nbnpk (t, p, k, n);
234   if (SIZ (r))
235     mpz_mul (r, r, t);
236   else
237     mpz_set (r, t);
238 
239   if (APARTAJ_KALKULOJ > 8 || k > 1)
240     {
241       posmpz_dec_ui (p, k);
242       rek_raising_fac4 (r, p, t, k - 1, 0, n);
243     }
244 }
245 
246 #else /* KVAROPE */
247 
248 static void
rek_raising_fac(mpz_ptr r,mpz_ptr n,unsigned long int k,unsigned long int lk,mpz_ptr t1,mpz_ptr t2)249 rek_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, unsigned long int lk, mpz_ptr t1, mpz_ptr t2)
250 {
251   /* Should the threshold depend on SIZ (n) ? */
252   if (k - lk < 10)
253     {
254       do {
255 	posmpz_inc_ui (n, k);
256 	mpz_mul (r, r, n);
257 	--k;
258       } while (k > lk);
259     }
260   else
261     {
262       mpz_t t3;
263       unsigned long int m;
264 
265       m = ((k + lk) >> 1) + 1;
266       rek_raising_fac (r, n, k, m, t1, t2);
267 
268       posmpz_inc_ui (n, m);
269       if (t1 == NULL)
270 	{
271 	  mpz_init_set (t3, n);
272 	  t1 = t3;
273 	}
274       else
275 	{
276 	  ALLOC (t3) = 0;
277 	  mpz_set (t1, n);
278 	}
279       rek_raising_fac (t1, n, m - 1, lk, t2, NULL);
280 
281       mpz_mul (r, r, t1);
282       mpz_clear (t3);
283     }
284 }
285 
286 /* Computes (n+1)(n+2)...(n+k)/2^(k/2) using the helper function
287    rek_raising_fac, and exploiting an idea inspired by a piece of
288    code that Fredrik Johansson wrote.
289 
290    Force an even k = 2i then compute:
291      p  = (n+1)(n+2i)/2
292      i' = i - 1
293      p == (n+1)(n+2i'+2)/2
294      p' = p + i' == (n+2)(n+2i'+1)/2
295      n' = n + 1
296      p'== (n'+1)(n'+2i')/2 == (n+1 +1)(n+2i -1)/2
297 
298    And compute the product p * p' * p" ...
299 */
300 
301 static void
mpz_raising_fac(mpz_ptr r,mpz_ptr n,unsigned long int k,mpz_ptr t,mpz_ptr p)302 mpz_raising_fac (mpz_ptr r, mpz_ptr n, unsigned long int k, mpz_ptr t, mpz_ptr p)
303 {
304   unsigned long int hk;
305   ASSERT ((k >= APARTAJ_KALKULOJ) && (APARTAJ_KALKULOJ > 1));
306   mpz_add_ui (n, n, 1);
307   hk = k >> 1;
308   mpz_hmul_nbnpk (p, n, hk, t);
309 
310   if ((k & 1) != 0)
311     {
312       mpz_add_ui (t, t, hk + 1);
313       mpz_mul (r, t, p);
314     }
315   else
316     {
317       mpz_set (r, p);
318     }
319 
320   if ((APARTAJ_KALKULOJ > 3) || (hk > 1))
321     {
322       posmpz_init (p);
323       rek_raising_fac (r, p, hk - 1, 0, t, n);
324     }
325 }
326 #endif /* KVAROPE */
327 
328 /* This is a poor implementation.  Look at bin_uiui.c for improvement ideas.
329    In fact consider calling mpz_bin_uiui() when the arguments fit, leaving
330    the code here only for big n.
331 
332    The identity bin(n,k) = (-1)^k * bin(-n+k-1,k) can be found in Knuth vol
333    1 section 1.2.6 part G. */
334 
335 void
mpz_bin_ui(mpz_ptr r,mpz_srcptr n,unsigned long int k)336 mpz_bin_ui (mpz_ptr r, mpz_srcptr n, unsigned long int k)
337 {
338   mpz_t      ni;
339   mp_size_t  negate;
340 
341   if (SIZ (n) < 0)
342     {
343       /* bin(n,k) = (-1)^k * bin(-n+k-1,k), and set ni = -n+k-1 - k = -n-1 */
344       mpz_init (ni);
345       mpz_add_ui (ni, n, 1L);
346       mpz_neg (ni, ni);
347       negate = (k & 1);   /* (-1)^k */
348     }
349   else
350     {
351       /* bin(n,k) == 0 if k>n
352 	 (no test for this under the n<0 case, since -n+k-1 >= k there) */
353       if (mpz_cmp_ui (n, k) < 0)
354 	{
355 	  SIZ (r) = 0;
356 	  return;
357 	}
358 
359       /* set ni = n-k */
360       mpz_init (ni);
361       mpz_sub_ui (ni, n, k);
362       negate = 0;
363     }
364 
365   /* Now wanting bin(ni+k,k), with ni positive, and "negate" is the sign (0
366      for positive, 1 for negative). */
367 
368   /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller.  In this case it's
369      whether ni+k-k < k meaning ni<k, and if so change to denominator ni+k-k
370      = ni, and new ni of ni+k-ni = k.  */
371   if (mpz_cmp_ui (ni, k) < 0)
372     {
373       unsigned long  tmp;
374       tmp = k;
375       k = mpz_get_ui (ni);
376       mpz_set_ui (ni, tmp);
377     }
378 
379   if (k < APARTAJ_KALKULOJ)
380     {
381       if (k == 0)
382 	{
383 	  SIZ (r) = 1;
384 	  MPZ_NEWALLOC (r, 1)[0] = 1;
385 	}
386 #if APARTAJ_KALKULOJ > 2
387       else if (k == 2)
388 	{
389 	  mpz_add_ui (ni, ni, 1);
390 	  mpz_mul (r, ni, ni);
391 	  mpz_add (r, r, ni);
392 	  posmpz_rsh1 (r);
393 	}
394 #endif
395 #if APARTAJ_KALKULOJ > 3
396       else if (k > 2)
397 	{ /* k = 3, 4 */
398 	  mpz_add_ui (ni, ni, 2); /* n+1 */
399 	  mpz_mul (r, ni, ni); /* (n+1)^2 */
400 	  mpz_sub_ui (r, r, 1); /* (n+1)^2-1 */
401 	  if (k == 3)
402 	    {
403 	      mpz_mul (r, r, ni); /* ((n+1)^2-1)(n+1) = n(n+1)(n+2) */
404 	      /* mpz_divexact_ui (r, r, 6); /\* 6=3<<1; div_by3 ? *\/ */
405 	      mpn_pi1_bdiv_q_1 (PTR(r), PTR(r), SIZ(r), 3, GMP_NUMB_MASK/3*2+1, 1);
406 	      MPN_NORMALIZE_NOT_ZERO (PTR(r), SIZ(r));
407 	    }
408 	  else /* k = 4 */
409 	    {
410 	      mpz_add (ni, ni, r); /* (n+1)^2+n */
411 	      mpz_mul (r, ni, ni); /* ((n+1)^2+n)^2 */
412 	      mpz_sub_ui (r, r, 1); /* ((n+1)^2+n)^2-1 = n(n+1)(n+2)(n+3) */
413 	      /* mpz_divexact_ui (r, r, 24); /\* 24=3<<3; div_by3 ? *\/ */
414 	      mpn_pi1_bdiv_q_1 (PTR(r), PTR(r), SIZ(r), 3, GMP_NUMB_MASK/3*2+1, 3);
415 	      MPN_NORMALIZE_NOT_ZERO (PTR(r), SIZ(r));
416 	    }
417 	}
418 #endif
419       else
420 	{ /* k = 1 */
421 	  mpz_add_ui (r, ni, 1);
422 	}
423     }
424 #if UZU_BIN_UIUI
425   else if (mpz_cmp_ui (ni, ULONG_MAX - k) <= 0)
426     {
427       mpz_bin_uiui (r, mpz_get_ui (ni) + k, k);
428     }
429 #endif
430   else
431     {
432       mp_limb_t count;
433       mpz_t num, den;
434 
435       mpz_init (num);
436       mpz_init (den);
437 
438 #if KVAROPE
439       mpz_raising_fac4 (num, ni, k, den, r);
440       popc_limb (count, k);
441       ASSERT (k - (k >> 1) - (k >> 2) - count >= 0);
442       mpz_tdiv_q_2exp (num, num, k - (k >> 1) - (k >> 2) - count);
443 #else
444       mpz_raising_fac (num, ni, k, den, r);
445       popc_limb (count, k);
446       ASSERT (k - (k >> 1) - count >= 0);
447       mpz_tdiv_q_2exp (num, num, k - (k >> 1) - count);
448 #endif
449 
450       mpz_oddfac_1(den, k, 0);
451 
452       mpz_divexact(r, num, den);
453       mpz_clear (num);
454       mpz_clear (den);
455     }
456   mpz_clear (ni);
457 
458   SIZ(r) = (SIZ(r) ^ -negate) + negate;
459 }
460