1 /* mpz_bin_uiui - compute n over k. 2 3 Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. 4 5 Copyright 2010-2012, 2015-2018 Free Software Foundation, Inc. 6 7 This file is part of the GNU MP Library. 8 9 The GNU MP Library is free software; you can redistribute it and/or modify 10 it under the terms of either: 11 12 * the GNU Lesser General Public License as published by the Free 13 Software Foundation; either version 3 of the License, or (at your 14 option) any later version. 15 16 or 17 18 * the GNU General Public License as published by the Free Software 19 Foundation; either version 2 of the License, or (at your option) any 20 later version. 21 22 or both in parallel, as here. 23 24 The GNU MP Library is distributed in the hope that it will be useful, but 25 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 for more details. 28 29 You should have received copies of the GNU General Public License and the 30 GNU Lesser General Public License along with the GNU MP Library. If not, 31 see https://www.gnu.org/licenses/. */ 32 33 #include "gmp-impl.h" 34 #include "longlong.h" 35 36 #ifndef BIN_GOETGHELUCK_THRESHOLD 37 #define BIN_GOETGHELUCK_THRESHOLD 512 38 #endif 39 #ifndef BIN_UIUI_ENABLE_SMALLDC 40 #define BIN_UIUI_ENABLE_SMALLDC 1 41 #endif 42 #ifndef BIN_UIUI_RECURSIVE_SMALLDC 43 #define BIN_UIUI_RECURSIVE_SMALLDC (GMP_NUMB_BITS > 32) 44 #endif 45 46 /* Algorithm: 47 48 Accumulate chunks of factors first limb-by-limb (using one of mul0-mul8) 49 which are then accumulated into mpn numbers. The first inner loop 50 accumulates divisor factors, the 2nd inner loop accumulates exactly the same 51 number of dividend factors. We avoid accumulating more for the divisor, 52 even with its smaller factors, since we else cannot guarantee divisibility. 53 54 Since we know each division will yield an integer, we compute the quotient 55 using Hensel norm: If the quotient is limited by 2^t, we compute A / B mod 56 2^t. 57 58 Improvements: 59 60 (1) An obvious improvement to this code would be to compute mod 2^t 61 everywhere. Unfortunately, we cannot determine t beforehand, unless we 62 invoke some approximation, such as Stirling's formula. Of course, we don't 63 need t to be tight. However, it is not clear that this would help much, 64 our numbers are kept reasonably small already. 65 66 (2) Compute nmax/kmax semi-accurately, without scalar division or a loop. 67 Extracting the 3 msb, then doing a table lookup using cnt*8+msb as index, 68 would make it both reasonably accurate and fast. (We could use a table 69 stored into a limb, perhaps.) The table should take the removed factors of 70 2 into account (those done on-the-fly in mulN). 71 72 (3) The first time in the loop we compute the odd part of a 73 factorial in kp, we might use oddfac_1 for this task. 74 */ 75 76 /* This threshold determines how large divisor to accumulate before we call 77 bdiv. Perhaps we should never call bdiv, and accumulate all we are told, 78 since we are just basecase code anyway? Presumably, this depends on the 79 relative speed of the asymptotically fast code and this code. */ 80 #define SOME_THRESHOLD 20 81 82 /* Multiply-into-limb functions. These remove factors of 2 on-the-fly. FIXME: 83 All versions of MAXFACS don't take this 2 removal into account now, meaning 84 that then, shifting just adds some overhead. (We remove factors from the 85 completed limb anyway.) */ 86 87 static mp_limb_t 88 mul1 (mp_limb_t m) 89 { 90 return m; 91 } 92 93 static mp_limb_t 94 mul2 (mp_limb_t m) 95 { 96 /* We need to shift before multiplying, to avoid an overflow. */ 97 mp_limb_t m01 = (m | 1) * ((m + 1) >> 1); 98 return m01; 99 } 100 101 static mp_limb_t 102 mul3 (mp_limb_t m) 103 { 104 mp_limb_t m01 = (m + 0) * (m + 1) >> 1; 105 mp_limb_t m2 = (m + 2); 106 return m01 * m2; 107 } 108 109 static mp_limb_t 110 mul4 (mp_limb_t m) 111 { 112 mp_limb_t m03 = (m + 0) * (m + 3) >> 1; 113 return m03 * (m03 + 1); /* mul2 (m03) ? */ 114 } 115 116 static mp_limb_t 117 mul5 (mp_limb_t m) 118 { 119 mp_limb_t m03 = (m + 0) * (m + 3) >> 1; 120 mp_limb_t m034 = m03 * (m + 4); 121 return (m03 + 1) * m034; 122 } 123 124 static mp_limb_t 125 mul6 (mp_limb_t m) 126 { 127 mp_limb_t m05 = (m + 0) * (m + 5); 128 mp_limb_t m1234 = (m05 + 5) * (m05 + 5) >> 3; 129 return m1234 * (m05 >> 1); 130 } 131 132 static mp_limb_t 133 mul7 (mp_limb_t m) 134 { 135 mp_limb_t m05 = (m + 0) * (m + 5); 136 mp_limb_t m1234 = (m05 + 5) * (m05 + 5) >> 3; 137 mp_limb_t m056 = m05 * (m + 6) >> 1; 138 return m1234 * m056; 139 } 140 141 static mp_limb_t 142 mul8 (mp_limb_t m) 143 { 144 mp_limb_t m07 = (m + 0) * (m + 7); 145 mp_limb_t m0257 = m07 * (m07 + 10) >> 3; 146 mp_limb_t m1346 = m07 + 9 + m0257; 147 return m0257 * m1346; 148 } 149 150 /* 151 static mp_limb_t 152 mul9 (mp_limb_t m) 153 { 154 return (m + 8) * mul8 (m) ; 155 } 156 157 static mp_limb_t 158 mul10 (mp_limb_t m) 159 { 160 mp_limb_t m09 = (m + 0) * (m + 9); 161 mp_limb_t m18 = (m09 >> 1) + 4; 162 mp_limb_t m0369 = m09 * (m09 + 18) >> 3; 163 mp_limb_t m2457 = m09 * 2 + 35 + m0369; 164 return ((m0369 * m2457) >> 1) * m18; 165 } 166 */ 167 168 typedef mp_limb_t (* mulfunc_t) (mp_limb_t); 169 170 static const mulfunc_t mulfunc[] = {mul1,mul2,mul3,mul4,mul5,mul6,mul7,mul8 /* ,mul9,mul10 */}; 171 #define M (numberof(mulfunc)) 172 173 /* Number of factors-of-2 removed by the corresponding mulN function. */ 174 static const unsigned char tcnttab[] = {0, 1, 1, 2, 2, 4, 4, 6 /*,6,8*/}; 175 176 #if 1 177 /* This variant is inaccurate but share the code with other functions. */ 178 #define MAXFACS(max,l) \ 179 do { \ 180 (max) = log_n_max (l); \ 181 } while (0) 182 #else 183 184 /* This variant is exact(?) but uses a loop. It takes the 2 removal 185 of mulN into account. */ 186 static const unsigned long ftab[] = 187 #if GMP_NUMB_BITS == 64 188 /* 1 to 8 factors per iteration */ 189 {CNST_LIMB(0xffffffffffffffff),CNST_LIMB(0x16a09e667),0x32cbfc,0x16a08,0x24c0,0xa11,0x345,0x1ab /*,0xe9,0x8e */}; 190 #endif 191 #if GMP_NUMB_BITS == 32 192 /* 1 to 7 factors per iteration */ 193 {0xffffffff,0x16a09,0x7ff,0x168,0x6f,0x3d,0x20 /* ,0x17 */}; 194 #endif 195 196 #define MAXFACS(max,l) \ 197 do { \ 198 int __i; \ 199 for (__i = numberof (ftab) - 1; l > ftab[__i]; __i--) \ 200 ; \ 201 (max) = __i + 1; \ 202 } while (0) 203 #endif 204 205 /* Entry i contains (i!/2^t)^(-1) where t is chosen such that the parenthesis 206 is an odd integer. */ 207 static const mp_limb_t facinv[] = { ONE_LIMB_ODD_FACTORIAL_INVERSES_TABLE }; 208 209 static void 210 mpz_bdiv_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) 211 { 212 unsigned nmax, kmax, nmaxnow, numfac; 213 mp_ptr np, kp; 214 mp_size_t nn, kn, alloc; 215 mp_limb_t i, j, t, iii, jjj, cy, dinv; 216 int cnt; 217 mp_size_t maxn; 218 TMP_DECL; 219 220 ASSERT (k > ODD_FACTORIAL_TABLE_LIMIT); 221 TMP_MARK; 222 223 maxn = 1 + n / GMP_NUMB_BITS; /* absolutely largest result size (limbs) */ 224 225 /* FIXME: This allocation might be insufficient, but is usually way too 226 large. */ 227 alloc = SOME_THRESHOLD - 1 + MAX (3 * maxn / 2, SOME_THRESHOLD); 228 alloc = MIN (alloc, (mp_size_t) k) + 1; 229 TMP_ALLOC_LIMBS_2 (np, alloc, kp, SOME_THRESHOLD + 1); 230 231 MAXFACS (nmax, n); 232 ASSERT (nmax <= M); 233 MAXFACS (kmax, k); 234 ASSERT (kmax <= M); 235 ASSERT (k >= M); 236 237 i = n - k + 1; 238 239 np[0] = 1; nn = 1; 240 241 numfac = 1; 242 j = ODD_FACTORIAL_TABLE_LIMIT + 1; 243 jjj = ODD_FACTORIAL_TABLE_MAX; 244 ASSERT (__gmp_oddfac_table[ODD_FACTORIAL_TABLE_LIMIT] == ODD_FACTORIAL_TABLE_MAX); 245 246 while (1) 247 { 248 kp[0] = jjj; /* store new factors */ 249 kn = 1; 250 t = k - j + 1; 251 kmax = MIN (kmax, t); 252 253 while (kmax != 0 && kn < SOME_THRESHOLD) 254 { 255 jjj = mulfunc[kmax - 1] (j); 256 j += kmax; /* number of factors used */ 257 count_trailing_zeros (cnt, jjj); /* count low zeros */ 258 jjj >>= cnt; /* remove remaining low zeros */ 259 cy = mpn_mul_1 (kp, kp, kn, jjj); /* accumulate new factors */ 260 kp[kn] = cy; 261 kn += cy != 0; 262 t = k - j + 1; 263 kmax = MIN (kmax, t); 264 } 265 numfac = j - numfac; 266 267 while (numfac != 0) 268 { 269 nmaxnow = MIN (nmax, numfac); 270 iii = mulfunc[nmaxnow - 1] (i); 271 i += nmaxnow; /* number of factors used */ 272 count_trailing_zeros (cnt, iii); /* count low zeros */ 273 iii >>= cnt; /* remove remaining low zeros */ 274 cy = mpn_mul_1 (np, np, nn, iii); /* accumulate new factors */ 275 np[nn] = cy; 276 nn += cy != 0; 277 numfac -= nmaxnow; 278 } 279 280 ASSERT (nn < alloc); 281 282 binvert_limb (dinv, kp[0]); 283 nn += (np[nn - 1] >= kp[kn - 1]); 284 nn -= kn; 285 mpn_sbpi1_bdiv_q (np, np, nn, kp, MIN(kn,nn), -dinv); 286 mpn_neg (np, np, nn); 287 288 if (kmax == 0) 289 break; 290 numfac = j; 291 292 jjj = mulfunc[kmax - 1] (j); 293 j += kmax; /* number of factors used */ 294 count_trailing_zeros (cnt, jjj); /* count low zeros */ 295 jjj >>= cnt; /* remove remaining low zeros */ 296 } 297 298 /* Put back the right number of factors of 2. */ 299 popc_limb (cnt, n - k); 300 popc_limb (j, k); 301 cnt += j; 302 popc_limb (j, n); 303 cnt -= j; 304 if (cnt != 0) 305 { 306 ASSERT (cnt < GMP_NUMB_BITS); /* can happen, but not for intended use */ 307 cy = mpn_lshift (np, np, nn, cnt); 308 np[nn] = cy; 309 nn += cy != 0; 310 } 311 312 nn -= np[nn - 1] == 0; /* normalisation */ 313 314 kp = MPZ_NEWALLOC (r, nn); 315 SIZ(r) = nn; 316 MPN_COPY (kp, np, nn); 317 TMP_FREE; 318 } 319 320 static void 321 mpz_smallk_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) 322 { 323 unsigned nmax, numfac; 324 mp_ptr rp; 325 mp_size_t rn, alloc; 326 mp_limb_t i, iii, cy; 327 unsigned i2cnt, cnt; 328 329 MAXFACS (nmax, n); 330 nmax = MIN (nmax, M); 331 332 i = n - k + 1; 333 334 i2cnt = __gmp_fac2cnt_table[k / 2 - 1]; /* low zeros count */ 335 if (nmax >= k) 336 { 337 MPZ_NEWALLOC (r, 1) [0] = mulfunc[k - 1] (i) * facinv[k - 2] >> 338 (i2cnt - tcnttab[k - 1]); 339 SIZ(r) = 1; 340 return; 341 } 342 343 count_leading_zeros (cnt, (mp_limb_t) n); 344 cnt = GMP_LIMB_BITS - cnt; 345 alloc = cnt * k / GMP_NUMB_BITS + 3; /* FIXME: ensure rounding is enough. */ 346 rp = MPZ_NEWALLOC (r, alloc); 347 348 rp[0] = mulfunc[nmax - 1] (i); 349 rn = 1; 350 i += nmax; /* number of factors used */ 351 i2cnt -= tcnttab[nmax - 1]; /* low zeros count */ 352 numfac = k - nmax; 353 do 354 { 355 nmax = MIN (nmax, numfac); 356 iii = mulfunc[nmax - 1] (i); 357 i += nmax; /* number of factors used */ 358 i2cnt -= tcnttab[nmax - 1]; /* update low zeros count */ 359 cy = mpn_mul_1 (rp, rp, rn, iii); /* accumulate new factors */ 360 rp[rn] = cy; 361 rn += cy != 0; 362 numfac -= nmax; 363 } while (numfac != 0); 364 365 ASSERT (rn < alloc); 366 367 mpn_pi1_bdiv_q_1 (rp, rp, rn, __gmp_oddfac_table[k], facinv[k - 2], i2cnt); 368 /* A two-fold, branch-free normalisation is possible :*/ 369 /* rn -= rp[rn - 1] == 0; */ 370 /* rn -= rp[rn - 1] == 0; */ 371 MPN_NORMALIZE_NOT_ZERO (rp, rn); 372 373 SIZ(r) = rn; 374 } 375 376 /* Algorithm: 377 378 Plain and simply multiply things together. 379 380 We tabulate factorials (k!/2^t)^(-1) mod B (where t is chosen such 381 that k!/2^t is odd). 382 383 */ 384 385 static mp_limb_t 386 bc_bin_uiui (unsigned int n, unsigned int k) 387 { 388 return ((__gmp_oddfac_table[n] * facinv[k - 2] * facinv[n - k - 2]) 389 << (__gmp_fac2cnt_table[n / 2 - 1] - __gmp_fac2cnt_table[k / 2 - 1] - __gmp_fac2cnt_table[(n-k) / 2 - 1])) 390 & GMP_NUMB_MASK; 391 } 392 393 /* Algorithm: 394 395 Recursively exploit the relation 396 bin(n,k) = bin(n,k>>1)*bin(n-k>>1,k-k>>1)/bin(k,k>>1) . 397 398 Values for binomial(k,k>>1) that fit in a limb are precomputed 399 (with inverses). 400 */ 401 402 /* bin2kk[i - ODD_CENTRAL_BINOMIAL_OFFSET] = 403 binomial(i*2,i)/2^t (where t is chosen so that it is odd). */ 404 static const mp_limb_t bin2kk[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_TABLE }; 405 406 /* bin2kkinv[i] = bin2kk[i]^-1 mod B */ 407 static const mp_limb_t bin2kkinv[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_INVERSE_TABLE }; 408 409 /* bin2kk[i] = binomial((i+MIN_S)*2,i+MIN_S)/2^t. This table contains the t values. */ 410 static const unsigned char fac2bin[] = { CENTRAL_BINOMIAL_2FAC_TABLE }; 411 412 static void 413 mpz_smallkdc_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) 414 { 415 mp_ptr rp; 416 mp_size_t rn; 417 unsigned long int hk; 418 419 hk = k >> 1; 420 421 if ((! BIN_UIUI_RECURSIVE_SMALLDC) || hk <= ODD_FACTORIAL_TABLE_LIMIT) 422 mpz_smallk_bin_uiui (r, n, hk); 423 else 424 mpz_smallkdc_bin_uiui (r, n, hk); 425 k -= hk; 426 n -= hk; 427 if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) { 428 mp_limb_t cy; 429 rn = SIZ (r); 430 rp = MPZ_REALLOC (r, rn + 1); 431 cy = mpn_mul_1 (rp, rp, rn, bc_bin_uiui (n, k)); 432 rp [rn] = cy; 433 rn += cy != 0; 434 } else { 435 mp_limb_t buffer[ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3]; 436 mpz_t t; 437 438 ALLOC (t) = ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3; 439 PTR (t) = buffer; 440 if ((! BIN_UIUI_RECURSIVE_SMALLDC) || k <= ODD_FACTORIAL_TABLE_LIMIT) 441 mpz_smallk_bin_uiui (t, n, k); 442 else 443 mpz_smallkdc_bin_uiui (t, n, k); 444 mpz_mul (r, r, t); 445 rp = PTR (r); 446 rn = SIZ (r); 447 } 448 449 mpn_pi1_bdiv_q_1 (rp, rp, rn, bin2kk[k - ODD_CENTRAL_BINOMIAL_OFFSET], 450 bin2kkinv[k - ODD_CENTRAL_BINOMIAL_OFFSET], 451 fac2bin[k - ODD_CENTRAL_BINOMIAL_OFFSET] - (k != hk)); 452 /* A two-fold, branch-free normalisation is possible :*/ 453 /* rn -= rp[rn - 1] == 0; */ 454 /* rn -= rp[rn - 1] == 0; */ 455 MPN_NORMALIZE_NOT_ZERO (rp, rn); 456 457 SIZ(r) = rn; 458 } 459 460 /* mpz_goetgheluck_bin_uiui(RESULT, N, K) -- Set RESULT to binomial(N,K). 461 * 462 * Contributed to the GNU project by Marco Bodrato. 463 * 464 * Implementation of the algorithm by P. Goetgheluck, "Computing 465 * Binomial Coefficients", The American Mathematical Monthly, Vol. 94, 466 * No. 4 (April 1987), pp. 360-365. 467 * 468 * Acknowledgment: Peter Luschny did spot the slowness of the previous 469 * code and suggested the reference. 470 */ 471 472 /* TODO: Remove duplicated constants / macros / static functions... 473 */ 474 475 /*************************************************************/ 476 /* Section macros: common macros, for swing/fac/bin (&sieve) */ 477 /*************************************************************/ 478 479 #define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \ 480 if ((PR) > (MAX_PR)) { \ 481 (VEC)[(I)++] = (PR); \ 482 (PR) = 1; \ 483 } 484 485 #define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \ 486 do { \ 487 if ((PR) > (MAX_PR)) { \ 488 (VEC)[(I)++] = (PR); \ 489 (PR) = (P); \ 490 } else \ 491 (PR) *= (P); \ 492 } while (0) 493 494 #define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \ 495 __max_i = (end); \ 496 \ 497 do { \ 498 ++__i; \ 499 if (((sieve)[__index] & __mask) == 0) \ 500 { \ 501 mp_limb_t prime; \ 502 prime = id_to_n(__i) 503 504 #define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \ 505 do { \ 506 mp_limb_t __mask, __index, __max_i, __i; \ 507 \ 508 __i = (start)-(off); \ 509 __index = __i / GMP_LIMB_BITS; \ 510 __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \ 511 __i += (off); \ 512 \ 513 LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) 514 515 #define LOOP_ON_SIEVE_STOP \ 516 } \ 517 __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \ 518 __index += __mask & 1; \ 519 } while (__i <= __max_i) 520 521 #define LOOP_ON_SIEVE_END \ 522 LOOP_ON_SIEVE_STOP; \ 523 } while (0) 524 525 /*********************************************************/ 526 /* Section sieve: sieving functions and tools for primes */ 527 /*********************************************************/ 528 529 #if WANT_ASSERT 530 static mp_limb_t 531 bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; } 532 #endif 533 534 /* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/ 535 static mp_limb_t 536 id_to_n (mp_limb_t id) { return id*3+1+(id&1); } 537 538 /* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */ 539 static mp_limb_t 540 n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; } 541 542 static mp_size_t 543 primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; } 544 545 /*********************************************************/ 546 /* Section binomial: fast binomial implementation */ 547 /*********************************************************/ 548 549 #define COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I) \ 550 do { \ 551 mp_limb_t __a, __b, __prime, __ma,__mb; \ 552 __prime = (P); \ 553 __a = (N); __b = (K); __mb = 0; \ 554 FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \ 555 do { \ 556 __mb += __b % __prime; __b /= __prime; \ 557 __ma = __a % __prime; __a /= __prime; \ 558 if (__ma < __mb) { \ 559 __mb = 1; (PR) *= __prime; \ 560 } else __mb = 0; \ 561 } while (__a >= __prime); \ 562 } while (0) 563 564 #define SH_COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I) \ 565 do { \ 566 mp_limb_t __prime; \ 567 __prime = (P); \ 568 if (((N) % __prime) < ((K) % __prime)) { \ 569 FACTOR_LIST_STORE (__prime, PR, MAX_PR, VEC, I); \ 570 } \ 571 } while (0) 572 573 /* Returns an approximation of the sqare root of x. 574 * It gives: 575 * limb_apprsqrt (x) ^ 2 <= x < (limb_apprsqrt (x)+1) ^ 2 576 * or 577 * x <= limb_apprsqrt (x) ^ 2 <= x * 9/8 578 */ 579 static mp_limb_t 580 limb_apprsqrt (mp_limb_t x) 581 { 582 int s; 583 584 ASSERT (x > 2); 585 count_leading_zeros (s, x); 586 s = (GMP_LIMB_BITS - s) >> 1; 587 return ((CNST_LIMB(1) << s) + (x >> s)) >> 1; 588 } 589 590 static void 591 mpz_goetgheluck_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) 592 { 593 mp_limb_t *sieve, *factors, count; 594 mp_limb_t prod, max_prod; 595 mp_size_t j; 596 TMP_DECL; 597 598 ASSERT (BIN_GOETGHELUCK_THRESHOLD >= 13); 599 ASSERT (n >= 25); 600 601 TMP_MARK; 602 sieve = TMP_ALLOC_LIMBS (primesieve_size (n)); 603 604 count = gmp_primesieve (sieve, n) + 1; 605 factors = TMP_ALLOC_LIMBS (count / log_n_max (n) + 1); 606 607 max_prod = GMP_NUMB_MAX / n; 608 609 /* Handle primes = 2, 3 separately. */ 610 popc_limb (count, n - k); 611 popc_limb (j, k); 612 count += j; 613 popc_limb (j, n); 614 count -= j; 615 prod = CNST_LIMB(1) << count; 616 617 j = 0; 618 COUNT_A_PRIME (3, n, k, prod, max_prod, factors, j); 619 620 /* Accumulate prime factors from 5 to n/2 */ 621 { 622 mp_limb_t s; 623 624 s = limb_apprsqrt(n); 625 s = n_to_bit (s); 626 ASSERT (bit_to_n (s+1) * bit_to_n (s+1) > n); 627 ASSERT (s <= n_to_bit (n >> 1)); 628 LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve); 629 COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j); 630 LOOP_ON_SIEVE_STOP; 631 632 ASSERT (max_prod <= GMP_NUMB_MAX / 2); 633 max_prod <<= 1; 634 635 LOOP_ON_SIEVE_CONTINUE (prime, n_to_bit (n >> 1),sieve); 636 SH_COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j); 637 LOOP_ON_SIEVE_END; 638 639 max_prod >>= 1; 640 } 641 642 /* Store primes from (n-k)+1 to n */ 643 ASSERT (n_to_bit (n - k) < n_to_bit (n)); 644 645 LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n - k) + 1, n_to_bit (n), 0,sieve); 646 FACTOR_LIST_STORE (prime, prod, max_prod, factors, j); 647 LOOP_ON_SIEVE_END; 648 649 if (LIKELY (j != 0)) 650 { 651 factors[j++] = prod; 652 mpz_prodlimbs (r, factors, j); 653 } 654 else 655 { 656 MPZ_NEWALLOC (r, 1)[0] = prod; 657 SIZ (r) = 1; 658 } 659 TMP_FREE; 660 } 661 662 #undef COUNT_A_PRIME 663 #undef SH_COUNT_A_PRIME 664 #undef LOOP_ON_SIEVE_END 665 #undef LOOP_ON_SIEVE_STOP 666 #undef LOOP_ON_SIEVE_BEGIN 667 #undef LOOP_ON_SIEVE_CONTINUE 668 669 /*********************************************************/ 670 /* End of implementation of Goetgheluck's algorithm */ 671 /*********************************************************/ 672 673 void 674 mpz_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) 675 { 676 if (UNLIKELY (n < k)) { 677 SIZ (r) = 0; 678 #if BITS_PER_ULONG > GMP_NUMB_BITS 679 } else if (UNLIKELY (n > GMP_NUMB_MAX)) { 680 mpz_t tmp; 681 682 mpz_init_set_ui (tmp, n); 683 mpz_bin_ui (r, tmp, k); 684 mpz_clear (tmp); 685 #endif 686 } else { 687 ASSERT (n <= GMP_NUMB_MAX); 688 /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. */ 689 k = MIN (k, n - k); 690 if (k < 2) { 691 MPZ_NEWALLOC (r, 1)[0] = k ? n : 1; /* 1 + ((-k) & (n-1)); */ 692 SIZ(r) = 1; 693 } else if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) { /* k >= 2, n >= 4 */ 694 MPZ_NEWALLOC (r, 1)[0] = bc_bin_uiui (n, k); 695 SIZ(r) = 1; 696 } else if (k <= ODD_FACTORIAL_TABLE_LIMIT) 697 mpz_smallk_bin_uiui (r, n, k); 698 else if (BIN_UIUI_ENABLE_SMALLDC && 699 k <= (BIN_UIUI_RECURSIVE_SMALLDC ? ODD_CENTRAL_BINOMIAL_TABLE_LIMIT : ODD_FACTORIAL_TABLE_LIMIT)* 2) 700 mpz_smallkdc_bin_uiui (r, n, k); 701 else if (ABOVE_THRESHOLD (k, BIN_GOETGHELUCK_THRESHOLD) && 702 k > (n >> 4)) /* k > ODD_FACTORIAL_TABLE_LIMIT */ 703 mpz_goetgheluck_bin_uiui (r, n, k); 704 else 705 mpz_bdiv_bin_uiui (r, n, k); 706 } 707 } 708