1 /* mpz_bin_uiui - compute n over k. 2 3 Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. 4 5 Copyright 2010, 2011, 2012 Free Software Foundation, Inc. 6 7 This file is part of the GNU MP Library. 8 9 The GNU MP Library is free software; you can redistribute it and/or modify 10 it under the terms of the GNU Lesser General Public License as published by 11 the Free Software Foundation; either version 3 of the License, or (at your 12 option) any later version. 13 14 The GNU MP Library is distributed in the hope that it will be useful, but 15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 17 License for more details. 18 19 You should have received a copy of the GNU Lesser General Public License 20 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 21 22 #include "gmp.h" 23 #include "gmp-impl.h" 24 #include "longlong.h" 25 26 #ifndef BIN_GOETGHELUCK_THRESHOLD 27 #define BIN_GOETGHELUCK_THRESHOLD 1000 28 #endif 29 #ifndef BIN_UIUI_ENABLE_SMALLDC 30 #define BIN_UIUI_ENABLE_SMALLDC 1 31 #endif 32 #ifndef BIN_UIUI_RECURSIVE_SMALLDC 33 #define BIN_UIUI_RECURSIVE_SMALLDC (GMP_NUMB_BITS > 32) 34 #endif 35 36 /* Algorithm: 37 38 Accumulate chunks of factors first limb-by-limb (using one of mul0-mul8) 39 which are then accumulated into mpn numbers. The first inner loop 40 accumulates divisor factors, the 2nd inner loop accumulates exactly the same 41 number of dividend factors. We avoid accumulating more for the divisor, 42 even with its smaller factors, since we else cannot guarantee divisibility. 43 44 Since we know each division will yield an integer, we compute the quotient 45 using Hensel norm: If the quotient is limited by 2^t, we compute A / B mod 46 2^t. 47 48 Improvements: 49 50 (1) An obvious improvement to this code would be to compute mod 2^t 51 everywhere. Unfortunately, we cannot determine t beforehand, unless we 52 invoke some approximation, such as Stirling's formula. Of course, we don't 53 need t to be tight. However, it is not clear that this would help much, 54 our numbers are kept reasonably small already. 55 56 (2) Compute nmax/kmax semi-accurately, without scalar division or a loop. 57 Extracting the 3 msb, then doing a table lookup using cnt*8+msb as index, 58 would make it both reasonably accurate and fast. (We could use a table 59 stored into a limb, perhaps.) The table should take the removed factors of 60 2 into account (those done on-the-fly in mulN). 61 62 (3) The first time in the loop we compute the odd part of a 63 factorial in kp, we might use oddfac_1 for this task. 64 */ 65 66 /* This threshold determines how large divisor to accumulate before we call 67 bdiv. Perhaps we should never call bdiv, and accumulate all we are told, 68 since we are just basecase code anyway? Presumably, this depends on the 69 relative speed of the asymptotically fast code and this code. */ 70 #define SOME_THRESHOLD 20 71 72 /* Multiply-into-limb functions. These remove factors of 2 on-the-fly. FIXME: 73 All versions of MAXFACS don't take this 2 removal into account now, meaning 74 that then, shifting just adds some overhead. (We remove factors from the 75 completed limb anyway.) */ 76 77 static mp_limb_t 78 mul1 (mp_limb_t m) 79 { 80 return m; 81 } 82 83 static mp_limb_t 84 mul2 (mp_limb_t m) 85 { 86 /* We need to shift before multiplying, to avoid an overflow. */ 87 mp_limb_t m01 = (m | 1) * ((m + 1) >> 1); 88 return m01; 89 } 90 91 static mp_limb_t 92 mul3 (mp_limb_t m) 93 { 94 mp_limb_t m01 = (m + 0) * (m + 1) >> 1; 95 mp_limb_t m2 = (m + 2); 96 return m01 * m2; 97 } 98 99 static mp_limb_t 100 mul4 (mp_limb_t m) 101 { 102 mp_limb_t m01 = (m + 0) * (m + 1) >> 1; 103 mp_limb_t m23 = (m + 2) * (m + 3) >> 1; 104 return m01 * m23; 105 } 106 107 static mp_limb_t 108 mul5 (mp_limb_t m) 109 { 110 mp_limb_t m012 = (m + 0) * (m + 1) * (m + 2) >> 1; 111 mp_limb_t m34 = (m + 3) * (m + 4) >> 1; 112 return m012 * m34; 113 } 114 115 static mp_limb_t 116 mul6 (mp_limb_t m) 117 { 118 mp_limb_t m01 = (m + 0) * (m + 1); 119 mp_limb_t m23 = (m + 2) * (m + 3); 120 mp_limb_t m45 = (m + 4) * (m + 5) >> 1; 121 mp_limb_t m0123 = m01 * m23 >> 3; 122 return m0123 * m45; 123 } 124 125 static mp_limb_t 126 mul7 (mp_limb_t m) 127 { 128 mp_limb_t m01 = (m + 0) * (m + 1); 129 mp_limb_t m23 = (m + 2) * (m + 3); 130 mp_limb_t m456 = (m + 4) * (m + 5) * (m + 6) >> 1; 131 mp_limb_t m0123 = m01 * m23 >> 3; 132 return m0123 * m456; 133 } 134 135 static mp_limb_t 136 mul8 (mp_limb_t m) 137 { 138 mp_limb_t m01 = (m + 0) * (m + 1); 139 mp_limb_t m23 = (m + 2) * (m + 3); 140 mp_limb_t m45 = (m + 4) * (m + 5); 141 mp_limb_t m67 = (m + 6) * (m + 7); 142 mp_limb_t m0123 = m01 * m23 >> 3; 143 mp_limb_t m4567 = m45 * m67 >> 3; 144 return m0123 * m4567; 145 } 146 147 typedef mp_limb_t (* mulfunc_t) (mp_limb_t); 148 149 static const mulfunc_t mulfunc[] = {mul1,mul2,mul3,mul4,mul5,mul6,mul7,mul8}; 150 #define M (numberof(mulfunc)) 151 152 /* Number of factors-of-2 removed by the corresponding mulN functon. */ 153 static const unsigned char tcnttab[] = {0, 1, 1, 2, 2, 4, 4, 6}; 154 155 #if 1 156 /* This variant is inaccurate but share the code with other functions. */ 157 #define MAXFACS(max,l) \ 158 do { \ 159 (max) = log_n_max (l); \ 160 } while (0) 161 #else 162 163 /* This variant is exact(?) but uses a loop. It takes the 2 removal 164 of mulN into account. */ 165 static const unsigned long ftab[] = 166 #if GMP_NUMB_BITS == 64 167 /* 1 to 8 factors per iteration */ 168 {CNST_LIMB(0xffffffffffffffff),CNST_LIMB(0x100000000),0x32cbfe,0x16a0b,0x24c4,0xa16,0x34b,0x1b2 /*,0xdf,0x8d */}; 169 #endif 170 #if GMP_NUMB_BITS == 32 171 /* 1 to 7 factors per iteration */ 172 {0xffffffff,0x10000,0x801,0x16b,0x71,0x42,0x26 /* ,0x1e */}; 173 #endif 174 175 #define MAXFACS(max,l) \ 176 do { \ 177 int __i; \ 178 for (__i = numberof (ftab) - 1; l > ftab[__i]; __i--) \ 179 ; \ 180 (max) = __i + 1; \ 181 } while (0) 182 #endif 183 184 /* Entry i contains (i!/2^t)^(-1) where t is chosen such that the parenthesis 185 is an odd integer. */ 186 static const mp_limb_t facinv[] = { ONE_LIMB_ODD_FACTORIAL_INVERSES_TABLE }; 187 188 static void 189 mpz_bdiv_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) 190 { 191 int nmax, kmax, nmaxnow, numfac; 192 mp_ptr np, kp; 193 mp_size_t nn, kn, alloc; 194 mp_limb_t i, j, t, iii, jjj, cy, dinv; 195 mp_bitcnt_t i2cnt, j2cnt; 196 int cnt; 197 mp_size_t maxn; 198 TMP_DECL; 199 200 ASSERT (k > ODD_FACTORIAL_TABLE_LIMIT); 201 TMP_MARK; 202 203 maxn = 1 + n / GMP_NUMB_BITS; /* absolutely largest result size (limbs) */ 204 205 /* FIXME: This allocation might be insufficient, but is usually way too 206 large. */ 207 alloc = SOME_THRESHOLD - 1 + MAX (3 * maxn / 2, SOME_THRESHOLD); 208 alloc = MIN (alloc, k) + 1; 209 np = TMP_ALLOC_LIMBS (alloc); 210 kp = TMP_ALLOC_LIMBS (SOME_THRESHOLD + 1); 211 212 MAXFACS (nmax, n); 213 ASSERT (nmax <= M); 214 MAXFACS (kmax, k); 215 ASSERT (kmax <= M); 216 ASSERT (k >= M); 217 218 i = n - k + 1; 219 220 np[0] = 1; nn = 1; 221 222 i2cnt = 0; /* total low zeros in dividend */ 223 j2cnt = __gmp_fac2cnt_table[ODD_FACTORIAL_TABLE_LIMIT / 2 - 1]; 224 /* total low zeros in divisor */ 225 226 numfac = 1; 227 j = ODD_FACTORIAL_TABLE_LIMIT + 1; 228 jjj = ODD_FACTORIAL_TABLE_MAX; 229 ASSERT (__gmp_oddfac_table[ODD_FACTORIAL_TABLE_LIMIT] == ODD_FACTORIAL_TABLE_MAX); 230 231 while (1) 232 { 233 kp[0] = jjj; /* store new factors */ 234 kn = 1; 235 t = k - j + 1; 236 kmax = MIN (kmax, t); 237 238 while (kmax != 0 && kn < SOME_THRESHOLD) 239 { 240 jjj = mulfunc[kmax - 1] (j); 241 j += kmax; /* number of factors used */ 242 count_trailing_zeros (cnt, jjj); /* count low zeros */ 243 jjj >>= cnt; /* remove remaining low zeros */ 244 j2cnt += tcnttab[kmax - 1] + cnt; /* update low zeros count */ 245 cy = mpn_mul_1 (kp, kp, kn, jjj); /* accumulate new factors */ 246 kp[kn] = cy; 247 kn += cy != 0; 248 t = k - j + 1; 249 kmax = MIN (kmax, t); 250 } 251 numfac = j - numfac; 252 253 while (numfac != 0) 254 { 255 nmaxnow = MIN (nmax, numfac); 256 iii = mulfunc[nmaxnow - 1] (i); 257 i += nmaxnow; /* number of factors used */ 258 count_trailing_zeros (cnt, iii); /* count low zeros */ 259 iii >>= cnt; /* remove remaining low zeros */ 260 i2cnt += tcnttab[nmaxnow - 1] + cnt; /* update low zeros count */ 261 cy = mpn_mul_1 (np, np, nn, iii); /* accumulate new factors */ 262 np[nn] = cy; 263 nn += cy != 0; 264 numfac -= nmaxnow; 265 } 266 267 ASSERT (nn < alloc); 268 269 binvert_limb (dinv, kp[0]); 270 nn += (np[nn - 1] >= kp[kn - 1]); 271 nn -= kn; 272 mpn_sbpi1_bdiv_q (np, np, nn, kp, MIN(kn,nn), -dinv); 273 274 if (kmax == 0) 275 break; 276 numfac = j; 277 278 jjj = mulfunc[kmax - 1] (j); 279 j += kmax; /* number of factors used */ 280 count_trailing_zeros (cnt, jjj); /* count low zeros */ 281 jjj >>= cnt; /* remove remaining low zeros */ 282 j2cnt += tcnttab[kmax - 1] + cnt; /* update low zeros count */ 283 } 284 285 /* Put back the right number of factors of 2. */ 286 cnt = i2cnt - j2cnt; 287 if (cnt != 0) 288 { 289 ASSERT (cnt < GMP_NUMB_BITS); /* can happen, but not for intended use */ 290 cy = mpn_lshift (np, np, nn, cnt); 291 np[nn] = cy; 292 nn += cy != 0; 293 } 294 295 nn -= np[nn - 1] == 0; /* normalisation */ 296 297 kp = MPZ_NEWALLOC (r, nn); 298 SIZ(r) = nn; 299 MPN_COPY (kp, np, nn); 300 TMP_FREE; 301 } 302 303 static void 304 mpz_smallk_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) 305 { 306 int nmax, numfac; 307 mp_ptr rp; 308 mp_size_t rn, alloc; 309 mp_limb_t i, iii, cy; 310 mp_bitcnt_t i2cnt, cnt; 311 312 count_leading_zeros (cnt, (mp_limb_t) n); 313 cnt = GMP_LIMB_BITS - cnt; 314 alloc = cnt * k / GMP_NUMB_BITS + 3; /* FIXME: ensure rounding is enough. */ 315 rp = MPZ_NEWALLOC (r, alloc); 316 317 MAXFACS (nmax, n); 318 nmax = MIN (nmax, M); 319 320 i = n - k + 1; 321 322 nmax = MIN (nmax, k); 323 rp[0] = mulfunc[nmax - 1] (i); 324 rn = 1; 325 i += nmax; /* number of factors used */ 326 i2cnt = tcnttab[nmax - 1]; /* low zeros count */ 327 numfac = k - nmax; 328 while (numfac != 0) 329 { 330 nmax = MIN (nmax, numfac); 331 iii = mulfunc[nmax - 1] (i); 332 i += nmax; /* number of factors used */ 333 i2cnt += tcnttab[nmax - 1]; /* update low zeros count */ 334 cy = mpn_mul_1 (rp, rp, rn, iii); /* accumulate new factors */ 335 rp[rn] = cy; 336 rn += cy != 0; 337 numfac -= nmax; 338 } 339 340 ASSERT (rn < alloc); 341 342 mpn_pi1_bdiv_q_1 (rp, rp, rn, __gmp_oddfac_table[k], facinv[k - 2], 343 __gmp_fac2cnt_table[k / 2 - 1] - i2cnt); 344 /* A two-fold, branch-free normalisation is possible :*/ 345 /* rn -= rp[rn - 1] == 0; */ 346 /* rn -= rp[rn - 1] == 0; */ 347 MPN_NORMALIZE_NOT_ZERO (rp, rn); 348 349 SIZ(r) = rn; 350 } 351 352 /* Algorithm: 353 354 Plain and simply multiply things together. 355 356 We tabulate factorials (k!/2^t)^(-1) mod B (where t is chosen such 357 that k!/2^t is odd). 358 359 */ 360 361 static mp_limb_t 362 bc_bin_uiui (unsigned int n, unsigned int k) 363 { 364 return ((__gmp_oddfac_table[n] * facinv[k - 2] * facinv[n - k - 2]) 365 << (__gmp_fac2cnt_table[n / 2 - 1] - __gmp_fac2cnt_table[k / 2 - 1] - __gmp_fac2cnt_table[(n-k) / 2 - 1])) 366 & GMP_NUMB_MASK; 367 } 368 369 /* Algorithm: 370 371 Recursively exploit the relation 372 bin(n,k) = bin(n,k>>1)*bin(n-k>>1,k-k>>1)/bin(k,k>>1) . 373 374 Values for binomial(k,k>>1) that fit in a limb are precomputed 375 (with inverses). 376 */ 377 378 /* bin2kk[i - ODD_CENTRAL_BINOMIAL_OFFSET] = 379 binomial(i*2,i)/2^t (where t is chosen so that it is odd). */ 380 static const mp_limb_t bin2kk[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_TABLE }; 381 382 /* bin2kkinv[i] = bin2kk[i]^-1 mod B */ 383 static const mp_limb_t bin2kkinv[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_INVERSE_TABLE }; 384 385 /* bin2kk[i] = binomial((i+MIN_S)*2,i+MIN_S)/2^t. This table contains the t values. */ 386 static const unsigned char fac2bin[] = { CENTRAL_BINOMIAL_2FAC_TABLE }; 387 388 static void 389 mpz_smallkdc_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) 390 { 391 mp_ptr rp; 392 mp_size_t rn; 393 unsigned long int hk; 394 395 hk = k >> 1; 396 397 if ((! BIN_UIUI_RECURSIVE_SMALLDC) || hk <= ODD_FACTORIAL_TABLE_LIMIT) 398 mpz_smallk_bin_uiui (r, n, hk); 399 else 400 mpz_smallkdc_bin_uiui (r, n, hk); 401 k -= hk; 402 n -= hk; 403 if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) { 404 mp_limb_t cy; 405 rn = SIZ (r); 406 rp = MPZ_REALLOC (r, rn + 1); 407 cy = mpn_mul_1 (rp, rp, rn, bc_bin_uiui (n, k)); 408 rp [rn] = cy; 409 rn += cy != 0; 410 } else { 411 mp_limb_t buffer[ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3]; 412 mpz_t t; 413 414 ALLOC (t) = ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3; 415 PTR (t) = buffer; 416 if ((! BIN_UIUI_RECURSIVE_SMALLDC) || k <= ODD_FACTORIAL_TABLE_LIMIT) 417 mpz_smallk_bin_uiui (t, n, k); 418 else 419 mpz_smallkdc_bin_uiui (t, n, k); 420 mpz_mul (r, r, t); 421 rp = PTR (r); 422 rn = SIZ (r); 423 } 424 425 mpn_pi1_bdiv_q_1 (rp, rp, rn, bin2kk[k - ODD_CENTRAL_BINOMIAL_OFFSET], 426 bin2kkinv[k - ODD_CENTRAL_BINOMIAL_OFFSET], 427 fac2bin[k - ODD_CENTRAL_BINOMIAL_OFFSET] - (k != hk)); 428 /* A two-fold, branch-free normalisation is possible :*/ 429 /* rn -= rp[rn - 1] == 0; */ 430 /* rn -= rp[rn - 1] == 0; */ 431 MPN_NORMALIZE_NOT_ZERO (rp, rn); 432 433 SIZ(r) = rn; 434 } 435 436 /* mpz_goetgheluck_bin_uiui(RESULT, N, K) -- Set RESULT to binomial(N,K). 437 * 438 * Contributed to the GNU project by Marco Bodrato. 439 * 440 * Implementation of the algorithm by P. Goetgheluck, "Computing 441 * Binomial Coefficients", The American Mathematical Monthly, Vol. 94, 442 * No. 4 (April 1987), pp. 360-365. 443 * 444 * Acknowledgment: Peter Luschny did spot the slowness of the previous 445 * code and suggested the reference. 446 */ 447 448 /* TODO: Remove duplicated constants / macros / static functions... 449 */ 450 451 /*************************************************************/ 452 /* Section macros: common macros, for swing/fac/bin (&sieve) */ 453 /*************************************************************/ 454 455 #define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \ 456 if ((PR) > (MAX_PR)) { \ 457 (VEC)[(I)++] = (PR); \ 458 (PR) = 1; \ 459 } 460 461 #define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \ 462 do { \ 463 if ((PR) > (MAX_PR)) { \ 464 (VEC)[(I)++] = (PR); \ 465 (PR) = (P); \ 466 } else \ 467 (PR) *= (P); \ 468 } while (0) 469 470 #define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \ 471 __max_i = (end); \ 472 \ 473 do { \ 474 ++__i; \ 475 if (((sieve)[__index] & __mask) == 0) \ 476 { \ 477 (prime) = id_to_n(__i) 478 479 #define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \ 480 do { \ 481 mp_limb_t __mask, __index, __max_i, __i; \ 482 \ 483 __i = (start)-(off); \ 484 __index = __i / GMP_LIMB_BITS; \ 485 __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \ 486 __i += (off); \ 487 \ 488 LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) 489 490 #define LOOP_ON_SIEVE_STOP \ 491 } \ 492 __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \ 493 __index += __mask & 1; \ 494 } while (__i <= __max_i) \ 495 496 #define LOOP_ON_SIEVE_END \ 497 LOOP_ON_SIEVE_STOP; \ 498 } while (0) 499 500 /*********************************************************/ 501 /* Section sieve: sieving functions and tools for primes */ 502 /*********************************************************/ 503 504 #if WANT_ASSERT 505 static mp_limb_t 506 bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; } 507 #endif 508 509 /* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/ 510 static mp_limb_t 511 id_to_n (mp_limb_t id) { return id*3+1+(id&1); } 512 513 /* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */ 514 static mp_limb_t 515 n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; } 516 517 static mp_size_t 518 primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; } 519 520 /*********************************************************/ 521 /* Section binomial: fast binomial implementation */ 522 /*********************************************************/ 523 524 #define COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I) \ 525 do { \ 526 mp_limb_t __a, __b, __prime, __ma,__mb; \ 527 __prime = (P); \ 528 __a = (N); __b = (K); __mb = 0; \ 529 FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \ 530 do { \ 531 __mb += __b % __prime; __b /= __prime; \ 532 __ma = __a % __prime; __a /= __prime; \ 533 if (__ma < __mb) { \ 534 __mb = 1; (PR) *= __prime; \ 535 } else __mb = 0; \ 536 } while (__a >= __prime); \ 537 } while (0) 538 539 #define SH_COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I) \ 540 do { \ 541 mp_limb_t __prime; \ 542 __prime = (P); \ 543 if (((N) % __prime) < ((K) % __prime)) { \ 544 FACTOR_LIST_STORE (__prime, PR, MAX_PR, VEC, I); \ 545 } \ 546 } while (0) 547 548 /* Returns an approximation of the sqare root of x. * 549 * It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4 */ 550 static mp_limb_t 551 limb_apprsqrt (mp_limb_t x) 552 { 553 int s; 554 555 ASSERT (x > 2); 556 count_leading_zeros (s, x - 1); 557 s = GMP_LIMB_BITS - 1 - s; 558 return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1)); 559 } 560 561 static void 562 mpz_goetgheluck_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) 563 { 564 mp_limb_t *sieve, *factors, count; 565 mp_limb_t prod, max_prod, j; 566 TMP_DECL; 567 568 ASSERT (BIN_GOETGHELUCK_THRESHOLD >= 13); 569 ASSERT (n >= 25); 570 571 TMP_MARK; 572 sieve = TMP_ALLOC_LIMBS (primesieve_size (n)); 573 574 count = gmp_primesieve (sieve, n) + 1; 575 factors = TMP_ALLOC_LIMBS (count / log_n_max (n) + 1); 576 577 max_prod = GMP_NUMB_MAX / n; 578 579 /* Handle primes = 2, 3 separately. */ 580 popc_limb (count, n - k); 581 popc_limb (j, k); 582 count += j; 583 popc_limb (j, n); 584 count -= j; 585 prod = CNST_LIMB(1) << count; 586 587 j = 0; 588 COUNT_A_PRIME (3, n, k, prod, max_prod, factors, j); 589 590 /* Accumulate prime factors from 5 to n/2 */ 591 { 592 mp_limb_t s; 593 594 { 595 mp_limb_t prime; 596 s = limb_apprsqrt(n); 597 s = n_to_bit (s); 598 LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve); 599 COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j); 600 LOOP_ON_SIEVE_END; 601 s++; 602 } 603 604 ASSERT (max_prod <= GMP_NUMB_MAX / 2); 605 max_prod <<= 1; 606 ASSERT (bit_to_n (s) * bit_to_n (s) > n); 607 ASSERT (s <= n_to_bit (n >> 1)); 608 { 609 mp_limb_t prime; 610 611 LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n >> 1), 0,sieve); 612 SH_COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j); 613 LOOP_ON_SIEVE_END; 614 } 615 max_prod >>= 1; 616 } 617 618 /* Store primes from (n-k)+1 to n */ 619 ASSERT (n_to_bit (n - k) < n_to_bit (n)); 620 { 621 mp_limb_t prime; 622 LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n - k) + 1, n_to_bit (n), 0,sieve); 623 FACTOR_LIST_STORE (prime, prod, max_prod, factors, j); 624 LOOP_ON_SIEVE_END; 625 } 626 627 if (LIKELY (j != 0)) 628 { 629 factors[j++] = prod; 630 mpz_prodlimbs (r, factors, j); 631 } 632 else 633 { 634 PTR (r)[0] = prod; 635 SIZ (r) = 1; 636 } 637 TMP_FREE; 638 } 639 640 #undef COUNT_A_PRIME 641 #undef SH_COUNT_A_PRIME 642 #undef LOOP_ON_SIEVE_END 643 #undef LOOP_ON_SIEVE_STOP 644 #undef LOOP_ON_SIEVE_BEGIN 645 #undef LOOP_ON_SIEVE_CONTINUE 646 647 /*********************************************************/ 648 /* End of implementation of Goetgheluck's algorithm */ 649 /*********************************************************/ 650 651 void 652 mpz_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) 653 { 654 if (UNLIKELY (n < k)) { 655 SIZ (r) = 0; 656 #if BITS_PER_ULONG > GMP_NUMB_BITS 657 } else if (UNLIKELY (n > GMP_NUMB_MAX)) { 658 mpz_t tmp; 659 660 mpz_init_set_ui (tmp, n); 661 mpz_bin_ui (r, tmp, k); 662 mpz_clear (tmp); 663 #endif 664 } else { 665 ASSERT (n <= GMP_NUMB_MAX); 666 /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. */ 667 k = MIN (k, n - k); 668 if (k < 2) { 669 PTR(r)[0] = k ? n : 1; /* 1 + ((-k) & (n-1)); */ 670 SIZ(r) = 1; 671 } else if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) { /* k >= 2, n >= 4 */ 672 PTR(r)[0] = bc_bin_uiui (n, k); 673 SIZ(r) = 1; 674 } else if (k <= ODD_FACTORIAL_TABLE_LIMIT) 675 mpz_smallk_bin_uiui (r, n, k); 676 else if (BIN_UIUI_ENABLE_SMALLDC && 677 k <= (BIN_UIUI_RECURSIVE_SMALLDC ? ODD_CENTRAL_BINOMIAL_TABLE_LIMIT : ODD_FACTORIAL_TABLE_LIMIT)* 2) 678 mpz_smallkdc_bin_uiui (r, n, k); 679 else if (ABOVE_THRESHOLD (k, BIN_GOETGHELUCK_THRESHOLD) && 680 k > (n >> 4)) /* k > ODD_FACTORIAL_TABLE_LIMIT */ 681 mpz_goetgheluck_bin_uiui (r, n, k); 682 else 683 mpz_bdiv_bin_uiui (r, n, k); 684 } 685 } 686