1 /* Generate perfect square testing data. 2 3 Copyright 2002, 2003, 2004, 2012 Free Software Foundation, Inc. 4 5 This file is part of the GNU MP Library. 6 7 The GNU MP Library is free software; you can redistribute it and/or modify 8 it under the terms of the GNU Lesser General Public License as published by 9 the Free Software Foundation; either version 3 of the License, or (at your 10 option) any later version. 11 12 The GNU MP Library is distributed in the hope that it will be useful, but 13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15 License for more details. 16 17 You should have received a copy of the GNU Lesser General Public License 18 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 19 20 #include <stdio.h> 21 #include <stdlib.h> 22 23 #include "bootstrap.c" 24 25 26 /* The aim of this program is to choose either mpn_mod_34lsub1 or mpn_mod_1 27 (plus a PERFSQR_PP modulus), and generate tables indicating quadratic 28 residues and non-residues modulo small factors of that modulus. 29 30 For the usual 32 or 64 bit cases mpn_mod_34lsub1 gets used. That 31 function exists specifically because 2^24-1 and 2^48-1 have nice sets of 32 prime factors. For other limb sizes it's considered, but if it doesn't 33 have good factors then mpn_mod_1 will be used instead. 34 35 When mpn_mod_1 is used, the modulus PERFSQR_PP is created from a 36 selection of small primes, chosen to fill PERFSQR_MOD_BITS of a limb, 37 with that bit count chosen so (2*GMP_LIMB_BITS)*2^PERFSQR_MOD_BITS <= 38 GMP_LIMB_MAX, allowing PERFSQR_MOD_IDX in mpn/generic/perfsqr.c to do its 39 calculation within a single limb. 40 41 In either case primes can be combined to make divisors. The table data 42 then effectively indicates remainders which are quadratic residues mod 43 all the primes. This sort of combining reduces the number of steps 44 needed after mpn_mod_34lsub1 or mpn_mod_1, saving code size and time. 45 Nothing is gained or lost in terms of detections, the same total fraction 46 of non-residues will be identified. 47 48 Nothing particularly sophisticated is attempted for combining factors to 49 make divisors. This is probably a kind of knapsack problem so it'd be 50 too hard to attempt anything completely general. For the usual 32 and 64 51 bit limbs we get a good enough result just pairing the biggest and 52 smallest which fit together, repeatedly. 53 54 Another aim is to get powerful combinations, ie. divisors which identify 55 biggest fraction of non-residues, and have those run first. Again for 56 the usual 32 and 64 bits it seems good enough just to pair for big 57 divisors then sort according to the resulting fraction of non-residues 58 identified. 59 60 Also in this program, a table sq_res_0x100 of residues modulo 256 is 61 generated. This simply fills bits into limbs of the appropriate 62 build-time GMP_LIMB_BITS each. 63 64 */ 65 66 67 /* Normally we aren't using const in gen*.c programs, so as not to have to 68 bother figuring out if it works, but using it with f_cmp_divisor and 69 f_cmp_fraction avoids warnings from the qsort calls. */ 70 71 /* Same tests as gmp.h. */ 72 #if defined (__STDC__) \ 73 || defined (__cplusplus) \ 74 || defined (_AIX) \ 75 || defined (__DECC) \ 76 || (defined (__mips) && defined (_SYSTYPE_SVR4)) \ 77 || defined (_MSC_VER) \ 78 || defined (_WIN32) 79 #define HAVE_CONST 1 80 #endif 81 82 #if ! HAVE_CONST 83 #define const 84 #endif 85 86 87 mpz_t *sq_res_0x100; /* table of limbs */ 88 int nsq_res_0x100; /* elements in sq_res_0x100 array */ 89 int sq_res_0x100_num; /* squares in sq_res_0x100 */ 90 double sq_res_0x100_fraction; /* sq_res_0x100_num / 256 */ 91 92 int mod34_bits; /* 3*GMP_NUMB_BITS/4 */ 93 int mod_bits; /* bits from PERFSQR_MOD_34 or MOD_PP */ 94 int max_divisor; /* all divisors <= max_divisor */ 95 int max_divisor_bits; /* ceil(log2(max_divisor)) */ 96 double total_fraction; /* of squares */ 97 mpz_t pp; /* product of primes, or 0 if mod_34lsub1 used */ 98 mpz_t pp_norm; /* pp shifted so NUMB high bit set */ 99 mpz_t pp_inverted; /* invert_limb style inverse */ 100 mpz_t mod_mask; /* 2^mod_bits-1 */ 101 char mod34_excuse[128]; /* why mod_34lsub1 not used (if it's not) */ 102 103 /* raw list of divisors of 2^mod34_bits-1 or pp, just to show in a comment */ 104 struct rawfactor_t { 105 int divisor; 106 int multiplicity; 107 }; 108 struct rawfactor_t *rawfactor; 109 int nrawfactor; 110 111 /* factors of 2^mod34_bits-1 or pp and associated data, after combining etc */ 112 struct factor_t { 113 int divisor; 114 mpz_t inverse; /* 1/divisor mod 2^mod_bits */ 115 mpz_t mask; /* indicating squares mod divisor */ 116 double fraction; /* squares/total */ 117 }; 118 struct factor_t *factor; 119 int nfactor; /* entries in use in factor array */ 120 int factor_alloc; /* entries allocated to factor array */ 121 122 123 int 124 f_cmp_divisor (const void *parg, const void *qarg) 125 { 126 const struct factor_t *p, *q; 127 p = parg; 128 q = qarg; 129 if (p->divisor > q->divisor) 130 return 1; 131 else if (p->divisor < q->divisor) 132 return -1; 133 else 134 return 0; 135 } 136 137 int 138 f_cmp_fraction (const void *parg, const void *qarg) 139 { 140 const struct factor_t *p, *q; 141 p = parg; 142 q = qarg; 143 if (p->fraction > q->fraction) 144 return 1; 145 else if (p->fraction < q->fraction) 146 return -1; 147 else 148 return 0; 149 } 150 151 /* Remove array[idx] by copying the remainder down, and adjust narray 152 accordingly. */ 153 #define COLLAPSE_ELEMENT(array, idx, narray) \ 154 do { \ 155 memmove (&(array)[idx], \ 156 &(array)[idx+1], \ 157 ((narray)-((idx)+1)) * sizeof (array[0])); \ 158 (narray)--; \ 159 } while (0) 160 161 162 /* return n*2^p mod m */ 163 int 164 mul_2exp_mod (int n, int p, int m) 165 { 166 int i; 167 for (i = 0; i < p; i++) 168 n = (2 * n) % m; 169 return n; 170 } 171 172 /* return -n mod m */ 173 int 174 neg_mod (int n, int m) 175 { 176 assert (n >= 0 && n < m); 177 return (n == 0 ? 0 : m-n); 178 } 179 180 /* Set "mask" to a value such that "mask & (1<<idx)" is non-zero if 181 "-(idx<<mod_bits)" can be a square modulo m. */ 182 void 183 square_mask (mpz_t mask, int m) 184 { 185 int p, i, r, idx; 186 187 p = mul_2exp_mod (1, mod_bits, m); 188 p = neg_mod (p, m); 189 190 mpz_set_ui (mask, 0L); 191 for (i = 0; i < m; i++) 192 { 193 r = (i * i) % m; 194 idx = (r * p) % m; 195 mpz_setbit (mask, (unsigned long) idx); 196 } 197 } 198 199 void 200 generate_sq_res_0x100 (int limb_bits) 201 { 202 int i, res; 203 204 nsq_res_0x100 = (0x100 + limb_bits - 1) / limb_bits; 205 sq_res_0x100 = xmalloc (nsq_res_0x100 * sizeof (*sq_res_0x100)); 206 207 for (i = 0; i < nsq_res_0x100; i++) 208 mpz_init_set_ui (sq_res_0x100[i], 0L); 209 210 for (i = 0; i < 0x100; i++) 211 { 212 res = (i * i) % 0x100; 213 mpz_setbit (sq_res_0x100[res / limb_bits], 214 (unsigned long) (res % limb_bits)); 215 } 216 217 sq_res_0x100_num = 0; 218 for (i = 0; i < nsq_res_0x100; i++) 219 sq_res_0x100_num += mpz_popcount (sq_res_0x100[i]); 220 sq_res_0x100_fraction = (double) sq_res_0x100_num / 256.0; 221 } 222 223 void 224 generate_mod (int limb_bits, int nail_bits) 225 { 226 int numb_bits = limb_bits - nail_bits; 227 int i, divisor; 228 229 mpz_init_set_ui (pp, 0L); 230 mpz_init_set_ui (pp_norm, 0L); 231 mpz_init_set_ui (pp_inverted, 0L); 232 233 /* no more than limb_bits many factors in a one limb modulus (and of 234 course in reality nothing like that many) */ 235 factor_alloc = limb_bits; 236 factor = xmalloc (factor_alloc * sizeof (*factor)); 237 rawfactor = xmalloc (factor_alloc * sizeof (*rawfactor)); 238 239 if (numb_bits % 4 != 0) 240 { 241 strcpy (mod34_excuse, "GMP_NUMB_BITS % 4 != 0"); 242 goto use_pp; 243 } 244 245 max_divisor = 2*limb_bits; 246 max_divisor_bits = log2_ceil (max_divisor); 247 248 if (numb_bits / 4 < max_divisor_bits) 249 { 250 /* Wind back to one limb worth of max_divisor, if that will let us use 251 mpn_mod_34lsub1. */ 252 max_divisor = limb_bits; 253 max_divisor_bits = log2_ceil (max_divisor); 254 255 if (numb_bits / 4 < max_divisor_bits) 256 { 257 strcpy (mod34_excuse, "GMP_NUMB_BITS / 4 too small"); 258 goto use_pp; 259 } 260 } 261 262 { 263 /* Can use mpn_mod_34lsub1, find small factors of 2^mod34_bits-1. */ 264 mpz_t m, q, r; 265 int multiplicity; 266 267 mod34_bits = (numb_bits / 4) * 3; 268 269 /* mpn_mod_34lsub1 returns a full limb value, PERFSQR_MOD_34 folds it at 270 the mod34_bits mark, adding the two halves for a remainder of at most 271 mod34_bits+1 many bits */ 272 mod_bits = mod34_bits + 1; 273 274 mpz_init_set_ui (m, 1L); 275 mpz_mul_2exp (m, m, mod34_bits); 276 mpz_sub_ui (m, m, 1L); 277 278 mpz_init (q); 279 mpz_init (r); 280 281 for (i = 3; i <= max_divisor; i++) 282 { 283 if (! isprime (i)) 284 continue; 285 286 mpz_tdiv_qr_ui (q, r, m, (unsigned long) i); 287 if (mpz_sgn (r) != 0) 288 continue; 289 290 /* if a repeated prime is found it's used as an i^n in one factor */ 291 divisor = 1; 292 multiplicity = 0; 293 do 294 { 295 if (divisor > max_divisor / i) 296 break; 297 multiplicity++; 298 mpz_set (m, q); 299 mpz_tdiv_qr_ui (q, r, m, (unsigned long) i); 300 } 301 while (mpz_sgn (r) == 0); 302 303 assert (nrawfactor < factor_alloc); 304 rawfactor[nrawfactor].divisor = i; 305 rawfactor[nrawfactor].multiplicity = multiplicity; 306 nrawfactor++; 307 } 308 309 mpz_clear (m); 310 mpz_clear (q); 311 mpz_clear (r); 312 } 313 314 if (nrawfactor <= 2) 315 { 316 mpz_t new_pp; 317 318 sprintf (mod34_excuse, "only %d small factor%s", 319 nrawfactor, nrawfactor == 1 ? "" : "s"); 320 321 use_pp: 322 /* reset to two limbs of max_divisor, in case the mpn_mod_34lsub1 code 323 tried with just one */ 324 max_divisor = 2*limb_bits; 325 max_divisor_bits = log2_ceil (max_divisor); 326 327 mpz_init (new_pp); 328 nrawfactor = 0; 329 mod_bits = MIN (numb_bits, limb_bits - max_divisor_bits); 330 331 /* one copy of each small prime */ 332 mpz_set_ui (pp, 1L); 333 for (i = 3; i <= max_divisor; i++) 334 { 335 if (! isprime (i)) 336 continue; 337 338 mpz_mul_ui (new_pp, pp, (unsigned long) i); 339 if (mpz_sizeinbase (new_pp, 2) > mod_bits) 340 break; 341 mpz_set (pp, new_pp); 342 343 assert (nrawfactor < factor_alloc); 344 rawfactor[nrawfactor].divisor = i; 345 rawfactor[nrawfactor].multiplicity = 1; 346 nrawfactor++; 347 } 348 349 /* Plus an extra copy of one or more of the primes selected, if that 350 still fits in max_divisor and the total in mod_bits. Usually only 351 3 or 5 will be candidates */ 352 for (i = nrawfactor-1; i >= 0; i--) 353 { 354 if (rawfactor[i].divisor > max_divisor / rawfactor[i].divisor) 355 continue; 356 mpz_mul_ui (new_pp, pp, (unsigned long) rawfactor[i].divisor); 357 if (mpz_sizeinbase (new_pp, 2) > mod_bits) 358 continue; 359 mpz_set (pp, new_pp); 360 361 rawfactor[i].multiplicity++; 362 } 363 364 mod_bits = mpz_sizeinbase (pp, 2); 365 366 mpz_set (pp_norm, pp); 367 while (mpz_sizeinbase (pp_norm, 2) < numb_bits) 368 mpz_add (pp_norm, pp_norm, pp_norm); 369 370 mpz_preinv_invert (pp_inverted, pp_norm, numb_bits); 371 372 mpz_clear (new_pp); 373 } 374 375 /* start the factor array */ 376 for (i = 0; i < nrawfactor; i++) 377 { 378 int j; 379 assert (nfactor < factor_alloc); 380 factor[nfactor].divisor = 1; 381 for (j = 0; j < rawfactor[i].multiplicity; j++) 382 factor[nfactor].divisor *= rawfactor[i].divisor; 383 nfactor++; 384 } 385 386 combine: 387 /* Combine entries in the factor array. Combine the smallest entry with 388 the biggest one that will fit with it (ie. under max_divisor), then 389 repeat that with the new smallest entry. */ 390 qsort (factor, nfactor, sizeof (factor[0]), f_cmp_divisor); 391 for (i = nfactor-1; i >= 1; i--) 392 { 393 if (factor[i].divisor <= max_divisor / factor[0].divisor) 394 { 395 factor[0].divisor *= factor[i].divisor; 396 COLLAPSE_ELEMENT (factor, i, nfactor); 397 goto combine; 398 } 399 } 400 401 total_fraction = 1.0; 402 for (i = 0; i < nfactor; i++) 403 { 404 mpz_init (factor[i].inverse); 405 mpz_invert_ui_2exp (factor[i].inverse, 406 (unsigned long) factor[i].divisor, 407 (unsigned long) mod_bits); 408 409 mpz_init (factor[i].mask); 410 square_mask (factor[i].mask, factor[i].divisor); 411 412 /* fraction of possible squares */ 413 factor[i].fraction = (double) mpz_popcount (factor[i].mask) 414 / factor[i].divisor; 415 416 /* total fraction of possible squares */ 417 total_fraction *= factor[i].fraction; 418 } 419 420 /* best tests first (ie. smallest fraction) */ 421 qsort (factor, nfactor, sizeof (factor[0]), f_cmp_fraction); 422 } 423 424 void 425 print (int limb_bits, int nail_bits) 426 { 427 int i; 428 mpz_t mhi, mlo; 429 430 printf ("/* This file generated by gen-psqr.c - DO NOT EDIT. */\n"); 431 printf ("\n"); 432 433 printf ("#if GMP_LIMB_BITS != %d || GMP_NAIL_BITS != %d\n", 434 limb_bits, nail_bits); 435 printf ("Error, error, this data is for %d bit limb and %d bit nail\n", 436 limb_bits, nail_bits); 437 printf ("#endif\n"); 438 printf ("\n"); 439 440 printf ("/* Non-zero bit indicates a quadratic residue mod 0x100.\n"); 441 printf (" This test identifies %.2f%% as non-squares (%d/256). */\n", 442 (1.0 - sq_res_0x100_fraction) * 100.0, 443 0x100 - sq_res_0x100_num); 444 printf ("static const mp_limb_t\n"); 445 printf ("sq_res_0x100[%d] = {\n", nsq_res_0x100); 446 for (i = 0; i < nsq_res_0x100; i++) 447 { 448 printf (" CNST_LIMB(0x"); 449 mpz_out_str (stdout, 16, sq_res_0x100[i]); 450 printf ("),\n"); 451 } 452 printf ("};\n"); 453 printf ("\n"); 454 455 if (mpz_sgn (pp) != 0) 456 { 457 printf ("/* mpn_mod_34lsub1 not used due to %s */\n", mod34_excuse); 458 printf ("/* PERFSQR_PP = "); 459 } 460 else 461 printf ("/* 2^%d-1 = ", mod34_bits); 462 for (i = 0; i < nrawfactor; i++) 463 { 464 if (i != 0) 465 printf (" * "); 466 printf ("%d", rawfactor[i].divisor); 467 if (rawfactor[i].multiplicity != 1) 468 printf ("^%d", rawfactor[i].multiplicity); 469 } 470 printf (" %s*/\n", mpz_sgn (pp) == 0 ? "... " : ""); 471 472 printf ("#define PERFSQR_MOD_BITS %d\n", mod_bits); 473 if (mpz_sgn (pp) != 0) 474 { 475 printf ("#define PERFSQR_PP CNST_LIMB(0x"); 476 mpz_out_str (stdout, 16, pp); 477 printf (")\n"); 478 printf ("#define PERFSQR_PP_NORM CNST_LIMB(0x"); 479 mpz_out_str (stdout, 16, pp_norm); 480 printf (")\n"); 481 printf ("#define PERFSQR_PP_INVERTED CNST_LIMB(0x"); 482 mpz_out_str (stdout, 16, pp_inverted); 483 printf (")\n"); 484 } 485 printf ("\n"); 486 487 mpz_init (mhi); 488 mpz_init (mlo); 489 490 printf ("/* This test identifies %.2f%% as non-squares. */\n", 491 (1.0 - total_fraction) * 100.0); 492 printf ("#define PERFSQR_MOD_TEST(up, usize) \\\n"); 493 printf (" do { \\\n"); 494 printf (" mp_limb_t r; \\\n"); 495 if (mpz_sgn (pp) != 0) 496 printf (" PERFSQR_MOD_PP (r, up, usize); \\\n"); 497 else 498 printf (" PERFSQR_MOD_34 (r, up, usize); \\\n"); 499 500 for (i = 0; i < nfactor; i++) 501 { 502 printf (" \\\n"); 503 printf (" /* %5.2f%% */ \\\n", 504 (1.0 - factor[i].fraction) * 100.0); 505 506 printf (" PERFSQR_MOD_%d (r, CNST_LIMB(%2d), CNST_LIMB(0x", 507 factor[i].divisor <= limb_bits ? 1 : 2, 508 factor[i].divisor); 509 mpz_out_str (stdout, 16, factor[i].inverse); 510 printf ("), \\\n"); 511 printf (" CNST_LIMB(0x"); 512 513 if ( factor[i].divisor <= limb_bits) 514 { 515 mpz_out_str (stdout, 16, factor[i].mask); 516 } 517 else 518 { 519 mpz_tdiv_r_2exp (mlo, factor[i].mask, (unsigned long) limb_bits); 520 mpz_tdiv_q_2exp (mhi, factor[i].mask, (unsigned long) limb_bits); 521 mpz_out_str (stdout, 16, mhi); 522 printf ("), CNST_LIMB(0x"); 523 mpz_out_str (stdout, 16, mlo); 524 } 525 printf (")); \\\n"); 526 } 527 528 printf (" } while (0)\n"); 529 printf ("\n"); 530 531 printf ("/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, %.2f%% non-squares. */\n", 532 (1.0 - (total_fraction * 44.0/256.0)) * 100.0); 533 printf ("\n"); 534 535 printf ("/* helper for tests/mpz/t-perfsqr.c */\n"); 536 printf ("#define PERFSQR_DIVISORS { 256,"); 537 for (i = 0; i < nfactor; i++) 538 printf (" %d,", factor[i].divisor); 539 printf (" }\n"); 540 541 542 mpz_clear (mhi); 543 mpz_clear (mlo); 544 } 545 546 int 547 main (int argc, char *argv[]) 548 { 549 int limb_bits, nail_bits; 550 551 if (argc != 3) 552 { 553 fprintf (stderr, "Usage: gen-psqr <limbbits> <nailbits>\n"); 554 exit (1); 555 } 556 557 limb_bits = atoi (argv[1]); 558 nail_bits = atoi (argv[2]); 559 560 if (limb_bits <= 0 561 || nail_bits < 0 562 || nail_bits >= limb_bits) 563 { 564 fprintf (stderr, "Invalid limb/nail bits: %d %d\n", 565 limb_bits, nail_bits); 566 exit (1); 567 } 568 569 generate_sq_res_0x100 (limb_bits); 570 generate_mod (limb_bits, nail_bits); 571 572 print (limb_bits, nail_bits); 573 574 return 0; 575 } 576