1 /* Run some tests on various mpn routines. 2 3 THIS IS A TEST PROGRAM USED ONLY FOR DEVELOPMENT. IT'S ALMOST CERTAIN TO 4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE VERSIONS OF GMP. 5 6 Copyright 2000-2006, 2008, 2009, 2011, 2012 Free Software Foundation, Inc. 7 8 This file is part of the GNU MP Library test suite. 9 10 The GNU MP Library test suite is free software; you can redistribute it 11 and/or modify it under the terms of the GNU General Public License as 12 published by the Free Software Foundation; either version 3 of the License, 13 or (at your option) any later version. 14 15 The GNU MP Library test suite is distributed in the hope that it will be 16 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 18 Public License for more details. 19 20 You should have received a copy of the GNU General Public License along with 21 the GNU MP Library test suite. If not, see https://www.gnu.org/licenses/. */ 22 23 24 /* Usage: try [options] <function>... 25 26 For example, "./try mpn_add_n" to run tests of that function. 27 28 Combinations of alignments and overlaps are tested, with redzones above 29 or below the destinations, and with the sources write-protected. 30 31 The number of tests performed becomes ridiculously large with all the 32 combinations, and for that reason this can't be a part of a "make check", 33 it's meant only for development. The code isn't very pretty either. 34 35 During development it can help to disable the redzones, since seeing the 36 rest of the destination written can show where the wrong part is, or if 37 the dst pointers are off by 1 or whatever. The magic DEADVAL initial 38 fill (see below) will show locations never written. 39 40 The -s option can be used to test only certain size operands, which is 41 useful if some new code doesn't yet support say sizes less than the 42 unrolling, or whatever. 43 44 When a problem occurs it'll of course be necessary to run the program 45 under gdb to find out quite where, how and why it's going wrong. Disable 46 the spinner with the -W option when doing this, or single stepping won't 47 work. Using the "-1" option to run with simple data can be useful. 48 49 New functions to test can be added in try_array[]. If a new TYPE is 50 required then add it to the existing constants, set up its parameters in 51 param_init(), and add it to the call() function. Extra parameter fields 52 can be added if necessary, or further interpretations given to existing 53 fields. 54 55 56 Portability: 57 58 This program is not designed for use on Cray vector systems under Unicos, 59 it will fail to compile due to missing _SC_PAGE_SIZE. Those systems 60 don't really have pages or mprotect. We could arrange to run the tests 61 without the redzones, but we haven't bothered currently. 62 63 64 Enhancements: 65 66 umul_ppmm support is not very good, lots of source data is generated 67 whereas only two limbs are needed. 68 69 Make a little scheme for interpreting the "SIZE" selections uniformly. 70 71 Make tr->size==SIZE_2 work, for the benefit of find_a which wants just 2 72 source limbs. Possibly increase the default repetitions in that case. 73 74 Automatically detect gdb and disable the spinner (use -W for now). 75 76 Make a way to re-run a failing case in the debugger. Have an option to 77 snapshot each test case before it's run so the data is available if a 78 segv occurs. (This should be more reliable than the current print_all() 79 in the signal handler.) 80 81 When alignment means a dst isn't hard against the redzone, check the 82 space in between remains unchanged. 83 84 When a source overlaps a destination, don't run both s[i].high 0 and 1, 85 as s[i].high has no effect. Maybe encode s[i].high into overlap->s[i]. 86 87 When partial overlaps aren't done, don't loop over source alignments 88 during overlaps. 89 90 Try to make the looping code a bit less horrible. Right now it's pretty 91 hard to see what iterations are actually done. 92 93 Perhaps specific setups and loops for each style of function under test 94 would be clearer than a parameterized general loop. There's lots of 95 stuff common to all functions, but the exceptions get messy. 96 97 When there's no overlap, run with both src>dst and src<dst. A subtle 98 calling-conventions violation occurred in a P6 copy which depended on the 99 relative location of src and dst. 100 101 multiplier_N is more or less a third source region for the addmul_N 102 routines, and could be done with the redzoned region scheme. 103 104 */ 105 106 107 /* always do assertion checking */ 108 #define WANT_ASSERT 1 109 110 #include "config.h" 111 112 #include <errno.h> 113 #include <limits.h> 114 #include <signal.h> 115 #include <stdio.h> 116 #include <stdlib.h> 117 #include <string.h> 118 #include <time.h> 119 120 #if HAVE_UNISTD_H 121 #include <unistd.h> 122 #endif 123 124 #if HAVE_SYS_MMAN_H 125 #include <sys/mman.h> 126 #endif 127 128 #include "gmp-impl.h" 129 #include "longlong.h" 130 #include "tests.h" 131 132 133 #if !HAVE_DECL_OPTARG 134 extern char *optarg; 135 extern int optind, opterr; 136 #endif 137 138 #if ! HAVE_DECL_SYS_NERR 139 extern int sys_nerr; 140 #endif 141 142 #if ! HAVE_DECL_SYS_ERRLIST 143 extern char *sys_errlist[]; 144 #endif 145 146 #if ! HAVE_STRERROR 147 char * 148 strerror (int n) 149 { 150 if (n < 0 || n >= sys_nerr) 151 return "errno out of range"; 152 else 153 return sys_errlist[n]; 154 } 155 #endif 156 157 /* Rumour has it some systems lack a define of PROT_NONE. */ 158 #ifndef PROT_NONE 159 #define PROT_NONE 0 160 #endif 161 162 /* Dummy defines for when mprotect doesn't exist. */ 163 #ifndef PROT_READ 164 #define PROT_READ 0 165 #endif 166 #ifndef PROT_WRITE 167 #define PROT_WRITE 0 168 #endif 169 170 /* _SC_PAGESIZE is standard, but hpux 9 and possibly other systems have 171 _SC_PAGE_SIZE instead. */ 172 #if defined (_SC_PAGE_SIZE) && ! defined (_SC_PAGESIZE) 173 #define _SC_PAGESIZE _SC_PAGE_SIZE 174 #endif 175 176 177 #ifdef EXTRA_PROTOS 178 EXTRA_PROTOS 179 #endif 180 #ifdef EXTRA_PROTOS2 181 EXTRA_PROTOS2 182 #endif 183 184 185 #define DEFAULT_REPETITIONS 10 186 187 int option_repetitions = DEFAULT_REPETITIONS; 188 int option_spinner = 1; 189 int option_redzones = 1; 190 int option_firstsize = 0; 191 int option_lastsize = 500; 192 int option_firstsize2 = 0; 193 194 #define ALIGNMENTS 4 195 #define OVERLAPS 4 196 #define CARRY_RANDOMS 5 197 #define MULTIPLIER_RANDOMS 5 198 #define DIVISOR_RANDOMS 5 199 #define FRACTION_COUNT 4 200 201 int option_print = 0; 202 203 #define DATA_TRAND 0 204 #define DATA_ZEROS 1 205 #define DATA_SEQ 2 206 #define DATA_FFS 3 207 #define DATA_2FD 4 208 int option_data = DATA_TRAND; 209 210 211 mp_size_t pagesize; 212 #define PAGESIZE_LIMBS (pagesize / GMP_LIMB_BYTES) 213 214 /* must be a multiple of the page size */ 215 #define REDZONE_BYTES (pagesize * 16) 216 #define REDZONE_LIMBS (REDZONE_BYTES / GMP_LIMB_BYTES) 217 218 219 #define MAX3(x,y,z) (MAX (x, MAX (y, z))) 220 221 #if GMP_LIMB_BITS == 32 222 #define DEADVAL CNST_LIMB(0xDEADBEEF) 223 #else 224 #define DEADVAL CNST_LIMB(0xDEADBEEFBADDCAFE) 225 #endif 226 227 228 struct region_t { 229 mp_ptr ptr; 230 mp_size_t size; 231 }; 232 233 234 #define TRAP_NOWHERE 0 235 #define TRAP_REF 1 236 #define TRAP_FUN 2 237 #define TRAP_SETUPS 3 238 int trap_location = TRAP_NOWHERE; 239 240 241 #define NUM_SOURCES 5 242 #define NUM_DESTS 2 243 244 struct source_t { 245 struct region_t region; 246 int high; 247 mp_size_t align; 248 mp_ptr p; 249 }; 250 251 struct source_t s[NUM_SOURCES]; 252 253 struct dest_t { 254 int high; 255 mp_size_t align; 256 mp_size_t size; 257 }; 258 259 struct dest_t d[NUM_DESTS]; 260 261 struct source_each_t { 262 mp_ptr p; 263 }; 264 265 struct dest_each_t { 266 struct region_t region; 267 mp_ptr p; 268 }; 269 270 mp_size_t size; 271 mp_size_t size2; 272 unsigned long shift; 273 mp_limb_t carry; 274 mp_limb_t divisor; 275 mp_limb_t multiplier; 276 mp_limb_t multiplier_N[8]; 277 278 struct each_t { 279 const char *name; 280 struct dest_each_t d[NUM_DESTS]; 281 struct source_each_t s[NUM_SOURCES]; 282 mp_limb_t retval; 283 }; 284 285 struct each_t ref = { "Ref" }; 286 struct each_t fun = { "Fun" }; 287 288 #define SRC_SIZE(n) ((n) == 1 && tr->size2 ? size2 : size) 289 290 void validate_fail (void); 291 292 293 #if HAVE_TRY_NEW_C 294 #include "try-new.c" 295 #endif 296 297 298 typedef mp_limb_t (*tryfun_t) (ANYARGS); 299 300 struct try_t { 301 char retval; 302 303 char src[NUM_SOURCES]; 304 char dst[NUM_DESTS]; 305 306 #define SIZE_YES 1 307 #define SIZE_ALLOW_ZERO 2 308 #define SIZE_1 3 /* 1 limb */ 309 #define SIZE_2 4 /* 2 limbs */ 310 #define SIZE_3 5 /* 3 limbs */ 311 #define SIZE_4 6 /* 4 limbs */ 312 #define SIZE_6 7 /* 6 limbs */ 313 #define SIZE_FRACTION 8 /* size2 is fraction for divrem etc */ 314 #define SIZE_SIZE2 9 315 #define SIZE_PLUS_1 10 316 #define SIZE_SUM 11 317 #define SIZE_DIFF 12 318 #define SIZE_DIFF_PLUS_1 13 319 #define SIZE_DIFF_PLUS_3 14 320 #define SIZE_RETVAL 15 321 #define SIZE_CEIL_HALF 16 322 #define SIZE_GET_STR 17 323 #define SIZE_PLUS_MSIZE_SUB_1 18 /* size+msize-1 */ 324 #define SIZE_ODD 19 325 char size; 326 char size2; 327 char dst_size[NUM_DESTS]; 328 329 /* multiplier_N size in limbs */ 330 mp_size_t msize; 331 332 char dst_bytes[NUM_DESTS]; 333 334 char dst0_from_src1; 335 336 #define CARRY_BIT 1 /* single bit 0 or 1 */ 337 #define CARRY_3 2 /* 0, 1, 2 */ 338 #define CARRY_4 3 /* 0 to 3 */ 339 #define CARRY_LIMB 4 /* any limb value */ 340 #define CARRY_DIVISOR 5 /* carry<divisor */ 341 char carry; 342 343 /* a fudge to tell the output when to print negatives */ 344 char carry_sign; 345 346 char multiplier; 347 char shift; 348 349 #define DIVISOR_LIMB 1 350 #define DIVISOR_NORM 2 351 #define DIVISOR_ODD 3 352 char divisor; 353 354 #define DATA_NON_ZERO 1 355 #define DATA_GCD 2 356 #define DATA_SRC0_ODD 3 357 #define DATA_SRC0_HIGHBIT 4 358 #define DATA_SRC1_ODD 5 359 #define DATA_SRC1_ODD_PRIME 6 360 #define DATA_SRC1_HIGHBIT 7 361 #define DATA_MULTIPLE_DIVISOR 8 362 #define DATA_UDIV_QRNND 9 363 #define DATA_DIV_QR_1 10 364 char data; 365 366 /* Default is allow full overlap. */ 367 #define OVERLAP_NONE 1 368 #define OVERLAP_LOW_TO_HIGH 2 369 #define OVERLAP_HIGH_TO_LOW 3 370 #define OVERLAP_NOT_SRCS 4 371 #define OVERLAP_NOT_SRC2 8 372 #define OVERLAP_NOT_DST2 16 373 char overlap; 374 375 tryfun_t reference; 376 const char *reference_name; 377 378 void (*validate) (void); 379 const char *validate_name; 380 }; 381 382 struct try_t *tr; 383 384 385 void 386 validate_mod_34lsub1 (void) 387 { 388 #define CNST_34LSUB1 ((CNST_LIMB(1) << (3 * (GMP_NUMB_BITS / 4))) - 1) 389 390 mp_srcptr ptr = s[0].p; 391 int error = 0; 392 mp_limb_t got, got_mod, want, want_mod; 393 394 ASSERT (size >= 1); 395 396 got = fun.retval; 397 got_mod = got % CNST_34LSUB1; 398 399 want = refmpn_mod_34lsub1 (ptr, size); 400 want_mod = want % CNST_34LSUB1; 401 402 if (got_mod != want_mod) 403 { 404 gmp_printf ("got 0x%MX reduced from 0x%MX\n", got_mod, got); 405 gmp_printf ("want 0x%MX reduced from 0x%MX\n", want_mod, want); 406 error = 1; 407 } 408 409 if (error) 410 validate_fail (); 411 } 412 413 void 414 validate_divexact_1 (void) 415 { 416 mp_srcptr src = s[0].p; 417 mp_srcptr dst = fun.d[0].p; 418 int error = 0; 419 420 ASSERT (size >= 1); 421 422 { 423 mp_ptr tp = refmpn_malloc_limbs (size); 424 mp_limb_t rem; 425 426 rem = refmpn_divrem_1 (tp, 0, src, size, divisor); 427 if (rem != 0) 428 { 429 gmp_printf ("Remainder a%%d == 0x%MX, mpn_divexact_1 undefined\n", rem); 430 error = 1; 431 } 432 if (! refmpn_equal_anynail (tp, dst, size)) 433 { 434 printf ("Quotient a/d wrong\n"); 435 mpn_trace ("fun ", dst, size); 436 mpn_trace ("want", tp, size); 437 error = 1; 438 } 439 free (tp); 440 } 441 442 if (error) 443 validate_fail (); 444 } 445 446 void 447 validate_bdiv_q_1 448 (void) 449 { 450 mp_srcptr src = s[0].p; 451 mp_srcptr dst = fun.d[0].p; 452 int error = 0; 453 454 ASSERT (size >= 1); 455 456 { 457 mp_ptr tp = refmpn_malloc_limbs (size + 1); 458 459 refmpn_mul_1 (tp, dst, size, divisor); 460 /* Set ignored low bits */ 461 tp[0] |= (src[0] & LOW_ZEROS_MASK (divisor)); 462 if (! refmpn_equal_anynail (tp, src, size)) 463 { 464 printf ("Bdiv wrong: res * divisor != src (mod B^size)\n"); 465 mpn_trace ("res ", dst, size); 466 mpn_trace ("src ", src, size); 467 error = 1; 468 } 469 free (tp); 470 } 471 472 if (error) 473 validate_fail (); 474 } 475 476 477 void 478 validate_modexact_1c_odd (void) 479 { 480 mp_srcptr ptr = s[0].p; 481 mp_limb_t r = fun.retval; 482 int error = 0; 483 484 ASSERT (size >= 1); 485 ASSERT (divisor & 1); 486 487 if ((r & GMP_NAIL_MASK) != 0) 488 printf ("r has non-zero nail\n"); 489 490 if (carry < divisor) 491 { 492 if (! (r < divisor)) 493 { 494 printf ("Don't have r < divisor\n"); 495 error = 1; 496 } 497 } 498 else /* carry >= divisor */ 499 { 500 if (! (r <= divisor)) 501 { 502 printf ("Don't have r <= divisor\n"); 503 error = 1; 504 } 505 } 506 507 { 508 mp_limb_t c = carry % divisor; 509 mp_ptr tp = refmpn_malloc_limbs (size+1); 510 mp_size_t k; 511 512 for (k = size-1; k <= size; k++) 513 { 514 /* set {tp,size+1} to r*b^k + a - c */ 515 refmpn_copyi (tp, ptr, size); 516 tp[size] = 0; 517 ASSERT_NOCARRY (refmpn_add_1 (tp+k, tp+k, size+1-k, r)); 518 if (refmpn_sub_1 (tp, tp, size+1, c)) 519 ASSERT_CARRY (mpn_add_1 (tp, tp, size+1, divisor)); 520 521 if (refmpn_mod_1 (tp, size+1, divisor) == 0) 522 goto good_remainder; 523 } 524 printf ("Remainder matches neither r*b^(size-1) nor r*b^size\n"); 525 error = 1; 526 527 good_remainder: 528 free (tp); 529 } 530 531 if (error) 532 validate_fail (); 533 } 534 535 void 536 validate_modexact_1_odd (void) 537 { 538 carry = 0; 539 validate_modexact_1c_odd (); 540 } 541 542 void 543 validate_div_qr_1_pi1 (void) 544 { 545 mp_srcptr up = ref.s[0].p; 546 mp_size_t un = size; 547 mp_size_t uh = ref.s[1].p[0]; 548 mp_srcptr qp = fun.d[0].p; 549 mp_limb_t r = fun.retval; 550 mp_limb_t cy; 551 int cmp; 552 mp_ptr tp; 553 if (r >= divisor) 554 { 555 gmp_printf ("Bad remainder %Md, d = %Md\n", r, divisor); 556 validate_fail (); 557 } 558 tp = refmpn_malloc_limbs (un); 559 cy = refmpn_mul_1 (tp, qp, un, divisor); 560 cy += refmpn_add_1 (tp, tp, un, r); 561 if (cy != uh || refmpn_cmp (tp, up, un) != 0) 562 { 563 gmp_printf ("Incorrect result, size %ld.\n" 564 "d = %Mx, u = %Mx, %Nx\n" 565 "got: r = %Mx, q = %Nx\n" 566 "q d + r = %Mx, %Nx", 567 (long) un, 568 divisor, uh, up, un, 569 r, qp, un, 570 cy, tp, un); 571 validate_fail (); 572 } 573 free (tp); 574 } 575 576 577 void 578 validate_sqrtrem (void) 579 { 580 mp_srcptr orig_ptr = s[0].p; 581 mp_size_t orig_size = size; 582 mp_size_t root_size = (size+1)/2; 583 mp_srcptr root_ptr = fun.d[0].p; 584 mp_size_t rem_size = fun.retval; 585 mp_srcptr rem_ptr = fun.d[1].p; 586 mp_size_t prod_size = 2*root_size; 587 mp_ptr p; 588 int error = 0; 589 590 if (rem_size < 0 || rem_size > size) 591 { 592 printf ("Bad remainder size retval %ld\n", (long) rem_size); 593 validate_fail (); 594 } 595 596 p = refmpn_malloc_limbs (prod_size); 597 598 p[root_size] = refmpn_lshift (p, root_ptr, root_size, 1); 599 if (refmpn_cmp_twosizes (p,root_size+1, rem_ptr,rem_size) < 0) 600 { 601 printf ("Remainder bigger than 2*root\n"); 602 error = 1; 603 } 604 605 refmpn_sqr (p, root_ptr, root_size); 606 if (rem_size != 0) 607 refmpn_add (p, p, prod_size, rem_ptr, rem_size); 608 if (refmpn_cmp_twosizes (p,prod_size, orig_ptr,orig_size) != 0) 609 { 610 printf ("root^2+rem != original\n"); 611 mpn_trace ("prod", p, prod_size); 612 error = 1; 613 } 614 free (p); 615 616 if (error) 617 validate_fail (); 618 } 619 620 void 621 validate_sqrt (void) 622 { 623 mp_srcptr orig_ptr = s[0].p; 624 mp_size_t orig_size = size; 625 mp_size_t root_size = (size+1)/2; 626 mp_srcptr root_ptr = fun.d[0].p; 627 int perf_pow = (fun.retval == 0); 628 mp_size_t prod_size = 2*root_size; 629 mp_ptr p; 630 int error = 0; 631 632 p = refmpn_malloc_limbs (prod_size); 633 634 refmpn_sqr (p, root_ptr, root_size); 635 MPN_NORMALIZE (p, prod_size); 636 if (refmpn_cmp_twosizes (p,prod_size, orig_ptr,orig_size) != - !perf_pow) 637 { 638 printf ("root^2 bigger than original, or wrong return value.\n"); 639 mpn_trace ("prod...", p, prod_size); 640 error = 1; 641 } 642 643 refmpn_sub (p, orig_ptr,orig_size, p,prod_size); 644 MPN_NORMALIZE (p, prod_size); 645 if (prod_size >= root_size && 646 refmpn_sub (p, p,prod_size, root_ptr, root_size) == 0 && 647 refmpn_cmp_twosizes (p, prod_size, root_ptr, root_size) > 0) 648 { 649 printf ("(root+1)^2 smaller than original.\n"); 650 mpn_trace ("prod", p, prod_size); 651 error = 1; 652 } 653 free (p); 654 655 if (error) 656 validate_fail (); 657 } 658 659 660 /* These types are indexes into the param[] array and are arbitrary so long 661 as they're all distinct and within the size of param[]. Renumber 662 whenever necessary or desired. */ 663 664 enum { 665 TYPE_ADD = 1, TYPE_ADD_N, TYPE_ADD_NC, TYPE_SUB, TYPE_SUB_N, TYPE_SUB_NC, 666 667 TYPE_ADD_ERR1_N, TYPE_ADD_ERR2_N, TYPE_ADD_ERR3_N, 668 TYPE_SUB_ERR1_N, TYPE_SUB_ERR2_N, TYPE_SUB_ERR3_N, 669 670 TYPE_MUL_1, TYPE_MUL_1C, 671 672 TYPE_MUL_2, TYPE_MUL_3, TYPE_MUL_4, TYPE_MUL_5, TYPE_MUL_6, 673 674 TYPE_ADDMUL_1, TYPE_ADDMUL_1C, TYPE_SUBMUL_1, TYPE_SUBMUL_1C, 675 676 TYPE_ADDMUL_2, TYPE_ADDMUL_3, TYPE_ADDMUL_4, TYPE_ADDMUL_5, TYPE_ADDMUL_6, 677 TYPE_ADDMUL_7, TYPE_ADDMUL_8, 678 679 TYPE_ADDSUB_N, TYPE_ADDSUB_NC, 680 681 TYPE_RSHIFT, TYPE_LSHIFT, TYPE_LSHIFTC, 682 683 TYPE_COPY, TYPE_COPYI, TYPE_COPYD, TYPE_COM, 684 685 TYPE_ADDLSH1_N, TYPE_ADDLSH2_N, TYPE_ADDLSH_N, 686 TYPE_ADDLSH1_N_IP1, TYPE_ADDLSH2_N_IP1, TYPE_ADDLSH_N_IP1, 687 TYPE_ADDLSH1_N_IP2, TYPE_ADDLSH2_N_IP2, TYPE_ADDLSH_N_IP2, 688 TYPE_SUBLSH1_N, TYPE_SUBLSH2_N, TYPE_SUBLSH_N, 689 TYPE_SUBLSH1_N_IP1, TYPE_SUBLSH2_N_IP1, TYPE_SUBLSH_N_IP1, 690 TYPE_RSBLSH1_N, TYPE_RSBLSH2_N, TYPE_RSBLSH_N, 691 TYPE_RSH1ADD_N, TYPE_RSH1SUB_N, 692 693 TYPE_ADDLSH1_NC, TYPE_ADDLSH2_NC, TYPE_ADDLSH_NC, 694 TYPE_SUBLSH1_NC, TYPE_SUBLSH2_NC, TYPE_SUBLSH_NC, 695 TYPE_RSBLSH1_NC, TYPE_RSBLSH2_NC, TYPE_RSBLSH_NC, 696 697 TYPE_ADDCND_N, TYPE_SUBCND_N, 698 699 TYPE_MOD_1, TYPE_MOD_1C, TYPE_DIVMOD_1, TYPE_DIVMOD_1C, TYPE_DIVREM_1, 700 TYPE_DIVREM_1C, TYPE_PREINV_DIVREM_1, TYPE_DIVREM_2, TYPE_PREINV_MOD_1, 701 TYPE_DIV_QR_1N_PI1, 702 TYPE_MOD_34LSUB1, TYPE_UDIV_QRNND, TYPE_UDIV_QRNND_R, 703 704 TYPE_DIVEXACT_1, TYPE_BDIV_Q_1, TYPE_DIVEXACT_BY3, TYPE_DIVEXACT_BY3C, 705 TYPE_MODEXACT_1_ODD, TYPE_MODEXACT_1C_ODD, 706 707 TYPE_INVERT, TYPE_BINVERT, 708 709 TYPE_GCD, TYPE_GCD_1, TYPE_GCD_FINDA, TYPE_MPZ_JACOBI, TYPE_MPZ_KRONECKER, 710 TYPE_MPZ_KRONECKER_UI, TYPE_MPZ_KRONECKER_SI, TYPE_MPZ_UI_KRONECKER, 711 TYPE_MPZ_SI_KRONECKER, TYPE_MPZ_LEGENDRE, 712 713 TYPE_AND_N, TYPE_NAND_N, TYPE_ANDN_N, TYPE_IOR_N, TYPE_IORN_N, TYPE_NIOR_N, 714 TYPE_XOR_N, TYPE_XNOR_N, 715 716 TYPE_MUL_MN, TYPE_MUL_N, TYPE_SQR, TYPE_UMUL_PPMM, TYPE_UMUL_PPMM_R, 717 TYPE_MULLO_N, TYPE_SQRLO, TYPE_MULMID_MN, TYPE_MULMID_N, 718 719 TYPE_SBPI1_DIV_QR, TYPE_TDIV_QR, 720 721 TYPE_SQRTREM, TYPE_SQRT, TYPE_ZERO, TYPE_GET_STR, TYPE_POPCOUNT, TYPE_HAMDIST, 722 723 TYPE_EXTRA 724 }; 725 726 struct try_t param[TYPE_EXTRA]; 727 728 729 void 730 param_init (void) 731 { 732 struct try_t *p; 733 734 #define COPY(index) memcpy (p, ¶m[index], sizeof (*p)) 735 736 #define REFERENCE(fun) \ 737 p->reference = (tryfun_t) fun; \ 738 p->reference_name = #fun 739 #define VALIDATE(fun) \ 740 p->validate = fun; \ 741 p->validate_name = #fun 742 743 744 p = ¶m[TYPE_ADD_N]; 745 p->retval = 1; 746 p->dst[0] = 1; 747 p->src[0] = 1; 748 p->src[1] = 1; 749 REFERENCE (refmpn_add_n); 750 751 p = ¶m[TYPE_ADD_NC]; 752 COPY (TYPE_ADD_N); 753 p->carry = CARRY_BIT; 754 REFERENCE (refmpn_add_nc); 755 756 p = ¶m[TYPE_SUB_N]; 757 COPY (TYPE_ADD_N); 758 REFERENCE (refmpn_sub_n); 759 760 p = ¶m[TYPE_SUB_NC]; 761 COPY (TYPE_ADD_NC); 762 REFERENCE (refmpn_sub_nc); 763 764 p = ¶m[TYPE_ADD]; 765 COPY (TYPE_ADD_N); 766 p->size = SIZE_ALLOW_ZERO; 767 p->size2 = 1; 768 REFERENCE (refmpn_add); 769 770 p = ¶m[TYPE_SUB]; 771 COPY (TYPE_ADD); 772 REFERENCE (refmpn_sub); 773 774 775 p = ¶m[TYPE_ADD_ERR1_N]; 776 p->retval = 1; 777 p->dst[0] = 1; 778 p->dst[1] = 1; 779 p->src[0] = 1; 780 p->src[1] = 1; 781 p->src[2] = 1; 782 p->dst_size[1] = SIZE_2; 783 p->carry = CARRY_BIT; 784 p->overlap = OVERLAP_NOT_DST2; 785 REFERENCE (refmpn_add_err1_n); 786 787 p = ¶m[TYPE_SUB_ERR1_N]; 788 COPY (TYPE_ADD_ERR1_N); 789 REFERENCE (refmpn_sub_err1_n); 790 791 p = ¶m[TYPE_ADD_ERR2_N]; 792 COPY (TYPE_ADD_ERR1_N); 793 p->src[3] = 1; 794 p->dst_size[1] = SIZE_4; 795 REFERENCE (refmpn_add_err2_n); 796 797 p = ¶m[TYPE_SUB_ERR2_N]; 798 COPY (TYPE_ADD_ERR2_N); 799 REFERENCE (refmpn_sub_err2_n); 800 801 p = ¶m[TYPE_ADD_ERR3_N]; 802 COPY (TYPE_ADD_ERR2_N); 803 p->src[4] = 1; 804 p->dst_size[1] = SIZE_6; 805 REFERENCE (refmpn_add_err3_n); 806 807 p = ¶m[TYPE_SUB_ERR3_N]; 808 COPY (TYPE_ADD_ERR3_N); 809 REFERENCE (refmpn_sub_err3_n); 810 811 p = ¶m[TYPE_ADDCND_N]; 812 COPY (TYPE_ADD_N); 813 p->carry = CARRY_BIT; 814 REFERENCE (refmpn_cnd_add_n); 815 816 p = ¶m[TYPE_SUBCND_N]; 817 COPY (TYPE_ADD_N); 818 p->carry = CARRY_BIT; 819 REFERENCE (refmpn_cnd_sub_n); 820 821 822 p = ¶m[TYPE_MUL_1]; 823 p->retval = 1; 824 p->dst[0] = 1; 825 p->src[0] = 1; 826 p->multiplier = 1; 827 p->overlap = OVERLAP_LOW_TO_HIGH; 828 REFERENCE (refmpn_mul_1); 829 830 p = ¶m[TYPE_MUL_1C]; 831 COPY (TYPE_MUL_1); 832 p->carry = CARRY_LIMB; 833 REFERENCE (refmpn_mul_1c); 834 835 836 p = ¶m[TYPE_MUL_2]; 837 p->retval = 1; 838 p->dst[0] = 1; 839 p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1; 840 p->src[0] = 1; 841 p->src[1] = 1; 842 p->msize = 2; 843 p->overlap = OVERLAP_NOT_SRC2; 844 REFERENCE (refmpn_mul_2); 845 846 p = ¶m[TYPE_MUL_3]; 847 COPY (TYPE_MUL_2); 848 p->msize = 3; 849 REFERENCE (refmpn_mul_3); 850 851 p = ¶m[TYPE_MUL_4]; 852 COPY (TYPE_MUL_2); 853 p->msize = 4; 854 REFERENCE (refmpn_mul_4); 855 856 p = ¶m[TYPE_MUL_5]; 857 COPY (TYPE_MUL_2); 858 p->msize = 5; 859 REFERENCE (refmpn_mul_5); 860 861 p = ¶m[TYPE_MUL_6]; 862 COPY (TYPE_MUL_2); 863 p->msize = 6; 864 REFERENCE (refmpn_mul_6); 865 866 867 p = ¶m[TYPE_ADDMUL_1]; 868 p->retval = 1; 869 p->dst[0] = 1; 870 p->src[0] = 1; 871 p->multiplier = 1; 872 p->dst0_from_src1 = 1; 873 REFERENCE (refmpn_addmul_1); 874 875 p = ¶m[TYPE_ADDMUL_1C]; 876 COPY (TYPE_ADDMUL_1); 877 p->carry = CARRY_LIMB; 878 REFERENCE (refmpn_addmul_1c); 879 880 p = ¶m[TYPE_SUBMUL_1]; 881 COPY (TYPE_ADDMUL_1); 882 REFERENCE (refmpn_submul_1); 883 884 p = ¶m[TYPE_SUBMUL_1C]; 885 COPY (TYPE_ADDMUL_1C); 886 REFERENCE (refmpn_submul_1c); 887 888 889 p = ¶m[TYPE_ADDMUL_2]; 890 p->retval = 1; 891 p->dst[0] = 1; 892 p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1; 893 p->src[0] = 1; 894 p->src[1] = 1; 895 p->msize = 2; 896 p->dst0_from_src1 = 1; 897 p->overlap = OVERLAP_NONE; 898 REFERENCE (refmpn_addmul_2); 899 900 p = ¶m[TYPE_ADDMUL_3]; 901 COPY (TYPE_ADDMUL_2); 902 p->msize = 3; 903 REFERENCE (refmpn_addmul_3); 904 905 p = ¶m[TYPE_ADDMUL_4]; 906 COPY (TYPE_ADDMUL_2); 907 p->msize = 4; 908 REFERENCE (refmpn_addmul_4); 909 910 p = ¶m[TYPE_ADDMUL_5]; 911 COPY (TYPE_ADDMUL_2); 912 p->msize = 5; 913 REFERENCE (refmpn_addmul_5); 914 915 p = ¶m[TYPE_ADDMUL_6]; 916 COPY (TYPE_ADDMUL_2); 917 p->msize = 6; 918 REFERENCE (refmpn_addmul_6); 919 920 p = ¶m[TYPE_ADDMUL_7]; 921 COPY (TYPE_ADDMUL_2); 922 p->msize = 7; 923 REFERENCE (refmpn_addmul_7); 924 925 p = ¶m[TYPE_ADDMUL_8]; 926 COPY (TYPE_ADDMUL_2); 927 p->msize = 8; 928 REFERENCE (refmpn_addmul_8); 929 930 931 p = ¶m[TYPE_AND_N]; 932 p->dst[0] = 1; 933 p->src[0] = 1; 934 p->src[1] = 1; 935 REFERENCE (refmpn_and_n); 936 937 p = ¶m[TYPE_ANDN_N]; 938 COPY (TYPE_AND_N); 939 REFERENCE (refmpn_andn_n); 940 941 p = ¶m[TYPE_NAND_N]; 942 COPY (TYPE_AND_N); 943 REFERENCE (refmpn_nand_n); 944 945 p = ¶m[TYPE_IOR_N]; 946 COPY (TYPE_AND_N); 947 REFERENCE (refmpn_ior_n); 948 949 p = ¶m[TYPE_IORN_N]; 950 COPY (TYPE_AND_N); 951 REFERENCE (refmpn_iorn_n); 952 953 p = ¶m[TYPE_NIOR_N]; 954 COPY (TYPE_AND_N); 955 REFERENCE (refmpn_nior_n); 956 957 p = ¶m[TYPE_XOR_N]; 958 COPY (TYPE_AND_N); 959 REFERENCE (refmpn_xor_n); 960 961 p = ¶m[TYPE_XNOR_N]; 962 COPY (TYPE_AND_N); 963 REFERENCE (refmpn_xnor_n); 964 965 966 p = ¶m[TYPE_ADDSUB_N]; 967 p->retval = 1; 968 p->dst[0] = 1; 969 p->dst[1] = 1; 970 p->src[0] = 1; 971 p->src[1] = 1; 972 REFERENCE (refmpn_add_n_sub_n); 973 974 p = ¶m[TYPE_ADDSUB_NC]; 975 COPY (TYPE_ADDSUB_N); 976 p->carry = CARRY_4; 977 REFERENCE (refmpn_add_n_sub_nc); 978 979 980 p = ¶m[TYPE_COPY]; 981 p->dst[0] = 1; 982 p->src[0] = 1; 983 p->overlap = OVERLAP_NONE; 984 p->size = SIZE_ALLOW_ZERO; 985 REFERENCE (refmpn_copy); 986 987 p = ¶m[TYPE_COPYI]; 988 p->dst[0] = 1; 989 p->src[0] = 1; 990 p->overlap = OVERLAP_LOW_TO_HIGH; 991 p->size = SIZE_ALLOW_ZERO; 992 REFERENCE (refmpn_copyi); 993 994 p = ¶m[TYPE_COPYD]; 995 p->dst[0] = 1; 996 p->src[0] = 1; 997 p->overlap = OVERLAP_HIGH_TO_LOW; 998 p->size = SIZE_ALLOW_ZERO; 999 REFERENCE (refmpn_copyd); 1000 1001 p = ¶m[TYPE_COM]; 1002 p->dst[0] = 1; 1003 p->src[0] = 1; 1004 REFERENCE (refmpn_com); 1005 1006 1007 p = ¶m[TYPE_ADDLSH1_N]; 1008 COPY (TYPE_ADD_N); 1009 REFERENCE (refmpn_addlsh1_n); 1010 1011 p = ¶m[TYPE_ADDLSH2_N]; 1012 COPY (TYPE_ADD_N); 1013 REFERENCE (refmpn_addlsh2_n); 1014 1015 p = ¶m[TYPE_ADDLSH_N]; 1016 COPY (TYPE_ADD_N); 1017 p->shift = 1; 1018 REFERENCE (refmpn_addlsh_n); 1019 1020 p = ¶m[TYPE_ADDLSH1_N_IP1]; 1021 p->retval = 1; 1022 p->dst[0] = 1; 1023 p->src[0] = 1; 1024 p->dst0_from_src1 = 1; 1025 REFERENCE (refmpn_addlsh1_n_ip1); 1026 1027 p = ¶m[TYPE_ADDLSH2_N_IP1]; 1028 COPY (TYPE_ADDLSH1_N_IP1); 1029 REFERENCE (refmpn_addlsh2_n_ip1); 1030 1031 p = ¶m[TYPE_ADDLSH_N_IP1]; 1032 COPY (TYPE_ADDLSH1_N_IP1); 1033 p->shift = 1; 1034 REFERENCE (refmpn_addlsh_n_ip1); 1035 1036 p = ¶m[TYPE_ADDLSH1_N_IP2]; 1037 COPY (TYPE_ADDLSH1_N_IP1); 1038 REFERENCE (refmpn_addlsh1_n_ip2); 1039 1040 p = ¶m[TYPE_ADDLSH2_N_IP2]; 1041 COPY (TYPE_ADDLSH1_N_IP1); 1042 REFERENCE (refmpn_addlsh2_n_ip2); 1043 1044 p = ¶m[TYPE_ADDLSH_N_IP2]; 1045 COPY (TYPE_ADDLSH_N_IP1); 1046 REFERENCE (refmpn_addlsh_n_ip2); 1047 1048 p = ¶m[TYPE_SUBLSH1_N]; 1049 COPY (TYPE_ADD_N); 1050 REFERENCE (refmpn_sublsh1_n); 1051 1052 p = ¶m[TYPE_SUBLSH2_N]; 1053 COPY (TYPE_ADD_N); 1054 REFERENCE (refmpn_sublsh2_n); 1055 1056 p = ¶m[TYPE_SUBLSH_N]; 1057 COPY (TYPE_ADDLSH_N); 1058 REFERENCE (refmpn_sublsh_n); 1059 1060 p = ¶m[TYPE_SUBLSH1_N_IP1]; 1061 COPY (TYPE_ADDLSH1_N_IP1); 1062 REFERENCE (refmpn_sublsh1_n_ip1); 1063 1064 p = ¶m[TYPE_SUBLSH2_N_IP1]; 1065 COPY (TYPE_ADDLSH1_N_IP1); 1066 REFERENCE (refmpn_sublsh2_n_ip1); 1067 1068 p = ¶m[TYPE_SUBLSH_N_IP1]; 1069 COPY (TYPE_ADDLSH_N_IP1); 1070 REFERENCE (refmpn_sublsh_n_ip1); 1071 1072 p = ¶m[TYPE_RSBLSH1_N]; 1073 COPY (TYPE_ADD_N); 1074 REFERENCE (refmpn_rsblsh1_n); 1075 1076 p = ¶m[TYPE_RSBLSH2_N]; 1077 COPY (TYPE_ADD_N); 1078 REFERENCE (refmpn_rsblsh2_n); 1079 1080 p = ¶m[TYPE_RSBLSH_N]; 1081 COPY (TYPE_ADDLSH_N); 1082 REFERENCE (refmpn_rsblsh_n); 1083 1084 p = ¶m[TYPE_RSH1ADD_N]; 1085 COPY (TYPE_ADD_N); 1086 REFERENCE (refmpn_rsh1add_n); 1087 1088 p = ¶m[TYPE_RSH1SUB_N]; 1089 COPY (TYPE_ADD_N); 1090 REFERENCE (refmpn_rsh1sub_n); 1091 1092 1093 p = ¶m[TYPE_ADDLSH1_NC]; 1094 COPY (TYPE_ADDLSH1_N); 1095 p->carry = CARRY_3; 1096 REFERENCE (refmpn_addlsh1_nc); 1097 1098 p = ¶m[TYPE_ADDLSH2_NC]; 1099 COPY (TYPE_ADDLSH2_N); 1100 p->carry = CARRY_4; /* FIXME */ 1101 REFERENCE (refmpn_addlsh2_nc); 1102 1103 p = ¶m[TYPE_ADDLSH_NC]; 1104 COPY (TYPE_ADDLSH_N); 1105 p->carry = CARRY_BIT; /* FIXME */ 1106 REFERENCE (refmpn_addlsh_nc); 1107 1108 p = ¶m[TYPE_SUBLSH1_NC]; 1109 COPY (TYPE_ADDLSH1_NC); 1110 REFERENCE (refmpn_sublsh1_nc); 1111 1112 p = ¶m[TYPE_SUBLSH2_NC]; 1113 COPY (TYPE_ADDLSH2_NC); 1114 REFERENCE (refmpn_sublsh2_nc); 1115 1116 p = ¶m[TYPE_SUBLSH_NC]; 1117 COPY (TYPE_ADDLSH_NC); 1118 REFERENCE (refmpn_sublsh_nc); 1119 1120 p = ¶m[TYPE_RSBLSH1_NC]; 1121 COPY (TYPE_RSBLSH1_N); 1122 p->carry = CARRY_BIT; /* FIXME */ 1123 REFERENCE (refmpn_rsblsh1_nc); 1124 1125 p = ¶m[TYPE_RSBLSH2_NC]; 1126 COPY (TYPE_RSBLSH2_N); 1127 p->carry = CARRY_4; /* FIXME */ 1128 REFERENCE (refmpn_rsblsh2_nc); 1129 1130 p = ¶m[TYPE_RSBLSH_NC]; 1131 COPY (TYPE_RSBLSH_N); 1132 p->carry = CARRY_BIT; /* FIXME */ 1133 REFERENCE (refmpn_rsblsh_nc); 1134 1135 1136 p = ¶m[TYPE_MOD_1]; 1137 p->retval = 1; 1138 p->src[0] = 1; 1139 p->size = SIZE_ALLOW_ZERO; 1140 p->divisor = DIVISOR_LIMB; 1141 REFERENCE (refmpn_mod_1); 1142 1143 p = ¶m[TYPE_MOD_1C]; 1144 COPY (TYPE_MOD_1); 1145 p->carry = CARRY_DIVISOR; 1146 REFERENCE (refmpn_mod_1c); 1147 1148 p = ¶m[TYPE_DIVMOD_1]; 1149 COPY (TYPE_MOD_1); 1150 p->dst[0] = 1; 1151 REFERENCE (refmpn_divmod_1); 1152 1153 p = ¶m[TYPE_DIVMOD_1C]; 1154 COPY (TYPE_DIVMOD_1); 1155 p->carry = CARRY_DIVISOR; 1156 REFERENCE (refmpn_divmod_1c); 1157 1158 p = ¶m[TYPE_DIVREM_1]; 1159 COPY (TYPE_DIVMOD_1); 1160 p->size2 = SIZE_FRACTION; 1161 p->dst_size[0] = SIZE_SUM; 1162 REFERENCE (refmpn_divrem_1); 1163 1164 p = ¶m[TYPE_DIVREM_1C]; 1165 COPY (TYPE_DIVREM_1); 1166 p->carry = CARRY_DIVISOR; 1167 REFERENCE (refmpn_divrem_1c); 1168 1169 p = ¶m[TYPE_PREINV_DIVREM_1]; 1170 COPY (TYPE_DIVREM_1); 1171 p->size = SIZE_YES; /* ie. no size==0 */ 1172 REFERENCE (refmpn_preinv_divrem_1); 1173 1174 p = ¶m[TYPE_DIV_QR_1N_PI1]; 1175 p->retval = 1; 1176 p->src[0] = 1; 1177 p->src[1] = 1; 1178 /* SIZE_1 not supported. Always uses low limb only. */ 1179 p->size2 = 1; 1180 p->dst[0] = 1; 1181 p->divisor = DIVISOR_NORM; 1182 p->data = DATA_DIV_QR_1; 1183 VALIDATE (validate_div_qr_1_pi1); 1184 1185 p = ¶m[TYPE_PREINV_MOD_1]; 1186 p->retval = 1; 1187 p->src[0] = 1; 1188 p->divisor = DIVISOR_NORM; 1189 REFERENCE (refmpn_preinv_mod_1); 1190 1191 p = ¶m[TYPE_MOD_34LSUB1]; 1192 p->retval = 1; 1193 p->src[0] = 1; 1194 VALIDATE (validate_mod_34lsub1); 1195 1196 p = ¶m[TYPE_UDIV_QRNND]; 1197 p->retval = 1; 1198 p->src[0] = 1; 1199 p->dst[0] = 1; 1200 p->dst_size[0] = SIZE_1; 1201 p->divisor = UDIV_NEEDS_NORMALIZATION ? DIVISOR_NORM : DIVISOR_LIMB; 1202 p->data = DATA_UDIV_QRNND; 1203 p->overlap = OVERLAP_NONE; 1204 REFERENCE (refmpn_udiv_qrnnd); 1205 1206 p = ¶m[TYPE_UDIV_QRNND_R]; 1207 COPY (TYPE_UDIV_QRNND); 1208 REFERENCE (refmpn_udiv_qrnnd_r); 1209 1210 1211 p = ¶m[TYPE_DIVEXACT_1]; 1212 p->dst[0] = 1; 1213 p->src[0] = 1; 1214 p->divisor = DIVISOR_LIMB; 1215 p->data = DATA_MULTIPLE_DIVISOR; 1216 VALIDATE (validate_divexact_1); 1217 REFERENCE (refmpn_divmod_1); 1218 1219 p = ¶m[TYPE_BDIV_Q_1]; 1220 p->dst[0] = 1; 1221 p->src[0] = 1; 1222 p->divisor = DIVISOR_LIMB; 1223 VALIDATE (validate_bdiv_q_1); 1224 1225 p = ¶m[TYPE_DIVEXACT_BY3]; 1226 p->retval = 1; 1227 p->dst[0] = 1; 1228 p->src[0] = 1; 1229 REFERENCE (refmpn_divexact_by3); 1230 1231 p = ¶m[TYPE_DIVEXACT_BY3C]; 1232 COPY (TYPE_DIVEXACT_BY3); 1233 p->carry = CARRY_3; 1234 REFERENCE (refmpn_divexact_by3c); 1235 1236 1237 p = ¶m[TYPE_MODEXACT_1_ODD]; 1238 p->retval = 1; 1239 p->src[0] = 1; 1240 p->divisor = DIVISOR_ODD; 1241 VALIDATE (validate_modexact_1_odd); 1242 1243 p = ¶m[TYPE_MODEXACT_1C_ODD]; 1244 COPY (TYPE_MODEXACT_1_ODD); 1245 p->carry = CARRY_LIMB; 1246 VALIDATE (validate_modexact_1c_odd); 1247 1248 1249 p = ¶m[TYPE_GCD_1]; 1250 p->retval = 1; 1251 p->src[0] = 1; 1252 p->data = DATA_NON_ZERO; 1253 p->divisor = DIVISOR_LIMB; 1254 REFERENCE (refmpn_gcd_1); 1255 1256 p = ¶m[TYPE_GCD]; 1257 p->retval = 1; 1258 p->dst[0] = 1; 1259 p->src[0] = 1; 1260 p->src[1] = 1; 1261 p->size2 = 1; 1262 p->dst_size[0] = SIZE_RETVAL; 1263 p->overlap = OVERLAP_NOT_SRCS; 1264 p->data = DATA_GCD; 1265 REFERENCE (refmpn_gcd); 1266 1267 1268 p = ¶m[TYPE_MPZ_LEGENDRE]; 1269 p->retval = 1; 1270 p->src[0] = 1; 1271 p->size = SIZE_ALLOW_ZERO; 1272 p->src[1] = 1; 1273 p->data = DATA_SRC1_ODD_PRIME; 1274 p->size2 = 1; 1275 p->carry = CARRY_BIT; 1276 p->carry_sign = 1; 1277 REFERENCE (refmpz_legendre); 1278 1279 p = ¶m[TYPE_MPZ_JACOBI]; 1280 p->retval = 1; 1281 p->src[0] = 1; 1282 p->size = SIZE_ALLOW_ZERO; 1283 p->src[1] = 1; 1284 p->data = DATA_SRC1_ODD; 1285 p->size2 = 1; 1286 p->carry = CARRY_BIT; 1287 p->carry_sign = 1; 1288 REFERENCE (refmpz_jacobi); 1289 1290 p = ¶m[TYPE_MPZ_KRONECKER]; 1291 p->retval = 1; 1292 p->src[0] = 1; 1293 p->size = SIZE_ALLOW_ZERO; 1294 p->src[1] = 1; 1295 p->data = 0; 1296 p->size2 = 1; 1297 p->carry = CARRY_4; 1298 p->carry_sign = 1; 1299 REFERENCE (refmpz_kronecker); 1300 1301 1302 p = ¶m[TYPE_MPZ_KRONECKER_UI]; 1303 p->retval = 1; 1304 p->src[0] = 1; 1305 p->size = SIZE_ALLOW_ZERO; 1306 p->multiplier = 1; 1307 p->carry = CARRY_BIT; 1308 REFERENCE (refmpz_kronecker_ui); 1309 1310 p = ¶m[TYPE_MPZ_KRONECKER_SI]; 1311 COPY (TYPE_MPZ_KRONECKER_UI); 1312 REFERENCE (refmpz_kronecker_si); 1313 1314 p = ¶m[TYPE_MPZ_UI_KRONECKER]; 1315 COPY (TYPE_MPZ_KRONECKER_UI); 1316 REFERENCE (refmpz_ui_kronecker); 1317 1318 p = ¶m[TYPE_MPZ_SI_KRONECKER]; 1319 COPY (TYPE_MPZ_KRONECKER_UI); 1320 REFERENCE (refmpz_si_kronecker); 1321 1322 1323 p = ¶m[TYPE_SQR]; 1324 p->dst[0] = 1; 1325 p->src[0] = 1; 1326 p->dst_size[0] = SIZE_SUM; 1327 p->overlap = OVERLAP_NONE; 1328 REFERENCE (refmpn_sqr); 1329 1330 p = ¶m[TYPE_MUL_N]; 1331 COPY (TYPE_SQR); 1332 p->src[1] = 1; 1333 REFERENCE (refmpn_mul_n); 1334 1335 p = ¶m[TYPE_MULLO_N]; 1336 COPY (TYPE_MUL_N); 1337 p->dst_size[0] = 0; 1338 REFERENCE (refmpn_mullo_n); 1339 1340 p = ¶m[TYPE_SQRLO]; 1341 COPY (TYPE_SQR); 1342 p->dst_size[0] = 0; 1343 REFERENCE (refmpn_sqrlo); 1344 1345 p = ¶m[TYPE_MUL_MN]; 1346 COPY (TYPE_MUL_N); 1347 p->size2 = 1; 1348 REFERENCE (refmpn_mul_basecase); 1349 1350 p = ¶m[TYPE_MULMID_MN]; 1351 COPY (TYPE_MUL_MN); 1352 p->dst_size[0] = SIZE_DIFF_PLUS_3; 1353 REFERENCE (refmpn_mulmid_basecase); 1354 1355 p = ¶m[TYPE_MULMID_N]; 1356 COPY (TYPE_MUL_N); 1357 p->size = SIZE_ODD; 1358 p->size2 = SIZE_CEIL_HALF; 1359 p->dst_size[0] = SIZE_DIFF_PLUS_3; 1360 REFERENCE (refmpn_mulmid_n); 1361 1362 p = ¶m[TYPE_UMUL_PPMM]; 1363 p->retval = 1; 1364 p->src[0] = 1; 1365 p->dst[0] = 1; 1366 p->dst_size[0] = SIZE_1; 1367 p->overlap = OVERLAP_NONE; 1368 REFERENCE (refmpn_umul_ppmm); 1369 1370 p = ¶m[TYPE_UMUL_PPMM_R]; 1371 COPY (TYPE_UMUL_PPMM); 1372 REFERENCE (refmpn_umul_ppmm_r); 1373 1374 1375 p = ¶m[TYPE_RSHIFT]; 1376 p->retval = 1; 1377 p->dst[0] = 1; 1378 p->src[0] = 1; 1379 p->shift = 1; 1380 p->overlap = OVERLAP_LOW_TO_HIGH; 1381 REFERENCE (refmpn_rshift); 1382 1383 p = ¶m[TYPE_LSHIFT]; 1384 COPY (TYPE_RSHIFT); 1385 p->overlap = OVERLAP_HIGH_TO_LOW; 1386 REFERENCE (refmpn_lshift); 1387 1388 p = ¶m[TYPE_LSHIFTC]; 1389 COPY (TYPE_RSHIFT); 1390 p->overlap = OVERLAP_HIGH_TO_LOW; 1391 REFERENCE (refmpn_lshiftc); 1392 1393 1394 p = ¶m[TYPE_POPCOUNT]; 1395 p->retval = 1; 1396 p->src[0] = 1; 1397 REFERENCE (refmpn_popcount); 1398 1399 p = ¶m[TYPE_HAMDIST]; 1400 COPY (TYPE_POPCOUNT); 1401 p->src[1] = 1; 1402 REFERENCE (refmpn_hamdist); 1403 1404 1405 p = ¶m[TYPE_SBPI1_DIV_QR]; 1406 p->retval = 1; 1407 p->dst[0] = 1; 1408 p->dst[1] = 1; 1409 p->src[0] = 1; 1410 p->src[1] = 1; 1411 p->data = DATA_SRC1_HIGHBIT; 1412 p->size2 = 1; 1413 p->dst_size[0] = SIZE_DIFF; 1414 p->overlap = OVERLAP_NONE; 1415 REFERENCE (refmpn_sb_div_qr); 1416 1417 p = ¶m[TYPE_TDIV_QR]; 1418 p->dst[0] = 1; 1419 p->dst[1] = 1; 1420 p->src[0] = 1; 1421 p->src[1] = 1; 1422 p->size2 = 1; 1423 p->dst_size[0] = SIZE_DIFF_PLUS_1; 1424 p->dst_size[1] = SIZE_SIZE2; 1425 p->overlap = OVERLAP_NONE; 1426 REFERENCE (refmpn_tdiv_qr); 1427 1428 p = ¶m[TYPE_SQRTREM]; 1429 p->retval = 1; 1430 p->dst[0] = 1; 1431 p->dst[1] = 1; 1432 p->src[0] = 1; 1433 p->dst_size[0] = SIZE_CEIL_HALF; 1434 p->dst_size[1] = SIZE_RETVAL; 1435 p->overlap = OVERLAP_NONE; 1436 VALIDATE (validate_sqrtrem); 1437 REFERENCE (refmpn_sqrtrem); 1438 1439 p = ¶m[TYPE_SQRT]; 1440 p->retval = 1; 1441 p->dst[0] = 1; 1442 p->dst[1] = 0; 1443 p->src[0] = 1; 1444 p->dst_size[0] = SIZE_CEIL_HALF; 1445 p->overlap = OVERLAP_NONE; 1446 VALIDATE (validate_sqrt); 1447 1448 p = ¶m[TYPE_ZERO]; 1449 p->dst[0] = 1; 1450 p->size = SIZE_ALLOW_ZERO; 1451 REFERENCE (refmpn_zero); 1452 1453 p = ¶m[TYPE_GET_STR]; 1454 p->retval = 1; 1455 p->src[0] = 1; 1456 p->size = SIZE_ALLOW_ZERO; 1457 p->dst[0] = 1; 1458 p->dst[1] = 1; 1459 p->dst_size[0] = SIZE_GET_STR; 1460 p->dst_bytes[0] = 1; 1461 p->overlap = OVERLAP_NONE; 1462 REFERENCE (refmpn_get_str); 1463 1464 p = ¶m[TYPE_BINVERT]; 1465 p->dst[0] = 1; 1466 p->src[0] = 1; 1467 p->data = DATA_SRC0_ODD; 1468 p->overlap = OVERLAP_NONE; 1469 REFERENCE (refmpn_binvert); 1470 1471 p = ¶m[TYPE_INVERT]; 1472 p->dst[0] = 1; 1473 p->src[0] = 1; 1474 p->data = DATA_SRC0_HIGHBIT; 1475 p->overlap = OVERLAP_NONE; 1476 REFERENCE (refmpn_invert); 1477 1478 #ifdef EXTRA_PARAM_INIT 1479 EXTRA_PARAM_INIT 1480 #endif 1481 } 1482 1483 1484 /* The following are macros if there's no native versions, so wrap them in 1485 functions that can be in try_array[]. */ 1486 1487 void 1488 MPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1489 { MPN_COPY (rp, sp, size); } 1490 1491 void 1492 MPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1493 { MPN_COPY_INCR (rp, sp, size); } 1494 1495 void 1496 MPN_COPY_DECR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1497 { MPN_COPY_DECR (rp, sp, size); } 1498 1499 void 1500 __GMPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1501 { __GMPN_COPY (rp, sp, size); } 1502 1503 #ifdef __GMPN_COPY_INCR 1504 void 1505 __GMPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1506 { __GMPN_COPY_INCR (rp, sp, size); } 1507 #endif 1508 1509 void 1510 mpn_com_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1511 { mpn_com (rp, sp, size); } 1512 1513 void 1514 mpn_and_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1515 { mpn_and_n (rp, s1, s2, size); } 1516 1517 void 1518 mpn_andn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1519 { mpn_andn_n (rp, s1, s2, size); } 1520 1521 void 1522 mpn_nand_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1523 { mpn_nand_n (rp, s1, s2, size); } 1524 1525 void 1526 mpn_ior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1527 { mpn_ior_n (rp, s1, s2, size); } 1528 1529 void 1530 mpn_iorn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1531 { mpn_iorn_n (rp, s1, s2, size); } 1532 1533 void 1534 mpn_nior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1535 { mpn_nior_n (rp, s1, s2, size); } 1536 1537 void 1538 mpn_xor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1539 { mpn_xor_n (rp, s1, s2, size); } 1540 1541 void 1542 mpn_xnor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1543 { mpn_xnor_n (rp, s1, s2, size); } 1544 1545 mp_limb_t 1546 udiv_qrnnd_fun (mp_limb_t *remptr, mp_limb_t n1, mp_limb_t n0, mp_limb_t d) 1547 { 1548 mp_limb_t q; 1549 udiv_qrnnd (q, *remptr, n1, n0, d); 1550 return q; 1551 } 1552 1553 mp_limb_t 1554 mpn_divexact_by3_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1555 { 1556 return mpn_divexact_by3 (rp, sp, size); 1557 } 1558 1559 #if HAVE_NATIVE_mpn_addlsh1_n_ip1 1560 mp_limb_t 1561 mpn_addlsh1_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1562 { 1563 return mpn_addlsh1_n_ip1 (rp, sp, size); 1564 } 1565 #endif 1566 #if HAVE_NATIVE_mpn_addlsh2_n_ip1 1567 mp_limb_t 1568 mpn_addlsh2_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1569 { 1570 return mpn_addlsh2_n_ip1 (rp, sp, size); 1571 } 1572 #endif 1573 #if HAVE_NATIVE_mpn_addlsh_n_ip1 1574 mp_limb_t 1575 mpn_addlsh_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh) 1576 { 1577 return mpn_addlsh_n_ip1 (rp, sp, size, sh); 1578 } 1579 #endif 1580 #if HAVE_NATIVE_mpn_addlsh1_n_ip2 1581 mp_limb_t 1582 mpn_addlsh1_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1583 { 1584 return mpn_addlsh1_n_ip2 (rp, sp, size); 1585 } 1586 #endif 1587 #if HAVE_NATIVE_mpn_addlsh2_n_ip2 1588 mp_limb_t 1589 mpn_addlsh2_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1590 { 1591 return mpn_addlsh2_n_ip2 (rp, sp, size); 1592 } 1593 #endif 1594 #if HAVE_NATIVE_mpn_addlsh_n_ip2 1595 mp_limb_t 1596 mpn_addlsh_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh) 1597 { 1598 return mpn_addlsh_n_ip2 (rp, sp, size, sh); 1599 } 1600 #endif 1601 #if HAVE_NATIVE_mpn_sublsh1_n_ip1 1602 mp_limb_t 1603 mpn_sublsh1_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1604 { 1605 return mpn_sublsh1_n_ip1 (rp, sp, size); 1606 } 1607 #endif 1608 #if HAVE_NATIVE_mpn_sublsh2_n_ip1 1609 mp_limb_t 1610 mpn_sublsh2_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1611 { 1612 return mpn_sublsh2_n_ip1 (rp, sp, size); 1613 } 1614 #endif 1615 #if HAVE_NATIVE_mpn_sublsh_n_ip1 1616 mp_limb_t 1617 mpn_sublsh_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh) 1618 { 1619 return mpn_sublsh_n_ip1 (rp, sp, size, sh); 1620 } 1621 #endif 1622 1623 mp_limb_t 1624 mpn_modexact_1_odd_fun (mp_srcptr ptr, mp_size_t size, mp_limb_t divisor) 1625 { 1626 return mpn_modexact_1_odd (ptr, size, divisor); 1627 } 1628 1629 void 1630 mpn_toom22_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size) 1631 { 1632 mp_ptr tspace; 1633 TMP_DECL; 1634 TMP_MARK; 1635 tspace = TMP_ALLOC_LIMBS (mpn_toom22_mul_itch (size, size)); 1636 mpn_toom22_mul (dst, src1, size, src2, size, tspace); 1637 TMP_FREE; 1638 } 1639 void 1640 mpn_toom2_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size) 1641 { 1642 mp_ptr tspace; 1643 TMP_DECL; 1644 TMP_MARK; 1645 tspace = TMP_ALLOC_LIMBS (mpn_toom2_sqr_itch (size)); 1646 mpn_toom2_sqr (dst, src, size, tspace); 1647 TMP_FREE; 1648 } 1649 void 1650 mpn_toom33_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size) 1651 { 1652 mp_ptr tspace; 1653 TMP_DECL; 1654 TMP_MARK; 1655 tspace = TMP_ALLOC_LIMBS (mpn_toom33_mul_itch (size, size)); 1656 mpn_toom33_mul (dst, src1, size, src2, size, tspace); 1657 TMP_FREE; 1658 } 1659 void 1660 mpn_toom3_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size) 1661 { 1662 mp_ptr tspace; 1663 TMP_DECL; 1664 TMP_MARK; 1665 tspace = TMP_ALLOC_LIMBS (mpn_toom3_sqr_itch (size)); 1666 mpn_toom3_sqr (dst, src, size, tspace); 1667 TMP_FREE; 1668 } 1669 void 1670 mpn_toom44_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size) 1671 { 1672 mp_ptr tspace; 1673 TMP_DECL; 1674 TMP_MARK; 1675 tspace = TMP_ALLOC_LIMBS (mpn_toom44_mul_itch (size, size)); 1676 mpn_toom44_mul (dst, src1, size, src2, size, tspace); 1677 TMP_FREE; 1678 } 1679 void 1680 mpn_toom4_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size) 1681 { 1682 mp_ptr tspace; 1683 TMP_DECL; 1684 TMP_MARK; 1685 tspace = TMP_ALLOC_LIMBS (mpn_toom4_sqr_itch (size)); 1686 mpn_toom4_sqr (dst, src, size, tspace); 1687 TMP_FREE; 1688 } 1689 1690 void 1691 mpn_toom42_mulmid_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 1692 mp_size_t size) 1693 { 1694 mp_ptr tspace; 1695 mp_size_t n; 1696 TMP_DECL; 1697 TMP_MARK; 1698 tspace = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (size)); 1699 mpn_toom42_mulmid (dst, src1, src2, size, tspace); 1700 TMP_FREE; 1701 } 1702 1703 mp_limb_t 1704 umul_ppmm_fun (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2) 1705 { 1706 mp_limb_t high; 1707 umul_ppmm (high, *lowptr, m1, m2); 1708 return high; 1709 } 1710 1711 void 1712 MPN_ZERO_fun (mp_ptr ptr, mp_size_t size) 1713 { MPN_ZERO (ptr, size); } 1714 1715 mp_size_t 1716 mpn_sqrt_fun (mp_ptr dst, mp_srcptr src, mp_size_t size) 1717 { return mpn_sqrtrem (dst, NULL, src, size); } 1718 1719 struct choice_t { 1720 const char *name; 1721 tryfun_t function; 1722 int type; 1723 mp_size_t minsize; 1724 }; 1725 1726 #define TRY(fun) #fun, (tryfun_t) fun 1727 #define TRY_FUNFUN(fun) #fun, (tryfun_t) fun##_fun 1728 1729 const struct choice_t choice_array[] = { 1730 { TRY(mpn_add), TYPE_ADD }, 1731 { TRY(mpn_sub), TYPE_SUB }, 1732 1733 { TRY(mpn_add_n), TYPE_ADD_N }, 1734 { TRY(mpn_sub_n), TYPE_SUB_N }, 1735 1736 #if HAVE_NATIVE_mpn_add_nc 1737 { TRY(mpn_add_nc), TYPE_ADD_NC }, 1738 #endif 1739 #if HAVE_NATIVE_mpn_sub_nc 1740 { TRY(mpn_sub_nc), TYPE_SUB_NC }, 1741 #endif 1742 1743 #if HAVE_NATIVE_mpn_add_n_sub_n 1744 { TRY(mpn_add_n_sub_n), TYPE_ADDSUB_N }, 1745 #endif 1746 #if HAVE_NATIVE_mpn_add_n_sub_nc 1747 { TRY(mpn_add_n_sub_nc), TYPE_ADDSUB_NC }, 1748 #endif 1749 1750 { TRY(mpn_add_err1_n), TYPE_ADD_ERR1_N }, 1751 { TRY(mpn_sub_err1_n), TYPE_SUB_ERR1_N }, 1752 { TRY(mpn_add_err2_n), TYPE_ADD_ERR2_N }, 1753 { TRY(mpn_sub_err2_n), TYPE_SUB_ERR2_N }, 1754 { TRY(mpn_add_err3_n), TYPE_ADD_ERR3_N }, 1755 { TRY(mpn_sub_err3_n), TYPE_SUB_ERR3_N }, 1756 1757 { TRY(mpn_addmul_1), TYPE_ADDMUL_1 }, 1758 { TRY(mpn_submul_1), TYPE_SUBMUL_1 }, 1759 #if HAVE_NATIVE_mpn_addmul_1c 1760 { TRY(mpn_addmul_1c), TYPE_ADDMUL_1C }, 1761 #endif 1762 #if HAVE_NATIVE_mpn_submul_1c 1763 { TRY(mpn_submul_1c), TYPE_SUBMUL_1C }, 1764 #endif 1765 1766 #if HAVE_NATIVE_mpn_addmul_2 1767 { TRY(mpn_addmul_2), TYPE_ADDMUL_2, 2 }, 1768 #endif 1769 #if HAVE_NATIVE_mpn_addmul_3 1770 { TRY(mpn_addmul_3), TYPE_ADDMUL_3, 3 }, 1771 #endif 1772 #if HAVE_NATIVE_mpn_addmul_4 1773 { TRY(mpn_addmul_4), TYPE_ADDMUL_4, 4 }, 1774 #endif 1775 #if HAVE_NATIVE_mpn_addmul_5 1776 { TRY(mpn_addmul_5), TYPE_ADDMUL_5, 5 }, 1777 #endif 1778 #if HAVE_NATIVE_mpn_addmul_6 1779 { TRY(mpn_addmul_6), TYPE_ADDMUL_6, 6 }, 1780 #endif 1781 #if HAVE_NATIVE_mpn_addmul_7 1782 { TRY(mpn_addmul_7), TYPE_ADDMUL_7, 7 }, 1783 #endif 1784 #if HAVE_NATIVE_mpn_addmul_8 1785 { TRY(mpn_addmul_8), TYPE_ADDMUL_8, 8 }, 1786 #endif 1787 1788 { TRY_FUNFUN(mpn_com), TYPE_COM }, 1789 1790 { TRY_FUNFUN(MPN_COPY), TYPE_COPY }, 1791 { TRY_FUNFUN(MPN_COPY_INCR), TYPE_COPYI }, 1792 { TRY_FUNFUN(MPN_COPY_DECR), TYPE_COPYD }, 1793 1794 { TRY_FUNFUN(__GMPN_COPY), TYPE_COPY }, 1795 #ifdef __GMPN_COPY_INCR 1796 { TRY_FUNFUN(__GMPN_COPY_INCR), TYPE_COPYI }, 1797 #endif 1798 1799 #if HAVE_NATIVE_mpn_copyi 1800 { TRY(mpn_copyi), TYPE_COPYI }, 1801 #endif 1802 #if HAVE_NATIVE_mpn_copyd 1803 { TRY(mpn_copyd), TYPE_COPYD }, 1804 #endif 1805 1806 { TRY(mpn_cnd_add_n), TYPE_ADDCND_N }, 1807 { TRY(mpn_cnd_sub_n), TYPE_SUBCND_N }, 1808 #if HAVE_NATIVE_mpn_addlsh1_n == 1 1809 { TRY(mpn_addlsh1_n), TYPE_ADDLSH1_N }, 1810 #endif 1811 #if HAVE_NATIVE_mpn_addlsh2_n == 1 1812 { TRY(mpn_addlsh2_n), TYPE_ADDLSH2_N }, 1813 #endif 1814 #if HAVE_NATIVE_mpn_addlsh_n 1815 { TRY(mpn_addlsh_n), TYPE_ADDLSH_N }, 1816 #endif 1817 #if HAVE_NATIVE_mpn_addlsh1_n_ip1 1818 { TRY_FUNFUN(mpn_addlsh1_n_ip1), TYPE_ADDLSH1_N_IP1 }, 1819 #endif 1820 #if HAVE_NATIVE_mpn_addlsh2_n_ip1 1821 { TRY_FUNFUN(mpn_addlsh2_n_ip1), TYPE_ADDLSH2_N_IP1 }, 1822 #endif 1823 #if HAVE_NATIVE_mpn_addlsh_n_ip1 1824 { TRY_FUNFUN(mpn_addlsh_n_ip1), TYPE_ADDLSH_N_IP1 }, 1825 #endif 1826 #if HAVE_NATIVE_mpn_addlsh1_n_ip2 1827 { TRY_FUNFUN(mpn_addlsh1_n_ip2), TYPE_ADDLSH1_N_IP2 }, 1828 #endif 1829 #if HAVE_NATIVE_mpn_addlsh2_n_ip2 1830 { TRY_FUNFUN(mpn_addlsh2_n_ip2), TYPE_ADDLSH2_N_IP2 }, 1831 #endif 1832 #if HAVE_NATIVE_mpn_addlsh_n_ip2 1833 { TRY_FUNFUN(mpn_addlsh_n_ip2), TYPE_ADDLSH_N_IP2 }, 1834 #endif 1835 #if HAVE_NATIVE_mpn_sublsh1_n == 1 1836 { TRY(mpn_sublsh1_n), TYPE_SUBLSH1_N }, 1837 #endif 1838 #if HAVE_NATIVE_mpn_sublsh2_n == 1 1839 { TRY(mpn_sublsh2_n), TYPE_SUBLSH2_N }, 1840 #endif 1841 #if HAVE_NATIVE_mpn_sublsh_n 1842 { TRY(mpn_sublsh_n), TYPE_SUBLSH_N }, 1843 #endif 1844 #if HAVE_NATIVE_mpn_sublsh1_n_ip1 1845 { TRY_FUNFUN(mpn_sublsh1_n_ip1), TYPE_SUBLSH1_N_IP1 }, 1846 #endif 1847 #if HAVE_NATIVE_mpn_sublsh2_n_ip1 1848 { TRY_FUNFUN(mpn_sublsh2_n_ip1), TYPE_SUBLSH2_N_IP1 }, 1849 #endif 1850 #if HAVE_NATIVE_mpn_sublsh_n_ip1 1851 { TRY_FUNFUN(mpn_sublsh_n_ip1), TYPE_SUBLSH_N_IP1 }, 1852 #endif 1853 #if HAVE_NATIVE_mpn_rsblsh1_n == 1 1854 { TRY(mpn_rsblsh1_n), TYPE_RSBLSH1_N }, 1855 #endif 1856 #if HAVE_NATIVE_mpn_rsblsh2_n == 1 1857 { TRY(mpn_rsblsh2_n), TYPE_RSBLSH2_N }, 1858 #endif 1859 #if HAVE_NATIVE_mpn_rsblsh_n 1860 { TRY(mpn_rsblsh_n), TYPE_RSBLSH_N }, 1861 #endif 1862 #if HAVE_NATIVE_mpn_rsh1add_n 1863 { TRY(mpn_rsh1add_n), TYPE_RSH1ADD_N }, 1864 #endif 1865 #if HAVE_NATIVE_mpn_rsh1sub_n 1866 { TRY(mpn_rsh1sub_n), TYPE_RSH1SUB_N }, 1867 #endif 1868 1869 #if HAVE_NATIVE_mpn_addlsh1_nc == 1 1870 { TRY(mpn_addlsh1_nc), TYPE_ADDLSH1_NC }, 1871 #endif 1872 #if HAVE_NATIVE_mpn_addlsh2_nc == 1 1873 { TRY(mpn_addlsh2_nc), TYPE_ADDLSH2_NC }, 1874 #endif 1875 #if HAVE_NATIVE_mpn_addlsh_nc 1876 { TRY(mpn_addlsh_nc), TYPE_ADDLSH_NC }, 1877 #endif 1878 #if HAVE_NATIVE_mpn_sublsh1_nc == 1 1879 { TRY(mpn_sublsh1_nc), TYPE_SUBLSH1_NC }, 1880 #endif 1881 #if HAVE_NATIVE_mpn_sublsh2_nc == 1 1882 { TRY(mpn_sublsh2_nc), TYPE_SUBLSH2_NC }, 1883 #endif 1884 #if HAVE_NATIVE_mpn_sublsh_nc 1885 { TRY(mpn_sublsh_nc), TYPE_SUBLSH_NC }, 1886 #endif 1887 #if HAVE_NATIVE_mpn_rsblsh1_nc 1888 { TRY(mpn_rsblsh1_nc), TYPE_RSBLSH1_NC }, 1889 #endif 1890 #if HAVE_NATIVE_mpn_rsblsh2_nc 1891 { TRY(mpn_rsblsh2_nc), TYPE_RSBLSH2_NC }, 1892 #endif 1893 #if HAVE_NATIVE_mpn_rsblsh_nc 1894 { TRY(mpn_rsblsh_nc), TYPE_RSBLSH_NC }, 1895 #endif 1896 1897 { TRY_FUNFUN(mpn_and_n), TYPE_AND_N }, 1898 { TRY_FUNFUN(mpn_andn_n), TYPE_ANDN_N }, 1899 { TRY_FUNFUN(mpn_nand_n), TYPE_NAND_N }, 1900 { TRY_FUNFUN(mpn_ior_n), TYPE_IOR_N }, 1901 { TRY_FUNFUN(mpn_iorn_n), TYPE_IORN_N }, 1902 { TRY_FUNFUN(mpn_nior_n), TYPE_NIOR_N }, 1903 { TRY_FUNFUN(mpn_xor_n), TYPE_XOR_N }, 1904 { TRY_FUNFUN(mpn_xnor_n), TYPE_XNOR_N }, 1905 1906 { TRY(mpn_divrem_1), TYPE_DIVREM_1 }, 1907 #if USE_PREINV_DIVREM_1 1908 { TRY(mpn_preinv_divrem_1), TYPE_PREINV_DIVREM_1 }, 1909 #endif 1910 { TRY(mpn_mod_1), TYPE_MOD_1 }, 1911 #if USE_PREINV_MOD_1 1912 { TRY(mpn_preinv_mod_1), TYPE_PREINV_MOD_1 }, 1913 #endif 1914 #if HAVE_NATIVE_mpn_divrem_1c 1915 { TRY(mpn_divrem_1c), TYPE_DIVREM_1C }, 1916 #endif 1917 #if HAVE_NATIVE_mpn_mod_1c 1918 { TRY(mpn_mod_1c), TYPE_MOD_1C }, 1919 #endif 1920 { TRY(mpn_div_qr_1n_pi1), TYPE_DIV_QR_1N_PI1 }, 1921 #if GMP_NUMB_BITS % 4 == 0 1922 { TRY(mpn_mod_34lsub1), TYPE_MOD_34LSUB1 }, 1923 #endif 1924 1925 { TRY_FUNFUN(udiv_qrnnd), TYPE_UDIV_QRNND, 2 }, 1926 #if HAVE_NATIVE_mpn_udiv_qrnnd 1927 { TRY(mpn_udiv_qrnnd), TYPE_UDIV_QRNND, 2 }, 1928 #endif 1929 #if HAVE_NATIVE_mpn_udiv_qrnnd_r 1930 { TRY(mpn_udiv_qrnnd_r), TYPE_UDIV_QRNND_R, 2 }, 1931 #endif 1932 1933 { TRY(mpn_divexact_1), TYPE_DIVEXACT_1 }, 1934 { TRY(mpn_bdiv_q_1), TYPE_BDIV_Q_1 }, 1935 { TRY_FUNFUN(mpn_divexact_by3), TYPE_DIVEXACT_BY3 }, 1936 { TRY(mpn_divexact_by3c), TYPE_DIVEXACT_BY3C }, 1937 1938 { TRY_FUNFUN(mpn_modexact_1_odd), TYPE_MODEXACT_1_ODD }, 1939 { TRY(mpn_modexact_1c_odd), TYPE_MODEXACT_1C_ODD }, 1940 1941 1942 { TRY(mpn_sbpi1_div_qr), TYPE_SBPI1_DIV_QR, 3}, 1943 { TRY(mpn_tdiv_qr), TYPE_TDIV_QR }, 1944 1945 { TRY(mpn_mul_1), TYPE_MUL_1 }, 1946 #if HAVE_NATIVE_mpn_mul_1c 1947 { TRY(mpn_mul_1c), TYPE_MUL_1C }, 1948 #endif 1949 #if HAVE_NATIVE_mpn_mul_2 1950 { TRY(mpn_mul_2), TYPE_MUL_2, 2 }, 1951 #endif 1952 #if HAVE_NATIVE_mpn_mul_3 1953 { TRY(mpn_mul_3), TYPE_MUL_3, 3 }, 1954 #endif 1955 #if HAVE_NATIVE_mpn_mul_4 1956 { TRY(mpn_mul_4), TYPE_MUL_4, 4 }, 1957 #endif 1958 #if HAVE_NATIVE_mpn_mul_5 1959 { TRY(mpn_mul_5), TYPE_MUL_5, 5 }, 1960 #endif 1961 #if HAVE_NATIVE_mpn_mul_6 1962 { TRY(mpn_mul_6), TYPE_MUL_6, 6 }, 1963 #endif 1964 1965 { TRY(mpn_rshift), TYPE_RSHIFT }, 1966 { TRY(mpn_lshift), TYPE_LSHIFT }, 1967 { TRY(mpn_lshiftc), TYPE_LSHIFTC }, 1968 1969 1970 { TRY(mpn_mul_basecase), TYPE_MUL_MN }, 1971 { TRY(mpn_mulmid_basecase), TYPE_MULMID_MN }, 1972 { TRY(mpn_mullo_basecase), TYPE_MULLO_N }, 1973 { TRY(mpn_sqrlo_basecase), TYPE_SQRLO }, 1974 { TRY(mpn_sqrlo), TYPE_SQRLO }, 1975 #if SQR_TOOM2_THRESHOLD > 0 1976 { TRY(mpn_sqr_basecase), TYPE_SQR }, 1977 #endif 1978 1979 { TRY(mpn_mul), TYPE_MUL_MN }, 1980 { TRY(mpn_mul_n), TYPE_MUL_N }, 1981 { TRY(mpn_sqr), TYPE_SQR }, 1982 1983 { TRY_FUNFUN(umul_ppmm), TYPE_UMUL_PPMM, 2 }, 1984 #if HAVE_NATIVE_mpn_umul_ppmm 1985 { TRY(mpn_umul_ppmm), TYPE_UMUL_PPMM, 2 }, 1986 #endif 1987 #if HAVE_NATIVE_mpn_umul_ppmm_r 1988 { TRY(mpn_umul_ppmm_r), TYPE_UMUL_PPMM_R, 2 }, 1989 #endif 1990 1991 { TRY_FUNFUN(mpn_toom22_mul), TYPE_MUL_N, MPN_TOOM22_MUL_MINSIZE }, 1992 { TRY_FUNFUN(mpn_toom2_sqr), TYPE_SQR, MPN_TOOM2_SQR_MINSIZE }, 1993 { TRY_FUNFUN(mpn_toom33_mul), TYPE_MUL_N, MPN_TOOM33_MUL_MINSIZE }, 1994 { TRY_FUNFUN(mpn_toom3_sqr), TYPE_SQR, MPN_TOOM3_SQR_MINSIZE }, 1995 { TRY_FUNFUN(mpn_toom44_mul), TYPE_MUL_N, MPN_TOOM44_MUL_MINSIZE }, 1996 { TRY_FUNFUN(mpn_toom4_sqr), TYPE_SQR, MPN_TOOM4_SQR_MINSIZE }, 1997 1998 { TRY(mpn_mulmid_n), TYPE_MULMID_N, 1 }, 1999 { TRY(mpn_mulmid), TYPE_MULMID_MN, 1 }, 2000 { TRY_FUNFUN(mpn_toom42_mulmid), TYPE_MULMID_N, 2001 (2 * MPN_TOOM42_MULMID_MINSIZE - 1) }, 2002 2003 { TRY(mpn_gcd_1), TYPE_GCD_1 }, 2004 { TRY(mpn_gcd), TYPE_GCD }, 2005 { TRY(mpz_legendre), TYPE_MPZ_LEGENDRE }, 2006 { TRY(mpz_jacobi), TYPE_MPZ_JACOBI }, 2007 { TRY(mpz_kronecker), TYPE_MPZ_KRONECKER }, 2008 { TRY(mpz_kronecker_ui), TYPE_MPZ_KRONECKER_UI }, 2009 { TRY(mpz_kronecker_si), TYPE_MPZ_KRONECKER_SI }, 2010 { TRY(mpz_ui_kronecker), TYPE_MPZ_UI_KRONECKER }, 2011 { TRY(mpz_si_kronecker), TYPE_MPZ_SI_KRONECKER }, 2012 2013 { TRY(mpn_popcount), TYPE_POPCOUNT }, 2014 { TRY(mpn_hamdist), TYPE_HAMDIST }, 2015 2016 { TRY(mpn_sqrtrem), TYPE_SQRTREM }, 2017 { TRY_FUNFUN(mpn_sqrt), TYPE_SQRT }, 2018 2019 { TRY_FUNFUN(MPN_ZERO), TYPE_ZERO }, 2020 2021 { TRY(mpn_get_str), TYPE_GET_STR }, 2022 2023 { TRY(mpn_binvert), TYPE_BINVERT }, 2024 { TRY(mpn_invert), TYPE_INVERT }, 2025 2026 #ifdef EXTRA_ROUTINES 2027 EXTRA_ROUTINES 2028 #endif 2029 }; 2030 2031 const struct choice_t *choice = NULL; 2032 2033 2034 void 2035 mprotect_maybe (void *addr, size_t len, int prot) 2036 { 2037 if (!option_redzones) 2038 return; 2039 2040 #if HAVE_MPROTECT 2041 if (mprotect (addr, len, prot) != 0) 2042 { 2043 fprintf (stderr, "Cannot mprotect %p 0x%X 0x%X: %s\n", 2044 addr, (unsigned) len, prot, strerror (errno)); 2045 exit (1); 2046 } 2047 #else 2048 { 2049 static int warned = 0; 2050 if (!warned) 2051 { 2052 fprintf (stderr, 2053 "mprotect not available, bounds testing not performed\n"); 2054 warned = 1; 2055 } 2056 } 2057 #endif 2058 } 2059 2060 /* round "a" up to a multiple of "m" */ 2061 size_t 2062 round_up_multiple (size_t a, size_t m) 2063 { 2064 unsigned long r; 2065 2066 r = a % m; 2067 if (r == 0) 2068 return a; 2069 else 2070 return a + (m - r); 2071 } 2072 2073 2074 /* On some systems it seems that only an mmap'ed region can be mprotect'ed, 2075 for instance HP-UX 10. 2076 2077 mmap will almost certainly return a pointer already aligned to a page 2078 boundary, but it's easy enough to share the alignment handling with the 2079 malloc case. */ 2080 2081 void 2082 malloc_region (struct region_t *r, mp_size_t n) 2083 { 2084 mp_ptr p; 2085 size_t nbytes; 2086 2087 ASSERT ((pagesize % GMP_LIMB_BYTES) == 0); 2088 2089 n = round_up_multiple (n, PAGESIZE_LIMBS); 2090 r->size = n; 2091 2092 nbytes = n*GMP_LIMB_BYTES + 2*REDZONE_BYTES + pagesize; 2093 2094 #if defined (MAP_ANONYMOUS) && ! defined (MAP_ANON) 2095 #define MAP_ANON MAP_ANONYMOUS 2096 #endif 2097 2098 #if HAVE_MMAP && defined (MAP_ANON) 2099 /* note must pass fd=-1 for MAP_ANON on BSD */ 2100 p = (mp_ptr) mmap (NULL, nbytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); 2101 if (p == (void *) -1) 2102 { 2103 fprintf (stderr, "Cannot mmap %#x anon bytes: %s\n", 2104 (unsigned) nbytes, strerror (errno)); 2105 exit (1); 2106 } 2107 #else 2108 p = (mp_ptr) malloc (nbytes); 2109 ASSERT_ALWAYS (p != NULL); 2110 #endif 2111 2112 p = (mp_ptr) align_pointer (p, pagesize); 2113 2114 mprotect_maybe (p, REDZONE_BYTES, PROT_NONE); 2115 p += REDZONE_LIMBS; 2116 r->ptr = p; 2117 2118 mprotect_maybe (p + n, REDZONE_BYTES, PROT_NONE); 2119 } 2120 2121 void 2122 mprotect_region (const struct region_t *r, int prot) 2123 { 2124 mprotect_maybe (r->ptr, r->size, prot); 2125 } 2126 2127 2128 /* First four entries must be 0,1,2,3 for the benefit of CARRY_BIT, CARRY_3, 2129 and CARRY_4 */ 2130 mp_limb_t carry_array[] = { 2131 0, 1, 2, 3, 2132 4, 2133 CNST_LIMB(1) << 8, 2134 CNST_LIMB(1) << 16, 2135 GMP_NUMB_MAX 2136 }; 2137 int carry_index; 2138 2139 #define CARRY_COUNT \ 2140 ((tr->carry == CARRY_BIT) ? 2 \ 2141 : tr->carry == CARRY_3 ? 3 \ 2142 : tr->carry == CARRY_4 ? 4 \ 2143 : (tr->carry == CARRY_LIMB || tr->carry == CARRY_DIVISOR) \ 2144 ? numberof(carry_array) + CARRY_RANDOMS \ 2145 : 1) 2146 2147 #define MPN_RANDOM_ALT(index,dst,size) \ 2148 (((index) & 1) ? refmpn_random (dst, size) : refmpn_random2 (dst, size)) 2149 2150 /* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have 2151 the same type */ 2152 #define CARRY_ITERATION \ 2153 for (carry_index = 0; \ 2154 (carry_index < numberof (carry_array) \ 2155 ? (carry = carry_array[carry_index]) \ 2156 : (MPN_RANDOM_ALT (carry_index, &carry, 1), (mp_limb_t) 0)), \ 2157 (tr->carry == CARRY_DIVISOR ? carry %= divisor : 0), \ 2158 carry_index < CARRY_COUNT; \ 2159 carry_index++) 2160 2161 2162 mp_limb_t multiplier_array[] = { 2163 0, 1, 2, 3, 2164 CNST_LIMB(1) << 8, 2165 CNST_LIMB(1) << 16, 2166 GMP_NUMB_MAX - 2, 2167 GMP_NUMB_MAX - 1, 2168 GMP_NUMB_MAX 2169 }; 2170 int multiplier_index; 2171 2172 mp_limb_t divisor_array[] = { 2173 1, 2, 3, 2174 CNST_LIMB(1) << 8, 2175 CNST_LIMB(1) << 16, 2176 CNST_LIMB(1) << (GMP_NUMB_BITS/2 - 1), 2177 GMP_NUMB_MAX >> (GMP_NUMB_BITS/2), 2178 GMP_NUMB_HIGHBIT, 2179 GMP_NUMB_HIGHBIT + 1, 2180 GMP_NUMB_MAX - 2, 2181 GMP_NUMB_MAX - 1, 2182 GMP_NUMB_MAX 2183 }; 2184 2185 int divisor_index; 2186 2187 /* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have 2188 the same type */ 2189 #define ARRAY_ITERATION(var, index, limit, array, randoms, cond) \ 2190 for (index = 0; \ 2191 (index < numberof (array) \ 2192 ? (var = array[index]) \ 2193 : (MPN_RANDOM_ALT (index, &var, 1), (mp_limb_t) 0)), \ 2194 index < limit; \ 2195 index++) 2196 2197 #define MULTIPLIER_COUNT \ 2198 (tr->multiplier \ 2199 ? numberof (multiplier_array) + MULTIPLIER_RANDOMS \ 2200 : 1) 2201 2202 #define MULTIPLIER_ITERATION \ 2203 ARRAY_ITERATION(multiplier, multiplier_index, MULTIPLIER_COUNT, \ 2204 multiplier_array, MULTIPLIER_RANDOMS, TRY_MULTIPLIER) 2205 2206 #define DIVISOR_COUNT \ 2207 (tr->divisor \ 2208 ? numberof (divisor_array) + DIVISOR_RANDOMS \ 2209 : 1) 2210 2211 #define DIVISOR_ITERATION \ 2212 ARRAY_ITERATION(divisor, divisor_index, DIVISOR_COUNT, divisor_array, \ 2213 DIVISOR_RANDOMS, TRY_DIVISOR) 2214 2215 2216 /* overlap_array[].s[i] is where s[i] should be, 0 or 1 means overlapping 2217 d[0] or d[1] respectively, -1 means a separate (write-protected) 2218 location. */ 2219 2220 struct overlap_t { 2221 int s[NUM_SOURCES]; 2222 } overlap_array[] = { 2223 { { -1, -1, -1, -1, -1 } }, 2224 { { 0, -1, -1, -1, -1 } }, 2225 { { -1, 0, -1, -1, -1 } }, 2226 { { 0, 0, -1, -1, -1 } }, 2227 { { 1, -1, -1, -1, -1 } }, 2228 { { -1, 1, -1, -1, -1 } }, 2229 { { 1, 1, -1, -1, -1 } }, 2230 { { 0, 1, -1, -1, -1 } }, 2231 { { 1, 0, -1, -1, -1 } }, 2232 }; 2233 2234 struct overlap_t *overlap, *overlap_limit; 2235 2236 #define OVERLAP_COUNT \ 2237 (tr->overlap & OVERLAP_NONE ? 1 \ 2238 : tr->overlap & OVERLAP_NOT_SRCS ? 3 \ 2239 : tr->overlap & OVERLAP_NOT_SRC2 ? 2 \ 2240 : tr->overlap & OVERLAP_NOT_DST2 ? 4 \ 2241 : tr->dst[1] ? 9 \ 2242 : tr->src[1] ? 4 \ 2243 : tr->dst[0] ? 2 \ 2244 : 1) 2245 2246 #define OVERLAP_ITERATION \ 2247 for (overlap = &overlap_array[0], \ 2248 overlap_limit = &overlap_array[OVERLAP_COUNT]; \ 2249 overlap < overlap_limit; \ 2250 overlap++) 2251 2252 2253 int base = 10; 2254 2255 #define T_RAND_COUNT 2 2256 int t_rand; 2257 2258 void 2259 t_random (mp_ptr ptr, mp_size_t n) 2260 { 2261 if (n == 0) 2262 return; 2263 2264 switch (option_data) { 2265 case DATA_TRAND: 2266 switch (t_rand) { 2267 case 0: refmpn_random (ptr, n); break; 2268 case 1: refmpn_random2 (ptr, n); break; 2269 default: abort(); 2270 } 2271 break; 2272 case DATA_SEQ: 2273 { 2274 static mp_limb_t counter = 0; 2275 mp_size_t i; 2276 for (i = 0; i < n; i++) 2277 ptr[i] = ++counter; 2278 } 2279 break; 2280 case DATA_ZEROS: 2281 refmpn_zero (ptr, n); 2282 break; 2283 case DATA_FFS: 2284 refmpn_fill (ptr, n, GMP_NUMB_MAX); 2285 break; 2286 case DATA_2FD: 2287 /* Special value 0x2FFF...FFFD, which divided by 3 gives 0xFFF...FFF, 2288 inducing the q1_ff special case in the mul-by-inverse part of some 2289 versions of divrem_1 and mod_1. */ 2290 refmpn_fill (ptr, n, (mp_limb_t) -1); 2291 ptr[n-1] = 2; 2292 ptr[0] -= 2; 2293 break; 2294 2295 default: 2296 abort(); 2297 } 2298 } 2299 #define T_RAND_ITERATION \ 2300 for (t_rand = 0; t_rand < T_RAND_COUNT; t_rand++) 2301 2302 2303 void 2304 print_each (const struct each_t *e) 2305 { 2306 int i; 2307 2308 printf ("%s %s\n", e->name, e == &ref ? tr->reference_name : choice->name); 2309 if (tr->retval) 2310 mpn_trace (" retval", &e->retval, 1); 2311 2312 for (i = 0; i < NUM_DESTS; i++) 2313 { 2314 if (tr->dst[i]) 2315 { 2316 if (tr->dst_bytes[i]) 2317 byte_tracen (" d[%d]", i, e->d[i].p, d[i].size); 2318 else 2319 mpn_tracen (" d[%d]", i, e->d[i].p, d[i].size); 2320 printf (" located %p\n", (void *) (e->d[i].p)); 2321 } 2322 } 2323 2324 for (i = 0; i < NUM_SOURCES; i++) 2325 if (tr->src[i]) 2326 printf (" s[%d] located %p\n", i, (void *) (e->s[i].p)); 2327 } 2328 2329 2330 void 2331 print_all (void) 2332 { 2333 int i; 2334 2335 printf ("\n"); 2336 printf ("size %ld\n", (long) size); 2337 if (tr->size2) 2338 printf ("size2 %ld\n", (long) size2); 2339 2340 for (i = 0; i < NUM_DESTS; i++) 2341 if (d[i].size != size) 2342 printf ("d[%d].size %ld\n", i, (long) d[i].size); 2343 2344 if (tr->multiplier) 2345 mpn_trace (" multiplier", &multiplier, 1); 2346 if (tr->divisor) 2347 mpn_trace (" divisor", &divisor, 1); 2348 if (tr->shift) 2349 printf (" shift %lu\n", shift); 2350 if (tr->carry) 2351 mpn_trace (" carry", &carry, 1); 2352 if (tr->msize) 2353 mpn_trace (" multiplier_N", multiplier_N, tr->msize); 2354 2355 for (i = 0; i < NUM_DESTS; i++) 2356 if (tr->dst[i]) 2357 printf (" d[%d] %s, align %ld, size %ld\n", 2358 i, d[i].high ? "high" : "low", 2359 (long) d[i].align, (long) d[i].size); 2360 2361 for (i = 0; i < NUM_SOURCES; i++) 2362 { 2363 if (tr->src[i]) 2364 { 2365 printf (" s[%d] %s, align %ld, ", 2366 i, s[i].high ? "high" : "low", (long) s[i].align); 2367 switch (overlap->s[i]) { 2368 case -1: 2369 printf ("no overlap\n"); 2370 break; 2371 default: 2372 printf ("==d[%d]%s\n", 2373 overlap->s[i], 2374 tr->overlap == OVERLAP_LOW_TO_HIGH ? "+a" 2375 : tr->overlap == OVERLAP_HIGH_TO_LOW ? "-a" 2376 : ""); 2377 break; 2378 } 2379 printf (" s[%d]=", i); 2380 if (tr->carry_sign && (carry & (1 << i))) 2381 printf ("-"); 2382 mpn_trace (NULL, s[i].p, SRC_SIZE(i)); 2383 } 2384 } 2385 2386 if (tr->dst0_from_src1) 2387 mpn_trace (" d[0]", s[1].region.ptr, size); 2388 2389 if (tr->reference) 2390 print_each (&ref); 2391 print_each (&fun); 2392 } 2393 2394 void 2395 compare (void) 2396 { 2397 int error = 0; 2398 int i; 2399 2400 if (tr->retval && ref.retval != fun.retval) 2401 { 2402 gmp_printf ("Different return values (%Mu, %Mu)\n", 2403 ref.retval, fun.retval); 2404 error = 1; 2405 } 2406 2407 for (i = 0; i < NUM_DESTS; i++) 2408 { 2409 switch (tr->dst_size[i]) { 2410 case SIZE_RETVAL: 2411 case SIZE_GET_STR: 2412 d[i].size = ref.retval; 2413 break; 2414 } 2415 } 2416 2417 for (i = 0; i < NUM_DESTS; i++) 2418 { 2419 if (! tr->dst[i]) 2420 continue; 2421 2422 if (tr->dst_bytes[i]) 2423 { 2424 if (memcmp (ref.d[i].p, fun.d[i].p, d[i].size) != 0) 2425 { 2426 printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n", 2427 i, 2428 (long) byte_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size), 2429 (long) byte_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size)); 2430 error = 1; 2431 } 2432 } 2433 else 2434 { 2435 if (d[i].size != 0 2436 && ! refmpn_equal_anynail (ref.d[i].p, fun.d[i].p, d[i].size)) 2437 { 2438 printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n", 2439 i, 2440 (long) mpn_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size), 2441 (long) mpn_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size)); 2442 error = 1; 2443 } 2444 } 2445 } 2446 2447 if (error) 2448 { 2449 print_all(); 2450 abort(); 2451 } 2452 } 2453 2454 2455 /* The functions are cast if the return value should be a long rather than 2456 the default mp_limb_t. This is necessary under _LONG_LONG_LIMB. This 2457 might not be enough if some actual calling conventions checking is 2458 implemented on a long long limb system. */ 2459 2460 void 2461 call (struct each_t *e, tryfun_t function) 2462 { 2463 switch (choice->type) { 2464 case TYPE_ADD: 2465 case TYPE_SUB: 2466 e->retval = CALLING_CONVENTIONS (function) 2467 (e->d[0].p, e->s[0].p, size, e->s[1].p, size2); 2468 break; 2469 2470 case TYPE_ADD_N: 2471 case TYPE_SUB_N: 2472 case TYPE_ADDLSH1_N: 2473 case TYPE_ADDLSH2_N: 2474 case TYPE_SUBLSH1_N: 2475 case TYPE_SUBLSH2_N: 2476 case TYPE_RSBLSH1_N: 2477 case TYPE_RSBLSH2_N: 2478 case TYPE_RSH1ADD_N: 2479 case TYPE_RSH1SUB_N: 2480 e->retval = CALLING_CONVENTIONS (function) 2481 (e->d[0].p, e->s[0].p, e->s[1].p, size); 2482 break; 2483 case TYPE_ADDLSH_N: 2484 case TYPE_SUBLSH_N: 2485 case TYPE_RSBLSH_N: 2486 e->retval = CALLING_CONVENTIONS (function) 2487 (e->d[0].p, e->s[0].p, e->s[1].p, size, shift); 2488 break; 2489 case TYPE_ADDLSH_NC: 2490 case TYPE_SUBLSH_NC: 2491 case TYPE_RSBLSH_NC: 2492 e->retval = CALLING_CONVENTIONS (function) 2493 (e->d[0].p, e->s[0].p, e->s[1].p, size, shift, carry); 2494 break; 2495 case TYPE_ADDLSH1_NC: 2496 case TYPE_ADDLSH2_NC: 2497 case TYPE_SUBLSH1_NC: 2498 case TYPE_SUBLSH2_NC: 2499 case TYPE_RSBLSH1_NC: 2500 case TYPE_RSBLSH2_NC: 2501 case TYPE_ADD_NC: 2502 case TYPE_SUB_NC: 2503 e->retval = CALLING_CONVENTIONS (function) 2504 (e->d[0].p, e->s[0].p, e->s[1].p, size, carry); 2505 break; 2506 case TYPE_ADDCND_N: 2507 case TYPE_SUBCND_N: 2508 e->retval = CALLING_CONVENTIONS (function) 2509 (carry, e->d[0].p, e->s[0].p, e->s[1].p, size); 2510 break; 2511 case TYPE_ADD_ERR1_N: 2512 case TYPE_SUB_ERR1_N: 2513 e->retval = CALLING_CONVENTIONS (function) 2514 (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, size, carry); 2515 break; 2516 case TYPE_ADD_ERR2_N: 2517 case TYPE_SUB_ERR2_N: 2518 e->retval = CALLING_CONVENTIONS (function) 2519 (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, e->s[3].p, size, carry); 2520 break; 2521 case TYPE_ADD_ERR3_N: 2522 case TYPE_SUB_ERR3_N: 2523 e->retval = CALLING_CONVENTIONS (function) 2524 (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, e->s[3].p, e->s[4].p, size, carry); 2525 break; 2526 2527 case TYPE_MUL_1: 2528 case TYPE_ADDMUL_1: 2529 case TYPE_SUBMUL_1: 2530 e->retval = CALLING_CONVENTIONS (function) 2531 (e->d[0].p, e->s[0].p, size, multiplier); 2532 break; 2533 case TYPE_MUL_1C: 2534 case TYPE_ADDMUL_1C: 2535 case TYPE_SUBMUL_1C: 2536 e->retval = CALLING_CONVENTIONS (function) 2537 (e->d[0].p, e->s[0].p, size, multiplier, carry); 2538 break; 2539 2540 case TYPE_MUL_2: 2541 case TYPE_MUL_3: 2542 case TYPE_MUL_4: 2543 case TYPE_MUL_5: 2544 case TYPE_MUL_6: 2545 if (size == 1) 2546 abort (); 2547 e->retval = CALLING_CONVENTIONS (function) 2548 (e->d[0].p, e->s[0].p, size, multiplier_N); 2549 break; 2550 2551 case TYPE_ADDMUL_2: 2552 case TYPE_ADDMUL_3: 2553 case TYPE_ADDMUL_4: 2554 case TYPE_ADDMUL_5: 2555 case TYPE_ADDMUL_6: 2556 case TYPE_ADDMUL_7: 2557 case TYPE_ADDMUL_8: 2558 if (size == 1) 2559 abort (); 2560 e->retval = CALLING_CONVENTIONS (function) 2561 (e->d[0].p, e->s[0].p, size, multiplier_N); 2562 break; 2563 2564 case TYPE_AND_N: 2565 case TYPE_ANDN_N: 2566 case TYPE_NAND_N: 2567 case TYPE_IOR_N: 2568 case TYPE_IORN_N: 2569 case TYPE_NIOR_N: 2570 case TYPE_XOR_N: 2571 case TYPE_XNOR_N: 2572 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size); 2573 break; 2574 2575 case TYPE_ADDSUB_N: 2576 e->retval = CALLING_CONVENTIONS (function) 2577 (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size); 2578 break; 2579 case TYPE_ADDSUB_NC: 2580 e->retval = CALLING_CONVENTIONS (function) 2581 (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size, carry); 2582 break; 2583 2584 case TYPE_COPY: 2585 case TYPE_COPYI: 2586 case TYPE_COPYD: 2587 case TYPE_COM: 2588 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size); 2589 break; 2590 2591 case TYPE_ADDLSH1_N_IP1: 2592 case TYPE_ADDLSH2_N_IP1: 2593 case TYPE_ADDLSH1_N_IP2: 2594 case TYPE_ADDLSH2_N_IP2: 2595 case TYPE_SUBLSH1_N_IP1: 2596 case TYPE_SUBLSH2_N_IP1: 2597 case TYPE_DIVEXACT_BY3: 2598 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size); 2599 break; 2600 case TYPE_DIVEXACT_BY3C: 2601 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, 2602 carry); 2603 break; 2604 2605 2606 case TYPE_DIVMOD_1: 2607 case TYPE_DIVEXACT_1: 2608 case TYPE_BDIV_Q_1: 2609 e->retval = CALLING_CONVENTIONS (function) 2610 (e->d[0].p, e->s[0].p, size, divisor); 2611 break; 2612 case TYPE_DIVMOD_1C: 2613 e->retval = CALLING_CONVENTIONS (function) 2614 (e->d[0].p, e->s[0].p, size, divisor, carry); 2615 break; 2616 case TYPE_DIVREM_1: 2617 e->retval = CALLING_CONVENTIONS (function) 2618 (e->d[0].p, size2, e->s[0].p, size, divisor); 2619 break; 2620 case TYPE_DIVREM_1C: 2621 e->retval = CALLING_CONVENTIONS (function) 2622 (e->d[0].p, size2, e->s[0].p, size, divisor, carry); 2623 break; 2624 case TYPE_PREINV_DIVREM_1: 2625 { 2626 mp_limb_t dinv; 2627 unsigned shift; 2628 shift = refmpn_count_leading_zeros (divisor); 2629 dinv = refmpn_invert_limb (divisor << shift); 2630 e->retval = CALLING_CONVENTIONS (function) 2631 (e->d[0].p, size2, e->s[0].p, size, divisor, dinv, shift); 2632 } 2633 break; 2634 case TYPE_MOD_1: 2635 case TYPE_MODEXACT_1_ODD: 2636 e->retval = CALLING_CONVENTIONS (function) 2637 (e->s[0].p, size, divisor); 2638 break; 2639 case TYPE_MOD_1C: 2640 case TYPE_MODEXACT_1C_ODD: 2641 e->retval = CALLING_CONVENTIONS (function) 2642 (e->s[0].p, size, divisor, carry); 2643 break; 2644 case TYPE_PREINV_MOD_1: 2645 e->retval = CALLING_CONVENTIONS (function) 2646 (e->s[0].p, size, divisor, refmpn_invert_limb (divisor)); 2647 break; 2648 case TYPE_DIV_QR_1N_PI1: 2649 { 2650 mp_limb_t dinv = refmpn_invert_limb (divisor); 2651 e->retval = CALLING_CONVENTIONS (function) 2652 (e->d[0].p, e->s[0].p, size, e->s[1].p[0], divisor, dinv); 2653 break; 2654 } 2655 2656 case TYPE_MOD_34LSUB1: 2657 e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size); 2658 break; 2659 2660 case TYPE_UDIV_QRNND: 2661 e->retval = CALLING_CONVENTIONS (function) 2662 (e->d[0].p, e->s[0].p[1], e->s[0].p[0], divisor); 2663 break; 2664 case TYPE_UDIV_QRNND_R: 2665 e->retval = CALLING_CONVENTIONS (function) 2666 (e->s[0].p[1], e->s[0].p[0], divisor, e->d[0].p); 2667 break; 2668 2669 case TYPE_SBPI1_DIV_QR: 2670 { 2671 gmp_pi1_t dinv; 2672 invert_pi1 (dinv, e->s[1].p[size2-1], e->s[1].p[size2-2]); /* FIXME: use refinvert_pi1 */ 2673 refmpn_copyi (e->d[1].p, e->s[0].p, size); /* dividend */ 2674 refmpn_fill (e->d[0].p, size-size2, 0x98765432); /* quotient */ 2675 e->retval = CALLING_CONVENTIONS (function) 2676 (e->d[0].p, e->d[1].p, size, e->s[1].p, size2, dinv.inv32); 2677 refmpn_zero (e->d[1].p+size2, size-size2); /* excess over remainder */ 2678 } 2679 break; 2680 2681 case TYPE_TDIV_QR: 2682 CALLING_CONVENTIONS (function) (e->d[0].p, e->d[1].p, 0, 2683 e->s[0].p, size, e->s[1].p, size2); 2684 break; 2685 2686 case TYPE_GCD_1: 2687 /* Must have a non-zero src, but this probably isn't the best way to do 2688 it. */ 2689 if (refmpn_zero_p (e->s[0].p, size)) 2690 e->retval = 0; 2691 else 2692 e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size, divisor); 2693 break; 2694 2695 case TYPE_GCD: 2696 /* Sources are destroyed, so they're saved and replaced, but a general 2697 approach to this might be better. Note that it's still e->s[0].p and 2698 e->s[1].p that are passed, to get the desired alignments. */ 2699 { 2700 mp_ptr s0 = refmpn_malloc_limbs (size); 2701 mp_ptr s1 = refmpn_malloc_limbs (size2); 2702 refmpn_copyi (s0, e->s[0].p, size); 2703 refmpn_copyi (s1, e->s[1].p, size2); 2704 2705 mprotect_region (&s[0].region, PROT_READ|PROT_WRITE); 2706 mprotect_region (&s[1].region, PROT_READ|PROT_WRITE); 2707 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, 2708 e->s[0].p, size, 2709 e->s[1].p, size2); 2710 refmpn_copyi (e->s[0].p, s0, size); 2711 refmpn_copyi (e->s[1].p, s1, size2); 2712 free (s0); 2713 free (s1); 2714 } 2715 break; 2716 2717 case TYPE_GCD_FINDA: 2718 { 2719 /* FIXME: do this with a flag */ 2720 mp_limb_t c[2]; 2721 c[0] = e->s[0].p[0]; 2722 c[0] += (c[0] == 0); 2723 c[1] = e->s[0].p[0]; 2724 c[1] += (c[1] == 0); 2725 e->retval = CALLING_CONVENTIONS (function) (c); 2726 } 2727 break; 2728 2729 case TYPE_MPZ_LEGENDRE: 2730 case TYPE_MPZ_JACOBI: 2731 { 2732 mpz_t a, b; 2733 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size); 2734 PTR(b) = e->s[1].p; SIZ(b) = size2; 2735 e->retval = CALLING_CONVENTIONS (function) (a, b); 2736 } 2737 break; 2738 case TYPE_MPZ_KRONECKER: 2739 { 2740 mpz_t a, b; 2741 PTR(a) = e->s[0].p; SIZ(a) = ((carry&1)==0 ? size : -size); 2742 PTR(b) = e->s[1].p; SIZ(b) = ((carry&2)==0 ? size2 : -size2); 2743 e->retval = CALLING_CONVENTIONS (function) (a, b); 2744 } 2745 break; 2746 case TYPE_MPZ_KRONECKER_UI: 2747 { 2748 mpz_t a; 2749 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size); 2750 e->retval = CALLING_CONVENTIONS(function) (a, (unsigned long)multiplier); 2751 } 2752 break; 2753 case TYPE_MPZ_KRONECKER_SI: 2754 { 2755 mpz_t a; 2756 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size); 2757 e->retval = CALLING_CONVENTIONS (function) (a, (long) multiplier); 2758 } 2759 break; 2760 case TYPE_MPZ_UI_KRONECKER: 2761 { 2762 mpz_t b; 2763 PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size); 2764 e->retval = CALLING_CONVENTIONS(function) ((unsigned long)multiplier, b); 2765 } 2766 break; 2767 case TYPE_MPZ_SI_KRONECKER: 2768 { 2769 mpz_t b; 2770 PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size); 2771 e->retval = CALLING_CONVENTIONS (function) ((long) multiplier, b); 2772 } 2773 break; 2774 2775 case TYPE_MUL_MN: 2776 case TYPE_MULMID_MN: 2777 CALLING_CONVENTIONS (function) 2778 (e->d[0].p, e->s[0].p, size, e->s[1].p, size2); 2779 break; 2780 case TYPE_MUL_N: 2781 case TYPE_MULLO_N: 2782 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size); 2783 break; 2784 case TYPE_MULMID_N: 2785 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, 2786 (size + 1) / 2); 2787 break; 2788 case TYPE_SQR: 2789 case TYPE_SQRLO: 2790 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size); 2791 break; 2792 2793 case TYPE_UMUL_PPMM: 2794 e->retval = CALLING_CONVENTIONS (function) 2795 (e->d[0].p, e->s[0].p[0], e->s[0].p[1]); 2796 break; 2797 case TYPE_UMUL_PPMM_R: 2798 e->retval = CALLING_CONVENTIONS (function) 2799 (e->s[0].p[0], e->s[0].p[1], e->d[0].p); 2800 break; 2801 2802 case TYPE_ADDLSH_N_IP1: 2803 case TYPE_ADDLSH_N_IP2: 2804 case TYPE_SUBLSH_N_IP1: 2805 case TYPE_LSHIFT: 2806 case TYPE_LSHIFTC: 2807 case TYPE_RSHIFT: 2808 e->retval = CALLING_CONVENTIONS (function) 2809 (e->d[0].p, e->s[0].p, size, shift); 2810 break; 2811 2812 case TYPE_POPCOUNT: 2813 e->retval = (* (unsigned long (*)(ANYARGS)) 2814 CALLING_CONVENTIONS (function)) (e->s[0].p, size); 2815 break; 2816 case TYPE_HAMDIST: 2817 e->retval = (* (unsigned long (*)(ANYARGS)) 2818 CALLING_CONVENTIONS (function)) (e->s[0].p, e->s[1].p, size); 2819 break; 2820 2821 case TYPE_SQRTREM: 2822 e->retval = (* (long (*)(ANYARGS)) CALLING_CONVENTIONS (function)) 2823 (e->d[0].p, e->d[1].p, e->s[0].p, size); 2824 break; 2825 2826 case TYPE_SQRT: 2827 e->retval = (* (long (*)(ANYARGS)) CALLING_CONVENTIONS (function)) 2828 (e->d[0].p, e->s[0].p, size); 2829 break; 2830 2831 case TYPE_ZERO: 2832 CALLING_CONVENTIONS (function) (e->d[0].p, size); 2833 break; 2834 2835 case TYPE_GET_STR: 2836 { 2837 size_t sizeinbase, fill; 2838 char *dst; 2839 MPN_SIZEINBASE (sizeinbase, e->s[0].p, size, base); 2840 ASSERT_ALWAYS (sizeinbase <= d[0].size); 2841 fill = d[0].size - sizeinbase; 2842 if (d[0].high) 2843 { 2844 memset (e->d[0].p, 0xBA, fill); 2845 dst = (char *) e->d[0].p + fill; 2846 } 2847 else 2848 { 2849 dst = (char *) e->d[0].p; 2850 memset (dst + sizeinbase, 0xBA, fill); 2851 } 2852 if (POW2_P (base)) 2853 { 2854 e->retval = CALLING_CONVENTIONS (function) (dst, base, 2855 e->s[0].p, size); 2856 } 2857 else 2858 { 2859 refmpn_copy (e->d[1].p, e->s[0].p, size); 2860 e->retval = CALLING_CONVENTIONS (function) (dst, base, 2861 e->d[1].p, size); 2862 } 2863 refmpn_zero (e->d[1].p, size); /* clobbered or unused */ 2864 } 2865 break; 2866 2867 case TYPE_INVERT: 2868 { 2869 mp_ptr scratch; 2870 TMP_DECL; 2871 TMP_MARK; 2872 scratch = TMP_ALLOC_LIMBS (mpn_invert_itch (size)); 2873 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch); 2874 TMP_FREE; 2875 } 2876 break; 2877 case TYPE_BINVERT: 2878 { 2879 mp_ptr scratch; 2880 TMP_DECL; 2881 TMP_MARK; 2882 scratch = TMP_ALLOC_LIMBS (mpn_binvert_itch (size)); 2883 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch); 2884 TMP_FREE; 2885 } 2886 break; 2887 2888 #ifdef EXTRA_CALL 2889 EXTRA_CALL 2890 #endif 2891 2892 default: 2893 printf ("Unknown routine type %d\n", choice->type); 2894 abort (); 2895 break; 2896 } 2897 } 2898 2899 2900 void 2901 pointer_setup (struct each_t *e) 2902 { 2903 int i, j; 2904 2905 for (i = 0; i < NUM_DESTS; i++) 2906 { 2907 switch (tr->dst_size[i]) { 2908 case 0: 2909 case SIZE_RETVAL: /* will be adjusted later */ 2910 d[i].size = size; 2911 break; 2912 2913 case SIZE_1: 2914 d[i].size = 1; 2915 break; 2916 case SIZE_2: 2917 d[i].size = 2; 2918 break; 2919 case SIZE_3: 2920 d[i].size = 3; 2921 break; 2922 case SIZE_4: 2923 d[i].size = 4; 2924 break; 2925 case SIZE_6: 2926 d[i].size = 6; 2927 break; 2928 2929 case SIZE_PLUS_1: 2930 d[i].size = size+1; 2931 break; 2932 case SIZE_PLUS_MSIZE_SUB_1: 2933 d[i].size = size + tr->msize - 1; 2934 break; 2935 2936 case SIZE_SUM: 2937 if (tr->size2) 2938 d[i].size = size + size2; 2939 else 2940 d[i].size = 2*size; 2941 break; 2942 2943 case SIZE_SIZE2: 2944 d[i].size = size2; 2945 break; 2946 2947 case SIZE_DIFF: 2948 d[i].size = size - size2; 2949 break; 2950 2951 case SIZE_DIFF_PLUS_1: 2952 d[i].size = size - size2 + 1; 2953 break; 2954 2955 case SIZE_DIFF_PLUS_3: 2956 d[i].size = size - size2 + 3; 2957 break; 2958 2959 case SIZE_CEIL_HALF: 2960 d[i].size = (size+1)/2; 2961 break; 2962 2963 case SIZE_GET_STR: 2964 { 2965 mp_limb_t ff = GMP_NUMB_MAX; 2966 MPN_SIZEINBASE (d[i].size, &ff - (size-1), size, base); 2967 } 2968 break; 2969 2970 default: 2971 printf ("Unrecognised dst_size type %d\n", tr->dst_size[i]); 2972 abort (); 2973 } 2974 } 2975 2976 /* establish e->d[].p destinations */ 2977 for (i = 0; i < NUM_DESTS; i++) 2978 { 2979 mp_size_t offset = 0; 2980 2981 /* possible room for overlapping sources */ 2982 for (j = 0; j < numberof (overlap->s); j++) 2983 if (overlap->s[j] == i) 2984 offset = MAX (offset, s[j].align); 2985 2986 if (d[i].high) 2987 { 2988 if (tr->dst_bytes[i]) 2989 { 2990 e->d[i].p = (mp_ptr) 2991 ((char *) (e->d[i].region.ptr + e->d[i].region.size) 2992 - d[i].size - d[i].align); 2993 } 2994 else 2995 { 2996 e->d[i].p = e->d[i].region.ptr + e->d[i].region.size 2997 - d[i].size - d[i].align; 2998 if (tr->overlap == OVERLAP_LOW_TO_HIGH) 2999 e->d[i].p -= offset; 3000 } 3001 } 3002 else 3003 { 3004 if (tr->dst_bytes[i]) 3005 { 3006 e->d[i].p = (mp_ptr) ((char *) e->d[i].region.ptr + d[i].align); 3007 } 3008 else 3009 { 3010 e->d[i].p = e->d[i].region.ptr + d[i].align; 3011 if (tr->overlap == OVERLAP_HIGH_TO_LOW) 3012 e->d[i].p += offset; 3013 } 3014 } 3015 } 3016 3017 /* establish e->s[].p sources */ 3018 for (i = 0; i < NUM_SOURCES; i++) 3019 { 3020 int o = overlap->s[i]; 3021 switch (o) { 3022 case -1: 3023 /* no overlap */ 3024 e->s[i].p = s[i].p; 3025 break; 3026 case 0: 3027 case 1: 3028 /* overlap with d[o] */ 3029 if (tr->overlap == OVERLAP_HIGH_TO_LOW) 3030 e->s[i].p = e->d[o].p - s[i].align; 3031 else if (tr->overlap == OVERLAP_LOW_TO_HIGH) 3032 e->s[i].p = e->d[o].p + s[i].align; 3033 else if (tr->size2 == SIZE_FRACTION) 3034 e->s[i].p = e->d[o].p + size2; 3035 else 3036 e->s[i].p = e->d[o].p; 3037 break; 3038 default: 3039 abort(); 3040 break; 3041 } 3042 } 3043 } 3044 3045 3046 void 3047 validate_fail (void) 3048 { 3049 if (tr->reference) 3050 { 3051 trap_location = TRAP_REF; 3052 call (&ref, tr->reference); 3053 trap_location = TRAP_NOWHERE; 3054 } 3055 3056 print_all(); 3057 abort(); 3058 } 3059 3060 3061 void 3062 try_one (void) 3063 { 3064 int i; 3065 3066 if (option_spinner) 3067 spinner(); 3068 spinner_count++; 3069 3070 trap_location = TRAP_SETUPS; 3071 3072 if (tr->divisor == DIVISOR_NORM) 3073 divisor |= GMP_NUMB_HIGHBIT; 3074 if (tr->divisor == DIVISOR_ODD) 3075 divisor |= 1; 3076 3077 for (i = 0; i < NUM_SOURCES; i++) 3078 { 3079 if (s[i].high) 3080 s[i].p = s[i].region.ptr + s[i].region.size - SRC_SIZE(i) - s[i].align; 3081 else 3082 s[i].p = s[i].region.ptr + s[i].align; 3083 } 3084 3085 pointer_setup (&ref); 3086 pointer_setup (&fun); 3087 3088 ref.retval = 0x04152637; 3089 fun.retval = 0x8C9DAEBF; 3090 3091 t_random (multiplier_N, tr->msize); 3092 3093 for (i = 0; i < NUM_SOURCES; i++) 3094 { 3095 if (! tr->src[i]) 3096 continue; 3097 3098 mprotect_region (&s[i].region, PROT_READ|PROT_WRITE); 3099 t_random (s[i].p, SRC_SIZE(i)); 3100 3101 switch (tr->data) { 3102 case DATA_NON_ZERO: 3103 if (refmpn_zero_p (s[i].p, SRC_SIZE(i))) 3104 s[i].p[0] = 1; 3105 break; 3106 3107 case DATA_MULTIPLE_DIVISOR: 3108 /* same number of low zero bits as divisor */ 3109 s[i].p[0] &= ~ LOW_ZEROS_MASK (divisor); 3110 refmpn_sub_1 (s[i].p, s[i].p, size, 3111 refmpn_mod_1 (s[i].p, size, divisor)); 3112 break; 3113 3114 case DATA_GCD: 3115 /* s[1] no more bits than s[0] */ 3116 if (i == 1 && size2 == size) 3117 s[1].p[size-1] &= refmpn_msbone_mask (s[0].p[size-1]); 3118 3119 /* high limb non-zero */ 3120 s[i].p[SRC_SIZE(i)-1] += (s[i].p[SRC_SIZE(i)-1] == 0); 3121 3122 /* odd */ 3123 s[i].p[0] |= 1; 3124 break; 3125 3126 case DATA_SRC0_ODD: 3127 if (i == 0) 3128 s[i].p[0] |= 1; 3129 break; 3130 3131 case DATA_SRC1_ODD: 3132 if (i == 1) 3133 s[i].p[0] |= 1; 3134 break; 3135 3136 case DATA_SRC1_ODD_PRIME: 3137 if (i == 1) 3138 { 3139 if (refmpn_zero_p (s[i].p+1, SRC_SIZE(i)-1) 3140 && s[i].p[0] <=3) 3141 s[i].p[0] = 3; 3142 else 3143 { 3144 mpz_t p; 3145 mpz_init (p); 3146 for (;;) 3147 { 3148 _mpz_realloc (p, SRC_SIZE(i)); 3149 MPN_COPY (PTR(p), s[i].p, SRC_SIZE(i)); 3150 SIZ(p) = SRC_SIZE(i); 3151 MPN_NORMALIZE (PTR(p), SIZ(p)); 3152 mpz_nextprime (p, p); 3153 if (mpz_size (p) <= SRC_SIZE(i)) 3154 break; 3155 3156 t_random (s[i].p, SRC_SIZE(i)); 3157 } 3158 MPN_COPY (s[i].p, PTR(p), SIZ(p)); 3159 if (SIZ(p) < SRC_SIZE(i)) 3160 MPN_ZERO (s[i].p + SIZ(p), SRC_SIZE(i) - SIZ(p)); 3161 mpz_clear (p); 3162 } 3163 } 3164 break; 3165 3166 case DATA_SRC1_HIGHBIT: 3167 if (i == 1) 3168 { 3169 if (tr->size2) 3170 s[i].p[size2-1] |= GMP_NUMB_HIGHBIT; 3171 else 3172 s[i].p[size-1] |= GMP_NUMB_HIGHBIT; 3173 } 3174 break; 3175 3176 case DATA_SRC0_HIGHBIT: 3177 if (i == 0) 3178 { 3179 s[i].p[size-1] |= GMP_NUMB_HIGHBIT; 3180 } 3181 break; 3182 3183 case DATA_UDIV_QRNND: 3184 s[i].p[1] %= divisor; 3185 break; 3186 case DATA_DIV_QR_1: 3187 if (i == 1) 3188 s[i].p[0] %= divisor; 3189 break; 3190 } 3191 3192 mprotect_region (&s[i].region, PROT_READ); 3193 } 3194 3195 for (i = 0; i < NUM_DESTS; i++) 3196 { 3197 if (! tr->dst[i]) 3198 continue; 3199 3200 if (tr->dst0_from_src1 && i==0) 3201 { 3202 mp_size_t copy = MIN (d[0].size, SRC_SIZE(1)); 3203 mp_size_t fill = MAX (0, d[0].size - copy); 3204 MPN_COPY (fun.d[0].p, s[1].region.ptr, copy); 3205 MPN_COPY (ref.d[0].p, s[1].region.ptr, copy); 3206 refmpn_fill (fun.d[0].p + copy, fill, DEADVAL); 3207 refmpn_fill (ref.d[0].p + copy, fill, DEADVAL); 3208 } 3209 else if (tr->dst_bytes[i]) 3210 { 3211 memset (ref.d[i].p, 0xBA, d[i].size); 3212 memset (fun.d[i].p, 0xBA, d[i].size); 3213 } 3214 else 3215 { 3216 refmpn_fill (ref.d[i].p, d[i].size, DEADVAL); 3217 refmpn_fill (fun.d[i].p, d[i].size, DEADVAL); 3218 } 3219 } 3220 3221 for (i = 0; i < NUM_SOURCES; i++) 3222 { 3223 if (! tr->src[i]) 3224 continue; 3225 3226 if (ref.s[i].p != s[i].p) 3227 { 3228 refmpn_copyi (ref.s[i].p, s[i].p, SRC_SIZE(i)); 3229 refmpn_copyi (fun.s[i].p, s[i].p, SRC_SIZE(i)); 3230 } 3231 } 3232 3233 if (option_print) 3234 print_all(); 3235 3236 if (tr->validate != NULL) 3237 { 3238 trap_location = TRAP_FUN; 3239 call (&fun, choice->function); 3240 trap_location = TRAP_NOWHERE; 3241 3242 if (! CALLING_CONVENTIONS_CHECK ()) 3243 { 3244 print_all(); 3245 abort(); 3246 } 3247 3248 (*tr->validate) (); 3249 } 3250 else 3251 { 3252 trap_location = TRAP_REF; 3253 call (&ref, tr->reference); 3254 trap_location = TRAP_FUN; 3255 call (&fun, choice->function); 3256 trap_location = TRAP_NOWHERE; 3257 3258 if (! CALLING_CONVENTIONS_CHECK ()) 3259 { 3260 print_all(); 3261 abort(); 3262 } 3263 3264 compare (); 3265 } 3266 } 3267 3268 3269 #define SIZE_ITERATION \ 3270 for (size = MAX3 (option_firstsize, \ 3271 choice->minsize, \ 3272 (tr->size == SIZE_ALLOW_ZERO) ? 0 : 1), \ 3273 size += (tr->size == SIZE_ODD) && !(size & 1); \ 3274 size <= option_lastsize; \ 3275 size += (tr->size == SIZE_ODD) ? 2 : 1) 3276 3277 #define SIZE2_FIRST \ 3278 (tr->size2 == SIZE_2 ? 2 \ 3279 : tr->size2 == SIZE_FRACTION ? option_firstsize2 \ 3280 : tr->size2 == SIZE_CEIL_HALF ? ((size + 1) / 2) \ 3281 : tr->size2 ? \ 3282 MAX (choice->minsize, (option_firstsize2 != 0 \ 3283 ? option_firstsize2 : 1)) \ 3284 : 0) 3285 3286 #define SIZE2_LAST \ 3287 (tr->size2 == SIZE_2 ? 2 \ 3288 : tr->size2 == SIZE_FRACTION ? FRACTION_COUNT-1 \ 3289 : tr->size2 == SIZE_CEIL_HALF ? ((size + 1) / 2) \ 3290 : tr->size2 ? size \ 3291 : 0) 3292 3293 #define SIZE2_ITERATION \ 3294 for (size2 = SIZE2_FIRST; size2 <= SIZE2_LAST; size2++) 3295 3296 #define ALIGN_COUNT(cond) ((cond) ? ALIGNMENTS : 1) 3297 #define ALIGN_ITERATION(w,n,cond) \ 3298 for (w[n].align = 0; w[n].align < ALIGN_COUNT(cond); w[n].align++) 3299 3300 #define HIGH_LIMIT(cond) ((cond) != 0) 3301 #define HIGH_COUNT(cond) (HIGH_LIMIT (cond) + 1) 3302 #define HIGH_ITERATION(w,n,cond) \ 3303 for (w[n].high = 0; w[n].high <= HIGH_LIMIT(cond); w[n].high++) 3304 3305 #define SHIFT_LIMIT \ 3306 ((unsigned long) (tr->shift ? GMP_NUMB_BITS -1 : 1)) 3307 3308 #define SHIFT_ITERATION \ 3309 for (shift = 1; shift <= SHIFT_LIMIT; shift++) 3310 3311 3312 void 3313 try_many (void) 3314 { 3315 int i; 3316 3317 { 3318 unsigned long total = 1; 3319 3320 total *= option_repetitions; 3321 total *= option_lastsize; 3322 if (tr->size2 == SIZE_FRACTION) total *= FRACTION_COUNT; 3323 else if (tr->size2) total *= (option_lastsize+1)/2; 3324 3325 total *= SHIFT_LIMIT; 3326 total *= MULTIPLIER_COUNT; 3327 total *= DIVISOR_COUNT; 3328 total *= CARRY_COUNT; 3329 total *= T_RAND_COUNT; 3330 3331 total *= HIGH_COUNT (tr->dst[0]); 3332 total *= HIGH_COUNT (tr->dst[1]); 3333 total *= HIGH_COUNT (tr->src[0]); 3334 total *= HIGH_COUNT (tr->src[1]); 3335 3336 total *= ALIGN_COUNT (tr->dst[0]); 3337 total *= ALIGN_COUNT (tr->dst[1]); 3338 total *= ALIGN_COUNT (tr->src[0]); 3339 total *= ALIGN_COUNT (tr->src[1]); 3340 3341 total *= OVERLAP_COUNT; 3342 3343 printf ("%s %lu\n", choice->name, total); 3344 } 3345 3346 spinner_count = 0; 3347 3348 for (i = 0; i < option_repetitions; i++) 3349 SIZE_ITERATION 3350 SIZE2_ITERATION 3351 3352 SHIFT_ITERATION 3353 MULTIPLIER_ITERATION 3354 DIVISOR_ITERATION 3355 CARRY_ITERATION /* must be after divisor */ 3356 T_RAND_ITERATION 3357 3358 HIGH_ITERATION(d,0, tr->dst[0]) 3359 HIGH_ITERATION(d,1, tr->dst[1]) 3360 HIGH_ITERATION(s,0, tr->src[0]) 3361 HIGH_ITERATION(s,1, tr->src[1]) 3362 3363 ALIGN_ITERATION(d,0, tr->dst[0]) 3364 ALIGN_ITERATION(d,1, tr->dst[1]) 3365 ALIGN_ITERATION(s,0, tr->src[0]) 3366 ALIGN_ITERATION(s,1, tr->src[1]) 3367 3368 OVERLAP_ITERATION 3369 try_one(); 3370 3371 printf("\n"); 3372 } 3373 3374 3375 /* Usually print_all() doesn't show much, but it might give a hint as to 3376 where the function was up to when it died. */ 3377 void 3378 trap (int sig) 3379 { 3380 const char *name = "noname"; 3381 3382 switch (sig) { 3383 case SIGILL: name = "SIGILL"; break; 3384 #ifdef SIGBUS 3385 case SIGBUS: name = "SIGBUS"; break; 3386 #endif 3387 case SIGSEGV: name = "SIGSEGV"; break; 3388 case SIGFPE: name = "SIGFPE"; break; 3389 } 3390 3391 printf ("\n\nSIGNAL TRAP: %s\n", name); 3392 3393 switch (trap_location) { 3394 case TRAP_REF: 3395 printf (" in reference function: %s\n", tr->reference_name); 3396 break; 3397 case TRAP_FUN: 3398 printf (" in test function: %s\n", choice->name); 3399 print_all (); 3400 break; 3401 case TRAP_SETUPS: 3402 printf (" in parameter setups\n"); 3403 print_all (); 3404 break; 3405 default: 3406 printf (" somewhere unknown\n"); 3407 break; 3408 } 3409 exit (1); 3410 } 3411 3412 3413 void 3414 try_init (void) 3415 { 3416 #if HAVE_GETPAGESIZE 3417 /* Prefer getpagesize() over sysconf(), since on SunOS 4 sysconf() doesn't 3418 know _SC_PAGESIZE. */ 3419 pagesize = getpagesize (); 3420 #else 3421 #if HAVE_SYSCONF 3422 if ((pagesize = sysconf (_SC_PAGESIZE)) == -1) 3423 { 3424 /* According to the linux man page, sysconf doesn't set errno */ 3425 fprintf (stderr, "Cannot get sysconf _SC_PAGESIZE\n"); 3426 exit (1); 3427 } 3428 #else 3429 Error, error, cannot get page size 3430 #endif 3431 #endif 3432 3433 printf ("pagesize is 0x%lX bytes\n", pagesize); 3434 3435 signal (SIGILL, trap); 3436 #ifdef SIGBUS 3437 signal (SIGBUS, trap); 3438 #endif 3439 signal (SIGSEGV, trap); 3440 signal (SIGFPE, trap); 3441 3442 { 3443 int i; 3444 3445 for (i = 0; i < NUM_SOURCES; i++) 3446 { 3447 malloc_region (&s[i].region, 2*option_lastsize+ALIGNMENTS-1); 3448 printf ("s[%d] %p to %p (0x%lX bytes)\n", 3449 i, (void *) (s[i].region.ptr), 3450 (void *) (s[i].region.ptr + s[i].region.size), 3451 (long) s[i].region.size * GMP_LIMB_BYTES); 3452 } 3453 3454 #define INIT_EACH(e,es) \ 3455 for (i = 0; i < NUM_DESTS; i++) \ 3456 { \ 3457 malloc_region (&e.d[i].region, 2*option_lastsize+ALIGNMENTS-1); \ 3458 printf ("%s d[%d] %p to %p (0x%lX bytes)\n", \ 3459 es, i, (void *) (e.d[i].region.ptr), \ 3460 (void *) (e.d[i].region.ptr + e.d[i].region.size), \ 3461 (long) e.d[i].region.size * GMP_LIMB_BYTES); \ 3462 } 3463 3464 INIT_EACH(ref, "ref"); 3465 INIT_EACH(fun, "fun"); 3466 } 3467 } 3468 3469 int 3470 strmatch_wild (const char *pattern, const char *str) 3471 { 3472 size_t plen, slen; 3473 3474 /* wildcard at start */ 3475 if (pattern[0] == '*') 3476 { 3477 pattern++; 3478 plen = strlen (pattern); 3479 slen = strlen (str); 3480 return (plen == 0 3481 || (slen >= plen && memcmp (pattern, str+slen-plen, plen) == 0)); 3482 } 3483 3484 /* wildcard at end */ 3485 plen = strlen (pattern); 3486 if (plen >= 1 && pattern[plen-1] == '*') 3487 return (memcmp (pattern, str, plen-1) == 0); 3488 3489 /* no wildcards */ 3490 return (strcmp (pattern, str) == 0); 3491 } 3492 3493 void 3494 try_name (const char *name) 3495 { 3496 int found = 0; 3497 int i; 3498 3499 for (i = 0; i < numberof (choice_array); i++) 3500 { 3501 if (strmatch_wild (name, choice_array[i].name)) 3502 { 3503 choice = &choice_array[i]; 3504 tr = ¶m[choice->type]; 3505 try_many (); 3506 found = 1; 3507 } 3508 } 3509 3510 if (!found) 3511 { 3512 printf ("%s unknown\n", name); 3513 /* exit (1); */ 3514 } 3515 } 3516 3517 3518 void 3519 usage (const char *prog) 3520 { 3521 int col = 0; 3522 int i; 3523 3524 printf ("Usage: %s [options] function...\n", prog); 3525 printf (" -1 use limb data 1,2,3,etc\n"); 3526 printf (" -9 use limb data all 0xFF..FFs\n"); 3527 printf (" -a zeros use limb data all zeros\n"); 3528 printf (" -a ffs use limb data all 0xFF..FFs (same as -9)\n"); 3529 printf (" -a 2fd use data 0x2FFF...FFFD\n"); 3530 printf (" -p print each case tried (try this if seg faulting)\n"); 3531 printf (" -R seed random numbers from time()\n"); 3532 printf (" -r reps set repetitions (default %d)\n", DEFAULT_REPETITIONS); 3533 printf (" -s size starting size to test\n"); 3534 printf (" -S size2 starting size2 to test\n"); 3535 printf (" -s s1-s2 range of sizes to test\n"); 3536 printf (" -W don't show the spinner (use this in gdb)\n"); 3537 printf (" -z disable mprotect() redzones\n"); 3538 printf ("Default data is refmpn_random() and refmpn_random2().\n"); 3539 printf ("\n"); 3540 printf ("Functions that can be tested:\n"); 3541 3542 for (i = 0; i < numberof (choice_array); i++) 3543 { 3544 if (col + 1 + strlen (choice_array[i].name) > 79) 3545 { 3546 printf ("\n"); 3547 col = 0; 3548 } 3549 printf (" %s", choice_array[i].name); 3550 col += 1 + strlen (choice_array[i].name); 3551 } 3552 printf ("\n"); 3553 3554 exit(1); 3555 } 3556 3557 3558 int 3559 main (int argc, char *argv[]) 3560 { 3561 int i; 3562 3563 /* unbuffered output */ 3564 setbuf (stdout, NULL); 3565 setbuf (stderr, NULL); 3566 3567 /* default trace in hex, and in upper-case so can paste into bc */ 3568 mp_trace_base = -16; 3569 3570 param_init (); 3571 3572 { 3573 unsigned long seed = 123; 3574 int opt; 3575 3576 while ((opt = getopt(argc, argv, "19a:b:E:pRr:S:s:Wz")) != EOF) 3577 { 3578 switch (opt) { 3579 case '1': 3580 /* use limb data values 1, 2, 3, ... etc */ 3581 option_data = DATA_SEQ; 3582 break; 3583 case '9': 3584 /* use limb data values 0xFFF...FFF always */ 3585 option_data = DATA_FFS; 3586 break; 3587 case 'a': 3588 if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS; 3589 else if (strcmp (optarg, "seq") == 0) option_data = DATA_SEQ; 3590 else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS; 3591 else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD; 3592 else 3593 { 3594 fprintf (stderr, "unrecognised data option: %s\n", optarg); 3595 exit (1); 3596 } 3597 break; 3598 case 'b': 3599 mp_trace_base = atoi (optarg); 3600 break; 3601 case 'E': 3602 /* re-seed */ 3603 sscanf (optarg, "%lu", &seed); 3604 printf ("Re-seeding with %lu\n", seed); 3605 break; 3606 case 'p': 3607 option_print = 1; 3608 break; 3609 case 'R': 3610 /* randomize */ 3611 seed = time (NULL); 3612 printf ("Seeding with %lu, re-run using \"-E %lu\"\n", seed, seed); 3613 break; 3614 case 'r': 3615 option_repetitions = atoi (optarg); 3616 break; 3617 case 's': 3618 { 3619 char *p; 3620 option_firstsize = strtol (optarg, 0, 0); 3621 if ((p = strchr (optarg, '-')) != NULL) 3622 option_lastsize = strtol (p+1, 0, 0); 3623 } 3624 break; 3625 case 'S': 3626 /* -S <size> sets the starting size for the second of a two size 3627 routine (like mpn_mul_basecase) */ 3628 option_firstsize2 = strtol (optarg, 0, 0); 3629 break; 3630 case 'W': 3631 /* use this when running in the debugger */ 3632 option_spinner = 0; 3633 break; 3634 case 'z': 3635 /* disable redzones */ 3636 option_redzones = 0; 3637 break; 3638 case '?': 3639 usage (argv[0]); 3640 break; 3641 } 3642 } 3643 3644 gmp_randinit_default (__gmp_rands); 3645 __gmp_rands_initialized = 1; 3646 gmp_randseed_ui (__gmp_rands, seed); 3647 } 3648 3649 try_init(); 3650 3651 if (argc <= optind) 3652 usage (argv[0]); 3653 3654 for (i = optind; i < argc; i++) 3655 try_name (argv[i]); 3656 3657 return 0; 3658 } 3659