1 /* Run some tests on various mpn routines. 2 3 THIS IS A TEST PROGRAM USED ONLY FOR DEVELOPMENT. IT'S ALMOST CERTAIN TO 4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE VERSIONS OF GMP. 5 6 Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009, 2011, 2012 7 Free Software Foundation, Inc. 8 9 This file is part of the GNU MP Library test suite. 10 11 The GNU MP Library test suite is free software; you can redistribute it 12 and/or modify it under the terms of the GNU General Public License as 13 published by the Free Software Foundation; either version 3 of the License, 14 or (at your option) any later version. 15 16 The GNU MP Library test suite is distributed in the hope that it will be 17 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 19 Public License for more details. 20 21 You should have received a copy of the GNU General Public License along with 22 the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */ 23 24 25 /* Usage: try [options] <function>... 26 27 For example, "./try mpn_add_n" to run tests of that function. 28 29 Combinations of alignments and overlaps are tested, with redzones above 30 or below the destinations, and with the sources write-protected. 31 32 The number of tests performed becomes ridiculously large with all the 33 combinations, and for that reason this can't be a part of a "make check", 34 it's meant only for development. The code isn't very pretty either. 35 36 During development it can help to disable the redzones, since seeing the 37 rest of the destination written can show where the wrong part is, or if 38 the dst pointers are off by 1 or whatever. The magic DEADVAL initial 39 fill (see below) will show locations never written. 40 41 The -s option can be used to test only certain size operands, which is 42 useful if some new code doesn't yet support say sizes less than the 43 unrolling, or whatever. 44 45 When a problem occurs it'll of course be necessary to run the program 46 under gdb to find out quite where, how and why it's going wrong. Disable 47 the spinner with the -W option when doing this, or single stepping won't 48 work. Using the "-1" option to run with simple data can be useful. 49 50 New functions to test can be added in try_array[]. If a new TYPE is 51 required then add it to the existing constants, set up its parameters in 52 param_init(), and add it to the call() function. Extra parameter fields 53 can be added if necessary, or further interpretations given to existing 54 fields. 55 56 57 Portability: 58 59 This program is not designed for use on Cray vector systems under Unicos, 60 it will fail to compile due to missing _SC_PAGE_SIZE. Those systems 61 don't really have pages or mprotect. We could arrange to run the tests 62 without the redzones, but we haven't bothered currently. 63 64 65 Enhancements: 66 67 umul_ppmm support is not very good, lots of source data is generated 68 whereas only two limbs are needed. 69 70 Make a little scheme for interpreting the "SIZE" selections uniformly. 71 72 Make tr->size==SIZE_2 work, for the benefit of find_a which wants just 2 73 source limbs. Possibly increase the default repetitions in that case. 74 75 Automatically detect gdb and disable the spinner (use -W for now). 76 77 Make a way to re-run a failing case in the debugger. Have an option to 78 snapshot each test case before it's run so the data is available if a 79 segv occurs. (This should be more reliable than the current print_all() 80 in the signal handler.) 81 82 When alignment means a dst isn't hard against the redzone, check the 83 space in between remains unchanged. 84 85 When a source overlaps a destination, don't run both s[i].high 0 and 1, 86 as s[i].high has no effect. Maybe encode s[i].high into overlap->s[i]. 87 88 When partial overlaps aren't done, don't loop over source alignments 89 during overlaps. 90 91 Try to make the looping code a bit less horrible. Right now it's pretty 92 hard to see what iterations are actually done. 93 94 Perhaps specific setups and loops for each style of function under test 95 would be clearer than a parameterized general loop. There's lots of 96 stuff common to all functions, but the exceptions get messy. 97 98 When there's no overlap, run with both src>dst and src<dst. A subtle 99 calling-conventions violation occurred in a P6 copy which depended on the 100 relative location of src and dst. 101 102 multiplier_N is more or less a third source region for the addmul_N 103 routines, and could be done with the redzoned region scheme. 104 105 */ 106 107 108 /* always do assertion checking */ 109 #define WANT_ASSERT 1 110 111 #include "config.h" 112 113 #include <errno.h> 114 #include <limits.h> 115 #include <signal.h> 116 #include <stdio.h> 117 #include <stdlib.h> 118 #include <string.h> 119 #include <time.h> 120 121 #if HAVE_UNISTD_H 122 #include <unistd.h> 123 #endif 124 125 #if HAVE_SYS_MMAN_H 126 #include <sys/mman.h> 127 #endif 128 129 #include "gmp.h" 130 #include "gmp-impl.h" 131 #include "longlong.h" 132 #include "tests.h" 133 134 135 #if !HAVE_DECL_OPTARG 136 extern char *optarg; 137 extern int optind, opterr; 138 #endif 139 140 #if ! HAVE_DECL_SYS_NERR 141 extern int sys_nerr; 142 #endif 143 144 #if ! HAVE_DECL_SYS_ERRLIST 145 extern char *sys_errlist[]; 146 #endif 147 148 #if ! HAVE_STRERROR 149 char * 150 strerror (int n) 151 { 152 if (n < 0 || n >= sys_nerr) 153 return "errno out of range"; 154 else 155 return sys_errlist[n]; 156 } 157 #endif 158 159 /* Rumour has it some systems lack a define of PROT_NONE. */ 160 #ifndef PROT_NONE 161 #define PROT_NONE 0 162 #endif 163 164 /* Dummy defines for when mprotect doesn't exist. */ 165 #ifndef PROT_READ 166 #define PROT_READ 0 167 #endif 168 #ifndef PROT_WRITE 169 #define PROT_WRITE 0 170 #endif 171 172 /* _SC_PAGESIZE is standard, but hpux 9 and possibly other systems have 173 _SC_PAGE_SIZE instead. */ 174 #if defined (_SC_PAGE_SIZE) && ! defined (_SC_PAGESIZE) 175 #define _SC_PAGESIZE _SC_PAGE_SIZE 176 #endif 177 178 179 #ifdef EXTRA_PROTOS 180 EXTRA_PROTOS 181 #endif 182 #ifdef EXTRA_PROTOS2 183 EXTRA_PROTOS2 184 #endif 185 186 187 #define DEFAULT_REPETITIONS 10 188 189 int option_repetitions = DEFAULT_REPETITIONS; 190 int option_spinner = 1; 191 int option_redzones = 1; 192 int option_firstsize = 0; 193 int option_lastsize = 500; 194 int option_firstsize2 = 0; 195 196 #define ALIGNMENTS 4 197 #define OVERLAPS 4 198 #define CARRY_RANDOMS 5 199 #define MULTIPLIER_RANDOMS 5 200 #define DIVISOR_RANDOMS 5 201 #define FRACTION_COUNT 4 202 203 int option_print = 0; 204 205 #define DATA_TRAND 0 206 #define DATA_ZEROS 1 207 #define DATA_SEQ 2 208 #define DATA_FFS 3 209 #define DATA_2FD 4 210 int option_data = DATA_TRAND; 211 212 213 mp_size_t pagesize; 214 #define PAGESIZE_LIMBS (pagesize / BYTES_PER_MP_LIMB) 215 216 /* must be a multiple of the page size */ 217 #define REDZONE_BYTES (pagesize * 16) 218 #define REDZONE_LIMBS (REDZONE_BYTES / BYTES_PER_MP_LIMB) 219 220 221 #define MAX3(x,y,z) (MAX (x, MAX (y, z))) 222 223 #if GMP_LIMB_BITS == 32 224 #define DEADVAL CNST_LIMB(0xDEADBEEF) 225 #else 226 #define DEADVAL CNST_LIMB(0xDEADBEEFBADDCAFE) 227 #endif 228 229 230 struct region_t { 231 mp_ptr ptr; 232 mp_size_t size; 233 }; 234 235 236 #define TRAP_NOWHERE 0 237 #define TRAP_REF 1 238 #define TRAP_FUN 2 239 #define TRAP_SETUPS 3 240 int trap_location = TRAP_NOWHERE; 241 242 243 #define NUM_SOURCES 5 244 #define NUM_DESTS 2 245 246 struct source_t { 247 struct region_t region; 248 int high; 249 mp_size_t align; 250 mp_ptr p; 251 }; 252 253 struct source_t s[NUM_SOURCES]; 254 255 struct dest_t { 256 int high; 257 mp_size_t align; 258 mp_size_t size; 259 }; 260 261 struct dest_t d[NUM_DESTS]; 262 263 struct source_each_t { 264 mp_ptr p; 265 }; 266 267 struct dest_each_t { 268 struct region_t region; 269 mp_ptr p; 270 }; 271 272 mp_size_t size; 273 mp_size_t size2; 274 unsigned long shift; 275 mp_limb_t carry; 276 mp_limb_t divisor; 277 mp_limb_t multiplier; 278 mp_limb_t multiplier_N[8]; 279 280 struct each_t { 281 const char *name; 282 struct dest_each_t d[NUM_DESTS]; 283 struct source_each_t s[NUM_SOURCES]; 284 mp_limb_t retval; 285 }; 286 287 struct each_t ref = { "Ref" }; 288 struct each_t fun = { "Fun" }; 289 290 #define SRC_SIZE(n) ((n) == 1 && tr->size2 ? size2 : size) 291 292 void validate_fail (void); 293 294 295 #if HAVE_TRY_NEW_C 296 #include "try-new.c" 297 #endif 298 299 300 typedef mp_limb_t (*tryfun_t) (ANYARGS); 301 302 struct try_t { 303 char retval; 304 305 char src[NUM_SOURCES]; 306 char dst[NUM_DESTS]; 307 308 #define SIZE_YES 1 309 #define SIZE_ALLOW_ZERO 2 310 #define SIZE_1 3 /* 1 limb */ 311 #define SIZE_2 4 /* 2 limbs */ 312 #define SIZE_3 5 /* 3 limbs */ 313 #define SIZE_4 6 /* 4 limbs */ 314 #define SIZE_6 7 /* 6 limbs */ 315 #define SIZE_FRACTION 8 /* size2 is fraction for divrem etc */ 316 #define SIZE_SIZE2 9 317 #define SIZE_PLUS_1 10 318 #define SIZE_SUM 11 319 #define SIZE_DIFF 12 320 #define SIZE_DIFF_PLUS_1 13 321 #define SIZE_DIFF_PLUS_3 14 322 #define SIZE_RETVAL 15 323 #define SIZE_CEIL_HALF 16 324 #define SIZE_GET_STR 17 325 #define SIZE_PLUS_MSIZE_SUB_1 18 /* size+msize-1 */ 326 #define SIZE_ODD 19 327 char size; 328 char size2; 329 char dst_size[NUM_DESTS]; 330 331 /* multiplier_N size in limbs */ 332 mp_size_t msize; 333 334 char dst_bytes[NUM_DESTS]; 335 336 char dst0_from_src1; 337 338 #define CARRY_BIT 1 /* single bit 0 or 1 */ 339 #define CARRY_3 2 /* 0, 1, 2 */ 340 #define CARRY_4 3 /* 0 to 3 */ 341 #define CARRY_LIMB 4 /* any limb value */ 342 #define CARRY_DIVISOR 5 /* carry<divisor */ 343 char carry; 344 345 /* a fudge to tell the output when to print negatives */ 346 char carry_sign; 347 348 char multiplier; 349 char shift; 350 351 #define DIVISOR_LIMB 1 352 #define DIVISOR_NORM 2 353 #define DIVISOR_ODD 3 354 char divisor; 355 356 #define DATA_NON_ZERO 1 357 #define DATA_GCD 2 358 #define DATA_SRC0_ODD 3 359 #define DATA_SRC0_HIGHBIT 4 360 #define DATA_SRC1_ODD 5 361 #define DATA_SRC1_ODD_PRIME 6 362 #define DATA_SRC1_HIGHBIT 7 363 #define DATA_MULTIPLE_DIVISOR 8 364 #define DATA_UDIV_QRNND 9 365 char data; 366 367 /* Default is allow full overlap. */ 368 #define OVERLAP_NONE 1 369 #define OVERLAP_LOW_TO_HIGH 2 370 #define OVERLAP_HIGH_TO_LOW 3 371 #define OVERLAP_NOT_SRCS 4 372 #define OVERLAP_NOT_SRC2 8 373 #define OVERLAP_NOT_DST2 16 374 char overlap; 375 376 tryfun_t reference; 377 const char *reference_name; 378 379 void (*validate) (void); 380 const char *validate_name; 381 }; 382 383 struct try_t *tr; 384 385 386 void 387 validate_mod_34lsub1 (void) 388 { 389 #define CNST_34LSUB1 ((CNST_LIMB(1) << (3 * (GMP_NUMB_BITS / 4))) - 1) 390 391 mp_srcptr ptr = s[0].p; 392 int error = 0; 393 mp_limb_t got, got_mod, want, want_mod; 394 395 ASSERT (size >= 1); 396 397 got = fun.retval; 398 got_mod = got % CNST_34LSUB1; 399 400 want = refmpn_mod_34lsub1 (ptr, size); 401 want_mod = want % CNST_34LSUB1; 402 403 if (got_mod != want_mod) 404 { 405 gmp_printf ("got 0x%MX reduced from 0x%MX\n", got_mod, got); 406 gmp_printf ("want 0x%MX reduced from 0x%MX\n", want_mod, want); 407 error = 1; 408 } 409 410 if (error) 411 validate_fail (); 412 } 413 414 void 415 validate_divexact_1 (void) 416 { 417 mp_srcptr src = s[0].p; 418 mp_srcptr dst = fun.d[0].p; 419 int error = 0; 420 421 ASSERT (size >= 1); 422 423 { 424 mp_ptr tp = refmpn_malloc_limbs (size); 425 mp_limb_t rem; 426 427 rem = refmpn_divrem_1 (tp, 0, src, size, divisor); 428 if (rem != 0) 429 { 430 gmp_printf ("Remainder a%%d == 0x%MX, mpn_divexact_1 undefined\n", rem); 431 error = 1; 432 } 433 if (! refmpn_equal_anynail (tp, dst, size)) 434 { 435 printf ("Quotient a/d wrong\n"); 436 mpn_trace ("fun ", dst, size); 437 mpn_trace ("want", tp, size); 438 error = 1; 439 } 440 free (tp); 441 } 442 443 if (error) 444 validate_fail (); 445 } 446 447 void 448 validate_bdiv_q_1 449 (void) 450 { 451 mp_srcptr src = s[0].p; 452 mp_srcptr dst = fun.d[0].p; 453 int error = 0; 454 455 ASSERT (size >= 1); 456 457 { 458 mp_ptr tp = refmpn_malloc_limbs (size + 1); 459 460 refmpn_mul_1 (tp, dst, size, divisor); 461 /* Set ignored low bits */ 462 tp[0] |= (src[0] & LOW_ZEROS_MASK (divisor)); 463 if (! refmpn_equal_anynail (tp, src, size)) 464 { 465 printf ("Bdiv wrong: res * divisor != src (mod B^size)\n"); 466 mpn_trace ("res ", dst, size); 467 mpn_trace ("src ", src, size); 468 error = 1; 469 } 470 free (tp); 471 } 472 473 if (error) 474 validate_fail (); 475 } 476 477 478 void 479 validate_modexact_1c_odd (void) 480 { 481 mp_srcptr ptr = s[0].p; 482 mp_limb_t r = fun.retval; 483 int error = 0; 484 485 ASSERT (size >= 1); 486 ASSERT (divisor & 1); 487 488 if ((r & GMP_NAIL_MASK) != 0) 489 printf ("r has non-zero nail\n"); 490 491 if (carry < divisor) 492 { 493 if (! (r < divisor)) 494 { 495 printf ("Don't have r < divisor\n"); 496 error = 1; 497 } 498 } 499 else /* carry >= divisor */ 500 { 501 if (! (r <= divisor)) 502 { 503 printf ("Don't have r <= divisor\n"); 504 error = 1; 505 } 506 } 507 508 { 509 mp_limb_t c = carry % divisor; 510 mp_ptr tp = refmpn_malloc_limbs (size+1); 511 mp_size_t k; 512 513 for (k = size-1; k <= size; k++) 514 { 515 /* set {tp,size+1} to r*b^k + a - c */ 516 refmpn_copyi (tp, ptr, size); 517 tp[size] = 0; 518 ASSERT_NOCARRY (refmpn_add_1 (tp+k, tp+k, size+1-k, r)); 519 if (refmpn_sub_1 (tp, tp, size+1, c)) 520 ASSERT_CARRY (mpn_add_1 (tp, tp, size+1, divisor)); 521 522 if (refmpn_mod_1 (tp, size+1, divisor) == 0) 523 goto good_remainder; 524 } 525 printf ("Remainder matches neither r*b^(size-1) nor r*b^size\n"); 526 error = 1; 527 528 good_remainder: 529 free (tp); 530 } 531 532 if (error) 533 validate_fail (); 534 } 535 536 void 537 validate_modexact_1_odd (void) 538 { 539 carry = 0; 540 validate_modexact_1c_odd (); 541 } 542 543 544 void 545 validate_sqrtrem (void) 546 { 547 mp_srcptr orig_ptr = s[0].p; 548 mp_size_t orig_size = size; 549 mp_size_t root_size = (size+1)/2; 550 mp_srcptr root_ptr = fun.d[0].p; 551 mp_size_t rem_size = fun.retval; 552 mp_srcptr rem_ptr = fun.d[1].p; 553 mp_size_t prod_size = 2*root_size; 554 mp_ptr p; 555 int error = 0; 556 557 if (rem_size < 0 || rem_size > size) 558 { 559 printf ("Bad remainder size retval %ld\n", (long) rem_size); 560 validate_fail (); 561 } 562 563 p = refmpn_malloc_limbs (prod_size); 564 565 p[root_size] = refmpn_lshift (p, root_ptr, root_size, 1); 566 if (refmpn_cmp_twosizes (p,root_size+1, rem_ptr,rem_size) < 0) 567 { 568 printf ("Remainder bigger than 2*root\n"); 569 error = 1; 570 } 571 572 refmpn_sqr (p, root_ptr, root_size); 573 if (rem_size != 0) 574 refmpn_add (p, p, prod_size, rem_ptr, rem_size); 575 if (refmpn_cmp_twosizes (p,prod_size, orig_ptr,orig_size) != 0) 576 { 577 printf ("root^2+rem != original\n"); 578 mpn_trace ("prod", p, prod_size); 579 error = 1; 580 } 581 free (p); 582 583 if (error) 584 validate_fail (); 585 } 586 587 588 /* These types are indexes into the param[] array and are arbitrary so long 589 as they're all distinct and within the size of param[]. Renumber 590 whenever necessary or desired. */ 591 592 enum { 593 TYPE_ADD = 1, TYPE_ADD_N, TYPE_ADD_NC, TYPE_SUB, TYPE_SUB_N, TYPE_SUB_NC, 594 595 TYPE_ADD_ERR1_N, TYPE_ADD_ERR2_N, TYPE_ADD_ERR3_N, 596 TYPE_SUB_ERR1_N, TYPE_SUB_ERR2_N, TYPE_SUB_ERR3_N, 597 598 TYPE_MUL_1, TYPE_MUL_1C, 599 600 TYPE_MUL_2, TYPE_MUL_3, TYPE_MUL_4, TYPE_MUL_5, TYPE_MUL_6, 601 602 TYPE_ADDMUL_1, TYPE_ADDMUL_1C, TYPE_SUBMUL_1, TYPE_SUBMUL_1C, 603 604 TYPE_ADDMUL_2, TYPE_ADDMUL_3, TYPE_ADDMUL_4, TYPE_ADDMUL_5, TYPE_ADDMUL_6, 605 TYPE_ADDMUL_7, TYPE_ADDMUL_8, 606 607 TYPE_ADDSUB_N, TYPE_ADDSUB_NC, 608 609 TYPE_RSHIFT, TYPE_LSHIFT, TYPE_LSHIFTC, 610 611 TYPE_COPY, TYPE_COPYI, TYPE_COPYD, TYPE_COM, 612 613 TYPE_ADDLSH1_N, TYPE_ADDLSH2_N, TYPE_ADDLSH_N, 614 TYPE_ADDLSH1_N_IP1, TYPE_ADDLSH2_N_IP1, TYPE_ADDLSH_N_IP1, 615 TYPE_ADDLSH1_N_IP2, TYPE_ADDLSH2_N_IP2, TYPE_ADDLSH_N_IP2, 616 TYPE_SUBLSH1_N, TYPE_SUBLSH2_N, TYPE_SUBLSH_N, 617 TYPE_SUBLSH1_N_IP1, TYPE_SUBLSH2_N_IP1, TYPE_SUBLSH_N_IP1, 618 TYPE_RSBLSH1_N, TYPE_RSBLSH2_N, TYPE_RSBLSH_N, 619 TYPE_RSH1ADD_N, TYPE_RSH1SUB_N, 620 621 TYPE_ADDLSH1_NC, TYPE_ADDLSH2_NC, TYPE_ADDLSH_NC, 622 TYPE_SUBLSH1_NC, TYPE_SUBLSH2_NC, TYPE_SUBLSH_NC, 623 TYPE_RSBLSH1_NC, TYPE_RSBLSH2_NC, TYPE_RSBLSH_NC, 624 625 TYPE_ADDCND_N, TYPE_SUBCND_N, 626 627 TYPE_MOD_1, TYPE_MOD_1C, TYPE_DIVMOD_1, TYPE_DIVMOD_1C, TYPE_DIVREM_1, 628 TYPE_DIVREM_1C, TYPE_PREINV_DIVREM_1, TYPE_DIVREM_2, TYPE_PREINV_MOD_1, 629 TYPE_MOD_34LSUB1, TYPE_UDIV_QRNND, TYPE_UDIV_QRNND_R, 630 631 TYPE_DIVEXACT_1, TYPE_BDIV_Q_1, TYPE_DIVEXACT_BY3, TYPE_DIVEXACT_BY3C, 632 TYPE_MODEXACT_1_ODD, TYPE_MODEXACT_1C_ODD, 633 634 TYPE_INVERT, TYPE_BINVERT, 635 636 TYPE_GCD, TYPE_GCD_1, TYPE_GCD_FINDA, TYPE_MPZ_JACOBI, TYPE_MPZ_KRONECKER, 637 TYPE_MPZ_KRONECKER_UI, TYPE_MPZ_KRONECKER_SI, TYPE_MPZ_UI_KRONECKER, 638 TYPE_MPZ_SI_KRONECKER, TYPE_MPZ_LEGENDRE, 639 640 TYPE_AND_N, TYPE_NAND_N, TYPE_ANDN_N, TYPE_IOR_N, TYPE_IORN_N, TYPE_NIOR_N, 641 TYPE_XOR_N, TYPE_XNOR_N, 642 643 TYPE_MUL_MN, TYPE_MUL_N, TYPE_SQR, TYPE_UMUL_PPMM, TYPE_UMUL_PPMM_R, 644 TYPE_MULLO_N, TYPE_MULMID_MN, TYPE_MULMID_N, 645 646 TYPE_SBPI1_DIV_QR, TYPE_TDIV_QR, 647 648 TYPE_SQRTREM, TYPE_ZERO, TYPE_GET_STR, TYPE_POPCOUNT, TYPE_HAMDIST, 649 650 TYPE_EXTRA 651 }; 652 653 struct try_t param[TYPE_EXTRA]; 654 655 656 void 657 param_init (void) 658 { 659 struct try_t *p; 660 661 #define COPY(index) memcpy (p, ¶m[index], sizeof (*p)) 662 663 #if HAVE_STRINGIZE 664 #define REFERENCE(fun) \ 665 p->reference = (tryfun_t) fun; \ 666 p->reference_name = #fun 667 #define VALIDATE(fun) \ 668 p->validate = fun; \ 669 p->validate_name = #fun 670 #else 671 #define REFERENCE(fun) \ 672 p->reference = (tryfun_t) fun; \ 673 p->reference_name = "fun" 674 #define VALIDATE(fun) \ 675 p->validate = fun; \ 676 p->validate_name = "fun" 677 #endif 678 679 680 p = ¶m[TYPE_ADD_N]; 681 p->retval = 1; 682 p->dst[0] = 1; 683 p->src[0] = 1; 684 p->src[1] = 1; 685 REFERENCE (refmpn_add_n); 686 687 p = ¶m[TYPE_ADD_NC]; 688 COPY (TYPE_ADD_N); 689 p->carry = CARRY_BIT; 690 REFERENCE (refmpn_add_nc); 691 692 p = ¶m[TYPE_SUB_N]; 693 COPY (TYPE_ADD_N); 694 REFERENCE (refmpn_sub_n); 695 696 p = ¶m[TYPE_SUB_NC]; 697 COPY (TYPE_ADD_NC); 698 REFERENCE (refmpn_sub_nc); 699 700 p = ¶m[TYPE_ADD]; 701 COPY (TYPE_ADD_N); 702 p->size = SIZE_ALLOW_ZERO; 703 p->size2 = 1; 704 REFERENCE (refmpn_add); 705 706 p = ¶m[TYPE_SUB]; 707 COPY (TYPE_ADD); 708 REFERENCE (refmpn_sub); 709 710 711 p = ¶m[TYPE_ADD_ERR1_N]; 712 p->retval = 1; 713 p->dst[0] = 1; 714 p->dst[1] = 1; 715 p->src[0] = 1; 716 p->src[1] = 1; 717 p->src[2] = 1; 718 p->dst_size[1] = SIZE_2; 719 p->carry = CARRY_BIT; 720 p->overlap = OVERLAP_NOT_DST2; 721 REFERENCE (refmpn_add_err1_n); 722 723 p = ¶m[TYPE_SUB_ERR1_N]; 724 COPY (TYPE_ADD_ERR1_N); 725 REFERENCE (refmpn_sub_err1_n); 726 727 p = ¶m[TYPE_ADD_ERR2_N]; 728 COPY (TYPE_ADD_ERR1_N); 729 p->src[3] = 1; 730 p->dst_size[1] = SIZE_4; 731 REFERENCE (refmpn_add_err2_n); 732 733 p = ¶m[TYPE_SUB_ERR2_N]; 734 COPY (TYPE_ADD_ERR2_N); 735 REFERENCE (refmpn_sub_err2_n); 736 737 p = ¶m[TYPE_ADD_ERR3_N]; 738 COPY (TYPE_ADD_ERR2_N); 739 p->src[4] = 1; 740 p->dst_size[1] = SIZE_6; 741 REFERENCE (refmpn_add_err3_n); 742 743 p = ¶m[TYPE_SUB_ERR3_N]; 744 COPY (TYPE_ADD_ERR3_N); 745 REFERENCE (refmpn_sub_err3_n); 746 747 p = ¶m[TYPE_ADDCND_N]; 748 COPY (TYPE_ADD_N); 749 p->carry = CARRY_BIT; 750 REFERENCE (refmpn_addcnd_n); 751 752 p = ¶m[TYPE_SUBCND_N]; 753 COPY (TYPE_ADD_N); 754 p->carry = CARRY_BIT; 755 REFERENCE (refmpn_subcnd_n); 756 757 758 p = ¶m[TYPE_MUL_1]; 759 p->retval = 1; 760 p->dst[0] = 1; 761 p->src[0] = 1; 762 p->multiplier = 1; 763 p->overlap = OVERLAP_LOW_TO_HIGH; 764 REFERENCE (refmpn_mul_1); 765 766 p = ¶m[TYPE_MUL_1C]; 767 COPY (TYPE_MUL_1); 768 p->carry = CARRY_LIMB; 769 REFERENCE (refmpn_mul_1c); 770 771 772 p = ¶m[TYPE_MUL_2]; 773 p->retval = 1; 774 p->dst[0] = 1; 775 p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1; 776 p->src[0] = 1; 777 p->src[1] = 1; 778 p->msize = 2; 779 p->overlap = OVERLAP_NOT_SRC2; 780 REFERENCE (refmpn_mul_2); 781 782 p = ¶m[TYPE_MUL_3]; 783 COPY (TYPE_MUL_2); 784 p->msize = 3; 785 REFERENCE (refmpn_mul_3); 786 787 p = ¶m[TYPE_MUL_4]; 788 COPY (TYPE_MUL_2); 789 p->msize = 4; 790 REFERENCE (refmpn_mul_4); 791 792 p = ¶m[TYPE_MUL_5]; 793 COPY (TYPE_MUL_2); 794 p->msize = 5; 795 REFERENCE (refmpn_mul_5); 796 797 p = ¶m[TYPE_MUL_6]; 798 COPY (TYPE_MUL_2); 799 p->msize = 6; 800 REFERENCE (refmpn_mul_6); 801 802 803 p = ¶m[TYPE_ADDMUL_1]; 804 p->retval = 1; 805 p->dst[0] = 1; 806 p->src[0] = 1; 807 p->multiplier = 1; 808 p->dst0_from_src1 = 1; 809 REFERENCE (refmpn_addmul_1); 810 811 p = ¶m[TYPE_ADDMUL_1C]; 812 COPY (TYPE_ADDMUL_1); 813 p->carry = CARRY_LIMB; 814 REFERENCE (refmpn_addmul_1c); 815 816 p = ¶m[TYPE_SUBMUL_1]; 817 COPY (TYPE_ADDMUL_1); 818 REFERENCE (refmpn_submul_1); 819 820 p = ¶m[TYPE_SUBMUL_1C]; 821 COPY (TYPE_ADDMUL_1C); 822 REFERENCE (refmpn_submul_1c); 823 824 825 p = ¶m[TYPE_ADDMUL_2]; 826 p->retval = 1; 827 p->dst[0] = 1; 828 p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1; 829 p->src[0] = 1; 830 p->src[1] = 1; 831 p->msize = 2; 832 p->dst0_from_src1 = 1; 833 p->overlap = OVERLAP_NONE; 834 REFERENCE (refmpn_addmul_2); 835 836 p = ¶m[TYPE_ADDMUL_3]; 837 COPY (TYPE_ADDMUL_2); 838 p->msize = 3; 839 REFERENCE (refmpn_addmul_3); 840 841 p = ¶m[TYPE_ADDMUL_4]; 842 COPY (TYPE_ADDMUL_2); 843 p->msize = 4; 844 REFERENCE (refmpn_addmul_4); 845 846 p = ¶m[TYPE_ADDMUL_5]; 847 COPY (TYPE_ADDMUL_2); 848 p->msize = 5; 849 REFERENCE (refmpn_addmul_5); 850 851 p = ¶m[TYPE_ADDMUL_6]; 852 COPY (TYPE_ADDMUL_2); 853 p->msize = 6; 854 REFERENCE (refmpn_addmul_6); 855 856 p = ¶m[TYPE_ADDMUL_7]; 857 COPY (TYPE_ADDMUL_2); 858 p->msize = 7; 859 REFERENCE (refmpn_addmul_7); 860 861 p = ¶m[TYPE_ADDMUL_8]; 862 COPY (TYPE_ADDMUL_2); 863 p->msize = 8; 864 REFERENCE (refmpn_addmul_8); 865 866 867 p = ¶m[TYPE_AND_N]; 868 p->dst[0] = 1; 869 p->src[0] = 1; 870 p->src[1] = 1; 871 REFERENCE (refmpn_and_n); 872 873 p = ¶m[TYPE_ANDN_N]; 874 COPY (TYPE_AND_N); 875 REFERENCE (refmpn_andn_n); 876 877 p = ¶m[TYPE_NAND_N]; 878 COPY (TYPE_AND_N); 879 REFERENCE (refmpn_nand_n); 880 881 p = ¶m[TYPE_IOR_N]; 882 COPY (TYPE_AND_N); 883 REFERENCE (refmpn_ior_n); 884 885 p = ¶m[TYPE_IORN_N]; 886 COPY (TYPE_AND_N); 887 REFERENCE (refmpn_iorn_n); 888 889 p = ¶m[TYPE_NIOR_N]; 890 COPY (TYPE_AND_N); 891 REFERENCE (refmpn_nior_n); 892 893 p = ¶m[TYPE_XOR_N]; 894 COPY (TYPE_AND_N); 895 REFERENCE (refmpn_xor_n); 896 897 p = ¶m[TYPE_XNOR_N]; 898 COPY (TYPE_AND_N); 899 REFERENCE (refmpn_xnor_n); 900 901 902 p = ¶m[TYPE_ADDSUB_N]; 903 p->retval = 1; 904 p->dst[0] = 1; 905 p->dst[1] = 1; 906 p->src[0] = 1; 907 p->src[1] = 1; 908 REFERENCE (refmpn_add_n_sub_n); 909 910 p = ¶m[TYPE_ADDSUB_NC]; 911 COPY (TYPE_ADDSUB_N); 912 p->carry = CARRY_4; 913 REFERENCE (refmpn_add_n_sub_nc); 914 915 916 p = ¶m[TYPE_COPY]; 917 p->dst[0] = 1; 918 p->src[0] = 1; 919 p->overlap = OVERLAP_NONE; 920 p->size = SIZE_ALLOW_ZERO; 921 REFERENCE (refmpn_copy); 922 923 p = ¶m[TYPE_COPYI]; 924 p->dst[0] = 1; 925 p->src[0] = 1; 926 p->overlap = OVERLAP_LOW_TO_HIGH; 927 p->size = SIZE_ALLOW_ZERO; 928 REFERENCE (refmpn_copyi); 929 930 p = ¶m[TYPE_COPYD]; 931 p->dst[0] = 1; 932 p->src[0] = 1; 933 p->overlap = OVERLAP_HIGH_TO_LOW; 934 p->size = SIZE_ALLOW_ZERO; 935 REFERENCE (refmpn_copyd); 936 937 p = ¶m[TYPE_COM]; 938 p->dst[0] = 1; 939 p->src[0] = 1; 940 REFERENCE (refmpn_com); 941 942 943 p = ¶m[TYPE_ADDLSH1_N]; 944 COPY (TYPE_ADD_N); 945 REFERENCE (refmpn_addlsh1_n); 946 947 p = ¶m[TYPE_ADDLSH2_N]; 948 COPY (TYPE_ADD_N); 949 REFERENCE (refmpn_addlsh2_n); 950 951 p = ¶m[TYPE_ADDLSH_N]; 952 COPY (TYPE_ADD_N); 953 p->shift = 1; 954 REFERENCE (refmpn_addlsh_n); 955 956 p = ¶m[TYPE_ADDLSH1_N_IP1]; 957 p->retval = 1; 958 p->dst[0] = 1; 959 p->src[0] = 1; 960 p->dst0_from_src1 = 1; 961 REFERENCE (refmpn_addlsh1_n_ip1); 962 963 p = ¶m[TYPE_ADDLSH2_N_IP1]; 964 COPY (TYPE_ADDLSH1_N_IP1); 965 REFERENCE (refmpn_addlsh2_n_ip1); 966 967 p = ¶m[TYPE_ADDLSH_N_IP1]; 968 COPY (TYPE_ADDLSH1_N_IP1); 969 p->shift = 1; 970 REFERENCE (refmpn_addlsh_n_ip1); 971 972 p = ¶m[TYPE_ADDLSH1_N_IP2]; 973 COPY (TYPE_ADDLSH1_N_IP1); 974 REFERENCE (refmpn_addlsh1_n_ip2); 975 976 p = ¶m[TYPE_ADDLSH2_N_IP2]; 977 COPY (TYPE_ADDLSH1_N_IP1); 978 REFERENCE (refmpn_addlsh2_n_ip2); 979 980 p = ¶m[TYPE_ADDLSH_N_IP2]; 981 COPY (TYPE_ADDLSH_N_IP1); 982 REFERENCE (refmpn_addlsh_n_ip2); 983 984 p = ¶m[TYPE_SUBLSH1_N]; 985 COPY (TYPE_ADD_N); 986 REFERENCE (refmpn_sublsh1_n); 987 988 p = ¶m[TYPE_SUBLSH2_N]; 989 COPY (TYPE_ADD_N); 990 REFERENCE (refmpn_sublsh2_n); 991 992 p = ¶m[TYPE_SUBLSH_N]; 993 COPY (TYPE_ADDLSH_N); 994 REFERENCE (refmpn_sublsh_n); 995 996 p = ¶m[TYPE_SUBLSH1_N_IP1]; 997 COPY (TYPE_ADDLSH1_N_IP1); 998 REFERENCE (refmpn_sublsh1_n_ip1); 999 1000 p = ¶m[TYPE_SUBLSH2_N_IP1]; 1001 COPY (TYPE_ADDLSH1_N_IP1); 1002 REFERENCE (refmpn_sublsh2_n_ip1); 1003 1004 p = ¶m[TYPE_SUBLSH_N_IP1]; 1005 COPY (TYPE_ADDLSH_N_IP1); 1006 REFERENCE (refmpn_sublsh_n_ip1); 1007 1008 p = ¶m[TYPE_RSBLSH1_N]; 1009 COPY (TYPE_ADD_N); 1010 REFERENCE (refmpn_rsblsh1_n); 1011 1012 p = ¶m[TYPE_RSBLSH2_N]; 1013 COPY (TYPE_ADD_N); 1014 REFERENCE (refmpn_rsblsh2_n); 1015 1016 p = ¶m[TYPE_RSBLSH_N]; 1017 COPY (TYPE_ADDLSH_N); 1018 REFERENCE (refmpn_rsblsh_n); 1019 1020 p = ¶m[TYPE_RSH1ADD_N]; 1021 COPY (TYPE_ADD_N); 1022 REFERENCE (refmpn_rsh1add_n); 1023 1024 p = ¶m[TYPE_RSH1SUB_N]; 1025 COPY (TYPE_ADD_N); 1026 REFERENCE (refmpn_rsh1sub_n); 1027 1028 1029 p = ¶m[TYPE_ADDLSH1_NC]; 1030 COPY (TYPE_ADDLSH1_N); 1031 p->carry = CARRY_3; 1032 REFERENCE (refmpn_addlsh1_nc); 1033 1034 p = ¶m[TYPE_ADDLSH2_NC]; 1035 COPY (TYPE_ADDLSH2_N); 1036 p->carry = CARRY_4; /* FIXME */ 1037 REFERENCE (refmpn_addlsh2_nc); 1038 1039 p = ¶m[TYPE_ADDLSH_NC]; 1040 COPY (TYPE_ADDLSH_N); 1041 p->carry = CARRY_BIT; /* FIXME */ 1042 REFERENCE (refmpn_addlsh_nc); 1043 1044 p = ¶m[TYPE_SUBLSH1_NC]; 1045 COPY (TYPE_ADDLSH1_NC); 1046 REFERENCE (refmpn_sublsh1_nc); 1047 1048 p = ¶m[TYPE_SUBLSH2_NC]; 1049 COPY (TYPE_ADDLSH2_NC); 1050 REFERENCE (refmpn_sublsh2_nc); 1051 1052 p = ¶m[TYPE_SUBLSH_NC]; 1053 COPY (TYPE_ADDLSH_NC); 1054 REFERENCE (refmpn_sublsh_nc); 1055 1056 p = ¶m[TYPE_RSBLSH1_NC]; 1057 COPY (TYPE_RSBLSH1_N); 1058 p->carry = CARRY_BIT; /* FIXME */ 1059 REFERENCE (refmpn_rsblsh1_nc); 1060 1061 p = ¶m[TYPE_RSBLSH2_NC]; 1062 COPY (TYPE_RSBLSH2_N); 1063 p->carry = CARRY_4; /* FIXME */ 1064 REFERENCE (refmpn_rsblsh2_nc); 1065 1066 p = ¶m[TYPE_RSBLSH_NC]; 1067 COPY (TYPE_RSBLSH_N); 1068 p->carry = CARRY_BIT; /* FIXME */ 1069 REFERENCE (refmpn_rsblsh_nc); 1070 1071 1072 p = ¶m[TYPE_MOD_1]; 1073 p->retval = 1; 1074 p->src[0] = 1; 1075 p->size = SIZE_ALLOW_ZERO; 1076 p->divisor = DIVISOR_LIMB; 1077 REFERENCE (refmpn_mod_1); 1078 1079 p = ¶m[TYPE_MOD_1C]; 1080 COPY (TYPE_MOD_1); 1081 p->carry = CARRY_DIVISOR; 1082 REFERENCE (refmpn_mod_1c); 1083 1084 p = ¶m[TYPE_DIVMOD_1]; 1085 COPY (TYPE_MOD_1); 1086 p->dst[0] = 1; 1087 REFERENCE (refmpn_divmod_1); 1088 1089 p = ¶m[TYPE_DIVMOD_1C]; 1090 COPY (TYPE_DIVMOD_1); 1091 p->carry = CARRY_DIVISOR; 1092 REFERENCE (refmpn_divmod_1c); 1093 1094 p = ¶m[TYPE_DIVREM_1]; 1095 COPY (TYPE_DIVMOD_1); 1096 p->size2 = SIZE_FRACTION; 1097 p->dst_size[0] = SIZE_SUM; 1098 REFERENCE (refmpn_divrem_1); 1099 1100 p = ¶m[TYPE_DIVREM_1C]; 1101 COPY (TYPE_DIVREM_1); 1102 p->carry = CARRY_DIVISOR; 1103 REFERENCE (refmpn_divrem_1c); 1104 1105 p = ¶m[TYPE_PREINV_DIVREM_1]; 1106 COPY (TYPE_DIVREM_1); 1107 p->size = SIZE_YES; /* ie. no size==0 */ 1108 REFERENCE (refmpn_preinv_divrem_1); 1109 1110 p = ¶m[TYPE_PREINV_MOD_1]; 1111 p->retval = 1; 1112 p->src[0] = 1; 1113 p->divisor = DIVISOR_NORM; 1114 REFERENCE (refmpn_preinv_mod_1); 1115 1116 p = ¶m[TYPE_MOD_34LSUB1]; 1117 p->retval = 1; 1118 p->src[0] = 1; 1119 VALIDATE (validate_mod_34lsub1); 1120 1121 p = ¶m[TYPE_UDIV_QRNND]; 1122 p->retval = 1; 1123 p->src[0] = 1; 1124 p->dst[0] = 1; 1125 p->dst_size[0] = SIZE_1; 1126 p->divisor = UDIV_NEEDS_NORMALIZATION ? DIVISOR_NORM : DIVISOR_LIMB; 1127 p->data = DATA_UDIV_QRNND; 1128 p->overlap = OVERLAP_NONE; 1129 REFERENCE (refmpn_udiv_qrnnd); 1130 1131 p = ¶m[TYPE_UDIV_QRNND_R]; 1132 COPY (TYPE_UDIV_QRNND); 1133 REFERENCE (refmpn_udiv_qrnnd_r); 1134 1135 1136 p = ¶m[TYPE_DIVEXACT_1]; 1137 p->dst[0] = 1; 1138 p->src[0] = 1; 1139 p->divisor = DIVISOR_LIMB; 1140 p->data = DATA_MULTIPLE_DIVISOR; 1141 VALIDATE (validate_divexact_1); 1142 REFERENCE (refmpn_divmod_1); 1143 1144 p = ¶m[TYPE_BDIV_Q_1]; 1145 p->dst[0] = 1; 1146 p->src[0] = 1; 1147 p->divisor = DIVISOR_LIMB; 1148 VALIDATE (validate_bdiv_q_1); 1149 1150 p = ¶m[TYPE_DIVEXACT_BY3]; 1151 p->retval = 1; 1152 p->dst[0] = 1; 1153 p->src[0] = 1; 1154 REFERENCE (refmpn_divexact_by3); 1155 1156 p = ¶m[TYPE_DIVEXACT_BY3C]; 1157 COPY (TYPE_DIVEXACT_BY3); 1158 p->carry = CARRY_3; 1159 REFERENCE (refmpn_divexact_by3c); 1160 1161 1162 p = ¶m[TYPE_MODEXACT_1_ODD]; 1163 p->retval = 1; 1164 p->src[0] = 1; 1165 p->divisor = DIVISOR_ODD; 1166 VALIDATE (validate_modexact_1_odd); 1167 1168 p = ¶m[TYPE_MODEXACT_1C_ODD]; 1169 COPY (TYPE_MODEXACT_1_ODD); 1170 p->carry = CARRY_LIMB; 1171 VALIDATE (validate_modexact_1c_odd); 1172 1173 1174 p = ¶m[TYPE_GCD_1]; 1175 p->retval = 1; 1176 p->src[0] = 1; 1177 p->data = DATA_NON_ZERO; 1178 p->divisor = DIVISOR_LIMB; 1179 REFERENCE (refmpn_gcd_1); 1180 1181 p = ¶m[TYPE_GCD]; 1182 p->retval = 1; 1183 p->dst[0] = 1; 1184 p->src[0] = 1; 1185 p->src[1] = 1; 1186 p->size2 = 1; 1187 p->dst_size[0] = SIZE_RETVAL; 1188 p->overlap = OVERLAP_NOT_SRCS; 1189 p->data = DATA_GCD; 1190 REFERENCE (refmpn_gcd); 1191 1192 1193 p = ¶m[TYPE_MPZ_LEGENDRE]; 1194 p->retval = 1; 1195 p->src[0] = 1; 1196 p->size = SIZE_ALLOW_ZERO; 1197 p->src[1] = 1; 1198 p->data = DATA_SRC1_ODD_PRIME; 1199 p->size2 = 1; 1200 p->carry = CARRY_BIT; 1201 p->carry_sign = 1; 1202 REFERENCE (refmpz_legendre); 1203 1204 p = ¶m[TYPE_MPZ_JACOBI]; 1205 p->retval = 1; 1206 p->src[0] = 1; 1207 p->size = SIZE_ALLOW_ZERO; 1208 p->src[1] = 1; 1209 p->data = DATA_SRC1_ODD; 1210 p->size2 = 1; 1211 p->carry = CARRY_BIT; 1212 p->carry_sign = 1; 1213 REFERENCE (refmpz_jacobi); 1214 1215 p = ¶m[TYPE_MPZ_KRONECKER]; 1216 p->retval = 1; 1217 p->src[0] = 1; 1218 p->size = SIZE_ALLOW_ZERO; 1219 p->src[1] = 1; 1220 p->data = 0; 1221 p->size2 = 1; 1222 p->carry = CARRY_4; 1223 p->carry_sign = 1; 1224 REFERENCE (refmpz_kronecker); 1225 1226 1227 p = ¶m[TYPE_MPZ_KRONECKER_UI]; 1228 p->retval = 1; 1229 p->src[0] = 1; 1230 p->size = SIZE_ALLOW_ZERO; 1231 p->multiplier = 1; 1232 p->carry = CARRY_BIT; 1233 REFERENCE (refmpz_kronecker_ui); 1234 1235 p = ¶m[TYPE_MPZ_KRONECKER_SI]; 1236 COPY (TYPE_MPZ_KRONECKER_UI); 1237 REFERENCE (refmpz_kronecker_si); 1238 1239 p = ¶m[TYPE_MPZ_UI_KRONECKER]; 1240 COPY (TYPE_MPZ_KRONECKER_UI); 1241 REFERENCE (refmpz_ui_kronecker); 1242 1243 p = ¶m[TYPE_MPZ_SI_KRONECKER]; 1244 COPY (TYPE_MPZ_KRONECKER_UI); 1245 REFERENCE (refmpz_si_kronecker); 1246 1247 1248 p = ¶m[TYPE_SQR]; 1249 p->dst[0] = 1; 1250 p->src[0] = 1; 1251 p->dst_size[0] = SIZE_SUM; 1252 p->overlap = OVERLAP_NONE; 1253 REFERENCE (refmpn_sqr); 1254 1255 p = ¶m[TYPE_MUL_N]; 1256 COPY (TYPE_SQR); 1257 p->src[1] = 1; 1258 REFERENCE (refmpn_mul_n); 1259 1260 p = ¶m[TYPE_MULLO_N]; 1261 COPY (TYPE_MUL_N); 1262 p->dst_size[0] = 0; 1263 REFERENCE (refmpn_mullo_n); 1264 1265 p = ¶m[TYPE_MUL_MN]; 1266 COPY (TYPE_MUL_N); 1267 p->size2 = 1; 1268 REFERENCE (refmpn_mul_basecase); 1269 1270 p = ¶m[TYPE_MULMID_MN]; 1271 COPY (TYPE_MUL_MN); 1272 p->dst_size[0] = SIZE_DIFF_PLUS_3; 1273 REFERENCE (refmpn_mulmid_basecase); 1274 1275 p = ¶m[TYPE_MULMID_N]; 1276 COPY (TYPE_MUL_N); 1277 p->size = SIZE_ODD; 1278 p->size2 = SIZE_CEIL_HALF; 1279 p->dst_size[0] = SIZE_DIFF_PLUS_3; 1280 REFERENCE (refmpn_mulmid_n); 1281 1282 p = ¶m[TYPE_UMUL_PPMM]; 1283 p->retval = 1; 1284 p->src[0] = 1; 1285 p->dst[0] = 1; 1286 p->dst_size[0] = SIZE_1; 1287 p->overlap = OVERLAP_NONE; 1288 REFERENCE (refmpn_umul_ppmm); 1289 1290 p = ¶m[TYPE_UMUL_PPMM_R]; 1291 COPY (TYPE_UMUL_PPMM); 1292 REFERENCE (refmpn_umul_ppmm_r); 1293 1294 1295 p = ¶m[TYPE_RSHIFT]; 1296 p->retval = 1; 1297 p->dst[0] = 1; 1298 p->src[0] = 1; 1299 p->shift = 1; 1300 p->overlap = OVERLAP_LOW_TO_HIGH; 1301 REFERENCE (refmpn_rshift); 1302 1303 p = ¶m[TYPE_LSHIFT]; 1304 COPY (TYPE_RSHIFT); 1305 p->overlap = OVERLAP_HIGH_TO_LOW; 1306 REFERENCE (refmpn_lshift); 1307 1308 p = ¶m[TYPE_LSHIFTC]; 1309 COPY (TYPE_RSHIFT); 1310 p->overlap = OVERLAP_HIGH_TO_LOW; 1311 REFERENCE (refmpn_lshiftc); 1312 1313 1314 p = ¶m[TYPE_POPCOUNT]; 1315 p->retval = 1; 1316 p->src[0] = 1; 1317 REFERENCE (refmpn_popcount); 1318 1319 p = ¶m[TYPE_HAMDIST]; 1320 COPY (TYPE_POPCOUNT); 1321 p->src[1] = 1; 1322 REFERENCE (refmpn_hamdist); 1323 1324 1325 p = ¶m[TYPE_SBPI1_DIV_QR]; 1326 p->retval = 1; 1327 p->dst[0] = 1; 1328 p->dst[1] = 1; 1329 p->src[0] = 1; 1330 p->src[1] = 1; 1331 p->data = DATA_SRC1_HIGHBIT; 1332 p->size2 = 1; 1333 p->dst_size[0] = SIZE_DIFF; 1334 p->overlap = OVERLAP_NONE; 1335 REFERENCE (refmpn_sb_div_qr); 1336 1337 p = ¶m[TYPE_TDIV_QR]; 1338 p->dst[0] = 1; 1339 p->dst[1] = 1; 1340 p->src[0] = 1; 1341 p->src[1] = 1; 1342 p->size2 = 1; 1343 p->dst_size[0] = SIZE_DIFF_PLUS_1; 1344 p->dst_size[1] = SIZE_SIZE2; 1345 p->overlap = OVERLAP_NONE; 1346 REFERENCE (refmpn_tdiv_qr); 1347 1348 p = ¶m[TYPE_SQRTREM]; 1349 p->retval = 1; 1350 p->dst[0] = 1; 1351 p->dst[1] = 1; 1352 p->src[0] = 1; 1353 p->dst_size[0] = SIZE_CEIL_HALF; 1354 p->dst_size[1] = SIZE_RETVAL; 1355 p->overlap = OVERLAP_NONE; 1356 VALIDATE (validate_sqrtrem); 1357 REFERENCE (refmpn_sqrtrem); 1358 1359 p = ¶m[TYPE_ZERO]; 1360 p->dst[0] = 1; 1361 p->size = SIZE_ALLOW_ZERO; 1362 REFERENCE (refmpn_zero); 1363 1364 p = ¶m[TYPE_GET_STR]; 1365 p->retval = 1; 1366 p->src[0] = 1; 1367 p->size = SIZE_ALLOW_ZERO; 1368 p->dst[0] = 1; 1369 p->dst[1] = 1; 1370 p->dst_size[0] = SIZE_GET_STR; 1371 p->dst_bytes[0] = 1; 1372 p->overlap = OVERLAP_NONE; 1373 REFERENCE (refmpn_get_str); 1374 1375 p = ¶m[TYPE_BINVERT]; 1376 p->dst[0] = 1; 1377 p->src[0] = 1; 1378 p->data = DATA_SRC0_ODD; 1379 p->overlap = OVERLAP_NONE; 1380 REFERENCE (refmpn_binvert); 1381 1382 p = ¶m[TYPE_INVERT]; 1383 p->dst[0] = 1; 1384 p->src[0] = 1; 1385 p->data = DATA_SRC0_HIGHBIT; 1386 p->overlap = OVERLAP_NONE; 1387 REFERENCE (refmpn_invert); 1388 1389 #ifdef EXTRA_PARAM_INIT 1390 EXTRA_PARAM_INIT 1391 #endif 1392 } 1393 1394 1395 /* The following are macros if there's no native versions, so wrap them in 1396 functions that can be in try_array[]. */ 1397 1398 void 1399 MPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1400 { MPN_COPY (rp, sp, size); } 1401 1402 void 1403 MPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1404 { MPN_COPY_INCR (rp, sp, size); } 1405 1406 void 1407 MPN_COPY_DECR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1408 { MPN_COPY_DECR (rp, sp, size); } 1409 1410 void 1411 __GMPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1412 { __GMPN_COPY (rp, sp, size); } 1413 1414 #ifdef __GMPN_COPY_INCR 1415 void 1416 __GMPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1417 { __GMPN_COPY_INCR (rp, sp, size); } 1418 #endif 1419 1420 void 1421 mpn_com_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1422 { mpn_com (rp, sp, size); } 1423 1424 void 1425 mpn_and_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1426 { mpn_and_n (rp, s1, s2, size); } 1427 1428 void 1429 mpn_andn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1430 { mpn_andn_n (rp, s1, s2, size); } 1431 1432 void 1433 mpn_nand_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1434 { mpn_nand_n (rp, s1, s2, size); } 1435 1436 void 1437 mpn_ior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1438 { mpn_ior_n (rp, s1, s2, size); } 1439 1440 void 1441 mpn_iorn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1442 { mpn_iorn_n (rp, s1, s2, size); } 1443 1444 void 1445 mpn_nior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1446 { mpn_nior_n (rp, s1, s2, size); } 1447 1448 void 1449 mpn_xor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1450 { mpn_xor_n (rp, s1, s2, size); } 1451 1452 void 1453 mpn_xnor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1454 { mpn_xnor_n (rp, s1, s2, size); } 1455 1456 mp_limb_t 1457 udiv_qrnnd_fun (mp_limb_t *remptr, mp_limb_t n1, mp_limb_t n0, mp_limb_t d) 1458 { 1459 mp_limb_t q; 1460 udiv_qrnnd (q, *remptr, n1, n0, d); 1461 return q; 1462 } 1463 1464 mp_limb_t 1465 mpn_divexact_by3_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1466 { 1467 return mpn_divexact_by3 (rp, sp, size); 1468 } 1469 1470 #if HAVE_NATIVE_mpn_addlsh1_n_ip1 1471 mp_limb_t 1472 mpn_addlsh1_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1473 { 1474 return mpn_addlsh1_n_ip1 (rp, sp, size); 1475 } 1476 #endif 1477 #if HAVE_NATIVE_mpn_addlsh2_n_ip1 1478 mp_limb_t 1479 mpn_addlsh2_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1480 { 1481 return mpn_addlsh2_n_ip1 (rp, sp, size); 1482 } 1483 #endif 1484 #if HAVE_NATIVE_mpn_addlsh_n_ip1 1485 mp_limb_t 1486 mpn_addlsh_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh) 1487 { 1488 return mpn_addlsh_n_ip1 (rp, sp, size, sh); 1489 } 1490 #endif 1491 #if HAVE_NATIVE_mpn_addlsh1_n_ip2 1492 mp_limb_t 1493 mpn_addlsh1_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1494 { 1495 return mpn_addlsh1_n_ip2 (rp, sp, size); 1496 } 1497 #endif 1498 #if HAVE_NATIVE_mpn_addlsh2_n_ip2 1499 mp_limb_t 1500 mpn_addlsh2_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1501 { 1502 return mpn_addlsh2_n_ip2 (rp, sp, size); 1503 } 1504 #endif 1505 #if HAVE_NATIVE_mpn_addlsh_n_ip2 1506 mp_limb_t 1507 mpn_addlsh_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh) 1508 { 1509 return mpn_addlsh_n_ip2 (rp, sp, size, sh); 1510 } 1511 #endif 1512 #if HAVE_NATIVE_mpn_sublsh1_n_ip1 1513 mp_limb_t 1514 mpn_sublsh1_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1515 { 1516 return mpn_sublsh1_n_ip1 (rp, sp, size); 1517 } 1518 #endif 1519 #if HAVE_NATIVE_mpn_sublsh2_n_ip1 1520 mp_limb_t 1521 mpn_sublsh2_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1522 { 1523 return mpn_sublsh2_n_ip1 (rp, sp, size); 1524 } 1525 #endif 1526 #if HAVE_NATIVE_mpn_sublsh_n_ip1 1527 mp_limb_t 1528 mpn_sublsh_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh) 1529 { 1530 return mpn_sublsh_n_ip1 (rp, sp, size, sh); 1531 } 1532 #endif 1533 1534 mp_limb_t 1535 mpn_modexact_1_odd_fun (mp_srcptr ptr, mp_size_t size, mp_limb_t divisor) 1536 { 1537 return mpn_modexact_1_odd (ptr, size, divisor); 1538 } 1539 1540 void 1541 mpn_toom22_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size) 1542 { 1543 mp_ptr tspace; 1544 TMP_DECL; 1545 TMP_MARK; 1546 tspace = TMP_ALLOC_LIMBS (mpn_toom22_mul_itch (size, size)); 1547 mpn_toom22_mul (dst, src1, size, src2, size, tspace); 1548 TMP_FREE; 1549 } 1550 void 1551 mpn_toom2_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size) 1552 { 1553 mp_ptr tspace; 1554 TMP_DECL; 1555 TMP_MARK; 1556 tspace = TMP_ALLOC_LIMBS (mpn_toom2_sqr_itch (size)); 1557 mpn_toom2_sqr (dst, src, size, tspace); 1558 TMP_FREE; 1559 } 1560 void 1561 mpn_toom33_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size) 1562 { 1563 mp_ptr tspace; 1564 TMP_DECL; 1565 TMP_MARK; 1566 tspace = TMP_ALLOC_LIMBS (mpn_toom33_mul_itch (size, size)); 1567 mpn_toom33_mul (dst, src1, size, src2, size, tspace); 1568 TMP_FREE; 1569 } 1570 void 1571 mpn_toom3_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size) 1572 { 1573 mp_ptr tspace; 1574 TMP_DECL; 1575 TMP_MARK; 1576 tspace = TMP_ALLOC_LIMBS (mpn_toom3_sqr_itch (size)); 1577 mpn_toom3_sqr (dst, src, size, tspace); 1578 TMP_FREE; 1579 } 1580 void 1581 mpn_toom44_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size) 1582 { 1583 mp_ptr tspace; 1584 TMP_DECL; 1585 TMP_MARK; 1586 tspace = TMP_ALLOC_LIMBS (mpn_toom44_mul_itch (size, size)); 1587 mpn_toom44_mul (dst, src1, size, src2, size, tspace); 1588 TMP_FREE; 1589 } 1590 void 1591 mpn_toom4_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size) 1592 { 1593 mp_ptr tspace; 1594 TMP_DECL; 1595 TMP_MARK; 1596 tspace = TMP_ALLOC_LIMBS (mpn_toom4_sqr_itch (size)); 1597 mpn_toom4_sqr (dst, src, size, tspace); 1598 TMP_FREE; 1599 } 1600 1601 void 1602 mpn_toom42_mulmid_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 1603 mp_size_t size) 1604 { 1605 mp_ptr tspace; 1606 mp_size_t n; 1607 TMP_DECL; 1608 TMP_MARK; 1609 tspace = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (size)); 1610 mpn_toom42_mulmid (dst, src1, src2, size, tspace); 1611 TMP_FREE; 1612 } 1613 1614 mp_limb_t 1615 umul_ppmm_fun (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2) 1616 { 1617 mp_limb_t high; 1618 umul_ppmm (high, *lowptr, m1, m2); 1619 return high; 1620 } 1621 1622 void 1623 MPN_ZERO_fun (mp_ptr ptr, mp_size_t size) 1624 { MPN_ZERO (ptr, size); } 1625 1626 1627 struct choice_t { 1628 const char *name; 1629 tryfun_t function; 1630 int type; 1631 mp_size_t minsize; 1632 }; 1633 1634 #if HAVE_STRINGIZE 1635 #define TRY(fun) #fun, (tryfun_t) fun 1636 #define TRY_FUNFUN(fun) #fun, (tryfun_t) fun##_fun 1637 #else 1638 #define TRY(fun) "fun", (tryfun_t) fun 1639 #define TRY_FUNFUN(fun) "fun", (tryfun_t) fun/**/_fun 1640 #endif 1641 1642 const struct choice_t choice_array[] = { 1643 { TRY(mpn_add), TYPE_ADD }, 1644 { TRY(mpn_sub), TYPE_SUB }, 1645 1646 { TRY(mpn_add_n), TYPE_ADD_N }, 1647 { TRY(mpn_sub_n), TYPE_SUB_N }, 1648 1649 #if HAVE_NATIVE_mpn_add_nc 1650 { TRY(mpn_add_nc), TYPE_ADD_NC }, 1651 #endif 1652 #if HAVE_NATIVE_mpn_sub_nc 1653 { TRY(mpn_sub_nc), TYPE_SUB_NC }, 1654 #endif 1655 1656 #if HAVE_NATIVE_mpn_add_n_sub_n 1657 { TRY(mpn_add_n_sub_n), TYPE_ADDSUB_N }, 1658 #endif 1659 #if HAVE_NATIVE_mpn_add_n_sub_nc 1660 { TRY(mpn_add_n_sub_nc), TYPE_ADDSUB_NC }, 1661 #endif 1662 1663 { TRY(mpn_add_err1_n), TYPE_ADD_ERR1_N }, 1664 { TRY(mpn_sub_err1_n), TYPE_SUB_ERR1_N }, 1665 { TRY(mpn_add_err2_n), TYPE_ADD_ERR2_N }, 1666 { TRY(mpn_sub_err2_n), TYPE_SUB_ERR2_N }, 1667 { TRY(mpn_add_err3_n), TYPE_ADD_ERR3_N }, 1668 { TRY(mpn_sub_err3_n), TYPE_SUB_ERR3_N }, 1669 1670 { TRY(mpn_addmul_1), TYPE_ADDMUL_1 }, 1671 { TRY(mpn_submul_1), TYPE_SUBMUL_1 }, 1672 #if HAVE_NATIVE_mpn_addmul_1c 1673 { TRY(mpn_addmul_1c), TYPE_ADDMUL_1C }, 1674 #endif 1675 #if HAVE_NATIVE_mpn_submul_1c 1676 { TRY(mpn_submul_1c), TYPE_SUBMUL_1C }, 1677 #endif 1678 1679 #if HAVE_NATIVE_mpn_addmul_2 1680 { TRY(mpn_addmul_2), TYPE_ADDMUL_2, 2 }, 1681 #endif 1682 #if HAVE_NATIVE_mpn_addmul_3 1683 { TRY(mpn_addmul_3), TYPE_ADDMUL_3, 3 }, 1684 #endif 1685 #if HAVE_NATIVE_mpn_addmul_4 1686 { TRY(mpn_addmul_4), TYPE_ADDMUL_4, 4 }, 1687 #endif 1688 #if HAVE_NATIVE_mpn_addmul_5 1689 { TRY(mpn_addmul_5), TYPE_ADDMUL_5, 5 }, 1690 #endif 1691 #if HAVE_NATIVE_mpn_addmul_6 1692 { TRY(mpn_addmul_6), TYPE_ADDMUL_6, 6 }, 1693 #endif 1694 #if HAVE_NATIVE_mpn_addmul_7 1695 { TRY(mpn_addmul_7), TYPE_ADDMUL_7, 7 }, 1696 #endif 1697 #if HAVE_NATIVE_mpn_addmul_8 1698 { TRY(mpn_addmul_8), TYPE_ADDMUL_8, 8 }, 1699 #endif 1700 1701 { TRY_FUNFUN(mpn_com), TYPE_COM }, 1702 1703 { TRY_FUNFUN(MPN_COPY), TYPE_COPY }, 1704 { TRY_FUNFUN(MPN_COPY_INCR), TYPE_COPYI }, 1705 { TRY_FUNFUN(MPN_COPY_DECR), TYPE_COPYD }, 1706 1707 { TRY_FUNFUN(__GMPN_COPY), TYPE_COPY }, 1708 #ifdef __GMPN_COPY_INCR 1709 { TRY_FUNFUN(__GMPN_COPY_INCR), TYPE_COPYI }, 1710 #endif 1711 1712 #if HAVE_NATIVE_mpn_copyi 1713 { TRY(mpn_copyi), TYPE_COPYI }, 1714 #endif 1715 #if HAVE_NATIVE_mpn_copyd 1716 { TRY(mpn_copyd), TYPE_COPYD }, 1717 #endif 1718 1719 { TRY(mpn_addcnd_n), TYPE_ADDCND_N }, 1720 { TRY(mpn_subcnd_n), TYPE_SUBCND_N }, 1721 #if HAVE_NATIVE_mpn_addlsh1_n 1722 { TRY(mpn_addlsh1_n), TYPE_ADDLSH1_N }, 1723 #endif 1724 #if HAVE_NATIVE_mpn_addlsh2_n 1725 { TRY(mpn_addlsh2_n), TYPE_ADDLSH2_N }, 1726 #endif 1727 #if HAVE_NATIVE_mpn_addlsh_n 1728 { TRY(mpn_addlsh_n), TYPE_ADDLSH_N }, 1729 #endif 1730 #if HAVE_NATIVE_mpn_addlsh1_n_ip1 1731 { TRY_FUNFUN(mpn_addlsh1_n_ip1), TYPE_ADDLSH1_N_IP1 }, 1732 #endif 1733 #if HAVE_NATIVE_mpn_addlsh2_n_ip1 1734 { TRY_FUNFUN(mpn_addlsh2_n_ip1), TYPE_ADDLSH2_N_IP1 }, 1735 #endif 1736 #if HAVE_NATIVE_mpn_addlsh_n_ip1 1737 { TRY_FUNFUN(mpn_addlsh_n_ip1), TYPE_ADDLSH_N_IP1 }, 1738 #endif 1739 #if HAVE_NATIVE_mpn_addlsh1_n_ip2 1740 { TRY_FUNFUN(mpn_addlsh1_n_ip2), TYPE_ADDLSH1_N_IP2 }, 1741 #endif 1742 #if HAVE_NATIVE_mpn_addlsh2_n_ip2 1743 { TRY_FUNFUN(mpn_addlsh2_n_ip2), TYPE_ADDLSH2_N_IP2 }, 1744 #endif 1745 #if HAVE_NATIVE_mpn_addlsh_n_ip2 1746 { TRY_FUNFUN(mpn_addlsh_n_ip2), TYPE_ADDLSH_N_IP2 }, 1747 #endif 1748 #if HAVE_NATIVE_mpn_sublsh1_n 1749 { TRY(mpn_sublsh1_n), TYPE_SUBLSH1_N }, 1750 #endif 1751 #if HAVE_NATIVE_mpn_sublsh2_n 1752 { TRY(mpn_sublsh2_n), TYPE_SUBLSH2_N }, 1753 #endif 1754 #if HAVE_NATIVE_mpn_sublsh_n 1755 { TRY(mpn_sublsh_n), TYPE_SUBLSH_N }, 1756 #endif 1757 #if HAVE_NATIVE_mpn_sublsh1_n_ip1 1758 { TRY_FUNFUN(mpn_sublsh1_n_ip1), TYPE_SUBLSH1_N_IP1 }, 1759 #endif 1760 #if HAVE_NATIVE_mpn_sublsh2_n_ip1 1761 { TRY_FUNFUN(mpn_sublsh2_n_ip1), TYPE_SUBLSH2_N_IP1 }, 1762 #endif 1763 #if HAVE_NATIVE_mpn_sublsh_n_ip1 1764 { TRY_FUNFUN(mpn_sublsh_n_ip1), TYPE_SUBLSH_N_IP1 }, 1765 #endif 1766 #if HAVE_NATIVE_mpn_rsblsh1_n 1767 { TRY(mpn_rsblsh1_n), TYPE_RSBLSH1_N }, 1768 #endif 1769 #if HAVE_NATIVE_mpn_rsblsh2_n 1770 { TRY(mpn_rsblsh2_n), TYPE_RSBLSH2_N }, 1771 #endif 1772 #if HAVE_NATIVE_mpn_rsblsh_n 1773 { TRY(mpn_rsblsh_n), TYPE_RSBLSH_N }, 1774 #endif 1775 #if HAVE_NATIVE_mpn_rsh1add_n 1776 { TRY(mpn_rsh1add_n), TYPE_RSH1ADD_N }, 1777 #endif 1778 #if HAVE_NATIVE_mpn_rsh1sub_n 1779 { TRY(mpn_rsh1sub_n), TYPE_RSH1SUB_N }, 1780 #endif 1781 1782 #if HAVE_NATIVE_mpn_addlsh1_nc 1783 { TRY(mpn_addlsh1_nc), TYPE_ADDLSH1_NC }, 1784 #endif 1785 #if HAVE_NATIVE_mpn_addlsh2_nc 1786 { TRY(mpn_addlsh2_nc), TYPE_ADDLSH2_NC }, 1787 #endif 1788 #if HAVE_NATIVE_mpn_addlsh_nc 1789 { TRY(mpn_addlsh_nc), TYPE_ADDLSH_NC }, 1790 #endif 1791 #if HAVE_NATIVE_mpn_sublsh1_nc 1792 { TRY(mpn_sublsh1_nc), TYPE_SUBLSH1_NC }, 1793 #endif 1794 #if HAVE_NATIVE_mpn_sublsh2_nc 1795 { TRY(mpn_sublsh2_nc), TYPE_SUBLSH2_NC }, 1796 #endif 1797 #if HAVE_NATIVE_mpn_sublsh_nc 1798 { TRY(mpn_sublsh_nc), TYPE_SUBLSH_NC }, 1799 #endif 1800 #if HAVE_NATIVE_mpn_rsblsh1_nc 1801 { TRY(mpn_rsblsh1_nc), TYPE_RSBLSH1_NC }, 1802 #endif 1803 #if HAVE_NATIVE_mpn_rsblsh2_nc 1804 { TRY(mpn_rsblsh2_nc), TYPE_RSBLSH2_NC }, 1805 #endif 1806 #if HAVE_NATIVE_mpn_rsblsh_nc 1807 { TRY(mpn_rsblsh_nc), TYPE_RSBLSH_NC }, 1808 #endif 1809 1810 { TRY_FUNFUN(mpn_and_n), TYPE_AND_N }, 1811 { TRY_FUNFUN(mpn_andn_n), TYPE_ANDN_N }, 1812 { TRY_FUNFUN(mpn_nand_n), TYPE_NAND_N }, 1813 { TRY_FUNFUN(mpn_ior_n), TYPE_IOR_N }, 1814 { TRY_FUNFUN(mpn_iorn_n), TYPE_IORN_N }, 1815 { TRY_FUNFUN(mpn_nior_n), TYPE_NIOR_N }, 1816 { TRY_FUNFUN(mpn_xor_n), TYPE_XOR_N }, 1817 { TRY_FUNFUN(mpn_xnor_n), TYPE_XNOR_N }, 1818 1819 { TRY(mpn_divrem_1), TYPE_DIVREM_1 }, 1820 #if USE_PREINV_DIVREM_1 1821 { TRY(mpn_preinv_divrem_1), TYPE_PREINV_DIVREM_1 }, 1822 #endif 1823 { TRY(mpn_mod_1), TYPE_MOD_1 }, 1824 #if USE_PREINV_MOD_1 1825 { TRY(mpn_preinv_mod_1), TYPE_PREINV_MOD_1 }, 1826 #endif 1827 #if HAVE_NATIVE_mpn_divrem_1c 1828 { TRY(mpn_divrem_1c), TYPE_DIVREM_1C }, 1829 #endif 1830 #if HAVE_NATIVE_mpn_mod_1c 1831 { TRY(mpn_mod_1c), TYPE_MOD_1C }, 1832 #endif 1833 #if GMP_NUMB_BITS % 4 == 0 1834 { TRY(mpn_mod_34lsub1), TYPE_MOD_34LSUB1 }, 1835 #endif 1836 1837 { TRY_FUNFUN(udiv_qrnnd), TYPE_UDIV_QRNND, 2 }, 1838 #if HAVE_NATIVE_mpn_udiv_qrnnd 1839 { TRY(mpn_udiv_qrnnd), TYPE_UDIV_QRNND, 2 }, 1840 #endif 1841 #if HAVE_NATIVE_mpn_udiv_qrnnd_r 1842 { TRY(mpn_udiv_qrnnd_r), TYPE_UDIV_QRNND_R, 2 }, 1843 #endif 1844 1845 { TRY(mpn_divexact_1), TYPE_DIVEXACT_1 }, 1846 { TRY(mpn_bdiv_q_1), TYPE_BDIV_Q_1 }, 1847 { TRY_FUNFUN(mpn_divexact_by3), TYPE_DIVEXACT_BY3 }, 1848 { TRY(mpn_divexact_by3c), TYPE_DIVEXACT_BY3C }, 1849 1850 { TRY_FUNFUN(mpn_modexact_1_odd), TYPE_MODEXACT_1_ODD }, 1851 { TRY(mpn_modexact_1c_odd), TYPE_MODEXACT_1C_ODD }, 1852 1853 1854 { TRY(mpn_sbpi1_div_qr), TYPE_SBPI1_DIV_QR, 3}, 1855 { TRY(mpn_tdiv_qr), TYPE_TDIV_QR }, 1856 1857 { TRY(mpn_mul_1), TYPE_MUL_1 }, 1858 #if HAVE_NATIVE_mpn_mul_1c 1859 { TRY(mpn_mul_1c), TYPE_MUL_1C }, 1860 #endif 1861 #if HAVE_NATIVE_mpn_mul_2 1862 { TRY(mpn_mul_2), TYPE_MUL_2, 2 }, 1863 #endif 1864 #if HAVE_NATIVE_mpn_mul_3 1865 { TRY(mpn_mul_3), TYPE_MUL_3, 3 }, 1866 #endif 1867 #if HAVE_NATIVE_mpn_mul_4 1868 { TRY(mpn_mul_4), TYPE_MUL_4, 4 }, 1869 #endif 1870 #if HAVE_NATIVE_mpn_mul_5 1871 { TRY(mpn_mul_5), TYPE_MUL_5, 5 }, 1872 #endif 1873 #if HAVE_NATIVE_mpn_mul_6 1874 { TRY(mpn_mul_6), TYPE_MUL_6, 6 }, 1875 #endif 1876 1877 { TRY(mpn_rshift), TYPE_RSHIFT }, 1878 { TRY(mpn_lshift), TYPE_LSHIFT }, 1879 { TRY(mpn_lshiftc), TYPE_LSHIFTC }, 1880 1881 1882 { TRY(mpn_mul_basecase), TYPE_MUL_MN }, 1883 { TRY(mpn_mulmid_basecase), TYPE_MULMID_MN }, 1884 { TRY(mpn_mullo_basecase), TYPE_MULLO_N }, 1885 #if SQR_TOOM2_THRESHOLD > 0 1886 { TRY(mpn_sqr_basecase), TYPE_SQR }, 1887 #endif 1888 1889 { TRY(mpn_mul), TYPE_MUL_MN }, 1890 { TRY(mpn_mul_n), TYPE_MUL_N }, 1891 { TRY(mpn_sqr), TYPE_SQR }, 1892 1893 { TRY_FUNFUN(umul_ppmm), TYPE_UMUL_PPMM, 2 }, 1894 #if HAVE_NATIVE_mpn_umul_ppmm 1895 { TRY(mpn_umul_ppmm), TYPE_UMUL_PPMM, 2 }, 1896 #endif 1897 #if HAVE_NATIVE_mpn_umul_ppmm_r 1898 { TRY(mpn_umul_ppmm_r), TYPE_UMUL_PPMM_R, 2 }, 1899 #endif 1900 1901 { TRY_FUNFUN(mpn_toom22_mul), TYPE_MUL_N, MPN_TOOM22_MUL_MINSIZE }, 1902 { TRY_FUNFUN(mpn_toom2_sqr), TYPE_SQR, MPN_TOOM2_SQR_MINSIZE }, 1903 { TRY_FUNFUN(mpn_toom33_mul), TYPE_MUL_N, MPN_TOOM33_MUL_MINSIZE }, 1904 { TRY_FUNFUN(mpn_toom3_sqr), TYPE_SQR, MPN_TOOM3_SQR_MINSIZE }, 1905 { TRY_FUNFUN(mpn_toom44_mul), TYPE_MUL_N, MPN_TOOM44_MUL_MINSIZE }, 1906 { TRY_FUNFUN(mpn_toom4_sqr), TYPE_SQR, MPN_TOOM4_SQR_MINSIZE }, 1907 1908 { TRY(mpn_mulmid_n), TYPE_MULMID_N, 1 }, 1909 { TRY(mpn_mulmid), TYPE_MULMID_MN, 1 }, 1910 { TRY_FUNFUN(mpn_toom42_mulmid), TYPE_MULMID_N, 1911 (2 * MPN_TOOM42_MULMID_MINSIZE - 1) }, 1912 1913 { TRY(mpn_gcd_1), TYPE_GCD_1 }, 1914 { TRY(mpn_gcd), TYPE_GCD }, 1915 { TRY(mpz_legendre), TYPE_MPZ_LEGENDRE }, 1916 { TRY(mpz_jacobi), TYPE_MPZ_JACOBI }, 1917 { TRY(mpz_kronecker), TYPE_MPZ_KRONECKER }, 1918 { TRY(mpz_kronecker_ui), TYPE_MPZ_KRONECKER_UI }, 1919 { TRY(mpz_kronecker_si), TYPE_MPZ_KRONECKER_SI }, 1920 { TRY(mpz_ui_kronecker), TYPE_MPZ_UI_KRONECKER }, 1921 { TRY(mpz_si_kronecker), TYPE_MPZ_SI_KRONECKER }, 1922 1923 { TRY(mpn_popcount), TYPE_POPCOUNT }, 1924 { TRY(mpn_hamdist), TYPE_HAMDIST }, 1925 1926 { TRY(mpn_sqrtrem), TYPE_SQRTREM }, 1927 1928 { TRY_FUNFUN(MPN_ZERO), TYPE_ZERO }, 1929 1930 { TRY(mpn_get_str), TYPE_GET_STR }, 1931 1932 { TRY(mpn_binvert), TYPE_BINVERT }, 1933 { TRY(mpn_invert), TYPE_INVERT }, 1934 1935 #ifdef EXTRA_ROUTINES 1936 EXTRA_ROUTINES 1937 #endif 1938 }; 1939 1940 const struct choice_t *choice = NULL; 1941 1942 1943 void 1944 mprotect_maybe (void *addr, size_t len, int prot) 1945 { 1946 if (!option_redzones) 1947 return; 1948 1949 #if HAVE_MPROTECT 1950 if (mprotect (addr, len, prot) != 0) 1951 { 1952 fprintf (stderr, "Cannot mprotect %p 0x%X 0x%X: %s\n", 1953 addr, (unsigned) len, prot, strerror (errno)); 1954 exit (1); 1955 } 1956 #else 1957 { 1958 static int warned = 0; 1959 if (!warned) 1960 { 1961 fprintf (stderr, 1962 "mprotect not available, bounds testing not performed\n"); 1963 warned = 1; 1964 } 1965 } 1966 #endif 1967 } 1968 1969 /* round "a" up to a multiple of "m" */ 1970 size_t 1971 round_up_multiple (size_t a, size_t m) 1972 { 1973 unsigned long r; 1974 1975 r = a % m; 1976 if (r == 0) 1977 return a; 1978 else 1979 return a + (m - r); 1980 } 1981 1982 1983 /* On some systems it seems that only an mmap'ed region can be mprotect'ed, 1984 for instance HP-UX 10. 1985 1986 mmap will almost certainly return a pointer already aligned to a page 1987 boundary, but it's easy enough to share the alignment handling with the 1988 malloc case. */ 1989 1990 void 1991 malloc_region (struct region_t *r, mp_size_t n) 1992 { 1993 mp_ptr p; 1994 size_t nbytes; 1995 1996 ASSERT ((pagesize % BYTES_PER_MP_LIMB) == 0); 1997 1998 n = round_up_multiple (n, PAGESIZE_LIMBS); 1999 r->size = n; 2000 2001 nbytes = n*BYTES_PER_MP_LIMB + 2*REDZONE_BYTES + pagesize; 2002 2003 #if defined (MAP_ANONYMOUS) && ! defined (MAP_ANON) 2004 #define MAP_ANON MAP_ANONYMOUS 2005 #endif 2006 2007 #if HAVE_MMAP && defined (MAP_ANON) 2008 /* note must pass fd=-1 for MAP_ANON on BSD */ 2009 p = (mp_ptr) mmap (NULL, nbytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); 2010 if (p == (void *) -1) 2011 { 2012 fprintf (stderr, "Cannot mmap %#x anon bytes: %s\n", 2013 (unsigned) nbytes, strerror (errno)); 2014 exit (1); 2015 } 2016 #else 2017 p = (mp_ptr) malloc (nbytes); 2018 ASSERT_ALWAYS (p != NULL); 2019 #endif 2020 2021 p = (mp_ptr) align_pointer (p, pagesize); 2022 2023 mprotect_maybe (p, REDZONE_BYTES, PROT_NONE); 2024 p += REDZONE_LIMBS; 2025 r->ptr = p; 2026 2027 mprotect_maybe (p + n, REDZONE_BYTES, PROT_NONE); 2028 } 2029 2030 void 2031 mprotect_region (const struct region_t *r, int prot) 2032 { 2033 mprotect_maybe (r->ptr, r->size, prot); 2034 } 2035 2036 2037 /* First four entries must be 0,1,2,3 for the benefit of CARRY_BIT, CARRY_3, 2038 and CARRY_4 */ 2039 mp_limb_t carry_array[] = { 2040 0, 1, 2, 3, 2041 4, 2042 CNST_LIMB(1) << 8, 2043 CNST_LIMB(1) << 16, 2044 GMP_NUMB_MAX 2045 }; 2046 int carry_index; 2047 2048 #define CARRY_COUNT \ 2049 ((tr->carry == CARRY_BIT) ? 2 \ 2050 : tr->carry == CARRY_3 ? 3 \ 2051 : tr->carry == CARRY_4 ? 4 \ 2052 : (tr->carry == CARRY_LIMB || tr->carry == CARRY_DIVISOR) \ 2053 ? numberof(carry_array) + CARRY_RANDOMS \ 2054 : 1) 2055 2056 #define MPN_RANDOM_ALT(index,dst,size) \ 2057 (((index) & 1) ? refmpn_random (dst, size) : refmpn_random2 (dst, size)) 2058 2059 /* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have 2060 the same type */ 2061 #define CARRY_ITERATION \ 2062 for (carry_index = 0; \ 2063 (carry_index < numberof (carry_array) \ 2064 ? (carry = carry_array[carry_index]) \ 2065 : (MPN_RANDOM_ALT (carry_index, &carry, 1), (mp_limb_t) 0)), \ 2066 (tr->carry == CARRY_DIVISOR ? carry %= divisor : 0), \ 2067 carry_index < CARRY_COUNT; \ 2068 carry_index++) 2069 2070 2071 mp_limb_t multiplier_array[] = { 2072 0, 1, 2, 3, 2073 CNST_LIMB(1) << 8, 2074 CNST_LIMB(1) << 16, 2075 GMP_NUMB_MAX - 2, 2076 GMP_NUMB_MAX - 1, 2077 GMP_NUMB_MAX 2078 }; 2079 int multiplier_index; 2080 2081 mp_limb_t divisor_array[] = { 2082 1, 2, 3, 2083 CNST_LIMB(1) << 8, 2084 CNST_LIMB(1) << 16, 2085 CNST_LIMB(1) << (GMP_NUMB_BITS/2 - 1), 2086 GMP_NUMB_MAX >> (GMP_NUMB_BITS/2), 2087 GMP_NUMB_HIGHBIT, 2088 GMP_NUMB_HIGHBIT + 1, 2089 GMP_NUMB_MAX - 2, 2090 GMP_NUMB_MAX - 1, 2091 GMP_NUMB_MAX 2092 }; 2093 2094 int divisor_index; 2095 2096 /* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have 2097 the same type */ 2098 #define ARRAY_ITERATION(var, index, limit, array, randoms, cond) \ 2099 for (index = 0; \ 2100 (index < numberof (array) \ 2101 ? (var = array[index]) \ 2102 : (MPN_RANDOM_ALT (index, &var, 1), (mp_limb_t) 0)), \ 2103 index < limit; \ 2104 index++) 2105 2106 #define MULTIPLIER_COUNT \ 2107 (tr->multiplier \ 2108 ? numberof (multiplier_array) + MULTIPLIER_RANDOMS \ 2109 : 1) 2110 2111 #define MULTIPLIER_ITERATION \ 2112 ARRAY_ITERATION(multiplier, multiplier_index, MULTIPLIER_COUNT, \ 2113 multiplier_array, MULTIPLIER_RANDOMS, TRY_MULTIPLIER) 2114 2115 #define DIVISOR_COUNT \ 2116 (tr->divisor \ 2117 ? numberof (divisor_array) + DIVISOR_RANDOMS \ 2118 : 1) 2119 2120 #define DIVISOR_ITERATION \ 2121 ARRAY_ITERATION(divisor, divisor_index, DIVISOR_COUNT, divisor_array, \ 2122 DIVISOR_RANDOMS, TRY_DIVISOR) 2123 2124 2125 /* overlap_array[].s[i] is where s[i] should be, 0 or 1 means overlapping 2126 d[0] or d[1] respectively, -1 means a separate (write-protected) 2127 location. */ 2128 2129 struct overlap_t { 2130 int s[NUM_SOURCES]; 2131 } overlap_array[] = { 2132 { { -1, -1, -1, -1, -1 } }, 2133 { { 0, -1, -1, -1, -1 } }, 2134 { { -1, 0, -1, -1, -1 } }, 2135 { { 0, 0, -1, -1, -1 } }, 2136 { { 1, -1, -1, -1, -1 } }, 2137 { { -1, 1, -1, -1, -1 } }, 2138 { { 1, 1, -1, -1, -1 } }, 2139 { { 0, 1, -1, -1, -1 } }, 2140 { { 1, 0, -1, -1, -1 } }, 2141 }; 2142 2143 struct overlap_t *overlap, *overlap_limit; 2144 2145 #define OVERLAP_COUNT \ 2146 (tr->overlap & OVERLAP_NONE ? 1 \ 2147 : tr->overlap & OVERLAP_NOT_SRCS ? 3 \ 2148 : tr->overlap & OVERLAP_NOT_SRC2 ? 2 \ 2149 : tr->overlap & OVERLAP_NOT_DST2 ? 4 \ 2150 : tr->dst[1] ? 9 \ 2151 : tr->src[1] ? 4 \ 2152 : tr->dst[0] ? 2 \ 2153 : 1) 2154 2155 #define OVERLAP_ITERATION \ 2156 for (overlap = &overlap_array[0], \ 2157 overlap_limit = &overlap_array[OVERLAP_COUNT]; \ 2158 overlap < overlap_limit; \ 2159 overlap++) 2160 2161 2162 int base = 10; 2163 2164 #define T_RAND_COUNT 2 2165 int t_rand; 2166 2167 void 2168 t_random (mp_ptr ptr, mp_size_t n) 2169 { 2170 if (n == 0) 2171 return; 2172 2173 switch (option_data) { 2174 case DATA_TRAND: 2175 switch (t_rand) { 2176 case 0: refmpn_random (ptr, n); break; 2177 case 1: refmpn_random2 (ptr, n); break; 2178 default: abort(); 2179 } 2180 break; 2181 case DATA_SEQ: 2182 { 2183 static mp_limb_t counter = 0; 2184 mp_size_t i; 2185 for (i = 0; i < n; i++) 2186 ptr[i] = ++counter; 2187 } 2188 break; 2189 case DATA_ZEROS: 2190 refmpn_zero (ptr, n); 2191 break; 2192 case DATA_FFS: 2193 refmpn_fill (ptr, n, GMP_NUMB_MAX); 2194 break; 2195 case DATA_2FD: 2196 /* Special value 0x2FFF...FFFD, which divided by 3 gives 0xFFF...FFF, 2197 inducing the q1_ff special case in the mul-by-inverse part of some 2198 versions of divrem_1 and mod_1. */ 2199 refmpn_fill (ptr, n, (mp_limb_t) -1); 2200 ptr[n-1] = 2; 2201 ptr[0] -= 2; 2202 break; 2203 2204 default: 2205 abort(); 2206 } 2207 } 2208 #define T_RAND_ITERATION \ 2209 for (t_rand = 0; t_rand < T_RAND_COUNT; t_rand++) 2210 2211 2212 void 2213 print_each (const struct each_t *e) 2214 { 2215 int i; 2216 2217 printf ("%s %s\n", e->name, e == &ref ? tr->reference_name : choice->name); 2218 if (tr->retval) 2219 mpn_trace (" retval", &e->retval, 1); 2220 2221 for (i = 0; i < NUM_DESTS; i++) 2222 { 2223 if (tr->dst[i]) 2224 { 2225 if (tr->dst_bytes[i]) 2226 byte_tracen (" d[%d]", i, e->d[i].p, d[i].size); 2227 else 2228 mpn_tracen (" d[%d]", i, e->d[i].p, d[i].size); 2229 printf (" located %p\n", (void *) (e->d[i].p)); 2230 } 2231 } 2232 2233 for (i = 0; i < NUM_SOURCES; i++) 2234 if (tr->src[i]) 2235 printf (" s[%d] located %p\n", i, (void *) (e->s[i].p)); 2236 } 2237 2238 2239 void 2240 print_all (void) 2241 { 2242 int i; 2243 2244 printf ("\n"); 2245 printf ("size %ld\n", (long) size); 2246 if (tr->size2) 2247 printf ("size2 %ld\n", (long) size2); 2248 2249 for (i = 0; i < NUM_DESTS; i++) 2250 if (d[i].size != size) 2251 printf ("d[%d].size %ld\n", i, (long) d[i].size); 2252 2253 if (tr->multiplier) 2254 mpn_trace (" multiplier", &multiplier, 1); 2255 if (tr->divisor) 2256 mpn_trace (" divisor", &divisor, 1); 2257 if (tr->shift) 2258 printf (" shift %lu\n", shift); 2259 if (tr->carry) 2260 mpn_trace (" carry", &carry, 1); 2261 if (tr->msize) 2262 mpn_trace (" multiplier_N", multiplier_N, tr->msize); 2263 2264 for (i = 0; i < NUM_DESTS; i++) 2265 if (tr->dst[i]) 2266 printf (" d[%d] %s, align %ld, size %ld\n", 2267 i, d[i].high ? "high" : "low", 2268 (long) d[i].align, (long) d[i].size); 2269 2270 for (i = 0; i < NUM_SOURCES; i++) 2271 { 2272 if (tr->src[i]) 2273 { 2274 printf (" s[%d] %s, align %ld, ", 2275 i, s[i].high ? "high" : "low", (long) s[i].align); 2276 switch (overlap->s[i]) { 2277 case -1: 2278 printf ("no overlap\n"); 2279 break; 2280 default: 2281 printf ("==d[%d]%s\n", 2282 overlap->s[i], 2283 tr->overlap == OVERLAP_LOW_TO_HIGH ? "+a" 2284 : tr->overlap == OVERLAP_HIGH_TO_LOW ? "-a" 2285 : ""); 2286 break; 2287 } 2288 printf (" s[%d]=", i); 2289 if (tr->carry_sign && (carry & (1 << i))) 2290 printf ("-"); 2291 mpn_trace (NULL, s[i].p, SRC_SIZE(i)); 2292 } 2293 } 2294 2295 if (tr->dst0_from_src1) 2296 mpn_trace (" d[0]", s[1].region.ptr, size); 2297 2298 if (tr->reference) 2299 print_each (&ref); 2300 print_each (&fun); 2301 } 2302 2303 void 2304 compare (void) 2305 { 2306 int error = 0; 2307 int i; 2308 2309 if (tr->retval && ref.retval != fun.retval) 2310 { 2311 gmp_printf ("Different return values (%Mu, %Mu)\n", 2312 ref.retval, fun.retval); 2313 error = 1; 2314 } 2315 2316 for (i = 0; i < NUM_DESTS; i++) 2317 { 2318 switch (tr->dst_size[i]) { 2319 case SIZE_RETVAL: 2320 case SIZE_GET_STR: 2321 d[i].size = ref.retval; 2322 break; 2323 } 2324 } 2325 2326 for (i = 0; i < NUM_DESTS; i++) 2327 { 2328 if (! tr->dst[i]) 2329 continue; 2330 2331 if (tr->dst_bytes[i]) 2332 { 2333 if (memcmp (ref.d[i].p, fun.d[i].p, d[i].size) != 0) 2334 { 2335 printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n", 2336 i, 2337 (long) byte_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size), 2338 (long) byte_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size)); 2339 error = 1; 2340 } 2341 } 2342 else 2343 { 2344 if (d[i].size != 0 2345 && ! refmpn_equal_anynail (ref.d[i].p, fun.d[i].p, d[i].size)) 2346 { 2347 printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n", 2348 i, 2349 (long) mpn_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size), 2350 (long) mpn_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size)); 2351 error = 1; 2352 } 2353 } 2354 } 2355 2356 if (error) 2357 { 2358 print_all(); 2359 abort(); 2360 } 2361 } 2362 2363 2364 /* The functions are cast if the return value should be a long rather than 2365 the default mp_limb_t. This is necessary under _LONG_LONG_LIMB. This 2366 might not be enough if some actual calling conventions checking is 2367 implemented on a long long limb system. */ 2368 2369 void 2370 call (struct each_t *e, tryfun_t function) 2371 { 2372 switch (choice->type) { 2373 case TYPE_ADD: 2374 case TYPE_SUB: 2375 e->retval = CALLING_CONVENTIONS (function) 2376 (e->d[0].p, e->s[0].p, size, e->s[1].p, size2); 2377 break; 2378 2379 case TYPE_ADD_N: 2380 case TYPE_SUB_N: 2381 case TYPE_ADDLSH1_N: 2382 case TYPE_ADDLSH2_N: 2383 case TYPE_SUBLSH1_N: 2384 case TYPE_SUBLSH2_N: 2385 case TYPE_RSBLSH1_N: 2386 case TYPE_RSBLSH2_N: 2387 case TYPE_RSH1ADD_N: 2388 case TYPE_RSH1SUB_N: 2389 e->retval = CALLING_CONVENTIONS (function) 2390 (e->d[0].p, e->s[0].p, e->s[1].p, size); 2391 break; 2392 case TYPE_ADDLSH_N: 2393 case TYPE_SUBLSH_N: 2394 case TYPE_RSBLSH_N: 2395 e->retval = CALLING_CONVENTIONS (function) 2396 (e->d[0].p, e->s[0].p, e->s[1].p, size, shift); 2397 break; 2398 case TYPE_ADDLSH_NC: 2399 case TYPE_SUBLSH_NC: 2400 case TYPE_RSBLSH_NC: 2401 e->retval = CALLING_CONVENTIONS (function) 2402 (e->d[0].p, e->s[0].p, e->s[1].p, size, shift, carry); 2403 break; 2404 case TYPE_ADDLSH1_NC: 2405 case TYPE_ADDLSH2_NC: 2406 case TYPE_SUBLSH1_NC: 2407 case TYPE_SUBLSH2_NC: 2408 case TYPE_RSBLSH1_NC: 2409 case TYPE_RSBLSH2_NC: 2410 case TYPE_ADD_NC: 2411 case TYPE_SUB_NC: 2412 case TYPE_ADDCND_N: 2413 case TYPE_SUBCND_N: 2414 e->retval = CALLING_CONVENTIONS (function) 2415 (e->d[0].p, e->s[0].p, e->s[1].p, size, carry); 2416 break; 2417 case TYPE_ADD_ERR1_N: 2418 case TYPE_SUB_ERR1_N: 2419 e->retval = CALLING_CONVENTIONS (function) 2420 (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, size, carry); 2421 break; 2422 case TYPE_ADD_ERR2_N: 2423 case TYPE_SUB_ERR2_N: 2424 e->retval = CALLING_CONVENTIONS (function) 2425 (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, e->s[3].p, size, carry); 2426 break; 2427 case TYPE_ADD_ERR3_N: 2428 case TYPE_SUB_ERR3_N: 2429 e->retval = CALLING_CONVENTIONS (function) 2430 (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, e->s[3].p, e->s[4].p, size, carry); 2431 break; 2432 2433 case TYPE_MUL_1: 2434 case TYPE_ADDMUL_1: 2435 case TYPE_SUBMUL_1: 2436 e->retval = CALLING_CONVENTIONS (function) 2437 (e->d[0].p, e->s[0].p, size, multiplier); 2438 break; 2439 case TYPE_MUL_1C: 2440 case TYPE_ADDMUL_1C: 2441 case TYPE_SUBMUL_1C: 2442 e->retval = CALLING_CONVENTIONS (function) 2443 (e->d[0].p, e->s[0].p, size, multiplier, carry); 2444 break; 2445 2446 case TYPE_MUL_2: 2447 case TYPE_MUL_3: 2448 case TYPE_MUL_4: 2449 case TYPE_MUL_5: 2450 case TYPE_MUL_6: 2451 if (size == 1) 2452 abort (); 2453 e->retval = CALLING_CONVENTIONS (function) 2454 (e->d[0].p, e->s[0].p, size, multiplier_N); 2455 break; 2456 2457 case TYPE_ADDMUL_2: 2458 case TYPE_ADDMUL_3: 2459 case TYPE_ADDMUL_4: 2460 case TYPE_ADDMUL_5: 2461 case TYPE_ADDMUL_6: 2462 case TYPE_ADDMUL_7: 2463 case TYPE_ADDMUL_8: 2464 if (size == 1) 2465 abort (); 2466 e->retval = CALLING_CONVENTIONS (function) 2467 (e->d[0].p, e->s[0].p, size, multiplier_N); 2468 break; 2469 2470 case TYPE_AND_N: 2471 case TYPE_ANDN_N: 2472 case TYPE_NAND_N: 2473 case TYPE_IOR_N: 2474 case TYPE_IORN_N: 2475 case TYPE_NIOR_N: 2476 case TYPE_XOR_N: 2477 case TYPE_XNOR_N: 2478 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size); 2479 break; 2480 2481 case TYPE_ADDSUB_N: 2482 e->retval = CALLING_CONVENTIONS (function) 2483 (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size); 2484 break; 2485 case TYPE_ADDSUB_NC: 2486 e->retval = CALLING_CONVENTIONS (function) 2487 (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size, carry); 2488 break; 2489 2490 case TYPE_COPY: 2491 case TYPE_COPYI: 2492 case TYPE_COPYD: 2493 case TYPE_COM: 2494 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size); 2495 break; 2496 2497 case TYPE_ADDLSH1_N_IP1: 2498 case TYPE_ADDLSH2_N_IP1: 2499 case TYPE_ADDLSH1_N_IP2: 2500 case TYPE_ADDLSH2_N_IP2: 2501 case TYPE_SUBLSH1_N_IP1: 2502 case TYPE_SUBLSH2_N_IP1: 2503 case TYPE_DIVEXACT_BY3: 2504 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size); 2505 break; 2506 case TYPE_DIVEXACT_BY3C: 2507 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, 2508 carry); 2509 break; 2510 2511 2512 case TYPE_DIVMOD_1: 2513 case TYPE_DIVEXACT_1: 2514 case TYPE_BDIV_Q_1: 2515 e->retval = CALLING_CONVENTIONS (function) 2516 (e->d[0].p, e->s[0].p, size, divisor); 2517 break; 2518 case TYPE_DIVMOD_1C: 2519 e->retval = CALLING_CONVENTIONS (function) 2520 (e->d[0].p, e->s[0].p, size, divisor, carry); 2521 break; 2522 case TYPE_DIVREM_1: 2523 e->retval = CALLING_CONVENTIONS (function) 2524 (e->d[0].p, size2, e->s[0].p, size, divisor); 2525 break; 2526 case TYPE_DIVREM_1C: 2527 e->retval = CALLING_CONVENTIONS (function) 2528 (e->d[0].p, size2, e->s[0].p, size, divisor, carry); 2529 break; 2530 case TYPE_PREINV_DIVREM_1: 2531 { 2532 mp_limb_t dinv; 2533 unsigned shift; 2534 shift = refmpn_count_leading_zeros (divisor); 2535 dinv = refmpn_invert_limb (divisor << shift); 2536 e->retval = CALLING_CONVENTIONS (function) 2537 (e->d[0].p, size2, e->s[0].p, size, divisor, dinv, shift); 2538 } 2539 break; 2540 case TYPE_MOD_1: 2541 case TYPE_MODEXACT_1_ODD: 2542 e->retval = CALLING_CONVENTIONS (function) 2543 (e->s[0].p, size, divisor); 2544 break; 2545 case TYPE_MOD_1C: 2546 case TYPE_MODEXACT_1C_ODD: 2547 e->retval = CALLING_CONVENTIONS (function) 2548 (e->s[0].p, size, divisor, carry); 2549 break; 2550 case TYPE_PREINV_MOD_1: 2551 e->retval = CALLING_CONVENTIONS (function) 2552 (e->s[0].p, size, divisor, refmpn_invert_limb (divisor)); 2553 break; 2554 case TYPE_MOD_34LSUB1: 2555 e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size); 2556 break; 2557 2558 case TYPE_UDIV_QRNND: 2559 e->retval = CALLING_CONVENTIONS (function) 2560 (e->d[0].p, e->s[0].p[1], e->s[0].p[0], divisor); 2561 break; 2562 case TYPE_UDIV_QRNND_R: 2563 e->retval = CALLING_CONVENTIONS (function) 2564 (e->s[0].p[1], e->s[0].p[0], divisor, e->d[0].p); 2565 break; 2566 2567 case TYPE_SBPI1_DIV_QR: 2568 { 2569 gmp_pi1_t dinv; 2570 invert_pi1 (dinv, e->s[1].p[size2-1], e->s[1].p[size2-2]); /* FIXME: use refinvert_pi1 */ 2571 refmpn_copyi (e->d[1].p, e->s[0].p, size); /* dividend */ 2572 refmpn_fill (e->d[0].p, size-size2, 0x98765432); /* quotient */ 2573 e->retval = CALLING_CONVENTIONS (function) 2574 (e->d[0].p, e->d[1].p, size, e->s[1].p, size2, dinv.inv32); 2575 refmpn_zero (e->d[1].p+size2, size-size2); /* excess over remainder */ 2576 } 2577 break; 2578 2579 case TYPE_TDIV_QR: 2580 CALLING_CONVENTIONS (function) (e->d[0].p, e->d[1].p, 0, 2581 e->s[0].p, size, e->s[1].p, size2); 2582 break; 2583 2584 case TYPE_GCD_1: 2585 /* Must have a non-zero src, but this probably isn't the best way to do 2586 it. */ 2587 if (refmpn_zero_p (e->s[0].p, size)) 2588 e->retval = 0; 2589 else 2590 e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size, divisor); 2591 break; 2592 2593 case TYPE_GCD: 2594 /* Sources are destroyed, so they're saved and replaced, but a general 2595 approach to this might be better. Note that it's still e->s[0].p and 2596 e->s[1].p that are passed, to get the desired alignments. */ 2597 { 2598 mp_ptr s0 = refmpn_malloc_limbs (size); 2599 mp_ptr s1 = refmpn_malloc_limbs (size2); 2600 refmpn_copyi (s0, e->s[0].p, size); 2601 refmpn_copyi (s1, e->s[1].p, size2); 2602 2603 mprotect_region (&s[0].region, PROT_READ|PROT_WRITE); 2604 mprotect_region (&s[1].region, PROT_READ|PROT_WRITE); 2605 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, 2606 e->s[0].p, size, 2607 e->s[1].p, size2); 2608 refmpn_copyi (e->s[0].p, s0, size); 2609 refmpn_copyi (e->s[1].p, s1, size2); 2610 free (s0); 2611 free (s1); 2612 } 2613 break; 2614 2615 case TYPE_GCD_FINDA: 2616 { 2617 /* FIXME: do this with a flag */ 2618 mp_limb_t c[2]; 2619 c[0] = e->s[0].p[0]; 2620 c[0] += (c[0] == 0); 2621 c[1] = e->s[0].p[0]; 2622 c[1] += (c[1] == 0); 2623 e->retval = CALLING_CONVENTIONS (function) (c); 2624 } 2625 break; 2626 2627 case TYPE_MPZ_LEGENDRE: 2628 case TYPE_MPZ_JACOBI: 2629 { 2630 mpz_t a, b; 2631 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size); 2632 PTR(b) = e->s[1].p; SIZ(b) = size2; 2633 e->retval = CALLING_CONVENTIONS (function) (a, b); 2634 } 2635 break; 2636 case TYPE_MPZ_KRONECKER: 2637 { 2638 mpz_t a, b; 2639 PTR(a) = e->s[0].p; SIZ(a) = ((carry&1)==0 ? size : -size); 2640 PTR(b) = e->s[1].p; SIZ(b) = ((carry&2)==0 ? size2 : -size2); 2641 e->retval = CALLING_CONVENTIONS (function) (a, b); 2642 } 2643 break; 2644 case TYPE_MPZ_KRONECKER_UI: 2645 { 2646 mpz_t a; 2647 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size); 2648 e->retval = CALLING_CONVENTIONS(function) (a, (unsigned long)multiplier); 2649 } 2650 break; 2651 case TYPE_MPZ_KRONECKER_SI: 2652 { 2653 mpz_t a; 2654 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size); 2655 e->retval = CALLING_CONVENTIONS (function) (a, (long) multiplier); 2656 } 2657 break; 2658 case TYPE_MPZ_UI_KRONECKER: 2659 { 2660 mpz_t b; 2661 PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size); 2662 e->retval = CALLING_CONVENTIONS(function) ((unsigned long)multiplier, b); 2663 } 2664 break; 2665 case TYPE_MPZ_SI_KRONECKER: 2666 { 2667 mpz_t b; 2668 PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size); 2669 e->retval = CALLING_CONVENTIONS (function) ((long) multiplier, b); 2670 } 2671 break; 2672 2673 case TYPE_MUL_MN: 2674 case TYPE_MULMID_MN: 2675 CALLING_CONVENTIONS (function) 2676 (e->d[0].p, e->s[0].p, size, e->s[1].p, size2); 2677 break; 2678 case TYPE_MUL_N: 2679 case TYPE_MULLO_N: 2680 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size); 2681 break; 2682 case TYPE_MULMID_N: 2683 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, 2684 (size + 1) / 2); 2685 break; 2686 case TYPE_SQR: 2687 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size); 2688 break; 2689 2690 case TYPE_UMUL_PPMM: 2691 e->retval = CALLING_CONVENTIONS (function) 2692 (e->d[0].p, e->s[0].p[0], e->s[0].p[1]); 2693 break; 2694 case TYPE_UMUL_PPMM_R: 2695 e->retval = CALLING_CONVENTIONS (function) 2696 (e->s[0].p[0], e->s[0].p[1], e->d[0].p); 2697 break; 2698 2699 case TYPE_ADDLSH_N_IP1: 2700 case TYPE_ADDLSH_N_IP2: 2701 case TYPE_SUBLSH_N_IP1: 2702 case TYPE_LSHIFT: 2703 case TYPE_LSHIFTC: 2704 case TYPE_RSHIFT: 2705 e->retval = CALLING_CONVENTIONS (function) 2706 (e->d[0].p, e->s[0].p, size, shift); 2707 break; 2708 2709 case TYPE_POPCOUNT: 2710 e->retval = (* (unsigned long (*)(ANYARGS)) 2711 CALLING_CONVENTIONS (function)) (e->s[0].p, size); 2712 break; 2713 case TYPE_HAMDIST: 2714 e->retval = (* (unsigned long (*)(ANYARGS)) 2715 CALLING_CONVENTIONS (function)) (e->s[0].p, e->s[1].p, size); 2716 break; 2717 2718 case TYPE_SQRTREM: 2719 e->retval = (* (long (*)(ANYARGS)) CALLING_CONVENTIONS (function)) 2720 (e->d[0].p, e->d[1].p, e->s[0].p, size); 2721 break; 2722 2723 case TYPE_ZERO: 2724 CALLING_CONVENTIONS (function) (e->d[0].p, size); 2725 break; 2726 2727 case TYPE_GET_STR: 2728 { 2729 size_t sizeinbase, fill; 2730 char *dst; 2731 MPN_SIZEINBASE (sizeinbase, e->s[0].p, size, base); 2732 ASSERT_ALWAYS (sizeinbase <= d[0].size); 2733 fill = d[0].size - sizeinbase; 2734 if (d[0].high) 2735 { 2736 memset (e->d[0].p, 0xBA, fill); 2737 dst = (char *) e->d[0].p + fill; 2738 } 2739 else 2740 { 2741 dst = (char *) e->d[0].p; 2742 memset (dst + sizeinbase, 0xBA, fill); 2743 } 2744 if (POW2_P (base)) 2745 { 2746 e->retval = CALLING_CONVENTIONS (function) (dst, base, 2747 e->s[0].p, size); 2748 } 2749 else 2750 { 2751 refmpn_copy (e->d[1].p, e->s[0].p, size); 2752 e->retval = CALLING_CONVENTIONS (function) (dst, base, 2753 e->d[1].p, size); 2754 } 2755 refmpn_zero (e->d[1].p, size); /* clobbered or unused */ 2756 } 2757 break; 2758 2759 case TYPE_INVERT: 2760 { 2761 mp_ptr scratch; 2762 TMP_DECL; 2763 TMP_MARK; 2764 scratch = TMP_ALLOC_LIMBS (mpn_invert_itch (size)); 2765 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch); 2766 TMP_FREE; 2767 } 2768 break; 2769 case TYPE_BINVERT: 2770 { 2771 mp_ptr scratch; 2772 TMP_DECL; 2773 TMP_MARK; 2774 scratch = TMP_ALLOC_LIMBS (mpn_binvert_itch (size)); 2775 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch); 2776 TMP_FREE; 2777 } 2778 break; 2779 2780 #ifdef EXTRA_CALL 2781 EXTRA_CALL 2782 #endif 2783 2784 default: 2785 printf ("Unknown routine type %d\n", choice->type); 2786 abort (); 2787 break; 2788 } 2789 } 2790 2791 2792 void 2793 pointer_setup (struct each_t *e) 2794 { 2795 int i, j; 2796 2797 for (i = 0; i < NUM_DESTS; i++) 2798 { 2799 switch (tr->dst_size[i]) { 2800 case 0: 2801 case SIZE_RETVAL: /* will be adjusted later */ 2802 d[i].size = size; 2803 break; 2804 2805 case SIZE_1: 2806 d[i].size = 1; 2807 break; 2808 case SIZE_2: 2809 d[i].size = 2; 2810 break; 2811 case SIZE_3: 2812 d[i].size = 3; 2813 break; 2814 case SIZE_4: 2815 d[i].size = 4; 2816 break; 2817 case SIZE_6: 2818 d[i].size = 6; 2819 break; 2820 2821 case SIZE_PLUS_1: 2822 d[i].size = size+1; 2823 break; 2824 case SIZE_PLUS_MSIZE_SUB_1: 2825 d[i].size = size + tr->msize - 1; 2826 break; 2827 2828 case SIZE_SUM: 2829 if (tr->size2) 2830 d[i].size = size + size2; 2831 else 2832 d[i].size = 2*size; 2833 break; 2834 2835 case SIZE_SIZE2: 2836 d[i].size = size2; 2837 break; 2838 2839 case SIZE_DIFF: 2840 d[i].size = size - size2; 2841 break; 2842 2843 case SIZE_DIFF_PLUS_1: 2844 d[i].size = size - size2 + 1; 2845 break; 2846 2847 case SIZE_DIFF_PLUS_3: 2848 d[i].size = size - size2 + 3; 2849 break; 2850 2851 case SIZE_CEIL_HALF: 2852 d[i].size = (size+1)/2; 2853 break; 2854 2855 case SIZE_GET_STR: 2856 { 2857 mp_limb_t ff = GMP_NUMB_MAX; 2858 MPN_SIZEINBASE (d[i].size, &ff - (size-1), size, base); 2859 } 2860 break; 2861 2862 default: 2863 printf ("Unrecognised dst_size type %d\n", tr->dst_size[i]); 2864 abort (); 2865 } 2866 } 2867 2868 /* establish e->d[].p destinations */ 2869 for (i = 0; i < NUM_DESTS; i++) 2870 { 2871 mp_size_t offset = 0; 2872 2873 /* possible room for overlapping sources */ 2874 for (j = 0; j < numberof (overlap->s); j++) 2875 if (overlap->s[j] == i) 2876 offset = MAX (offset, s[j].align); 2877 2878 if (d[i].high) 2879 { 2880 if (tr->dst_bytes[i]) 2881 { 2882 e->d[i].p = (mp_ptr) 2883 ((char *) (e->d[i].region.ptr + e->d[i].region.size) 2884 - d[i].size - d[i].align); 2885 } 2886 else 2887 { 2888 e->d[i].p = e->d[i].region.ptr + e->d[i].region.size 2889 - d[i].size - d[i].align; 2890 if (tr->overlap == OVERLAP_LOW_TO_HIGH) 2891 e->d[i].p -= offset; 2892 } 2893 } 2894 else 2895 { 2896 if (tr->dst_bytes[i]) 2897 { 2898 e->d[i].p = (mp_ptr) ((char *) e->d[i].region.ptr + d[i].align); 2899 } 2900 else 2901 { 2902 e->d[i].p = e->d[i].region.ptr + d[i].align; 2903 if (tr->overlap == OVERLAP_HIGH_TO_LOW) 2904 e->d[i].p += offset; 2905 } 2906 } 2907 } 2908 2909 /* establish e->s[].p sources */ 2910 for (i = 0; i < NUM_SOURCES; i++) 2911 { 2912 int o = overlap->s[i]; 2913 switch (o) { 2914 case -1: 2915 /* no overlap */ 2916 e->s[i].p = s[i].p; 2917 break; 2918 case 0: 2919 case 1: 2920 /* overlap with d[o] */ 2921 if (tr->overlap == OVERLAP_HIGH_TO_LOW) 2922 e->s[i].p = e->d[o].p - s[i].align; 2923 else if (tr->overlap == OVERLAP_LOW_TO_HIGH) 2924 e->s[i].p = e->d[o].p + s[i].align; 2925 else if (tr->size2 == SIZE_FRACTION) 2926 e->s[i].p = e->d[o].p + size2; 2927 else 2928 e->s[i].p = e->d[o].p; 2929 break; 2930 default: 2931 abort(); 2932 break; 2933 } 2934 } 2935 } 2936 2937 2938 void 2939 validate_fail (void) 2940 { 2941 if (tr->reference) 2942 { 2943 trap_location = TRAP_REF; 2944 call (&ref, tr->reference); 2945 trap_location = TRAP_NOWHERE; 2946 } 2947 2948 print_all(); 2949 abort(); 2950 } 2951 2952 2953 void 2954 try_one (void) 2955 { 2956 int i; 2957 2958 if (option_spinner) 2959 spinner(); 2960 spinner_count++; 2961 2962 trap_location = TRAP_SETUPS; 2963 2964 if (tr->divisor == DIVISOR_NORM) 2965 divisor |= GMP_NUMB_HIGHBIT; 2966 if (tr->divisor == DIVISOR_ODD) 2967 divisor |= 1; 2968 2969 for (i = 0; i < NUM_SOURCES; i++) 2970 { 2971 if (s[i].high) 2972 s[i].p = s[i].region.ptr + s[i].region.size - SRC_SIZE(i) - s[i].align; 2973 else 2974 s[i].p = s[i].region.ptr + s[i].align; 2975 } 2976 2977 pointer_setup (&ref); 2978 pointer_setup (&fun); 2979 2980 ref.retval = 0x04152637; 2981 fun.retval = 0x8C9DAEBF; 2982 2983 t_random (multiplier_N, tr->msize); 2984 2985 for (i = 0; i < NUM_SOURCES; i++) 2986 { 2987 if (! tr->src[i]) 2988 continue; 2989 2990 mprotect_region (&s[i].region, PROT_READ|PROT_WRITE); 2991 t_random (s[i].p, SRC_SIZE(i)); 2992 2993 switch (tr->data) { 2994 case DATA_NON_ZERO: 2995 if (refmpn_zero_p (s[i].p, SRC_SIZE(i))) 2996 s[i].p[0] = 1; 2997 break; 2998 2999 case DATA_MULTIPLE_DIVISOR: 3000 /* same number of low zero bits as divisor */ 3001 s[i].p[0] &= ~ LOW_ZEROS_MASK (divisor); 3002 refmpn_sub_1 (s[i].p, s[i].p, size, 3003 refmpn_mod_1 (s[i].p, size, divisor)); 3004 break; 3005 3006 case DATA_GCD: 3007 /* s[1] no more bits than s[0] */ 3008 if (i == 1 && size2 == size) 3009 s[1].p[size-1] &= refmpn_msbone_mask (s[0].p[size-1]); 3010 3011 /* high limb non-zero */ 3012 s[i].p[SRC_SIZE(i)-1] += (s[i].p[SRC_SIZE(i)-1] == 0); 3013 3014 /* odd */ 3015 s[i].p[0] |= 1; 3016 break; 3017 3018 case DATA_SRC0_ODD: 3019 if (i == 0) 3020 s[i].p[0] |= 1; 3021 break; 3022 3023 case DATA_SRC1_ODD: 3024 if (i == 1) 3025 s[i].p[0] |= 1; 3026 break; 3027 3028 case DATA_SRC1_ODD_PRIME: 3029 if (i == 1) 3030 { 3031 if (refmpn_zero_p (s[i].p+1, SRC_SIZE(i)-1) 3032 && s[i].p[0] <=3) 3033 s[i].p[0] = 3; 3034 else 3035 { 3036 mpz_t p; 3037 mpz_init (p); 3038 for (;;) 3039 { 3040 _mpz_realloc (p, SRC_SIZE(i)); 3041 MPN_COPY (PTR(p), s[i].p, SRC_SIZE(i)); 3042 SIZ(p) = SRC_SIZE(i); 3043 MPN_NORMALIZE (PTR(p), SIZ(p)); 3044 mpz_nextprime (p, p); 3045 if (mpz_size (p) <= SRC_SIZE(i)) 3046 break; 3047 3048 t_random (s[i].p, SRC_SIZE(i)); 3049 } 3050 MPN_COPY (s[i].p, PTR(p), SIZ(p)); 3051 if (SIZ(p) < SRC_SIZE(i)) 3052 MPN_ZERO (s[i].p + SIZ(p), SRC_SIZE(i) - SIZ(p)); 3053 mpz_clear (p); 3054 } 3055 } 3056 break; 3057 3058 case DATA_SRC1_HIGHBIT: 3059 if (i == 1) 3060 { 3061 if (tr->size2) 3062 s[i].p[size2-1] |= GMP_NUMB_HIGHBIT; 3063 else 3064 s[i].p[size-1] |= GMP_NUMB_HIGHBIT; 3065 } 3066 break; 3067 3068 case DATA_SRC0_HIGHBIT: 3069 if (i == 0) 3070 { 3071 s[i].p[size-1] |= GMP_NUMB_HIGHBIT; 3072 } 3073 break; 3074 3075 case DATA_UDIV_QRNND: 3076 s[i].p[1] %= divisor; 3077 break; 3078 } 3079 3080 mprotect_region (&s[i].region, PROT_READ); 3081 } 3082 3083 for (i = 0; i < NUM_DESTS; i++) 3084 { 3085 if (! tr->dst[i]) 3086 continue; 3087 3088 if (tr->dst0_from_src1 && i==0) 3089 { 3090 mp_size_t copy = MIN (d[0].size, SRC_SIZE(1)); 3091 mp_size_t fill = MAX (0, d[0].size - copy); 3092 MPN_COPY (fun.d[0].p, s[1].region.ptr, copy); 3093 MPN_COPY (ref.d[0].p, s[1].region.ptr, copy); 3094 refmpn_fill (fun.d[0].p + copy, fill, DEADVAL); 3095 refmpn_fill (ref.d[0].p + copy, fill, DEADVAL); 3096 } 3097 else if (tr->dst_bytes[i]) 3098 { 3099 memset (ref.d[i].p, 0xBA, d[i].size); 3100 memset (fun.d[i].p, 0xBA, d[i].size); 3101 } 3102 else 3103 { 3104 refmpn_fill (ref.d[i].p, d[i].size, DEADVAL); 3105 refmpn_fill (fun.d[i].p, d[i].size, DEADVAL); 3106 } 3107 } 3108 3109 for (i = 0; i < NUM_SOURCES; i++) 3110 { 3111 if (! tr->src[i]) 3112 continue; 3113 3114 if (ref.s[i].p != s[i].p) 3115 { 3116 refmpn_copyi (ref.s[i].p, s[i].p, SRC_SIZE(i)); 3117 refmpn_copyi (fun.s[i].p, s[i].p, SRC_SIZE(i)); 3118 } 3119 } 3120 3121 if (option_print) 3122 print_all(); 3123 3124 if (tr->validate != NULL) 3125 { 3126 trap_location = TRAP_FUN; 3127 call (&fun, choice->function); 3128 trap_location = TRAP_NOWHERE; 3129 3130 if (! CALLING_CONVENTIONS_CHECK ()) 3131 { 3132 print_all(); 3133 abort(); 3134 } 3135 3136 (*tr->validate) (); 3137 } 3138 else 3139 { 3140 trap_location = TRAP_REF; 3141 call (&ref, tr->reference); 3142 trap_location = TRAP_FUN; 3143 call (&fun, choice->function); 3144 trap_location = TRAP_NOWHERE; 3145 3146 if (! CALLING_CONVENTIONS_CHECK ()) 3147 { 3148 print_all(); 3149 abort(); 3150 } 3151 3152 compare (); 3153 } 3154 } 3155 3156 3157 #define SIZE_ITERATION \ 3158 for (size = MAX3 (option_firstsize, \ 3159 choice->minsize, \ 3160 (tr->size == SIZE_ALLOW_ZERO) ? 0 : 1), \ 3161 size += (tr->size == SIZE_ODD) && !(size & 1); \ 3162 size <= option_lastsize; \ 3163 size += (tr->size == SIZE_ODD) ? 2 : 1) 3164 3165 #define SIZE2_FIRST \ 3166 (tr->size2 == SIZE_2 ? 2 \ 3167 : tr->size2 == SIZE_FRACTION ? option_firstsize2 \ 3168 : tr->size2 == SIZE_CEIL_HALF ? ((size + 1) / 2) \ 3169 : tr->size2 ? \ 3170 MAX (choice->minsize, (option_firstsize2 != 0 \ 3171 ? option_firstsize2 : 1)) \ 3172 : 0) 3173 3174 #define SIZE2_LAST \ 3175 (tr->size2 == SIZE_2 ? 2 \ 3176 : tr->size2 == SIZE_FRACTION ? FRACTION_COUNT-1 \ 3177 : tr->size2 == SIZE_CEIL_HALF ? ((size + 1) / 2) \ 3178 : tr->size2 ? size \ 3179 : 0) 3180 3181 #define SIZE2_ITERATION \ 3182 for (size2 = SIZE2_FIRST; size2 <= SIZE2_LAST; size2++) 3183 3184 #define ALIGN_COUNT(cond) ((cond) ? ALIGNMENTS : 1) 3185 #define ALIGN_ITERATION(w,n,cond) \ 3186 for (w[n].align = 0; w[n].align < ALIGN_COUNT(cond); w[n].align++) 3187 3188 #define HIGH_LIMIT(cond) ((cond) != 0) 3189 #define HIGH_COUNT(cond) (HIGH_LIMIT (cond) + 1) 3190 #define HIGH_ITERATION(w,n,cond) \ 3191 for (w[n].high = 0; w[n].high <= HIGH_LIMIT(cond); w[n].high++) 3192 3193 #define SHIFT_LIMIT \ 3194 ((unsigned long) (tr->shift ? GMP_NUMB_BITS -1 : 1)) 3195 3196 #define SHIFT_ITERATION \ 3197 for (shift = 1; shift <= SHIFT_LIMIT; shift++) 3198 3199 3200 void 3201 try_many (void) 3202 { 3203 int i; 3204 3205 { 3206 unsigned long total = 1; 3207 3208 total *= option_repetitions; 3209 total *= option_lastsize; 3210 if (tr->size2 == SIZE_FRACTION) total *= FRACTION_COUNT; 3211 else if (tr->size2) total *= (option_lastsize+1)/2; 3212 3213 total *= SHIFT_LIMIT; 3214 total *= MULTIPLIER_COUNT; 3215 total *= DIVISOR_COUNT; 3216 total *= CARRY_COUNT; 3217 total *= T_RAND_COUNT; 3218 3219 total *= HIGH_COUNT (tr->dst[0]); 3220 total *= HIGH_COUNT (tr->dst[1]); 3221 total *= HIGH_COUNT (tr->src[0]); 3222 total *= HIGH_COUNT (tr->src[1]); 3223 3224 total *= ALIGN_COUNT (tr->dst[0]); 3225 total *= ALIGN_COUNT (tr->dst[1]); 3226 total *= ALIGN_COUNT (tr->src[0]); 3227 total *= ALIGN_COUNT (tr->src[1]); 3228 3229 total *= OVERLAP_COUNT; 3230 3231 printf ("%s %lu\n", choice->name, total); 3232 } 3233 3234 spinner_count = 0; 3235 3236 for (i = 0; i < option_repetitions; i++) 3237 SIZE_ITERATION 3238 SIZE2_ITERATION 3239 3240 SHIFT_ITERATION 3241 MULTIPLIER_ITERATION 3242 DIVISOR_ITERATION 3243 CARRY_ITERATION /* must be after divisor */ 3244 T_RAND_ITERATION 3245 3246 HIGH_ITERATION(d,0, tr->dst[0]) 3247 HIGH_ITERATION(d,1, tr->dst[1]) 3248 HIGH_ITERATION(s,0, tr->src[0]) 3249 HIGH_ITERATION(s,1, tr->src[1]) 3250 3251 ALIGN_ITERATION(d,0, tr->dst[0]) 3252 ALIGN_ITERATION(d,1, tr->dst[1]) 3253 ALIGN_ITERATION(s,0, tr->src[0]) 3254 ALIGN_ITERATION(s,1, tr->src[1]) 3255 3256 OVERLAP_ITERATION 3257 try_one(); 3258 3259 printf("\n"); 3260 } 3261 3262 3263 /* Usually print_all() doesn't show much, but it might give a hint as to 3264 where the function was up to when it died. */ 3265 void 3266 trap (int sig) 3267 { 3268 const char *name = "noname"; 3269 3270 switch (sig) { 3271 case SIGILL: name = "SIGILL"; break; 3272 #ifdef SIGBUS 3273 case SIGBUS: name = "SIGBUS"; break; 3274 #endif 3275 case SIGSEGV: name = "SIGSEGV"; break; 3276 case SIGFPE: name = "SIGFPE"; break; 3277 } 3278 3279 printf ("\n\nSIGNAL TRAP: %s\n", name); 3280 3281 switch (trap_location) { 3282 case TRAP_REF: 3283 printf (" in reference function: %s\n", tr->reference_name); 3284 break; 3285 case TRAP_FUN: 3286 printf (" in test function: %s\n", choice->name); 3287 print_all (); 3288 break; 3289 case TRAP_SETUPS: 3290 printf (" in parameter setups\n"); 3291 print_all (); 3292 break; 3293 default: 3294 printf (" somewhere unknown\n"); 3295 break; 3296 } 3297 exit (1); 3298 } 3299 3300 3301 void 3302 try_init (void) 3303 { 3304 #if HAVE_GETPAGESIZE 3305 /* Prefer getpagesize() over sysconf(), since on SunOS 4 sysconf() doesn't 3306 know _SC_PAGESIZE. */ 3307 pagesize = getpagesize (); 3308 #else 3309 #if HAVE_SYSCONF 3310 if ((pagesize = sysconf (_SC_PAGESIZE)) == -1) 3311 { 3312 /* According to the linux man page, sysconf doesn't set errno */ 3313 fprintf (stderr, "Cannot get sysconf _SC_PAGESIZE\n"); 3314 exit (1); 3315 } 3316 #else 3317 Error, error, cannot get page size 3318 #endif 3319 #endif 3320 3321 printf ("pagesize is 0x%lX bytes\n", pagesize); 3322 3323 signal (SIGILL, trap); 3324 #ifdef SIGBUS 3325 signal (SIGBUS, trap); 3326 #endif 3327 signal (SIGSEGV, trap); 3328 signal (SIGFPE, trap); 3329 3330 { 3331 int i; 3332 3333 for (i = 0; i < NUM_SOURCES; i++) 3334 { 3335 malloc_region (&s[i].region, 2*option_lastsize+ALIGNMENTS-1); 3336 printf ("s[%d] %p to %p (0x%lX bytes)\n", 3337 i, (void *) (s[i].region.ptr), 3338 (void *) (s[i].region.ptr + s[i].region.size), 3339 (long) s[i].region.size * BYTES_PER_MP_LIMB); 3340 } 3341 3342 #define INIT_EACH(e,es) \ 3343 for (i = 0; i < NUM_DESTS; i++) \ 3344 { \ 3345 malloc_region (&e.d[i].region, 2*option_lastsize+ALIGNMENTS-1); \ 3346 printf ("%s d[%d] %p to %p (0x%lX bytes)\n", \ 3347 es, i, (void *) (e.d[i].region.ptr), \ 3348 (void *) (e.d[i].region.ptr + e.d[i].region.size), \ 3349 (long) e.d[i].region.size * BYTES_PER_MP_LIMB); \ 3350 } 3351 3352 INIT_EACH(ref, "ref"); 3353 INIT_EACH(fun, "fun"); 3354 } 3355 } 3356 3357 int 3358 strmatch_wild (const char *pattern, const char *str) 3359 { 3360 size_t plen, slen; 3361 3362 /* wildcard at start */ 3363 if (pattern[0] == '*') 3364 { 3365 pattern++; 3366 plen = strlen (pattern); 3367 slen = strlen (str); 3368 return (plen == 0 3369 || (slen >= plen && memcmp (pattern, str+slen-plen, plen) == 0)); 3370 } 3371 3372 /* wildcard at end */ 3373 plen = strlen (pattern); 3374 if (plen >= 1 && pattern[plen-1] == '*') 3375 return (memcmp (pattern, str, plen-1) == 0); 3376 3377 /* no wildcards */ 3378 return (strcmp (pattern, str) == 0); 3379 } 3380 3381 void 3382 try_name (const char *name) 3383 { 3384 int found = 0; 3385 int i; 3386 3387 for (i = 0; i < numberof (choice_array); i++) 3388 { 3389 if (strmatch_wild (name, choice_array[i].name)) 3390 { 3391 choice = &choice_array[i]; 3392 tr = ¶m[choice->type]; 3393 try_many (); 3394 found = 1; 3395 } 3396 } 3397 3398 if (!found) 3399 { 3400 printf ("%s unknown\n", name); 3401 /* exit (1); */ 3402 } 3403 } 3404 3405 3406 void 3407 usage (const char *prog) 3408 { 3409 int col = 0; 3410 int i; 3411 3412 printf ("Usage: %s [options] function...\n", prog); 3413 printf (" -1 use limb data 1,2,3,etc\n"); 3414 printf (" -9 use limb data all 0xFF..FFs\n"); 3415 printf (" -a zeros use limb data all zeros\n"); 3416 printf (" -a ffs use limb data all 0xFF..FFs (same as -9)\n"); 3417 printf (" -a 2fd use data 0x2FFF...FFFD\n"); 3418 printf (" -p print each case tried (try this if seg faulting)\n"); 3419 printf (" -R seed random numbers from time()\n"); 3420 printf (" -r reps set repetitions (default %d)\n", DEFAULT_REPETITIONS); 3421 printf (" -s size starting size to test\n"); 3422 printf (" -S size2 starting size2 to test\n"); 3423 printf (" -s s1-s2 range of sizes to test\n"); 3424 printf (" -W don't show the spinner (use this in gdb)\n"); 3425 printf (" -z disable mprotect() redzones\n"); 3426 printf ("Default data is refmpn_random() and refmpn_random2().\n"); 3427 printf ("\n"); 3428 printf ("Functions that can be tested:\n"); 3429 3430 for (i = 0; i < numberof (choice_array); i++) 3431 { 3432 if (col + 1 + strlen (choice_array[i].name) > 79) 3433 { 3434 printf ("\n"); 3435 col = 0; 3436 } 3437 printf (" %s", choice_array[i].name); 3438 col += 1 + strlen (choice_array[i].name); 3439 } 3440 printf ("\n"); 3441 3442 exit(1); 3443 } 3444 3445 3446 int 3447 main (int argc, char *argv[]) 3448 { 3449 int i; 3450 3451 /* unbuffered output */ 3452 setbuf (stdout, NULL); 3453 setbuf (stderr, NULL); 3454 3455 /* default trace in hex, and in upper-case so can paste into bc */ 3456 mp_trace_base = -16; 3457 3458 param_init (); 3459 3460 { 3461 unsigned long seed = 123; 3462 int opt; 3463 3464 while ((opt = getopt(argc, argv, "19a:b:E:pRr:S:s:Wz")) != EOF) 3465 { 3466 switch (opt) { 3467 case '1': 3468 /* use limb data values 1, 2, 3, ... etc */ 3469 option_data = DATA_SEQ; 3470 break; 3471 case '9': 3472 /* use limb data values 0xFFF...FFF always */ 3473 option_data = DATA_FFS; 3474 break; 3475 case 'a': 3476 if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS; 3477 else if (strcmp (optarg, "seq") == 0) option_data = DATA_SEQ; 3478 else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS; 3479 else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD; 3480 else 3481 { 3482 fprintf (stderr, "unrecognised data option: %s\n", optarg); 3483 exit (1); 3484 } 3485 break; 3486 case 'b': 3487 mp_trace_base = atoi (optarg); 3488 break; 3489 case 'E': 3490 /* re-seed */ 3491 sscanf (optarg, "%lu", &seed); 3492 printf ("Re-seeding with %lu\n", seed); 3493 break; 3494 case 'p': 3495 option_print = 1; 3496 break; 3497 case 'R': 3498 /* randomize */ 3499 seed = time (NULL); 3500 printf ("Seeding with %lu, re-run using \"-E %lu\"\n", seed, seed); 3501 break; 3502 case 'r': 3503 option_repetitions = atoi (optarg); 3504 break; 3505 case 's': 3506 { 3507 char *p; 3508 option_firstsize = strtol (optarg, 0, 0); 3509 if ((p = strchr (optarg, '-')) != NULL) 3510 option_lastsize = strtol (p+1, 0, 0); 3511 } 3512 break; 3513 case 'S': 3514 /* -S <size> sets the starting size for the second of a two size 3515 routine (like mpn_mul_basecase) */ 3516 option_firstsize2 = strtol (optarg, 0, 0); 3517 break; 3518 case 'W': 3519 /* use this when running in the debugger */ 3520 option_spinner = 0; 3521 break; 3522 case 'z': 3523 /* disable redzones */ 3524 option_redzones = 0; 3525 break; 3526 case '?': 3527 usage (argv[0]); 3528 break; 3529 } 3530 } 3531 3532 gmp_randinit_default (__gmp_rands); 3533 __gmp_rands_initialized = 1; 3534 gmp_randseed_ui (__gmp_rands, seed); 3535 } 3536 3537 try_init(); 3538 3539 if (argc <= optind) 3540 usage (argv[0]); 3541 3542 for (i = optind; i < argc; i++) 3543 try_name (argv[i]); 3544 3545 return 0; 3546 } 3547