1 /* Include file for internal GNU MP types and definitions. 2 3 THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO 4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES. 5 6 Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 7 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software 8 Foundation, Inc. 9 10 This file is part of the GNU MP Library. 11 12 The GNU MP Library is free software; you can redistribute it and/or modify 13 it under the terms of the GNU Lesser General Public License as published by 14 the Free Software Foundation; either version 3 of the License, or (at your 15 option) any later version. 16 17 The GNU MP Library is distributed in the hope that it will be useful, but 18 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 19 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 20 License for more details. 21 22 You should have received a copy of the GNU Lesser General Public License 23 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 24 25 26 /* __GMP_DECLSPEC must be given on any global data that will be accessed 27 from outside libgmp, meaning from the test or development programs, or 28 from libgmpxx. Failing to do this will result in an incorrect address 29 being used for the accesses. On functions __GMP_DECLSPEC makes calls 30 from outside libgmp more efficient, but they'll still work fine without 31 it. */ 32 33 34 #ifndef __GMP_IMPL_H__ 35 #define __GMP_IMPL_H__ 36 37 #if defined _CRAY 38 #include <intrinsics.h> /* for _popcnt */ 39 #endif 40 41 /* limits.h is not used in general, since it's an ANSI-ism, and since on 42 solaris gcc 2.95 under -mcpu=ultrasparc in ABI=32 ends up getting wrong 43 values (the ABI=64 values). 44 45 On Cray vector systems, however, we need the system limits.h since sizes 46 of signed and unsigned types can differ there, depending on compiler 47 options (eg. -hnofastmd), making our SHRT_MAX etc expressions fail. For 48 reference, int can be 46 or 64 bits, whereas uint is always 64 bits; and 49 short can be 24, 32, 46 or 64 bits, and different for ushort. */ 50 51 #if defined _CRAY 52 #include <limits.h> 53 #endif 54 55 /* For fat.h and other fat binary stuff. 56 No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions 57 declared this way are only used to set function pointers in __gmpn_cpuvec, 58 they're not called directly. */ 59 #define DECL_add_n(name) \ 60 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t) 61 #define DECL_addlsh1_n(name) \ 62 DECL_add_n (name) 63 #define DECL_addlsh2_n(name) \ 64 DECL_add_n (name) 65 #define DECL_addmul_1(name) \ 66 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t) 67 #define DECL_addmul_2(name) \ 68 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr) 69 #define DECL_bdiv_dbm1c(name) \ 70 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) 71 #define DECL_com(name) \ 72 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t) 73 #define DECL_copyd(name) \ 74 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t) 75 #define DECL_copyi(name) \ 76 DECL_copyd (name) 77 #define DECL_divexact_1(name) \ 78 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t) 79 #define DECL_divexact_by3c(name) \ 80 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t) 81 #define DECL_divrem_1(name) \ 82 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t) 83 #define DECL_gcd_1(name) \ 84 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t) 85 #define DECL_lshift(name) \ 86 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, unsigned) 87 #define DECL_lshiftc(name) \ 88 DECL_lshift (name) 89 #define DECL_mod_1(name) \ 90 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t) 91 #define DECL_mod_1_1p(name) \ 92 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t []) 93 #define DECL_mod_1_1p_cps(name) \ 94 __GMP_DECLSPEC void name (mp_limb_t cps[], mp_limb_t b) 95 #define DECL_mod_1s_2p(name) \ 96 DECL_mod_1_1p (name) 97 #define DECL_mod_1s_2p_cps(name) \ 98 DECL_mod_1_1p_cps (name) 99 #define DECL_mod_1s_4p(name) \ 100 DECL_mod_1_1p (name) 101 #define DECL_mod_1s_4p_cps(name) \ 102 DECL_mod_1_1p_cps (name) 103 #define DECL_mod_34lsub1(name) \ 104 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t) 105 #define DECL_modexact_1c_odd(name) \ 106 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) 107 #define DECL_mul_1(name) \ 108 DECL_addmul_1 (name) 109 #define DECL_mul_basecase(name) \ 110 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t) 111 #define DECL_mullo_basecase(name) \ 112 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t) 113 #define DECL_preinv_divrem_1(name) \ 114 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int) 115 #define DECL_preinv_mod_1(name) \ 116 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) 117 #define DECL_redc_1(name) \ 118 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t) 119 #define DECL_redc_2(name) \ 120 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr) 121 #define DECL_rshift(name) \ 122 DECL_lshift (name) 123 #define DECL_sqr_basecase(name) \ 124 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t) 125 #define DECL_sub_n(name) \ 126 DECL_add_n (name) 127 #define DECL_sublsh1_n(name) \ 128 DECL_add_n (name) 129 #define DECL_submul_1(name) \ 130 DECL_addmul_1 (name) 131 132 #if ! defined (__GMP_WITHIN_CONFIGURE) 133 #include "config.h" 134 #include "gmp-mparam.h" 135 #include "fib_table.h" 136 #include "fac_table.h" 137 #include "mp_bases.h" 138 #if WANT_FAT_BINARY 139 #include "fat.h" 140 #endif 141 #endif 142 143 #if HAVE_INTTYPES_H /* for uint_least32_t */ 144 # include <inttypes.h> 145 #else 146 # if HAVE_STDINT_H 147 # include <stdint.h> 148 # endif 149 #endif 150 151 #ifdef __cplusplus 152 #include <cstring> /* for strlen */ 153 #include <string> /* for std::string */ 154 #endif 155 156 157 #ifndef WANT_TMP_DEBUG /* for TMP_ALLOC_LIMBS_2 and others */ 158 #define WANT_TMP_DEBUG 0 159 #endif 160 161 /* The following tries to get a good version of alloca. The tests are 162 adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions. 163 Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will 164 be setup appropriately. 165 166 ifndef alloca - a cpp define might already exist. 167 glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca. 168 HP cc +Olibcalls adds a #define of alloca to __builtin_alloca. 169 170 GCC __builtin_alloca - preferred whenever available. 171 172 _AIX pragma - IBM compilers need a #pragma in "each module that needs to 173 use alloca". Pragma indented to protect pre-ANSI cpp's. _IBMR2 was 174 used in past versions of GMP, retained still in case it matters. 175 176 The autoconf manual says this pragma needs to be at the start of a C 177 file, apart from comments and preprocessor directives. Is that true? 178 xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc 179 from gmp.h. 180 */ 181 182 #ifndef alloca 183 # ifdef __GNUC__ 184 # define alloca __builtin_alloca 185 # else 186 # ifdef __DECC 187 # define alloca(x) __ALLOCA(x) 188 # else 189 # ifdef _MSC_VER 190 # include <malloc.h> 191 # define alloca _alloca 192 # else 193 # if HAVE_ALLOCA_H 194 # include <alloca.h> 195 # else 196 # if defined (_AIX) || defined (_IBMR2) 197 #pragma alloca 198 # else 199 # if !defined (__NetBSD__) 200 char *alloca (); 201 # endif 202 # endif 203 # endif 204 # endif 205 # endif 206 # endif 207 #endif 208 209 210 /* if not provided by gmp-mparam.h */ 211 #ifndef BYTES_PER_MP_LIMB 212 #define BYTES_PER_MP_LIMB SIZEOF_MP_LIMB_T 213 #endif 214 #define GMP_LIMB_BYTES BYTES_PER_MP_LIMB 215 #ifndef GMP_LIMB_BITS 216 #define GMP_LIMB_BITS (8 * SIZEOF_MP_LIMB_T) 217 #endif 218 219 #define BITS_PER_ULONG (8 * SIZEOF_UNSIGNED_LONG) 220 221 222 /* gmp_uint_least32_t is an unsigned integer type with at least 32 bits. */ 223 #if HAVE_UINT_LEAST32_T 224 typedef uint_least32_t gmp_uint_least32_t; 225 #else 226 #if SIZEOF_UNSIGNED_SHORT >= 4 227 typedef unsigned short gmp_uint_least32_t; 228 #else 229 #if SIZEOF_UNSIGNED >= 4 230 typedef unsigned gmp_uint_least32_t; 231 #else 232 typedef unsigned long gmp_uint_least32_t; 233 #endif 234 #endif 235 #endif 236 237 238 /* gmp_intptr_t, for pointer to integer casts */ 239 #if HAVE_INTPTR_T 240 typedef intptr_t gmp_intptr_t; 241 #else /* fallback */ 242 typedef size_t gmp_intptr_t; 243 #endif 244 245 246 /* pre-inverse types for truncating division and modulo */ 247 typedef struct {mp_limb_t inv32;} gmp_pi1_t; 248 typedef struct {mp_limb_t inv21, inv32, inv53;} gmp_pi2_t; 249 250 251 /* "const" basically means a function does nothing but examine its arguments 252 and give a return value, it doesn't read or write any memory (neither 253 global nor pointed to by arguments), and has no other side-effects. This 254 is more restrictive than "pure". See info node "(gcc)Function 255 Attributes". __GMP_NO_ATTRIBUTE_CONST_PURE lets tune/common.c etc turn 256 this off when trying to write timing loops. */ 257 #if HAVE_ATTRIBUTE_CONST && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE) 258 #define ATTRIBUTE_CONST __attribute__ ((const)) 259 #else 260 #define ATTRIBUTE_CONST 261 #endif 262 263 #if HAVE_ATTRIBUTE_NORETURN 264 #define ATTRIBUTE_NORETURN __attribute__ ((noreturn)) 265 #else 266 #define ATTRIBUTE_NORETURN 267 #endif 268 269 /* "malloc" means a function behaves like malloc in that the pointer it 270 returns doesn't alias anything. */ 271 #if HAVE_ATTRIBUTE_MALLOC 272 #define ATTRIBUTE_MALLOC __attribute__ ((malloc)) 273 #else 274 #define ATTRIBUTE_MALLOC 275 #endif 276 277 278 #if ! HAVE_STRCHR 279 #define strchr(s,c) index(s,c) 280 #endif 281 282 #if ! HAVE_MEMSET 283 #define memset(p, c, n) \ 284 do { \ 285 ASSERT ((n) >= 0); \ 286 char *__memset__p = (p); \ 287 int __i; \ 288 for (__i = 0; __i < (n); __i++) \ 289 __memset__p[__i] = (c); \ 290 } while (0) 291 #endif 292 293 /* va_copy is standard in C99, and gcc provides __va_copy when in strict C89 294 mode. Falling back to a memcpy will give maximum portability, since it 295 works no matter whether va_list is a pointer, struct or array. */ 296 #if ! defined (va_copy) && defined (__va_copy) 297 #define va_copy(dst,src) __va_copy(dst,src) 298 #endif 299 #if ! defined (va_copy) 300 #define va_copy(dst,src) \ 301 do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0) 302 #endif 303 304 305 /* HAVE_HOST_CPU_alpha_CIX is 1 on an alpha with the CIX instructions 306 (ie. ctlz, ctpop, cttz). */ 307 #if HAVE_HOST_CPU_alphaev67 || HAVE_HOST_CPU_alphaev68 \ 308 || HAVE_HOST_CPU_alphaev7 309 #define HAVE_HOST_CPU_alpha_CIX 1 310 #endif 311 312 313 #if defined (__cplusplus) 314 extern "C" { 315 #endif 316 317 318 /* Usage: TMP_DECL; 319 TMP_MARK; 320 ptr = TMP_ALLOC (bytes); 321 TMP_FREE; 322 323 Small allocations should use TMP_SALLOC, big allocations should use 324 TMP_BALLOC. Allocations that might be small or big should use TMP_ALLOC. 325 326 Functions that use just TMP_SALLOC should use TMP_SDECL, TMP_SMARK, and 327 TMP_SFREE. 328 329 TMP_DECL just declares a variable, but might be empty and so must be last 330 in a list of variables. TMP_MARK must be done before any TMP_ALLOC. 331 TMP_ALLOC(0) is not allowed. TMP_FREE doesn't need to be done if a 332 TMP_MARK was made, but then no TMP_ALLOCs. */ 333 334 /* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or 335 __gmp_allocate_func doesn't already determine it. Currently TMP_ALLOC 336 isn't used for "double"s, so that's not in the union. */ 337 union tmp_align_t { 338 mp_limb_t l; 339 char *p; 340 }; 341 #define __TMP_ALIGN sizeof (union tmp_align_t) 342 343 /* Return "a" rounded upwards to a multiple of "m", if it isn't already. 344 "a" must be an unsigned type. 345 This is designed for use with a compile-time constant "m". 346 The POW2 case is expected to be usual, and gcc 3.0 and up recognises 347 "(-(8*n))%8" or the like is always zero, which means the rounding up in 348 the WANT_TMP_NOTREENTRANT version of TMP_ALLOC below will be a noop. */ 349 #define ROUND_UP_MULTIPLE(a,m) \ 350 (POW2_P(m) ? (a) + (-(a))%(m) \ 351 : (a)+(m)-1 - (((a)+(m)-1) % (m))) 352 353 #if defined (WANT_TMP_ALLOCA) || defined (WANT_TMP_REENTRANT) 354 struct tmp_reentrant_t { 355 struct tmp_reentrant_t *next; 356 size_t size; /* bytes, including header */ 357 }; 358 __GMP_DECLSPEC void *__gmp_tmp_reentrant_alloc (struct tmp_reentrant_t **, size_t) ATTRIBUTE_MALLOC; 359 __GMP_DECLSPEC void __gmp_tmp_reentrant_free (struct tmp_reentrant_t *); 360 #endif 361 362 #if WANT_TMP_ALLOCA 363 #define TMP_SDECL 364 #define TMP_DECL struct tmp_reentrant_t *__tmp_marker 365 #define TMP_SMARK 366 #define TMP_MARK __tmp_marker = 0 367 #define TMP_SALLOC(n) alloca(n) 368 #define TMP_BALLOC(n) __gmp_tmp_reentrant_alloc (&__tmp_marker, n) 369 #define TMP_ALLOC(n) \ 370 (LIKELY ((n) < 65536) ? TMP_SALLOC(n) : TMP_BALLOC(n)) 371 #define TMP_SFREE 372 #define TMP_FREE \ 373 do { \ 374 if (UNLIKELY (__tmp_marker != 0)) \ 375 __gmp_tmp_reentrant_free (__tmp_marker); \ 376 } while (0) 377 #endif 378 379 #if WANT_TMP_REENTRANT 380 #define TMP_SDECL TMP_DECL 381 #define TMP_DECL struct tmp_reentrant_t *__tmp_marker 382 #define TMP_SMARK TMP_MARK 383 #define TMP_MARK __tmp_marker = 0 384 #define TMP_SALLOC(n) TMP_ALLOC(n) 385 #define TMP_BALLOC(n) TMP_ALLOC(n) 386 #define TMP_ALLOC(n) __gmp_tmp_reentrant_alloc (&__tmp_marker, n) 387 #define TMP_SFREE TMP_FREE 388 #define TMP_FREE __gmp_tmp_reentrant_free (__tmp_marker) 389 #endif 390 391 #if WANT_TMP_NOTREENTRANT 392 struct tmp_marker 393 { 394 struct tmp_stack *which_chunk; 395 void *alloc_point; 396 }; 397 __GMP_DECLSPEC void *__gmp_tmp_alloc (unsigned long) ATTRIBUTE_MALLOC; 398 __GMP_DECLSPEC void __gmp_tmp_mark (struct tmp_marker *); 399 __GMP_DECLSPEC void __gmp_tmp_free (struct tmp_marker *); 400 #define TMP_SDECL TMP_DECL 401 #define TMP_DECL struct tmp_marker __tmp_marker 402 #define TMP_SMARK TMP_MARK 403 #define TMP_MARK __gmp_tmp_mark (&__tmp_marker) 404 #define TMP_SALLOC(n) TMP_ALLOC(n) 405 #define TMP_BALLOC(n) TMP_ALLOC(n) 406 #define TMP_ALLOC(n) \ 407 __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (n), __TMP_ALIGN)) 408 #define TMP_SFREE TMP_FREE 409 #define TMP_FREE __gmp_tmp_free (&__tmp_marker) 410 #endif 411 412 #if WANT_TMP_DEBUG 413 /* See tal-debug.c for some comments. */ 414 struct tmp_debug_t { 415 struct tmp_debug_entry_t *list; 416 const char *file; 417 int line; 418 }; 419 struct tmp_debug_entry_t { 420 struct tmp_debug_entry_t *next; 421 char *block; 422 size_t size; 423 }; 424 __GMP_DECLSPEC void __gmp_tmp_debug_mark (const char *, int, struct tmp_debug_t **, 425 struct tmp_debug_t *, 426 const char *, const char *); 427 __GMP_DECLSPEC void *__gmp_tmp_debug_alloc (const char *, int, int, 428 struct tmp_debug_t **, const char *, 429 size_t) ATTRIBUTE_MALLOC; 430 __GMP_DECLSPEC void __gmp_tmp_debug_free (const char *, int, int, 431 struct tmp_debug_t **, 432 const char *, const char *); 433 #define TMP_SDECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker") 434 #define TMP_DECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker") 435 #define TMP_SMARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker") 436 #define TMP_MARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker") 437 #define TMP_SFREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker") 438 #define TMP_FREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker") 439 /* The marker variable is designed to provoke an uninitialized variable 440 warning from the compiler if TMP_FREE is used without a TMP_MARK. 441 __tmp_marker_inscope does the same for TMP_ALLOC. Runtime tests pick 442 these things up too. */ 443 #define TMP_DECL_NAME(marker, marker_name) \ 444 int marker; \ 445 int __tmp_marker_inscope; \ 446 const char *__tmp_marker_name = marker_name; \ 447 struct tmp_debug_t __tmp_marker_struct; \ 448 /* don't demand NULL, just cast a zero */ \ 449 struct tmp_debug_t *__tmp_marker = (struct tmp_debug_t *) 0 450 #define TMP_MARK_NAME(marker, marker_name) \ 451 do { \ 452 marker = 1; \ 453 __tmp_marker_inscope = 1; \ 454 __gmp_tmp_debug_mark (ASSERT_FILE, ASSERT_LINE, \ 455 &__tmp_marker, &__tmp_marker_struct, \ 456 __tmp_marker_name, marker_name); \ 457 } while (0) 458 #define TMP_SALLOC(n) TMP_ALLOC(n) 459 #define TMP_BALLOC(n) TMP_ALLOC(n) 460 #define TMP_ALLOC(size) \ 461 __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE, \ 462 __tmp_marker_inscope, \ 463 &__tmp_marker, __tmp_marker_name, size) 464 #define TMP_FREE_NAME(marker, marker_name) \ 465 do { \ 466 __gmp_tmp_debug_free (ASSERT_FILE, ASSERT_LINE, \ 467 marker, &__tmp_marker, \ 468 __tmp_marker_name, marker_name); \ 469 } while (0) 470 #endif /* WANT_TMP_DEBUG */ 471 472 473 /* Allocating various types. */ 474 #define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type))) 475 #define TMP_SALLOC_TYPE(n,type) ((type *) TMP_SALLOC ((n) * sizeof (type))) 476 #define TMP_BALLOC_TYPE(n,type) ((type *) TMP_BALLOC ((n) * sizeof (type))) 477 #define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t) 478 #define TMP_SALLOC_LIMBS(n) TMP_SALLOC_TYPE(n,mp_limb_t) 479 #define TMP_BALLOC_LIMBS(n) TMP_BALLOC_TYPE(n,mp_limb_t) 480 #define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr) 481 #define TMP_SALLOC_MP_PTRS(n) TMP_SALLOC_TYPE(n,mp_ptr) 482 #define TMP_BALLOC_MP_PTRS(n) TMP_BALLOC_TYPE(n,mp_ptr) 483 484 /* It's more efficient to allocate one block than two. This is certainly 485 true of the malloc methods, but it can even be true of alloca if that 486 involves copying a chunk of stack (various RISCs), or a call to a stack 487 bounds check (mingw). In any case, when debugging keep separate blocks 488 so a redzoning malloc debugger can protect each individually. */ 489 #define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize) \ 490 do { \ 491 if (WANT_TMP_DEBUG) \ 492 { \ 493 (xp) = TMP_ALLOC_LIMBS (xsize); \ 494 (yp) = TMP_ALLOC_LIMBS (ysize); \ 495 } \ 496 else \ 497 { \ 498 (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize)); \ 499 (yp) = (xp) + (xsize); \ 500 } \ 501 } while (0) 502 503 504 /* From gmp.h, nicer names for internal use. */ 505 #define CRAY_Pragma(str) __GMP_CRAY_Pragma(str) 506 #define MPN_CMP(result, xp, yp, size) __GMPN_CMP(result, xp, yp, size) 507 #define LIKELY(cond) __GMP_LIKELY(cond) 508 #define UNLIKELY(cond) __GMP_UNLIKELY(cond) 509 510 #define ABS(x) ((x) >= 0 ? (x) : -(x)) 511 #define ABS_CAST(T,x) ((x) >= 0 ? (T)(x) : -((T)((x) + 1) - 1)) 512 #undef MIN 513 #define MIN(l,o) ((l) < (o) ? (l) : (o)) 514 #undef MAX 515 #define MAX(h,i) ((h) > (i) ? (h) : (i)) 516 #define numberof(x) (sizeof (x) / sizeof ((x)[0])) 517 518 /* Field access macros. */ 519 #define SIZ(x) ((x)->_mp_size) 520 #define ABSIZ(x) ABS (SIZ (x)) 521 #define PTR(x) ((x)->_mp_d) 522 #define EXP(x) ((x)->_mp_exp) 523 #define PREC(x) ((x)->_mp_prec) 524 #define ALLOC(x) ((x)->_mp_alloc) 525 #define NUM(x) mpq_numref(x) 526 #define DEN(x) mpq_denref(x) 527 528 /* n-1 inverts any low zeros and the lowest one bit. If n&(n-1) leaves zero 529 then that lowest one bit must have been the only bit set. n==0 will 530 return true though, so avoid that. */ 531 #define POW2_P(n) (((n) & ((n) - 1)) == 0) 532 533 /* This is intended for constant THRESHOLDs only, where the compiler 534 can completely fold the result. */ 535 #define LOG2C(n) \ 536 (((n) >= 0x1) + ((n) >= 0x2) + ((n) >= 0x4) + ((n) >= 0x8) + \ 537 ((n) >= 0x10) + ((n) >= 0x20) + ((n) >= 0x40) + ((n) >= 0x80) + \ 538 ((n) >= 0x100) + ((n) >= 0x200) + ((n) >= 0x400) + ((n) >= 0x800) + \ 539 ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000)) 540 541 /* The "short" defines are a bit different because shorts are promoted to 542 ints by ~ or >> etc. 543 544 #ifndef's are used since on some systems (HP?) header files other than 545 limits.h setup these defines. We could forcibly #undef in that case, but 546 there seems no need to worry about that. */ 547 548 #ifndef ULONG_MAX 549 #define ULONG_MAX __GMP_ULONG_MAX 550 #endif 551 #ifndef UINT_MAX 552 #define UINT_MAX __GMP_UINT_MAX 553 #endif 554 #ifndef USHRT_MAX 555 #define USHRT_MAX __GMP_USHRT_MAX 556 #endif 557 #define MP_LIMB_T_MAX (~ (mp_limb_t) 0) 558 559 /* Must cast ULONG_MAX etc to unsigned long etc, since they might not be 560 unsigned on a K&R compiler. In particular the HP-UX 10 bundled K&R cc 561 treats the plain decimal values in <limits.h> as signed. */ 562 #define ULONG_HIGHBIT (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1)) 563 #define UINT_HIGHBIT (UINT_MAX ^ ((unsigned) UINT_MAX >> 1)) 564 #define USHRT_HIGHBIT ((unsigned short) (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1))) 565 #define GMP_LIMB_HIGHBIT (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1)) 566 567 #ifndef LONG_MIN 568 #define LONG_MIN ((long) ULONG_HIGHBIT) 569 #endif 570 #ifndef LONG_MAX 571 #define LONG_MAX (-(LONG_MIN+1)) 572 #endif 573 574 #ifndef INT_MIN 575 #define INT_MIN ((int) UINT_HIGHBIT) 576 #endif 577 #ifndef INT_MAX 578 #define INT_MAX (-(INT_MIN+1)) 579 #endif 580 581 #ifndef SHRT_MIN 582 #define SHRT_MIN ((short) USHRT_HIGHBIT) 583 #endif 584 #ifndef SHRT_MAX 585 #define SHRT_MAX ((short) (-(SHRT_MIN+1))) 586 #endif 587 588 #if __GMP_MP_SIZE_T_INT 589 #define MP_SIZE_T_MAX INT_MAX 590 #define MP_SIZE_T_MIN INT_MIN 591 #else 592 #define MP_SIZE_T_MAX LONG_MAX 593 #define MP_SIZE_T_MIN LONG_MIN 594 #endif 595 596 /* mp_exp_t is the same as mp_size_t */ 597 #define MP_EXP_T_MAX MP_SIZE_T_MAX 598 #define MP_EXP_T_MIN MP_SIZE_T_MIN 599 600 #define LONG_HIGHBIT LONG_MIN 601 #define INT_HIGHBIT INT_MIN 602 #define SHRT_HIGHBIT SHRT_MIN 603 604 605 #define GMP_NUMB_HIGHBIT (CNST_LIMB(1) << (GMP_NUMB_BITS-1)) 606 607 #if GMP_NAIL_BITS == 0 608 #define GMP_NAIL_LOWBIT CNST_LIMB(0) 609 #else 610 #define GMP_NAIL_LOWBIT (CNST_LIMB(1) << GMP_NUMB_BITS) 611 #endif 612 613 #if GMP_NAIL_BITS != 0 614 /* Set various *_THRESHOLD values to be used for nails. Thus we avoid using 615 code that has not yet been qualified. */ 616 617 #undef DC_DIV_QR_THRESHOLD 618 #define DC_DIV_QR_THRESHOLD 50 619 620 #undef DIVREM_1_NORM_THRESHOLD 621 #undef DIVREM_1_UNNORM_THRESHOLD 622 #undef MOD_1_NORM_THRESHOLD 623 #undef MOD_1_UNNORM_THRESHOLD 624 #undef USE_PREINV_DIVREM_1 625 #undef DIVREM_2_THRESHOLD 626 #undef DIVEXACT_1_THRESHOLD 627 #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */ 628 #define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */ 629 #define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */ 630 #define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */ 631 #define USE_PREINV_DIVREM_1 0 /* no preinv */ 632 #define DIVREM_2_THRESHOLD MP_SIZE_T_MAX /* no preinv */ 633 634 /* mpn/generic/mul_fft.c is not nails-capable. */ 635 #undef MUL_FFT_THRESHOLD 636 #undef SQR_FFT_THRESHOLD 637 #define MUL_FFT_THRESHOLD MP_SIZE_T_MAX 638 #define SQR_FFT_THRESHOLD MP_SIZE_T_MAX 639 #endif 640 641 /* Swap macros. */ 642 643 #define MP_LIMB_T_SWAP(x, y) \ 644 do { \ 645 mp_limb_t __mp_limb_t_swap__tmp = (x); \ 646 (x) = (y); \ 647 (y) = __mp_limb_t_swap__tmp; \ 648 } while (0) 649 #define MP_SIZE_T_SWAP(x, y) \ 650 do { \ 651 mp_size_t __mp_size_t_swap__tmp = (x); \ 652 (x) = (y); \ 653 (y) = __mp_size_t_swap__tmp; \ 654 } while (0) 655 656 #define MP_PTR_SWAP(x, y) \ 657 do { \ 658 mp_ptr __mp_ptr_swap__tmp = (x); \ 659 (x) = (y); \ 660 (y) = __mp_ptr_swap__tmp; \ 661 } while (0) 662 #define MP_SRCPTR_SWAP(x, y) \ 663 do { \ 664 mp_srcptr __mp_srcptr_swap__tmp = (x); \ 665 (x) = (y); \ 666 (y) = __mp_srcptr_swap__tmp; \ 667 } while (0) 668 669 #define MPN_PTR_SWAP(xp,xs, yp,ys) \ 670 do { \ 671 MP_PTR_SWAP (xp, yp); \ 672 MP_SIZE_T_SWAP (xs, ys); \ 673 } while(0) 674 #define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \ 675 do { \ 676 MP_SRCPTR_SWAP (xp, yp); \ 677 MP_SIZE_T_SWAP (xs, ys); \ 678 } while(0) 679 680 #define MPZ_PTR_SWAP(x, y) \ 681 do { \ 682 mpz_ptr __mpz_ptr_swap__tmp = (x); \ 683 (x) = (y); \ 684 (y) = __mpz_ptr_swap__tmp; \ 685 } while (0) 686 #define MPZ_SRCPTR_SWAP(x, y) \ 687 do { \ 688 mpz_srcptr __mpz_srcptr_swap__tmp = (x); \ 689 (x) = (y); \ 690 (y) = __mpz_srcptr_swap__tmp; \ 691 } while (0) 692 693 694 /* Enhancement: __gmp_allocate_func could have "__attribute__ ((malloc))", 695 but current gcc (3.0) doesn't seem to support that. */ 696 __GMP_DECLSPEC extern void * (*__gmp_allocate_func) (size_t); 697 __GMP_DECLSPEC extern void * (*__gmp_reallocate_func) (void *, size_t, size_t); 698 __GMP_DECLSPEC extern void (*__gmp_free_func) (void *, size_t); 699 700 __GMP_DECLSPEC void *__gmp_default_allocate (size_t); 701 __GMP_DECLSPEC void *__gmp_default_reallocate (void *, size_t, size_t); 702 __GMP_DECLSPEC void __gmp_default_free (void *, size_t); 703 704 #define __GMP_ALLOCATE_FUNC_TYPE(n,type) \ 705 ((type *) (*__gmp_allocate_func) ((n) * sizeof (type))) 706 #define __GMP_ALLOCATE_FUNC_LIMBS(n) __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t) 707 708 #define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \ 709 ((type *) (*__gmp_reallocate_func) \ 710 (p, (old_size) * sizeof (type), (new_size) * sizeof (type))) 711 #define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \ 712 __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t) 713 714 #define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type)) 715 #define __GMP_FREE_FUNC_LIMBS(p,n) __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t) 716 717 #define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize) \ 718 do { \ 719 if ((oldsize) != (newsize)) \ 720 (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \ 721 } while (0) 722 723 #define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type) \ 724 do { \ 725 if ((oldsize) != (newsize)) \ 726 (ptr) = (type *) (*__gmp_reallocate_func) \ 727 (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type)); \ 728 } while (0) 729 730 731 /* Dummy for non-gcc, code involving it will go dead. */ 732 #if ! defined (__GNUC__) || __GNUC__ < 2 733 #define __builtin_constant_p(x) 0 734 #endif 735 736 737 /* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the 738 stack usage is compatible. __attribute__ ((regparm (N))) helps by 739 putting leading parameters in registers, avoiding extra stack. 740 741 regparm cannot be used with calls going through the PLT, because the 742 binding code there may clobber the registers (%eax, %edx, %ecx) used for 743 the regparm parameters. Calls to local (ie. static) functions could 744 still use this, if we cared to differentiate locals and globals. 745 746 On athlon-unknown-freebsd4.9 with gcc 3.3.3, regparm cannot be used with 747 -p or -pg profiling, since that version of gcc doesn't realize the 748 .mcount calls will clobber the parameter registers. Other systems are 749 ok, like debian with glibc 2.3.2 (mcount doesn't clobber), but we don't 750 bother to try to detect this. regparm is only an optimization so we just 751 disable it when profiling (profiling being a slowdown anyway). */ 752 753 #if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) && ! defined (PIC) \ 754 && ! WANT_PROFILING_PROF && ! WANT_PROFILING_GPROF 755 #define USE_LEADING_REGPARM 1 756 #else 757 #define USE_LEADING_REGPARM 0 758 #endif 759 760 /* Macros for altering parameter order according to regparm usage. */ 761 #if USE_LEADING_REGPARM 762 #define REGPARM_2_1(a,b,x) x,a,b 763 #define REGPARM_3_1(a,b,c,x) x,a,b,c 764 #define REGPARM_ATTR(n) __attribute__ ((regparm (n))) 765 #else 766 #define REGPARM_2_1(a,b,x) a,b,x 767 #define REGPARM_3_1(a,b,c,x) a,b,c,x 768 #define REGPARM_ATTR(n) 769 #endif 770 771 772 /* ASM_L gives a local label for a gcc asm block, for use when temporary 773 local labels like "1:" might not be available, which is the case for 774 instance on the x86s (the SCO assembler doesn't support them). 775 776 The label generated is made unique by including "%=" which is a unique 777 number for each insn. This ensures the same name can be used in multiple 778 asm blocks, perhaps via a macro. Since jumps between asm blocks are not 779 allowed there's no need for a label to be usable outside a single 780 block. */ 781 782 #define ASM_L(name) LSYM_PREFIX "asm_%=_" #name 783 784 785 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86 786 #if 0 787 /* FIXME: Check that these actually improve things. 788 FIXME: Need a cld after each std. 789 FIXME: Can't have inputs in clobbered registers, must describe them as 790 dummy outputs, and add volatile. */ 791 #define MPN_COPY_INCR(DST, SRC, N) \ 792 __asm__ ("cld\n\trep\n\tmovsl" : : \ 793 "D" (DST), "S" (SRC), "c" (N) : \ 794 "cx", "di", "si", "memory") 795 #define MPN_COPY_DECR(DST, SRC, N) \ 796 __asm__ ("std\n\trep\n\tmovsl" : : \ 797 "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \ 798 "cx", "di", "si", "memory") 799 #endif 800 #endif 801 802 803 __GMP_DECLSPEC void __gmpz_aorsmul_1 (REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_limb_t, mp_size_t)) REGPARM_ATTR(1); 804 #define mpz_aorsmul_1(w,u,v,sub) __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub)) 805 806 #define mpz_n_pow_ui __gmpz_n_pow_ui 807 __GMP_DECLSPEC void mpz_n_pow_ui (mpz_ptr, mp_srcptr, mp_size_t, unsigned long); 808 809 810 #define mpn_addmul_1c __MPN(addmul_1c) 811 __GMP_DECLSPEC mp_limb_t mpn_addmul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t); 812 813 #ifndef mpn_addmul_2 /* if not done with cpuvec in a fat binary */ 814 #define mpn_addmul_2 __MPN(addmul_2) 815 __GMP_DECLSPEC mp_limb_t mpn_addmul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 816 #endif 817 818 #define mpn_addmul_3 __MPN(addmul_3) 819 __GMP_DECLSPEC mp_limb_t mpn_addmul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 820 821 #define mpn_addmul_4 __MPN(addmul_4) 822 __GMP_DECLSPEC mp_limb_t mpn_addmul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 823 824 #define mpn_addmul_5 __MPN(addmul_5) 825 __GMP_DECLSPEC mp_limb_t mpn_addmul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 826 827 #define mpn_addmul_6 __MPN(addmul_6) 828 __GMP_DECLSPEC mp_limb_t mpn_addmul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 829 830 #define mpn_addmul_7 __MPN(addmul_7) 831 __GMP_DECLSPEC mp_limb_t mpn_addmul_7 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 832 833 #define mpn_addmul_8 __MPN(addmul_8) 834 __GMP_DECLSPEC mp_limb_t mpn_addmul_8 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 835 836 /* Alternative entry point in mpn_addmul_2 for the benefit of mpn_sqr_basecase. */ 837 #define mpn_addmul_2s __MPN(addmul_2s) 838 __GMP_DECLSPEC mp_limb_t mpn_addmul_2s (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 839 840 /* mpn_addlsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+2*{b,n}, and 841 returns the carry out (0, 1 or 2). Use _ip1 when a=c. */ 842 #ifndef mpn_addlsh1_n /* if not done with cpuvec in a fat binary */ 843 #define mpn_addlsh1_n __MPN(addlsh1_n) 844 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 845 #endif 846 #define mpn_addlsh1_nc __MPN(addlsh1_nc) 847 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 848 #if HAVE_NATIVE_mpn_addlsh1_n && ! HAVE_NATIVE_mpn_addlsh1_n_ip1 849 #define mpn_addlsh1_n_ip1(dst,src,n) mpn_addlsh1_n(dst,dst,src,n) 850 #define HAVE_NATIVE_mpn_addlsh1_n_ip1 1 851 #else 852 #define mpn_addlsh1_n_ip1 __MPN(addlsh1_n_ip1) 853 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t); 854 #endif 855 #if HAVE_NATIVE_mpn_addlsh1_nc && ! HAVE_NATIVE_mpn_addlsh1_nc_ip1 856 #define mpn_addlsh1_nc_ip1(dst,src,n,c) mpn_addlsh1_nc(dst,dst,src,n,c) 857 #define HAVE_NATIVE_mpn_addlsh1_nc_ip1 1 858 #else 859 #define mpn_addlsh1_nc_ip1 __MPN(addlsh1_nc_ip1) 860 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 861 #endif 862 863 #ifndef mpn_addlsh2_n /* if not done with cpuvec in a fat binary */ 864 /* mpn_addlsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+4*{b,n}, and 865 returns the carry out (0, ..., 4). Use _ip1 when a=c. */ 866 #define mpn_addlsh2_n __MPN(addlsh2_n) 867 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 868 #endif 869 #define mpn_addlsh2_nc __MPN(addlsh2_nc) 870 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 871 #if HAVE_NATIVE_mpn_addlsh2_n && ! HAVE_NATIVE_mpn_addlsh2_n_ip1 872 #define mpn_addlsh2_n_ip1(dst,src,n) mpn_addlsh2_n(dst,dst,src,n) 873 #define HAVE_NATIVE_mpn_addlsh2_n_ip1 1 874 #else 875 #define mpn_addlsh2_n_ip1 __MPN(addlsh2_n_ip1) 876 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t); 877 #endif 878 #if HAVE_NATIVE_mpn_addlsh2_nc && ! HAVE_NATIVE_mpn_addlsh2_nc_ip1 879 #define mpn_addlsh2_nc_ip1(dst,src,n,c) mpn_addlsh2_nc(dst,dst,src,n,c) 880 #define HAVE_NATIVE_mpn_addlsh2_nc_ip1 1 881 #else 882 #define mpn_addlsh2_nc_ip1 __MPN(addlsh2_nc_ip1) 883 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 884 #endif 885 886 /* mpn_addlsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}+2^k*{b,n}, and 887 returns the carry out (0, ..., 2^k). Use _ip1 when a=c. */ 888 #define mpn_addlsh_n __MPN(addlsh_n) 889 __GMP_DECLSPEC mp_limb_t mpn_addlsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int); 890 #define mpn_addlsh_nc __MPN(addlsh_nc) 891 __GMP_DECLSPEC mp_limb_t mpn_addlsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t); 892 #if HAVE_NATIVE_mpn_addlsh_n && ! HAVE_NATIVE_mpn_addlsh_n_ip1 893 #define mpn_addlsh_n_ip1(dst,src,n,s) mpn_addlsh_n(dst,dst,src,n,s) 894 #define HAVE_NATIVE_mpn_addlsh_n_ip1 1 895 #else 896 #define mpn_addlsh_n_ip1 __MPN(addlsh_n_ip1) 897 __GMP_DECLSPEC mp_limb_t mpn_addlsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int); 898 #endif 899 #if HAVE_NATIVE_mpn_addlsh_nc && ! HAVE_NATIVE_mpn_addlsh_nc_ip1 900 #define mpn_addlsh_nc_ip1(dst,src,n,s,c) mpn_addlsh_nc(dst,dst,src,n,s,c) 901 #define HAVE_NATIVE_mpn_addlsh_nc_ip1 1 902 #else 903 #define mpn_addlsh_nc_ip1 __MPN(addlsh_nc_ip1) 904 __GMP_DECLSPEC mp_limb_t mpn_addlsh_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t); 905 #endif 906 907 #ifndef mpn_sublsh1_n /* if not done with cpuvec in a fat binary */ 908 /* mpn_sublsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-2*{b,n}, and 909 returns the borrow out (0, 1 or 2). Use _ip1 when a=c. */ 910 #define mpn_sublsh1_n __MPN(sublsh1_n) 911 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 912 #endif 913 #define mpn_sublsh1_nc __MPN(sublsh1_nc) 914 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 915 #if HAVE_NATIVE_mpn_sublsh1_n && ! HAVE_NATIVE_mpn_sublsh1_n_ip1 916 #define mpn_sublsh1_n_ip1(dst,src,n) mpn_sublsh1_n(dst,dst,src,n) 917 #define HAVE_NATIVE_mpn_sublsh1_n_ip1 1 918 #else 919 #define mpn_sublsh1_n_ip1 __MPN(sublsh1_n_ip1) 920 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t); 921 #endif 922 #if HAVE_NATIVE_mpn_sublsh1_nc && ! HAVE_NATIVE_mpn_sublsh1_nc_ip1 923 #define mpn_sublsh1_nc_ip1(dst,src,n,c) mpn_sublsh1_nc(dst,dst,src,n,c) 924 #define HAVE_NATIVE_mpn_sublsh1_nc_ip1 1 925 #else 926 #define mpn_sublsh1_nc_ip1 __MPN(sublsh1_nc_ip1) 927 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 928 #endif 929 930 /* mpn_rsblsh1_n(c,a,b,n), when it exists, sets {c,n} to 2*{b,n}-{a,n}, and 931 returns the carry out (-1, 0, 1). */ 932 #define mpn_rsblsh1_n __MPN(rsblsh1_n) 933 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 934 #define mpn_rsblsh1_nc __MPN(rsblsh1_nc) 935 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 936 937 /* mpn_sublsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-4*{b,n}, and 938 returns the borrow out (0, ..., 4). Use _ip1 when a=c. */ 939 #define mpn_sublsh2_n __MPN(sublsh2_n) 940 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 941 #define mpn_sublsh2_nc __MPN(sublsh2_nc) 942 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 943 #if HAVE_NATIVE_mpn_sublsh2_n && ! HAVE_NATIVE_mpn_sublsh2_n_ip1 944 #define mpn_sublsh2_n_ip1(dst,src,n) mpn_sublsh2_n(dst,dst,src,n) 945 #define HAVE_NATIVE_mpn_sublsh2_n_ip1 1 946 #else 947 #define mpn_sublsh2_n_ip1 __MPN(sublsh2_n_ip1) 948 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t); 949 #endif 950 #if HAVE_NATIVE_mpn_sublsh2_nc && ! HAVE_NATIVE_mpn_sublsh2_nc_ip1 951 #define mpn_sublsh2_nc_ip1(dst,src,n,c) mpn_sublsh2_nc(dst,dst,src,n,c) 952 #define HAVE_NATIVE_mpn_sublsh2_nc_ip1 1 953 #else 954 #define mpn_sublsh2_nc_ip1 __MPN(sublsh2_nc_ip1) 955 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 956 #endif 957 958 /* mpn_sublsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}-2^k*{b,n}, and 959 returns the carry out (0, ..., 2^k). Use _ip1 when a=c. */ 960 #define mpn_sublsh_n __MPN(sublsh_n) 961 __GMP_DECLSPEC mp_limb_t mpn_sublsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int); 962 #if HAVE_NATIVE_mpn_sublsh_n && ! HAVE_NATIVE_mpn_sublsh_n_ip1 963 #define mpn_sublsh_n_ip1(dst,src,n,s) mpn_sublsh_n(dst,dst,src,n,s) 964 #define HAVE_NATIVE_mpn_sublsh_n_ip1 1 965 #else 966 #define mpn_sublsh_n_ip1 __MPN(sublsh_n_ip1) 967 __GMP_DECLSPEC mp_limb_t mpn_sublsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int); 968 #endif 969 #if HAVE_NATIVE_mpn_sublsh_nc && ! HAVE_NATIVE_mpn_sublsh_nc_ip1 970 #define mpn_sublsh_nc_ip1(dst,src,n,s,c) mpn_sublsh_nc(dst,dst,src,n,s,c) 971 #define HAVE_NATIVE_mpn_sublsh_nc_ip1 1 972 #else 973 #define mpn_sublsh_nc_ip1 __MPN(sublsh_nc_ip1) 974 __GMP_DECLSPEC mp_limb_t mpn_sublsh_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t); 975 #endif 976 977 /* mpn_rsblsh2_n(c,a,b,n), when it exists, sets {c,n} to 4*{b,n}-{a,n}, and 978 returns the carry out (-1, ..., 3). */ 979 #define mpn_rsblsh2_n __MPN(rsblsh2_n) 980 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 981 #define mpn_rsblsh2_nc __MPN(rsblsh2_nc) 982 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 983 984 /* mpn_rsblsh_n(c,a,b,n,k), when it exists, sets {c,n} to 2^k*{b,n}-{a,n}, and 985 returns the carry out (-1, 0, ..., 2^k-1). */ 986 #define mpn_rsblsh_n __MPN(rsblsh_n) 987 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int); 988 #define mpn_rsblsh_nc __MPN(rsblsh_nc) 989 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t); 990 991 /* mpn_rsh1add_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} + {b,n}) >> 1, 992 and returns the bit rshifted out (0 or 1). */ 993 #define mpn_rsh1add_n __MPN(rsh1add_n) 994 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 995 #define mpn_rsh1add_nc __MPN(rsh1add_nc) 996 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 997 998 /* mpn_rsh1sub_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} - {b,n}) >> 1, 999 and returns the bit rshifted out (0 or 1). If there's a borrow from the 1000 subtract, it's stored as a 1 in the high bit of c[n-1], like a twos 1001 complement negative. */ 1002 #define mpn_rsh1sub_n __MPN(rsh1sub_n) 1003 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1004 #define mpn_rsh1sub_nc __MPN(rsh1sub_nc) 1005 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1006 1007 #ifndef mpn_lshiftc /* if not done with cpuvec in a fat binary */ 1008 #define mpn_lshiftc __MPN(lshiftc) 1009 __GMP_DECLSPEC mp_limb_t mpn_lshiftc (mp_ptr, mp_srcptr, mp_size_t, unsigned int); 1010 #endif 1011 1012 #define mpn_add_err1_n __MPN(add_err1_n) 1013 __GMP_DECLSPEC mp_limb_t mpn_add_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1014 1015 #define mpn_add_err2_n __MPN(add_err2_n) 1016 __GMP_DECLSPEC mp_limb_t mpn_add_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1017 1018 #define mpn_add_err3_n __MPN(add_err3_n) 1019 __GMP_DECLSPEC mp_limb_t mpn_add_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1020 1021 #define mpn_sub_err1_n __MPN(sub_err1_n) 1022 __GMP_DECLSPEC mp_limb_t mpn_sub_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1023 1024 #define mpn_sub_err2_n __MPN(sub_err2_n) 1025 __GMP_DECLSPEC mp_limb_t mpn_sub_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1026 1027 #define mpn_sub_err3_n __MPN(sub_err3_n) 1028 __GMP_DECLSPEC mp_limb_t mpn_sub_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1029 1030 #define mpn_add_n_sub_n __MPN(add_n_sub_n) 1031 __GMP_DECLSPEC mp_limb_t mpn_add_n_sub_n (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1032 1033 #define mpn_add_n_sub_nc __MPN(add_n_sub_nc) 1034 __GMP_DECLSPEC mp_limb_t mpn_add_n_sub_nc (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1035 1036 #define mpn_addaddmul_1msb0 __MPN(addaddmul_1msb0) 1037 __GMP_DECLSPEC mp_limb_t mpn_addaddmul_1msb0 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t); 1038 1039 #define mpn_divrem_1c __MPN(divrem_1c) 1040 __GMP_DECLSPEC mp_limb_t mpn_divrem_1c (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t); 1041 1042 #define mpn_dump __MPN(dump) 1043 __GMP_DECLSPEC void mpn_dump (mp_srcptr, mp_size_t); 1044 1045 #define mpn_fib2_ui __MPN(fib2_ui) 1046 __GMP_DECLSPEC mp_size_t mpn_fib2_ui (mp_ptr, mp_ptr, unsigned long); 1047 1048 /* Remap names of internal mpn functions. */ 1049 #define __clz_tab __MPN(clz_tab) 1050 #define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv) 1051 1052 #define mpn_jacobi_base __MPN(jacobi_base) 1053 __GMP_DECLSPEC int mpn_jacobi_base (mp_limb_t, mp_limb_t, int) ATTRIBUTE_CONST; 1054 1055 #define mpn_jacobi_2 __MPN(jacobi_2) 1056 __GMP_DECLSPEC int mpn_jacobi_2 (mp_srcptr, mp_srcptr, unsigned); 1057 1058 #define mpn_jacobi_n __MPN(jacobi_n) 1059 __GMP_DECLSPEC int mpn_jacobi_n (mp_ptr, mp_ptr, mp_size_t, unsigned); 1060 1061 #define mpn_mod_1c __MPN(mod_1c) 1062 __GMP_DECLSPEC mp_limb_t mpn_mod_1c (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE; 1063 1064 #define mpn_mul_1c __MPN(mul_1c) 1065 __GMP_DECLSPEC mp_limb_t mpn_mul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t); 1066 1067 #define mpn_mul_2 __MPN(mul_2) 1068 __GMP_DECLSPEC mp_limb_t mpn_mul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 1069 1070 #define mpn_mul_3 __MPN(mul_3) 1071 __GMP_DECLSPEC mp_limb_t mpn_mul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 1072 1073 #define mpn_mul_4 __MPN(mul_4) 1074 __GMP_DECLSPEC mp_limb_t mpn_mul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 1075 1076 #define mpn_mul_5 __MPN(mul_5) 1077 __GMP_DECLSPEC mp_limb_t mpn_mul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 1078 1079 #define mpn_mul_6 __MPN(mul_6) 1080 __GMP_DECLSPEC mp_limb_t mpn_mul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 1081 1082 #ifndef mpn_mul_basecase /* if not done with cpuvec in a fat binary */ 1083 #define mpn_mul_basecase __MPN(mul_basecase) 1084 __GMP_DECLSPEC void mpn_mul_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); 1085 #endif 1086 1087 #define mpn_mullo_n __MPN(mullo_n) 1088 __GMP_DECLSPEC void mpn_mullo_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1089 1090 #ifndef mpn_mullo_basecase /* if not done with cpuvec in a fat binary */ 1091 #define mpn_mullo_basecase __MPN(mullo_basecase) 1092 __GMP_DECLSPEC void mpn_mullo_basecase (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1093 #endif 1094 1095 #define mpn_sqr __MPN(sqr) 1096 __GMP_DECLSPEC void mpn_sqr (mp_ptr, mp_srcptr, mp_size_t); 1097 1098 #ifndef mpn_sqr_basecase /* if not done with cpuvec in a fat binary */ 1099 #define mpn_sqr_basecase __MPN(sqr_basecase) 1100 __GMP_DECLSPEC void mpn_sqr_basecase (mp_ptr, mp_srcptr, mp_size_t); 1101 #endif 1102 1103 #define mpn_mulmid_basecase __MPN(mulmid_basecase) 1104 __GMP_DECLSPEC void mpn_mulmid_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); 1105 1106 #define mpn_mulmid_n __MPN(mulmid_n) 1107 __GMP_DECLSPEC void mpn_mulmid_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1108 1109 #define mpn_mulmid __MPN(mulmid) 1110 __GMP_DECLSPEC void mpn_mulmid (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); 1111 1112 #define mpn_submul_1c __MPN(submul_1c) 1113 __GMP_DECLSPEC mp_limb_t mpn_submul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t); 1114 1115 #ifndef mpn_redc_1 /* if not done with cpuvec in a fat binary */ 1116 #define mpn_redc_1 __MPN(redc_1) 1117 __GMP_DECLSPEC mp_limb_t mpn_redc_1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1118 #endif 1119 1120 #ifndef mpn_redc_2 /* if not done with cpuvec in a fat binary */ 1121 #define mpn_redc_2 __MPN(redc_2) 1122 __GMP_DECLSPEC mp_limb_t mpn_redc_2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 1123 #endif 1124 1125 #define mpn_redc_n __MPN(redc_n) 1126 __GMP_DECLSPEC void mpn_redc_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 1127 1128 1129 #ifndef mpn_mod_1_1p_cps /* if not done with cpuvec in a fat binary */ 1130 #define mpn_mod_1_1p_cps __MPN(mod_1_1p_cps) 1131 __GMP_DECLSPEC void mpn_mod_1_1p_cps (mp_limb_t [4], mp_limb_t); 1132 #endif 1133 #ifndef mpn_mod_1_1p /* if not done with cpuvec in a fat binary */ 1134 #define mpn_mod_1_1p __MPN(mod_1_1p) 1135 __GMP_DECLSPEC mp_limb_t mpn_mod_1_1p (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]) __GMP_ATTRIBUTE_PURE; 1136 #endif 1137 1138 #ifndef mpn_mod_1s_2p_cps /* if not done with cpuvec in a fat binary */ 1139 #define mpn_mod_1s_2p_cps __MPN(mod_1s_2p_cps) 1140 __GMP_DECLSPEC void mpn_mod_1s_2p_cps (mp_limb_t [5], mp_limb_t); 1141 #endif 1142 #ifndef mpn_mod_1s_2p /* if not done with cpuvec in a fat binary */ 1143 #define mpn_mod_1s_2p __MPN(mod_1s_2p) 1144 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_2p (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [5]) __GMP_ATTRIBUTE_PURE; 1145 #endif 1146 1147 #ifndef mpn_mod_1s_3p_cps /* if not done with cpuvec in a fat binary */ 1148 #define mpn_mod_1s_3p_cps __MPN(mod_1s_3p_cps) 1149 __GMP_DECLSPEC void mpn_mod_1s_3p_cps (mp_limb_t [6], mp_limb_t); 1150 #endif 1151 #ifndef mpn_mod_1s_3p /* if not done with cpuvec in a fat binary */ 1152 #define mpn_mod_1s_3p __MPN(mod_1s_3p) 1153 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_3p (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [6]) __GMP_ATTRIBUTE_PURE; 1154 #endif 1155 1156 #ifndef mpn_mod_1s_4p_cps /* if not done with cpuvec in a fat binary */ 1157 #define mpn_mod_1s_4p_cps __MPN(mod_1s_4p_cps) 1158 __GMP_DECLSPEC void mpn_mod_1s_4p_cps (mp_limb_t [7], mp_limb_t); 1159 #endif 1160 #ifndef mpn_mod_1s_4p /* if not done with cpuvec in a fat binary */ 1161 #define mpn_mod_1s_4p __MPN(mod_1s_4p) 1162 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_4p (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [7]) __GMP_ATTRIBUTE_PURE; 1163 #endif 1164 1165 #define mpn_bc_mulmod_bnm1 __MPN(bc_mulmod_bnm1) 1166 __GMP_DECLSPEC void mpn_bc_mulmod_bnm1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr); 1167 #define mpn_mulmod_bnm1 __MPN(mulmod_bnm1) 1168 __GMP_DECLSPEC void mpn_mulmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1169 #define mpn_mulmod_bnm1_next_size __MPN(mulmod_bnm1_next_size) 1170 __GMP_DECLSPEC mp_size_t mpn_mulmod_bnm1_next_size (mp_size_t) ATTRIBUTE_CONST; 1171 static inline mp_size_t 1172 mpn_mulmod_bnm1_itch (mp_size_t rn, mp_size_t an, mp_size_t bn) { 1173 mp_size_t n, itch; 1174 n = rn >> 1; 1175 itch = rn + 4 + 1176 (an > n ? (bn > n ? rn : n) : 0); 1177 return itch; 1178 } 1179 1180 #define mpn_sqrmod_bnm1 __MPN(sqrmod_bnm1) 1181 __GMP_DECLSPEC void mpn_sqrmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1182 #define mpn_sqrmod_bnm1_next_size __MPN(sqrmod_bnm1_next_size) 1183 __GMP_DECLSPEC mp_size_t mpn_sqrmod_bnm1_next_size (mp_size_t) ATTRIBUTE_CONST; 1184 static inline mp_size_t 1185 mpn_sqrmod_bnm1_itch (mp_size_t rn, mp_size_t an) { 1186 mp_size_t n, itch; 1187 n = rn >> 1; 1188 itch = rn + 3 + 1189 (an > n ? an : 0); 1190 return itch; 1191 } 1192 1193 typedef __gmp_randstate_struct *gmp_randstate_ptr; 1194 typedef const __gmp_randstate_struct *gmp_randstate_srcptr; 1195 1196 /* Pseudo-random number generator function pointers structure. */ 1197 typedef struct { 1198 void (*randseed_fn) (gmp_randstate_t, mpz_srcptr); 1199 void (*randget_fn) (gmp_randstate_t, mp_ptr, unsigned long int); 1200 void (*randclear_fn) (gmp_randstate_t); 1201 void (*randiset_fn) (gmp_randstate_ptr, gmp_randstate_srcptr); 1202 } gmp_randfnptr_t; 1203 1204 /* Macro to obtain a void pointer to the function pointers structure. */ 1205 #define RNG_FNPTR(rstate) ((rstate)->_mp_algdata._mp_lc) 1206 1207 /* Macro to obtain a pointer to the generator's state. 1208 When used as a lvalue the rvalue needs to be cast to mp_ptr. */ 1209 #define RNG_STATE(rstate) ((rstate)->_mp_seed->_mp_d) 1210 1211 /* Write a given number of random bits to rp. */ 1212 #define _gmp_rand(rp, state, bits) \ 1213 do { \ 1214 gmp_randstate_ptr __rstate = (state); \ 1215 (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn) \ 1216 (__rstate, rp, bits); \ 1217 } while (0) 1218 1219 __GMP_DECLSPEC void __gmp_randinit_mt_noseed (gmp_randstate_t); 1220 1221 1222 /* __gmp_rands is the global state for the old-style random functions, and 1223 is also used in the test programs (hence the __GMP_DECLSPEC). 1224 1225 There's no seeding here, so mpz_random etc will generate the same 1226 sequence every time. This is not unlike the C library random functions 1227 if you don't seed them, so perhaps it's acceptable. Digging up a seed 1228 from /dev/random or the like would work on many systems, but might 1229 encourage a false confidence, since it'd be pretty much impossible to do 1230 something that would work reliably everywhere. In any case the new style 1231 functions are recommended to applications which care about randomness, so 1232 the old functions aren't too important. */ 1233 1234 __GMP_DECLSPEC extern char __gmp_rands_initialized; 1235 __GMP_DECLSPEC extern gmp_randstate_t __gmp_rands; 1236 1237 #define RANDS \ 1238 ((__gmp_rands_initialized ? 0 \ 1239 : (__gmp_rands_initialized = 1, \ 1240 __gmp_randinit_mt_noseed (__gmp_rands), 0)), \ 1241 __gmp_rands) 1242 1243 /* this is used by the test programs, to free memory */ 1244 #define RANDS_CLEAR() \ 1245 do { \ 1246 if (__gmp_rands_initialized) \ 1247 { \ 1248 __gmp_rands_initialized = 0; \ 1249 gmp_randclear (__gmp_rands); \ 1250 } \ 1251 } while (0) 1252 1253 1254 /* For a threshold between algorithms A and B, size>=thresh is where B 1255 should be used. Special value MP_SIZE_T_MAX means only ever use A, or 1256 value 0 means only ever use B. The tests for these special values will 1257 be compile-time constants, so the compiler should be able to eliminate 1258 the code for the unwanted algorithm. */ 1259 1260 #if ! defined (__GNUC__) || __GNUC__ < 2 1261 #define ABOVE_THRESHOLD(size,thresh) \ 1262 ((thresh) == 0 \ 1263 || ((thresh) != MP_SIZE_T_MAX \ 1264 && (size) >= (thresh))) 1265 #else 1266 #define ABOVE_THRESHOLD(size,thresh) \ 1267 ((__builtin_constant_p (thresh) && (thresh) == 0) \ 1268 || (!(__builtin_constant_p (thresh) && (thresh) == MP_SIZE_T_MAX) \ 1269 && (size) >= (thresh))) 1270 #endif 1271 #define BELOW_THRESHOLD(size,thresh) (! ABOVE_THRESHOLD (size, thresh)) 1272 1273 #define MPN_TOOM22_MUL_MINSIZE 4 1274 #define MPN_TOOM2_SQR_MINSIZE 4 1275 1276 #define MPN_TOOM33_MUL_MINSIZE 17 1277 #define MPN_TOOM3_SQR_MINSIZE 17 1278 1279 #define MPN_TOOM44_MUL_MINSIZE 30 1280 #define MPN_TOOM4_SQR_MINSIZE 30 1281 1282 #define MPN_TOOM6H_MUL_MINSIZE 46 1283 #define MPN_TOOM6_SQR_MINSIZE 46 1284 1285 #define MPN_TOOM8H_MUL_MINSIZE 86 1286 #define MPN_TOOM8_SQR_MINSIZE 86 1287 1288 #define MPN_TOOM32_MUL_MINSIZE 10 1289 #define MPN_TOOM42_MUL_MINSIZE 10 1290 #define MPN_TOOM43_MUL_MINSIZE 25 1291 #define MPN_TOOM53_MUL_MINSIZE 17 1292 #define MPN_TOOM54_MUL_MINSIZE 31 1293 #define MPN_TOOM63_MUL_MINSIZE 49 1294 1295 #define MPN_TOOM42_MULMID_MINSIZE 4 1296 1297 #define mpn_sqr_diagonal __MPN(sqr_diagonal) 1298 __GMP_DECLSPEC void mpn_sqr_diagonal (mp_ptr, mp_srcptr, mp_size_t); 1299 1300 #define mpn_sqr_diag_addlsh1 __MPN(sqr_diag_addlsh1) 1301 __GMP_DECLSPEC void mpn_sqr_diag_addlsh1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1302 1303 #define mpn_toom_interpolate_5pts __MPN(toom_interpolate_5pts) 1304 __GMP_DECLSPEC void mpn_toom_interpolate_5pts (mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_limb_t); 1305 1306 enum toom6_flags {toom6_all_pos = 0, toom6_vm1_neg = 1, toom6_vm2_neg = 2}; 1307 #define mpn_toom_interpolate_6pts __MPN(toom_interpolate_6pts) 1308 __GMP_DECLSPEC void mpn_toom_interpolate_6pts (mp_ptr, mp_size_t, enum toom6_flags, mp_ptr, mp_ptr, mp_ptr, mp_size_t); 1309 1310 enum toom7_flags { toom7_w1_neg = 1, toom7_w3_neg = 2 }; 1311 #define mpn_toom_interpolate_7pts __MPN(toom_interpolate_7pts) 1312 __GMP_DECLSPEC void mpn_toom_interpolate_7pts (mp_ptr, mp_size_t, enum toom7_flags, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr); 1313 1314 #define mpn_toom_interpolate_8pts __MPN(toom_interpolate_8pts) 1315 __GMP_DECLSPEC void mpn_toom_interpolate_8pts (mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr); 1316 1317 #define mpn_toom_interpolate_12pts __MPN(toom_interpolate_12pts) 1318 __GMP_DECLSPEC void mpn_toom_interpolate_12pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr); 1319 1320 #define mpn_toom_interpolate_16pts __MPN(toom_interpolate_16pts) 1321 __GMP_DECLSPEC void mpn_toom_interpolate_16pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr); 1322 1323 #define mpn_toom_couple_handling __MPN(toom_couple_handling) 1324 __GMP_DECLSPEC void mpn_toom_couple_handling (mp_ptr, mp_size_t, mp_ptr, int, mp_size_t, int, int); 1325 1326 #define mpn_toom_eval_dgr3_pm1 __MPN(toom_eval_dgr3_pm1) 1327 __GMP_DECLSPEC int mpn_toom_eval_dgr3_pm1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); 1328 1329 #define mpn_toom_eval_dgr3_pm2 __MPN(toom_eval_dgr3_pm2) 1330 __GMP_DECLSPEC int mpn_toom_eval_dgr3_pm2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); 1331 1332 #define mpn_toom_eval_pm1 __MPN(toom_eval_pm1) 1333 __GMP_DECLSPEC int mpn_toom_eval_pm1 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); 1334 1335 #define mpn_toom_eval_pm2 __MPN(toom_eval_pm2) 1336 __GMP_DECLSPEC int mpn_toom_eval_pm2 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); 1337 1338 #define mpn_toom_eval_pm2exp __MPN(toom_eval_pm2exp) 1339 __GMP_DECLSPEC int mpn_toom_eval_pm2exp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr); 1340 1341 #define mpn_toom_eval_pm2rexp __MPN(toom_eval_pm2rexp) 1342 __GMP_DECLSPEC int mpn_toom_eval_pm2rexp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr); 1343 1344 #define mpn_toom22_mul __MPN(toom22_mul) 1345 __GMP_DECLSPEC void mpn_toom22_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1346 1347 #define mpn_toom32_mul __MPN(toom32_mul) 1348 __GMP_DECLSPEC void mpn_toom32_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1349 1350 #define mpn_toom42_mul __MPN(toom42_mul) 1351 __GMP_DECLSPEC void mpn_toom42_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1352 1353 #define mpn_toom52_mul __MPN(toom52_mul) 1354 __GMP_DECLSPEC void mpn_toom52_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1355 1356 #define mpn_toom62_mul __MPN(toom62_mul) 1357 __GMP_DECLSPEC void mpn_toom62_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1358 1359 #define mpn_toom2_sqr __MPN(toom2_sqr) 1360 __GMP_DECLSPEC void mpn_toom2_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1361 1362 #define mpn_toom33_mul __MPN(toom33_mul) 1363 __GMP_DECLSPEC void mpn_toom33_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1364 1365 #define mpn_toom43_mul __MPN(toom43_mul) 1366 __GMP_DECLSPEC void mpn_toom43_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1367 1368 #define mpn_toom53_mul __MPN(toom53_mul) 1369 __GMP_DECLSPEC void mpn_toom53_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1370 1371 #define mpn_toom54_mul __MPN(toom54_mul) 1372 __GMP_DECLSPEC void mpn_toom54_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1373 1374 #define mpn_toom63_mul __MPN(toom63_mul) 1375 __GMP_DECLSPEC void mpn_toom63_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1376 1377 #define mpn_toom3_sqr __MPN(toom3_sqr) 1378 __GMP_DECLSPEC void mpn_toom3_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1379 1380 #define mpn_toom44_mul __MPN(toom44_mul) 1381 __GMP_DECLSPEC void mpn_toom44_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1382 1383 #define mpn_toom4_sqr __MPN(toom4_sqr) 1384 __GMP_DECLSPEC void mpn_toom4_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1385 1386 #define mpn_toom6h_mul __MPN(toom6h_mul) 1387 __GMP_DECLSPEC void mpn_toom6h_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1388 1389 #define mpn_toom6_sqr __MPN(toom6_sqr) 1390 __GMP_DECLSPEC void mpn_toom6_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1391 1392 #define mpn_toom8h_mul __MPN(toom8h_mul) 1393 __GMP_DECLSPEC void mpn_toom8h_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1394 1395 #define mpn_toom8_sqr __MPN(toom8_sqr) 1396 __GMP_DECLSPEC void mpn_toom8_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1397 1398 #define mpn_toom42_mulmid __MPN(toom42_mulmid) 1399 __GMP_DECLSPEC void mpn_toom42_mulmid (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr); 1400 1401 #define mpn_fft_best_k __MPN(fft_best_k) 1402 __GMP_DECLSPEC int mpn_fft_best_k (mp_size_t, int) ATTRIBUTE_CONST; 1403 1404 #define mpn_mul_fft __MPN(mul_fft) 1405 __GMP_DECLSPEC mp_limb_t mpn_mul_fft (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int); 1406 1407 #define mpn_mul_fft_full __MPN(mul_fft_full) 1408 __GMP_DECLSPEC void mpn_mul_fft_full (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); 1409 1410 #define mpn_nussbaumer_mul __MPN(nussbaumer_mul) 1411 __GMP_DECLSPEC void mpn_nussbaumer_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); 1412 1413 #define mpn_fft_next_size __MPN(fft_next_size) 1414 __GMP_DECLSPEC mp_size_t mpn_fft_next_size (mp_size_t, int) ATTRIBUTE_CONST; 1415 1416 #define mpn_div_qr_2n_pi1 __MPN(div_qr_2n_pi1) 1417 __GMP_DECLSPEC mp_limb_t mpn_div_qr_2n_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t); 1418 1419 #define mpn_div_qr_2u_pi1 __MPN(div_qr_2u_pi1) 1420 __GMP_DECLSPEC mp_limb_t mpn_div_qr_2u_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int, mp_limb_t); 1421 1422 #define mpn_sbpi1_div_qr __MPN(sbpi1_div_qr) 1423 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); 1424 1425 #define mpn_sbpi1_div_q __MPN(sbpi1_div_q) 1426 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); 1427 1428 #define mpn_sbpi1_divappr_q __MPN(sbpi1_divappr_q) 1429 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_divappr_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); 1430 1431 #define mpn_dcpi1_div_qr __MPN(dcpi1_div_qr) 1432 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *); 1433 #define mpn_dcpi1_div_qr_n __MPN(dcpi1_div_qr_n) 1434 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr); 1435 1436 #define mpn_dcpi1_div_q __MPN(dcpi1_div_q) 1437 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *); 1438 1439 #define mpn_dcpi1_divappr_q __MPN(dcpi1_divappr_q) 1440 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *); 1441 #define mpn_dcpi1_divappr_q_n __MPN(dcpi1_divappr_q_n) 1442 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr); 1443 1444 #define mpn_mu_div_qr __MPN(mu_div_qr) 1445 __GMP_DECLSPEC mp_limb_t mpn_mu_div_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1446 #define mpn_mu_div_qr_itch __MPN(mu_div_qr_itch) 1447 __GMP_DECLSPEC mp_size_t mpn_mu_div_qr_itch (mp_size_t, mp_size_t, int); 1448 #define mpn_mu_div_qr_choose_in __MPN(mu_div_qr_choose_in) 1449 __GMP_DECLSPEC mp_size_t mpn_mu_div_qr_choose_in (mp_size_t, mp_size_t, int); 1450 1451 #define mpn_preinv_mu_div_qr __MPN(preinv_mu_div_qr) 1452 __GMP_DECLSPEC mp_limb_t mpn_preinv_mu_div_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1453 #define mpn_preinv_mu_div_qr_itch __MPN(preinv_mu_div_qr_itch) 1454 __GMP_DECLSPEC mp_size_t mpn_preinv_mu_div_qr_itch (mp_size_t, mp_size_t, mp_size_t); 1455 1456 #define mpn_mu_divappr_q __MPN(mu_divappr_q) 1457 __GMP_DECLSPEC mp_limb_t mpn_mu_divappr_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1458 #define mpn_mu_divappr_q_itch __MPN(mu_divappr_q_itch) 1459 __GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_itch (mp_size_t, mp_size_t, int); 1460 #define mpn_mu_divappr_q_choose_in __MPN(mu_divappr_q_choose_in) 1461 __GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_choose_in (mp_size_t, mp_size_t, int); 1462 1463 #define mpn_preinv_mu_divappr_q __MPN(preinv_mu_divappr_q) 1464 __GMP_DECLSPEC mp_limb_t mpn_preinv_mu_divappr_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1465 1466 #define mpn_mu_div_q __MPN(mu_div_q) 1467 __GMP_DECLSPEC mp_limb_t mpn_mu_div_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1468 #define mpn_mu_div_q_itch __MPN(mu_div_q_itch) 1469 __GMP_DECLSPEC mp_size_t mpn_mu_div_q_itch (mp_size_t, mp_size_t, int); 1470 1471 #define mpn_div_q __MPN(div_q) 1472 __GMP_DECLSPEC void mpn_div_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1473 1474 #define mpn_invert __MPN(invert) 1475 __GMP_DECLSPEC void mpn_invert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1476 #define mpn_invert_itch(n) mpn_invertappr_itch(n) 1477 1478 #define mpn_ni_invertappr __MPN(ni_invertappr) 1479 __GMP_DECLSPEC mp_limb_t mpn_ni_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1480 #define mpn_invertappr __MPN(invertappr) 1481 __GMP_DECLSPEC mp_limb_t mpn_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1482 #define mpn_invertappr_itch(n) (3 * (n) + 2) 1483 1484 #define mpn_binvert __MPN(binvert) 1485 __GMP_DECLSPEC void mpn_binvert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1486 #define mpn_binvert_itch __MPN(binvert_itch) 1487 __GMP_DECLSPEC mp_size_t mpn_binvert_itch (mp_size_t); 1488 1489 #define mpn_bdiv_q_1 __MPN(bdiv_q_1) 1490 __GMP_DECLSPEC mp_limb_t mpn_bdiv_q_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1491 1492 #define mpn_pi1_bdiv_q_1 __MPN(pi1_bdiv_q_1) 1493 __GMP_DECLSPEC mp_limb_t mpn_pi1_bdiv_q_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int); 1494 1495 #define mpn_sbpi1_bdiv_qr __MPN(sbpi1_bdiv_qr) 1496 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); 1497 1498 #define mpn_sbpi1_bdiv_q __MPN(sbpi1_bdiv_q) 1499 __GMP_DECLSPEC void mpn_sbpi1_bdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); 1500 1501 #define mpn_dcpi1_bdiv_qr __MPN(dcpi1_bdiv_qr) 1502 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); 1503 #define mpn_dcpi1_bdiv_qr_n_itch __MPN(dcpi1_bdiv_qr_n_itch) 1504 __GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_qr_n_itch (mp_size_t); 1505 1506 #define mpn_dcpi1_bdiv_qr_n __MPN(dcpi1_bdiv_qr_n) 1507 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr); 1508 #define mpn_dcpi1_bdiv_q __MPN(dcpi1_bdiv_q) 1509 __GMP_DECLSPEC void mpn_dcpi1_bdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); 1510 1511 #define mpn_dcpi1_bdiv_q_n_itch __MPN(dcpi1_bdiv_q_n_itch) 1512 __GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_q_n_itch (mp_size_t); 1513 #define mpn_dcpi1_bdiv_q_n __MPN(dcpi1_bdiv_q_n) 1514 __GMP_DECLSPEC void mpn_dcpi1_bdiv_q_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr); 1515 1516 #define mpn_mu_bdiv_qr __MPN(mu_bdiv_qr) 1517 __GMP_DECLSPEC mp_limb_t mpn_mu_bdiv_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1518 #define mpn_mu_bdiv_qr_itch __MPN(mu_bdiv_qr_itch) 1519 __GMP_DECLSPEC mp_size_t mpn_mu_bdiv_qr_itch (mp_size_t, mp_size_t); 1520 1521 #define mpn_mu_bdiv_q __MPN(mu_bdiv_q) 1522 __GMP_DECLSPEC void mpn_mu_bdiv_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1523 #define mpn_mu_bdiv_q_itch __MPN(mu_bdiv_q_itch) 1524 __GMP_DECLSPEC mp_size_t mpn_mu_bdiv_q_itch (mp_size_t, mp_size_t); 1525 1526 #define mpn_bdiv_qr __MPN(bdiv_qr) 1527 __GMP_DECLSPEC mp_limb_t mpn_bdiv_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1528 #define mpn_bdiv_qr_itch __MPN(bdiv_qr_itch) 1529 __GMP_DECLSPEC mp_size_t mpn_bdiv_qr_itch (mp_size_t, mp_size_t); 1530 1531 #define mpn_bdiv_q __MPN(bdiv_q) 1532 __GMP_DECLSPEC void mpn_bdiv_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1533 #define mpn_bdiv_q_itch __MPN(bdiv_q_itch) 1534 __GMP_DECLSPEC mp_size_t mpn_bdiv_q_itch (mp_size_t, mp_size_t); 1535 1536 #define mpn_divexact __MPN(divexact) 1537 __GMP_DECLSPEC void mpn_divexact (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); 1538 #define mpn_divexact_itch __MPN(divexact_itch) 1539 __GMP_DECLSPEC mp_size_t mpn_divexact_itch (mp_size_t, mp_size_t); 1540 1541 #ifndef mpn_bdiv_dbm1c /* if not done with cpuvec in a fat binary */ 1542 #define mpn_bdiv_dbm1c __MPN(bdiv_dbm1c) 1543 __GMP_DECLSPEC mp_limb_t mpn_bdiv_dbm1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t); 1544 #endif 1545 1546 #define mpn_bdiv_dbm1(dst, src, size, divisor) \ 1547 mpn_bdiv_dbm1c (dst, src, size, divisor, __GMP_CAST (mp_limb_t, 0)) 1548 1549 #define mpn_powm __MPN(powm) 1550 __GMP_DECLSPEC void mpn_powm (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1551 #define mpn_powlo __MPN(powlo) 1552 __GMP_DECLSPEC void mpn_powlo (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); 1553 #define mpn_powm_sec __MPN(powm_sec) 1554 __GMP_DECLSPEC void mpn_powm_sec (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1555 #define mpn_powm_sec_itch __MPN(powm_sec_itch) 1556 __GMP_DECLSPEC mp_size_t mpn_powm_sec_itch (mp_size_t, mp_size_t, mp_size_t); 1557 #define mpn_tabselect __MPN(tabselect) 1558 __GMP_DECLSPEC void mpn_tabselect (volatile mp_limb_t *, volatile mp_limb_t *, mp_size_t, mp_size_t, mp_size_t); 1559 #define mpn_addcnd_n __MPN(addcnd_n) 1560 __GMP_DECLSPEC mp_limb_t mpn_addcnd_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1561 #define mpn_subcnd_n __MPN(subcnd_n) 1562 __GMP_DECLSPEC mp_limb_t mpn_subcnd_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1563 1564 #define mpn_sb_div_qr_sec __MPN(sb_div_qr_sec) 1565 __GMP_DECLSPEC void mpn_sb_div_qr_sec (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1566 #define mpn_sbpi1_div_qr_sec __MPN(sbpi1_div_qr_sec) 1567 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr_sec (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr); 1568 #define mpn_sb_div_r_sec __MPN(sb_div_r_sec) 1569 __GMP_DECLSPEC void mpn_sb_div_r_sec (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1570 #define mpn_sbpi1_div_r_sec __MPN(sbpi1_div_r_sec) 1571 __GMP_DECLSPEC void mpn_sbpi1_div_r_sec (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr); 1572 1573 1574 #ifndef DIVEXACT_BY3_METHOD 1575 #if GMP_NUMB_BITS % 2 == 0 && ! defined (HAVE_NATIVE_mpn_divexact_by3c) 1576 #define DIVEXACT_BY3_METHOD 0 /* default to using mpn_bdiv_dbm1c */ 1577 #else 1578 #define DIVEXACT_BY3_METHOD 1 1579 #endif 1580 #endif 1581 1582 #if DIVEXACT_BY3_METHOD == 0 1583 #undef mpn_divexact_by3 1584 #define mpn_divexact_by3(dst,src,size) \ 1585 (3 & mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3))) 1586 /* override mpn_divexact_by3c defined in gmp.h */ 1587 /* 1588 #undef mpn_divexact_by3c 1589 #define mpn_divexact_by3c(dst,src,size,cy) \ 1590 (3 & mpn_bdiv_dbm1c (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3, GMP_NUMB_MASK / 3 * cy))) 1591 */ 1592 #endif 1593 1594 #if GMP_NUMB_BITS % 4 == 0 1595 #define mpn_divexact_by5(dst,src,size) \ 1596 (7 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 5))) 1597 #endif 1598 1599 #if GMP_NUMB_BITS % 3 == 0 1600 #define mpn_divexact_by7(dst,src,size) \ 1601 (7 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 7))) 1602 #endif 1603 1604 #if GMP_NUMB_BITS % 6 == 0 1605 #define mpn_divexact_by9(dst,src,size) \ 1606 (15 & 7 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 9))) 1607 #endif 1608 1609 #if GMP_NUMB_BITS % 10 == 0 1610 #define mpn_divexact_by11(dst,src,size) \ 1611 (15 & 5 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 11))) 1612 #endif 1613 1614 #if GMP_NUMB_BITS % 12 == 0 1615 #define mpn_divexact_by13(dst,src,size) \ 1616 (15 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 13))) 1617 #endif 1618 1619 #if GMP_NUMB_BITS % 4 == 0 1620 #define mpn_divexact_by15(dst,src,size) \ 1621 (15 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 15))) 1622 #endif 1623 1624 #define mpz_divexact_gcd __gmpz_divexact_gcd 1625 __GMP_DECLSPEC void mpz_divexact_gcd (mpz_ptr, mpz_srcptr, mpz_srcptr); 1626 1627 #define mpz_prodlimbs __gmpz_prodlimbs 1628 __GMP_DECLSPEC mp_size_t mpz_prodlimbs (mpz_ptr, mp_ptr, mp_size_t); 1629 1630 #define mpz_oddfac_1 __gmpz_oddfac_1 1631 __GMP_DECLSPEC void mpz_oddfac_1 (mpz_ptr, mp_limb_t, unsigned); 1632 1633 #define mpz_inp_str_nowhite __gmpz_inp_str_nowhite 1634 #ifdef _GMP_H_HAVE_FILE 1635 __GMP_DECLSPEC size_t mpz_inp_str_nowhite (mpz_ptr, FILE *, int, int, size_t); 1636 #endif 1637 1638 #define mpn_divisible_p __MPN(divisible_p) 1639 __GMP_DECLSPEC int mpn_divisible_p (mp_srcptr, mp_size_t, mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE; 1640 1641 #define mpn_rootrem __MPN(rootrem) 1642 __GMP_DECLSPEC mp_size_t mpn_rootrem (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1643 1644 #define mpn_broot __MPN(broot) 1645 __GMP_DECLSPEC void mpn_broot (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1646 1647 #define mpn_broot_invm1 __MPN(broot_invm1) 1648 __GMP_DECLSPEC void mpn_broot_invm1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1649 1650 #define mpn_brootinv __MPN(brootinv) 1651 __GMP_DECLSPEC void mpn_brootinv (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr); 1652 1653 #define mpn_bsqrt __MPN(bsqrt) 1654 __GMP_DECLSPEC void mpn_bsqrt (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr); 1655 1656 #define mpn_bsqrtinv __MPN(bsqrtinv) 1657 __GMP_DECLSPEC int mpn_bsqrtinv (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr); 1658 1659 #if defined (_CRAY) 1660 #define MPN_COPY_INCR(dst, src, n) \ 1661 do { \ 1662 int __i; /* Faster on some Crays with plain int */ \ 1663 _Pragma ("_CRI ivdep"); \ 1664 for (__i = 0; __i < (n); __i++) \ 1665 (dst)[__i] = (src)[__i]; \ 1666 } while (0) 1667 #endif 1668 1669 /* used by test programs, hence __GMP_DECLSPEC */ 1670 #ifndef mpn_copyi /* if not done with cpuvec in a fat binary */ 1671 #define mpn_copyi __MPN(copyi) 1672 __GMP_DECLSPEC void mpn_copyi (mp_ptr, mp_srcptr, mp_size_t); 1673 #endif 1674 1675 #if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi 1676 #define MPN_COPY_INCR(dst, src, size) \ 1677 do { \ 1678 ASSERT ((size) >= 0); \ 1679 ASSERT (MPN_SAME_OR_INCR_P (dst, src, size)); \ 1680 mpn_copyi (dst, src, size); \ 1681 } while (0) 1682 #endif 1683 1684 /* Copy N limbs from SRC to DST incrementing, N==0 allowed. */ 1685 #if ! defined (MPN_COPY_INCR) 1686 #define MPN_COPY_INCR(dst, src, n) \ 1687 do { \ 1688 ASSERT ((n) >= 0); \ 1689 ASSERT (MPN_SAME_OR_INCR_P (dst, src, n)); \ 1690 if ((n) != 0) \ 1691 { \ 1692 mp_size_t __n = (n) - 1; \ 1693 mp_ptr __dst = (dst); \ 1694 mp_srcptr __src = (src); \ 1695 mp_limb_t __x; \ 1696 __x = *__src++; \ 1697 if (__n != 0) \ 1698 { \ 1699 do \ 1700 { \ 1701 *__dst++ = __x; \ 1702 __x = *__src++; \ 1703 } \ 1704 while (--__n); \ 1705 } \ 1706 *__dst++ = __x; \ 1707 } \ 1708 } while (0) 1709 #endif 1710 1711 1712 #if defined (_CRAY) 1713 #define MPN_COPY_DECR(dst, src, n) \ 1714 do { \ 1715 int __i; /* Faster on some Crays with plain int */ \ 1716 _Pragma ("_CRI ivdep"); \ 1717 for (__i = (n) - 1; __i >= 0; __i--) \ 1718 (dst)[__i] = (src)[__i]; \ 1719 } while (0) 1720 #endif 1721 1722 /* used by test programs, hence __GMP_DECLSPEC */ 1723 #ifndef mpn_copyd /* if not done with cpuvec in a fat binary */ 1724 #define mpn_copyd __MPN(copyd) 1725 __GMP_DECLSPEC void mpn_copyd (mp_ptr, mp_srcptr, mp_size_t); 1726 #endif 1727 1728 #if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd 1729 #define MPN_COPY_DECR(dst, src, size) \ 1730 do { \ 1731 ASSERT ((size) >= 0); \ 1732 ASSERT (MPN_SAME_OR_DECR_P (dst, src, size)); \ 1733 mpn_copyd (dst, src, size); \ 1734 } while (0) 1735 #endif 1736 1737 /* Copy N limbs from SRC to DST decrementing, N==0 allowed. */ 1738 #if ! defined (MPN_COPY_DECR) 1739 #define MPN_COPY_DECR(dst, src, n) \ 1740 do { \ 1741 ASSERT ((n) >= 0); \ 1742 ASSERT (MPN_SAME_OR_DECR_P (dst, src, n)); \ 1743 if ((n) != 0) \ 1744 { \ 1745 mp_size_t __n = (n) - 1; \ 1746 mp_ptr __dst = (dst) + __n; \ 1747 mp_srcptr __src = (src) + __n; \ 1748 mp_limb_t __x; \ 1749 __x = *__src--; \ 1750 if (__n != 0) \ 1751 { \ 1752 do \ 1753 { \ 1754 *__dst-- = __x; \ 1755 __x = *__src--; \ 1756 } \ 1757 while (--__n); \ 1758 } \ 1759 *__dst-- = __x; \ 1760 } \ 1761 } while (0) 1762 #endif 1763 1764 1765 #ifndef MPN_COPY 1766 #define MPN_COPY(d,s,n) \ 1767 do { \ 1768 ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n)); \ 1769 MPN_COPY_INCR (d, s, n); \ 1770 } while (0) 1771 #endif 1772 1773 1774 /* Set {dst,size} to the limbs of {src,size} in reverse order. */ 1775 #define MPN_REVERSE(dst, src, size) \ 1776 do { \ 1777 mp_ptr __dst = (dst); \ 1778 mp_size_t __size = (size); \ 1779 mp_srcptr __src = (src) + __size - 1; \ 1780 mp_size_t __i; \ 1781 ASSERT ((size) >= 0); \ 1782 ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \ 1783 CRAY_Pragma ("_CRI ivdep"); \ 1784 for (__i = 0; __i < __size; __i++) \ 1785 { \ 1786 *__dst = *__src; \ 1787 __dst++; \ 1788 __src--; \ 1789 } \ 1790 } while (0) 1791 1792 1793 /* Zero n limbs at dst. 1794 1795 For power and powerpc we want an inline stu/bdnz loop for zeroing. On 1796 ppc630 for instance this is optimal since it can sustain only 1 store per 1797 cycle. 1798 1799 gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the 1800 "for" loop in the generic code below can become stu/bdnz. The do/while 1801 here helps it get to that. The same caveat about plain -mpowerpc64 mode 1802 applies here as to __GMPN_COPY_INCR in gmp.h. 1803 1804 xlc 3.1 already generates stu/bdnz from the generic C, and does so from 1805 this loop too. 1806 1807 Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines 1808 at a time. MPN_ZERO isn't all that important in GMP, so it might be more 1809 trouble than it's worth to do the same, though perhaps a call to memset 1810 would be good when on a GNU system. */ 1811 1812 #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc 1813 #define MPN_ZERO(dst, n) \ 1814 do { \ 1815 ASSERT ((n) >= 0); \ 1816 if ((n) != 0) \ 1817 { \ 1818 mp_ptr __dst = (dst) - 1; \ 1819 mp_size_t __n = (n); \ 1820 do \ 1821 *++__dst = 0; \ 1822 while (--__n); \ 1823 } \ 1824 } while (0) 1825 #endif 1826 1827 #ifndef MPN_ZERO 1828 #define MPN_ZERO(dst, n) \ 1829 do { \ 1830 ASSERT ((n) >= 0); \ 1831 if ((n) != 0) \ 1832 { \ 1833 mp_ptr __dst = (dst); \ 1834 mp_size_t __n = (n); \ 1835 do \ 1836 *__dst++ = 0; \ 1837 while (--__n); \ 1838 } \ 1839 } while (0) 1840 #endif 1841 1842 1843 /* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to 1844 start up and would need to strip a lot of zeros before it'd be faster 1845 than a simple cmpl loop. Here are some times in cycles for 1846 std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping 1847 low zeros). 1848 1849 std cld 1850 P5 18 16 1851 P6 46 38 1852 K6 36 13 1853 K7 21 20 1854 */ 1855 #ifndef MPN_NORMALIZE 1856 #define MPN_NORMALIZE(DST, NLIMBS) \ 1857 do { \ 1858 while ((NLIMBS) > 0) \ 1859 { \ 1860 if ((DST)[(NLIMBS) - 1] != 0) \ 1861 break; \ 1862 (NLIMBS)--; \ 1863 } \ 1864 } while (0) 1865 #endif 1866 #ifndef MPN_NORMALIZE_NOT_ZERO 1867 #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \ 1868 do { \ 1869 while (1) \ 1870 { \ 1871 ASSERT ((NLIMBS) >= 1); \ 1872 if ((DST)[(NLIMBS) - 1] != 0) \ 1873 break; \ 1874 (NLIMBS)--; \ 1875 } \ 1876 } while (0) 1877 #endif 1878 1879 /* Strip least significant zero limbs from {ptr,size} by incrementing ptr 1880 and decrementing size. low should be ptr[0], and will be the new ptr[0] 1881 on returning. The number in {ptr,size} must be non-zero, ie. size!=0 and 1882 somewhere a non-zero limb. */ 1883 #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low) \ 1884 do { \ 1885 ASSERT ((size) >= 1); \ 1886 ASSERT ((low) == (ptr)[0]); \ 1887 \ 1888 while ((low) == 0) \ 1889 { \ 1890 (size)--; \ 1891 ASSERT ((size) >= 1); \ 1892 (ptr)++; \ 1893 (low) = *(ptr); \ 1894 } \ 1895 } while (0) 1896 1897 /* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a 1898 temporary variable; it will be automatically cleared out at function 1899 return. We use __x here to make it possible to accept both mpz_ptr and 1900 mpz_t arguments. */ 1901 #define MPZ_TMP_INIT(X, NLIMBS) \ 1902 do { \ 1903 mpz_ptr __x = (X); \ 1904 ASSERT ((NLIMBS) >= 1); \ 1905 __x->_mp_alloc = (NLIMBS); \ 1906 __x->_mp_d = TMP_ALLOC_LIMBS (NLIMBS); \ 1907 } while (0) 1908 1909 #if WANT_ASSERT 1910 static inline void * 1911 _mpz_newalloc (mpz_ptr z, mp_size_t n) 1912 { 1913 void * res = _mpz_realloc(z,n); 1914 /* If we are checking the code, force a random change to limbs. */ 1915 ((mp_ptr) res)[0] = ~ ((mp_ptr) res)[ALLOC (z) - 1]; 1916 return res; 1917 } 1918 #else 1919 #define _mpz_newalloc _mpz_realloc 1920 #endif 1921 /* Realloc for an mpz_t WHAT if it has less than NEEDED limbs. */ 1922 #define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z)) \ 1923 ? (mp_ptr) _mpz_realloc(z,n) \ 1924 : PTR(z)) 1925 #define MPZ_NEWALLOC(z,n) (UNLIKELY ((n) > ALLOC(z)) \ 1926 ? (mp_ptr) _mpz_newalloc(z,n) \ 1927 : PTR(z)) 1928 1929 #define MPZ_EQUAL_1_P(z) (SIZ(z)==1 && PTR(z)[0] == 1) 1930 1931 1932 /* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and 1933 f1p. 1934 1935 From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the 1936 nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio. So the 1937 number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419. 1938 1939 The multiplier used is 23/32=0.71875 for efficient calculation on CPUs 1940 without good floating point. There's +2 for rounding up, and a further 1941 +2 since at the last step x limbs are doubled into a 2x+1 limb region 1942 whereas the actual F[2k] value might be only 2x-1 limbs. 1943 1944 Note that a division is done first, since on a 32-bit system it's at 1945 least conceivable to go right up to n==ULONG_MAX. (F[2^32-1] would be 1946 about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and 1947 whatever a multiply of two 190Mbyte numbers takes.) 1948 1949 Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be 1950 worked into the multiplier. */ 1951 1952 #define MPN_FIB2_SIZE(n) \ 1953 ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4) 1954 1955 1956 /* FIB_TABLE(n) returns the Fibonacci number F[n]. Must have n in the range 1957 -1 <= n <= FIB_TABLE_LIMIT (that constant in fib_table.h). 1958 1959 FIB_TABLE_LUCNUM_LIMIT (in fib_table.h) is the largest n for which L[n] = 1960 F[n] + 2*F[n-1] fits in a limb. */ 1961 1962 __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[]; 1963 #define FIB_TABLE(n) (__gmp_fib_table[(n)+1]) 1964 1965 extern const mp_limb_t __gmp_oddfac_table[]; 1966 extern const mp_limb_t __gmp_odd2fac_table[]; 1967 extern const unsigned char __gmp_fac2cnt_table[]; 1968 extern const mp_limb_t __gmp_limbroots_table[]; 1969 1970 /* n^log <= GMP_NUMB_MAX, a limb can store log factors less than n */ 1971 static inline unsigned 1972 log_n_max (mp_limb_t n) 1973 { 1974 unsigned log; 1975 for (log = 8; n > __gmp_limbroots_table[log - 1]; log--); 1976 return log; 1977 } 1978 1979 #define SIEVESIZE 512 /* FIXME: Allow gmp_init_primesieve to choose */ 1980 typedef struct 1981 { 1982 unsigned long d; /* current index in s[] */ 1983 unsigned long s0; /* number corresponding to s[0] */ 1984 unsigned long sqrt_s0; /* misnomer for sqrt(s[SIEVESIZE-1]) */ 1985 unsigned char s[SIEVESIZE + 1]; /* sieve table */ 1986 } gmp_primesieve_t; 1987 1988 #define gmp_init_primesieve __gmp_init_primesieve 1989 __GMP_DECLSPEC void gmp_init_primesieve (gmp_primesieve_t *); 1990 1991 #define gmp_nextprime __gmp_nextprime 1992 __GMP_DECLSPEC unsigned long int gmp_nextprime (gmp_primesieve_t *); 1993 1994 #define gmp_primesieve __gmp_primesieve 1995 __GMP_DECLSPEC mp_limb_t gmp_primesieve (mp_ptr, mp_limb_t); 1996 1997 1998 #ifndef MUL_TOOM22_THRESHOLD 1999 #define MUL_TOOM22_THRESHOLD 30 2000 #endif 2001 2002 #ifndef MUL_TOOM33_THRESHOLD 2003 #define MUL_TOOM33_THRESHOLD 100 2004 #endif 2005 2006 #ifndef MUL_TOOM44_THRESHOLD 2007 #define MUL_TOOM44_THRESHOLD 300 2008 #endif 2009 2010 #ifndef MUL_TOOM6H_THRESHOLD 2011 #define MUL_TOOM6H_THRESHOLD 350 2012 #endif 2013 2014 #ifndef SQR_TOOM6_THRESHOLD 2015 #define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD 2016 #endif 2017 2018 #ifndef MUL_TOOM8H_THRESHOLD 2019 #define MUL_TOOM8H_THRESHOLD 450 2020 #endif 2021 2022 #ifndef SQR_TOOM8_THRESHOLD 2023 #define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD 2024 #endif 2025 2026 #ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD 2027 #define MUL_TOOM32_TO_TOOM43_THRESHOLD 100 2028 #endif 2029 2030 #ifndef MUL_TOOM32_TO_TOOM53_THRESHOLD 2031 #define MUL_TOOM32_TO_TOOM53_THRESHOLD 110 2032 #endif 2033 2034 #ifndef MUL_TOOM42_TO_TOOM53_THRESHOLD 2035 #define MUL_TOOM42_TO_TOOM53_THRESHOLD 100 2036 #endif 2037 2038 #ifndef MUL_TOOM42_TO_TOOM63_THRESHOLD 2039 #define MUL_TOOM42_TO_TOOM63_THRESHOLD 110 2040 #endif 2041 2042 #ifndef MUL_TOOM43_TO_TOOM54_THRESHOLD 2043 #define MUL_TOOM43_TO_TOOM54_THRESHOLD 150 2044 #endif 2045 2046 /* MUL_TOOM22_THRESHOLD_LIMIT is the maximum for MUL_TOOM22_THRESHOLD. In a 2047 normal build MUL_TOOM22_THRESHOLD is a constant and we use that. In a fat 2048 binary or tune program build MUL_TOOM22_THRESHOLD is a variable and a 2049 separate hard limit will have been defined. Similarly for TOOM3. */ 2050 #ifndef MUL_TOOM22_THRESHOLD_LIMIT 2051 #define MUL_TOOM22_THRESHOLD_LIMIT MUL_TOOM22_THRESHOLD 2052 #endif 2053 #ifndef MUL_TOOM33_THRESHOLD_LIMIT 2054 #define MUL_TOOM33_THRESHOLD_LIMIT MUL_TOOM33_THRESHOLD 2055 #endif 2056 #ifndef MULLO_BASECASE_THRESHOLD_LIMIT 2057 #define MULLO_BASECASE_THRESHOLD_LIMIT MULLO_BASECASE_THRESHOLD 2058 #endif 2059 2060 /* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from 2061 mpn_mul_basecase. Default is to use mpn_sqr_basecase from 0. (Note that we 2062 certainly always want it if there's a native assembler mpn_sqr_basecase.) 2063 2064 If it turns out that mpn_toom2_sqr becomes faster than mpn_mul_basecase 2065 before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the toom2 2066 threshold and SQR_TOOM2_THRESHOLD is 0. This oddity arises more or less 2067 because SQR_TOOM2_THRESHOLD represents the size up to which mpn_sqr_basecase 2068 should be used, and that may be never. */ 2069 2070 #ifndef SQR_BASECASE_THRESHOLD 2071 #define SQR_BASECASE_THRESHOLD 0 2072 #endif 2073 2074 #ifndef SQR_TOOM2_THRESHOLD 2075 #define SQR_TOOM2_THRESHOLD 50 2076 #endif 2077 2078 #ifndef SQR_TOOM3_THRESHOLD 2079 #define SQR_TOOM3_THRESHOLD 120 2080 #endif 2081 2082 #ifndef SQR_TOOM4_THRESHOLD 2083 #define SQR_TOOM4_THRESHOLD 400 2084 #endif 2085 2086 /* See comments above about MUL_TOOM33_THRESHOLD_LIMIT. */ 2087 #ifndef SQR_TOOM3_THRESHOLD_LIMIT 2088 #define SQR_TOOM3_THRESHOLD_LIMIT SQR_TOOM3_THRESHOLD 2089 #endif 2090 2091 #ifndef MULMID_TOOM42_THRESHOLD 2092 #define MULMID_TOOM42_THRESHOLD MUL_TOOM22_THRESHOLD 2093 #endif 2094 2095 #ifndef DC_DIV_QR_THRESHOLD 2096 #define DC_DIV_QR_THRESHOLD 50 2097 #endif 2098 2099 #ifndef DC_DIVAPPR_Q_THRESHOLD 2100 #define DC_DIVAPPR_Q_THRESHOLD 200 2101 #endif 2102 2103 #ifndef DC_BDIV_QR_THRESHOLD 2104 #define DC_BDIV_QR_THRESHOLD 50 2105 #endif 2106 2107 #ifndef DC_BDIV_Q_THRESHOLD 2108 #define DC_BDIV_Q_THRESHOLD 180 2109 #endif 2110 2111 #ifndef DIVEXACT_JEB_THRESHOLD 2112 #define DIVEXACT_JEB_THRESHOLD 25 2113 #endif 2114 2115 #ifndef INV_MULMOD_BNM1_THRESHOLD 2116 #define INV_MULMOD_BNM1_THRESHOLD (5*MULMOD_BNM1_THRESHOLD) 2117 #endif 2118 2119 #ifndef INV_APPR_THRESHOLD 2120 #define INV_APPR_THRESHOLD INV_NEWTON_THRESHOLD 2121 #endif 2122 2123 #ifndef INV_NEWTON_THRESHOLD 2124 #define INV_NEWTON_THRESHOLD 200 2125 #endif 2126 2127 #ifndef BINV_NEWTON_THRESHOLD 2128 #define BINV_NEWTON_THRESHOLD 300 2129 #endif 2130 2131 #ifndef MU_DIVAPPR_Q_THRESHOLD 2132 #define MU_DIVAPPR_Q_THRESHOLD 2000 2133 #endif 2134 2135 #ifndef MU_DIV_QR_THRESHOLD 2136 #define MU_DIV_QR_THRESHOLD 2000 2137 #endif 2138 2139 #ifndef MUPI_DIV_QR_THRESHOLD 2140 #define MUPI_DIV_QR_THRESHOLD 200 2141 #endif 2142 2143 #ifndef MU_BDIV_Q_THRESHOLD 2144 #define MU_BDIV_Q_THRESHOLD 2000 2145 #endif 2146 2147 #ifndef MU_BDIV_QR_THRESHOLD 2148 #define MU_BDIV_QR_THRESHOLD 2000 2149 #endif 2150 2151 #ifndef MULMOD_BNM1_THRESHOLD 2152 #define MULMOD_BNM1_THRESHOLD 16 2153 #endif 2154 2155 #ifndef SQRMOD_BNM1_THRESHOLD 2156 #define SQRMOD_BNM1_THRESHOLD 16 2157 #endif 2158 2159 #ifndef MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD 2160 #define MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD (INV_MULMOD_BNM1_THRESHOLD/2) 2161 #endif 2162 2163 #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 2164 2165 #ifndef REDC_1_TO_REDC_2_THRESHOLD 2166 #define REDC_1_TO_REDC_2_THRESHOLD 15 2167 #endif 2168 #ifndef REDC_2_TO_REDC_N_THRESHOLD 2169 #define REDC_2_TO_REDC_N_THRESHOLD 100 2170 #endif 2171 2172 #else 2173 2174 #ifndef REDC_1_TO_REDC_N_THRESHOLD 2175 #define REDC_1_TO_REDC_N_THRESHOLD 100 2176 #endif 2177 2178 #endif /* HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 */ 2179 2180 2181 /* First k to use for an FFT modF multiply. A modF FFT is an order 2182 log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba, 2183 whereas k=4 is 1.33 which is faster than toom3 at 1.485. */ 2184 #define FFT_FIRST_K 4 2185 2186 /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */ 2187 #ifndef MUL_FFT_MODF_THRESHOLD 2188 #define MUL_FFT_MODF_THRESHOLD (MUL_TOOM33_THRESHOLD * 3) 2189 #endif 2190 #ifndef SQR_FFT_MODF_THRESHOLD 2191 #define SQR_FFT_MODF_THRESHOLD (SQR_TOOM3_THRESHOLD * 3) 2192 #endif 2193 2194 /* Threshold at which FFT should be used to do an NxN -> 2N multiply. This 2195 will be a size where FFT is using k=7 or k=8, since an FFT-k used for an 2196 NxN->2N multiply and not recursing into itself is an order 2197 log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which 2198 is the first better than toom3. */ 2199 #ifndef MUL_FFT_THRESHOLD 2200 #define MUL_FFT_THRESHOLD (MUL_FFT_MODF_THRESHOLD * 10) 2201 #endif 2202 #ifndef SQR_FFT_THRESHOLD 2203 #define SQR_FFT_THRESHOLD (SQR_FFT_MODF_THRESHOLD * 10) 2204 #endif 2205 2206 /* Table of thresholds for successive modF FFT "k"s. The first entry is 2207 where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2, 2208 etc. See mpn_fft_best_k(). */ 2209 #ifndef MUL_FFT_TABLE 2210 #define MUL_FFT_TABLE \ 2211 { MUL_TOOM33_THRESHOLD * 4, /* k=5 */ \ 2212 MUL_TOOM33_THRESHOLD * 8, /* k=6 */ \ 2213 MUL_TOOM33_THRESHOLD * 16, /* k=7 */ \ 2214 MUL_TOOM33_THRESHOLD * 32, /* k=8 */ \ 2215 MUL_TOOM33_THRESHOLD * 96, /* k=9 */ \ 2216 MUL_TOOM33_THRESHOLD * 288, /* k=10 */ \ 2217 0 } 2218 #endif 2219 #ifndef SQR_FFT_TABLE 2220 #define SQR_FFT_TABLE \ 2221 { SQR_TOOM3_THRESHOLD * 4, /* k=5 */ \ 2222 SQR_TOOM3_THRESHOLD * 8, /* k=6 */ \ 2223 SQR_TOOM3_THRESHOLD * 16, /* k=7 */ \ 2224 SQR_TOOM3_THRESHOLD * 32, /* k=8 */ \ 2225 SQR_TOOM3_THRESHOLD * 96, /* k=9 */ \ 2226 SQR_TOOM3_THRESHOLD * 288, /* k=10 */ \ 2227 0 } 2228 #endif 2229 2230 struct fft_table_nk 2231 { 2232 unsigned int n:27; 2233 unsigned int k:5; 2234 }; 2235 2236 #ifndef FFT_TABLE_ATTRS 2237 #define FFT_TABLE_ATTRS static const 2238 #endif 2239 2240 #define MPN_FFT_TABLE_SIZE 16 2241 2242 2243 #ifndef DC_DIV_QR_THRESHOLD 2244 #define DC_DIV_QR_THRESHOLD (3 * MUL_TOOM22_THRESHOLD) 2245 #endif 2246 2247 #ifndef GET_STR_DC_THRESHOLD 2248 #define GET_STR_DC_THRESHOLD 18 2249 #endif 2250 2251 #ifndef GET_STR_PRECOMPUTE_THRESHOLD 2252 #define GET_STR_PRECOMPUTE_THRESHOLD 35 2253 #endif 2254 2255 #ifndef SET_STR_DC_THRESHOLD 2256 #define SET_STR_DC_THRESHOLD 750 2257 #endif 2258 2259 #ifndef SET_STR_PRECOMPUTE_THRESHOLD 2260 #define SET_STR_PRECOMPUTE_THRESHOLD 2000 2261 #endif 2262 2263 #ifndef FAC_ODD_THRESHOLD 2264 #define FAC_ODD_THRESHOLD 35 2265 #endif 2266 2267 #ifndef FAC_DSC_THRESHOLD 2268 #define FAC_DSC_THRESHOLD 400 2269 #endif 2270 2271 /* Return non-zero if xp,xsize and yp,ysize overlap. 2272 If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no 2273 overlap. If both these are false, there's an overlap. */ 2274 #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \ 2275 ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp)) 2276 #define MEM_OVERLAP_P(xp, xsize, yp, ysize) \ 2277 ( (char *) (xp) + (xsize) > (char *) (yp) \ 2278 && (char *) (yp) + (ysize) > (char *) (xp)) 2279 2280 /* Return non-zero if xp,xsize and yp,ysize are either identical or not 2281 overlapping. Return zero if they're partially overlapping. */ 2282 #define MPN_SAME_OR_SEPARATE_P(xp, yp, size) \ 2283 MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size) 2284 #define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize) \ 2285 ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize)) 2286 2287 /* Return non-zero if dst,dsize and src,ssize are either identical or 2288 overlapping in a way suitable for an incrementing/decrementing algorithm. 2289 Return zero if they're partially overlapping in an unsuitable fashion. */ 2290 #define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize) \ 2291 ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize)) 2292 #define MPN_SAME_OR_INCR_P(dst, src, size) \ 2293 MPN_SAME_OR_INCR2_P(dst, size, src, size) 2294 #define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize) \ 2295 ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize)) 2296 #define MPN_SAME_OR_DECR_P(dst, src, size) \ 2297 MPN_SAME_OR_DECR2_P(dst, size, src, size) 2298 2299 2300 /* ASSERT() is a private assertion checking scheme, similar to <assert.h>. 2301 ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS() 2302 does it always. Generally assertions are meant for development, but 2303 might help when looking for a problem later too. 2304 2305 Note that strings shouldn't be used within the ASSERT expression, 2306 eg. ASSERT(strcmp(s,"notgood")!=0), since the quotes upset the "expr" 2307 used in the !HAVE_STRINGIZE case (ie. K&R). */ 2308 2309 #ifdef __LINE__ 2310 #define ASSERT_LINE __LINE__ 2311 #else 2312 #define ASSERT_LINE -1 2313 #endif 2314 2315 #ifdef __FILE__ 2316 #define ASSERT_FILE __FILE__ 2317 #else 2318 #define ASSERT_FILE "" 2319 #endif 2320 2321 __GMP_DECLSPEC void __gmp_assert_header (const char *, int); 2322 __GMP_DECLSPEC void __gmp_assert_fail (const char *, int, const char *) ATTRIBUTE_NORETURN; 2323 2324 #if HAVE_STRINGIZE 2325 #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr) 2326 #else 2327 #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr") 2328 #endif 2329 2330 #define ASSERT_ALWAYS(expr) \ 2331 do { \ 2332 if (UNLIKELY (!(expr))) \ 2333 ASSERT_FAIL (expr); \ 2334 } while (0) 2335 2336 #if WANT_ASSERT 2337 #define ASSERT(expr) ASSERT_ALWAYS (expr) 2338 #else 2339 #define ASSERT(expr) do {} while (0) 2340 #endif 2341 2342 2343 /* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks 2344 that it's zero. In both cases if assertion checking is disabled the 2345 expression is still evaluated. These macros are meant for use with 2346 routines like mpn_add_n() where the return value represents a carry or 2347 whatever that should or shouldn't occur in some context. For example, 2348 ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */ 2349 #if WANT_ASSERT 2350 #define ASSERT_CARRY(expr) ASSERT_ALWAYS ((expr) != 0) 2351 #define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0) 2352 #else 2353 #define ASSERT_CARRY(expr) (expr) 2354 #define ASSERT_NOCARRY(expr) (expr) 2355 #endif 2356 2357 2358 /* ASSERT_CODE includes code when assertion checking is wanted. This is the 2359 same as writing "#if WANT_ASSERT", but more compact. */ 2360 #if WANT_ASSERT 2361 #define ASSERT_CODE(expr) expr 2362 #else 2363 #define ASSERT_CODE(expr) 2364 #endif 2365 2366 2367 /* Test that an mpq_t is in fully canonical form. This can be used as 2368 protection on routines like mpq_equal which give wrong results on 2369 non-canonical inputs. */ 2370 #if WANT_ASSERT 2371 #define ASSERT_MPQ_CANONICAL(q) \ 2372 do { \ 2373 ASSERT (q->_mp_den._mp_size > 0); \ 2374 if (q->_mp_num._mp_size == 0) \ 2375 { \ 2376 /* zero should be 0/1 */ \ 2377 ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0); \ 2378 } \ 2379 else \ 2380 { \ 2381 /* no common factors */ \ 2382 mpz_t __g; \ 2383 mpz_init (__g); \ 2384 mpz_gcd (__g, mpq_numref(q), mpq_denref(q)); \ 2385 ASSERT (mpz_cmp_ui (__g, 1) == 0); \ 2386 mpz_clear (__g); \ 2387 } \ 2388 } while (0) 2389 #else 2390 #define ASSERT_MPQ_CANONICAL(q) do {} while (0) 2391 #endif 2392 2393 /* Check that the nail parts are zero. */ 2394 #define ASSERT_ALWAYS_LIMB(limb) \ 2395 do { \ 2396 mp_limb_t __nail = (limb) & GMP_NAIL_MASK; \ 2397 ASSERT_ALWAYS (__nail == 0); \ 2398 } while (0) 2399 #define ASSERT_ALWAYS_MPN(ptr, size) \ 2400 do { \ 2401 /* let whole loop go dead when no nails */ \ 2402 if (GMP_NAIL_BITS != 0) \ 2403 { \ 2404 mp_size_t __i; \ 2405 for (__i = 0; __i < (size); __i++) \ 2406 ASSERT_ALWAYS_LIMB ((ptr)[__i]); \ 2407 } \ 2408 } while (0) 2409 #if WANT_ASSERT 2410 #define ASSERT_LIMB(limb) ASSERT_ALWAYS_LIMB (limb) 2411 #define ASSERT_MPN(ptr, size) ASSERT_ALWAYS_MPN (ptr, size) 2412 #else 2413 #define ASSERT_LIMB(limb) do {} while (0) 2414 #define ASSERT_MPN(ptr, size) do {} while (0) 2415 #endif 2416 2417 2418 /* Assert that an mpn region {ptr,size} is zero, or non-zero. 2419 size==0 is allowed, and in that case {ptr,size} considered to be zero. */ 2420 #if WANT_ASSERT 2421 #define ASSERT_MPN_ZERO_P(ptr,size) \ 2422 do { \ 2423 mp_size_t __i; \ 2424 ASSERT ((size) >= 0); \ 2425 for (__i = 0; __i < (size); __i++) \ 2426 ASSERT ((ptr)[__i] == 0); \ 2427 } while (0) 2428 #define ASSERT_MPN_NONZERO_P(ptr,size) \ 2429 do { \ 2430 mp_size_t __i; \ 2431 int __nonzero = 0; \ 2432 ASSERT ((size) >= 0); \ 2433 for (__i = 0; __i < (size); __i++) \ 2434 if ((ptr)[__i] != 0) \ 2435 { \ 2436 __nonzero = 1; \ 2437 break; \ 2438 } \ 2439 ASSERT (__nonzero); \ 2440 } while (0) 2441 #else 2442 #define ASSERT_MPN_ZERO_P(ptr,size) do {} while (0) 2443 #define ASSERT_MPN_NONZERO_P(ptr,size) do {} while (0) 2444 #endif 2445 2446 2447 #if ! HAVE_NATIVE_mpn_com 2448 #undef mpn_com 2449 #define mpn_com(d,s,n) \ 2450 do { \ 2451 mp_ptr __d = (d); \ 2452 mp_srcptr __s = (s); \ 2453 mp_size_t __n = (n); \ 2454 ASSERT (__n >= 1); \ 2455 ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n)); \ 2456 do \ 2457 *__d++ = (~ *__s++) & GMP_NUMB_MASK; \ 2458 while (--__n); \ 2459 } while (0) 2460 #endif 2461 2462 #define MPN_LOGOPS_N_INLINE(rp, up, vp, n, operation) \ 2463 do { \ 2464 mp_srcptr __up = (up); \ 2465 mp_srcptr __vp = (vp); \ 2466 mp_ptr __rp = (rp); \ 2467 mp_size_t __n = (n); \ 2468 mp_limb_t __a, __b; \ 2469 ASSERT (__n > 0); \ 2470 ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __up, __n)); \ 2471 ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __vp, __n)); \ 2472 __up += __n; \ 2473 __vp += __n; \ 2474 __rp += __n; \ 2475 __n = -__n; \ 2476 do { \ 2477 __a = __up[__n]; \ 2478 __b = __vp[__n]; \ 2479 __rp[__n] = operation; \ 2480 } while (++__n); \ 2481 } while (0) 2482 2483 2484 #if ! HAVE_NATIVE_mpn_and_n 2485 #undef mpn_and_n 2486 #define mpn_and_n(rp, up, vp, n) \ 2487 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & __b) 2488 #endif 2489 2490 #if ! HAVE_NATIVE_mpn_andn_n 2491 #undef mpn_andn_n 2492 #define mpn_andn_n(rp, up, vp, n) \ 2493 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & ~__b) 2494 #endif 2495 2496 #if ! HAVE_NATIVE_mpn_nand_n 2497 #undef mpn_nand_n 2498 #define mpn_nand_n(rp, up, vp, n) \ 2499 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a & __b) & GMP_NUMB_MASK) 2500 #endif 2501 2502 #if ! HAVE_NATIVE_mpn_ior_n 2503 #undef mpn_ior_n 2504 #define mpn_ior_n(rp, up, vp, n) \ 2505 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a | __b) 2506 #endif 2507 2508 #if ! HAVE_NATIVE_mpn_iorn_n 2509 #undef mpn_iorn_n 2510 #define mpn_iorn_n(rp, up, vp, n) \ 2511 MPN_LOGOPS_N_INLINE (rp, up, vp, n, (__a | ~__b) & GMP_NUMB_MASK) 2512 #endif 2513 2514 #if ! HAVE_NATIVE_mpn_nior_n 2515 #undef mpn_nior_n 2516 #define mpn_nior_n(rp, up, vp, n) \ 2517 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a | __b) & GMP_NUMB_MASK) 2518 #endif 2519 2520 #if ! HAVE_NATIVE_mpn_xor_n 2521 #undef mpn_xor_n 2522 #define mpn_xor_n(rp, up, vp, n) \ 2523 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a ^ __b) 2524 #endif 2525 2526 #if ! HAVE_NATIVE_mpn_xnor_n 2527 #undef mpn_xnor_n 2528 #define mpn_xnor_n(rp, up, vp, n) \ 2529 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a ^ __b) & GMP_NUMB_MASK) 2530 #endif 2531 2532 #define mpn_trialdiv __MPN(trialdiv) 2533 __GMP_DECLSPEC mp_limb_t mpn_trialdiv (mp_srcptr, mp_size_t, mp_size_t, int *); 2534 2535 #define mpn_remove __MPN(remove) 2536 __GMP_DECLSPEC mp_bitcnt_t mpn_remove (mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_bitcnt_t); 2537 2538 2539 /* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */ 2540 #if GMP_NAIL_BITS == 0 2541 #define ADDC_LIMB(cout, w, x, y) \ 2542 do { \ 2543 mp_limb_t __x = (x); \ 2544 mp_limb_t __y = (y); \ 2545 mp_limb_t __w = __x + __y; \ 2546 (w) = __w; \ 2547 (cout) = __w < __x; \ 2548 } while (0) 2549 #else 2550 #define ADDC_LIMB(cout, w, x, y) \ 2551 do { \ 2552 mp_limb_t __w; \ 2553 ASSERT_LIMB (x); \ 2554 ASSERT_LIMB (y); \ 2555 __w = (x) + (y); \ 2556 (w) = __w & GMP_NUMB_MASK; \ 2557 (cout) = __w >> GMP_NUMB_BITS; \ 2558 } while (0) 2559 #endif 2560 2561 /* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that 2562 subtract. */ 2563 #if GMP_NAIL_BITS == 0 2564 #define SUBC_LIMB(cout, w, x, y) \ 2565 do { \ 2566 mp_limb_t __x = (x); \ 2567 mp_limb_t __y = (y); \ 2568 mp_limb_t __w = __x - __y; \ 2569 (w) = __w; \ 2570 (cout) = __w > __x; \ 2571 } while (0) 2572 #else 2573 #define SUBC_LIMB(cout, w, x, y) \ 2574 do { \ 2575 mp_limb_t __w = (x) - (y); \ 2576 (w) = __w & GMP_NUMB_MASK; \ 2577 (cout) = __w >> (GMP_LIMB_BITS-1); \ 2578 } while (0) 2579 #endif 2580 2581 2582 /* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both 2583 expecting no carry (or borrow) from that. 2584 2585 The size parameter is only for the benefit of assertion checking. In a 2586 normal build it's unused and the carry/borrow is just propagated as far 2587 as it needs to go. 2588 2589 On random data, usually only one or two limbs of {ptr,size} get updated, 2590 so there's no need for any sophisticated looping, just something compact 2591 and sensible. 2592 2593 FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U, 2594 declaring their operand sizes, then remove the former. This is purely 2595 for the benefit of assertion checking. */ 2596 2597 #if defined (__GNUC__) && GMP_NAIL_BITS == 0 && ! defined (NO_ASM) \ 2598 && (defined(HAVE_HOST_CPU_FAMILY_x86) || defined(HAVE_HOST_CPU_FAMILY_x86_64)) \ 2599 && ! WANT_ASSERT 2600 /* Better flags handling than the generic C gives on i386, saving a few 2601 bytes of code and maybe a cycle or two. */ 2602 2603 #define MPN_IORD_U(ptr, incr, aors) \ 2604 do { \ 2605 mp_ptr __ptr_dummy; \ 2606 if (__builtin_constant_p (incr) && (incr) == 0) \ 2607 { \ 2608 } \ 2609 else if (__builtin_constant_p (incr) && (incr) == 1) \ 2610 { \ 2611 __asm__ __volatile__ \ 2612 ("\n" ASM_L(top) ":\n" \ 2613 "\t" aors "\t$1, (%0)\n" \ 2614 "\tlea\t%c2(%0), %0\n" \ 2615 "\tjc\t" ASM_L(top) \ 2616 : "=r" (__ptr_dummy) \ 2617 : "0" (ptr), "n" (sizeof(mp_limb_t)) \ 2618 : "memory"); \ 2619 } \ 2620 else \ 2621 { \ 2622 __asm__ __volatile__ \ 2623 ( aors "\t%2, (%0)\n" \ 2624 "\tjnc\t" ASM_L(done) "\n" \ 2625 ASM_L(top) ":\n" \ 2626 "\t" aors "\t$1, %c3(%0)\n" \ 2627 "\tlea\t%c3(%0), %0\n" \ 2628 "\tjc\t" ASM_L(top) "\n" \ 2629 ASM_L(done) ":\n" \ 2630 : "=r" (__ptr_dummy) \ 2631 : "0" (ptr), \ 2632 "ri" ((mp_limb_t) (incr)), "n" (sizeof(mp_limb_t)) \ 2633 : "memory"); \ 2634 } \ 2635 } while (0) 2636 2637 #if GMP_LIMB_BITS == 32 2638 #define MPN_INCR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "addl") 2639 #define MPN_DECR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "subl") 2640 #endif 2641 #if GMP_LIMB_BITS == 64 2642 #define MPN_INCR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "addq") 2643 #define MPN_DECR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "subq") 2644 #endif 2645 #define mpn_incr_u(ptr, incr) MPN_INCR_U (ptr, 0, incr) 2646 #define mpn_decr_u(ptr, incr) MPN_DECR_U (ptr, 0, incr) 2647 #endif 2648 2649 #if GMP_NAIL_BITS == 0 2650 #ifndef mpn_incr_u 2651 #define mpn_incr_u(p,incr) \ 2652 do { \ 2653 mp_limb_t __x; \ 2654 mp_ptr __p = (p); \ 2655 if (__builtin_constant_p (incr) && (incr) == 1) \ 2656 { \ 2657 while (++(*(__p++)) == 0) \ 2658 ; \ 2659 } \ 2660 else \ 2661 { \ 2662 __x = *__p + (incr); \ 2663 *__p = __x; \ 2664 if (__x < (incr)) \ 2665 while (++(*(++__p)) == 0) \ 2666 ; \ 2667 } \ 2668 } while (0) 2669 #endif 2670 #ifndef mpn_decr_u 2671 #define mpn_decr_u(p,incr) \ 2672 do { \ 2673 mp_limb_t __x; \ 2674 mp_ptr __p = (p); \ 2675 if (__builtin_constant_p (incr) && (incr) == 1) \ 2676 { \ 2677 while ((*(__p++))-- == 0) \ 2678 ; \ 2679 } \ 2680 else \ 2681 { \ 2682 __x = *__p; \ 2683 *__p = __x - (incr); \ 2684 if (__x < (incr)) \ 2685 while ((*(++__p))-- == 0) \ 2686 ; \ 2687 } \ 2688 } while (0) 2689 #endif 2690 #endif 2691 2692 #if GMP_NAIL_BITS >= 1 2693 #ifndef mpn_incr_u 2694 #define mpn_incr_u(p,incr) \ 2695 do { \ 2696 mp_limb_t __x; \ 2697 mp_ptr __p = (p); \ 2698 if (__builtin_constant_p (incr) && (incr) == 1) \ 2699 { \ 2700 do \ 2701 { \ 2702 __x = (*__p + 1) & GMP_NUMB_MASK; \ 2703 *__p++ = __x; \ 2704 } \ 2705 while (__x == 0); \ 2706 } \ 2707 else \ 2708 { \ 2709 __x = (*__p + (incr)); \ 2710 *__p++ = __x & GMP_NUMB_MASK; \ 2711 if (__x >> GMP_NUMB_BITS != 0) \ 2712 { \ 2713 do \ 2714 { \ 2715 __x = (*__p + 1) & GMP_NUMB_MASK; \ 2716 *__p++ = __x; \ 2717 } \ 2718 while (__x == 0); \ 2719 } \ 2720 } \ 2721 } while (0) 2722 #endif 2723 #ifndef mpn_decr_u 2724 #define mpn_decr_u(p,incr) \ 2725 do { \ 2726 mp_limb_t __x; \ 2727 mp_ptr __p = (p); \ 2728 if (__builtin_constant_p (incr) && (incr) == 1) \ 2729 { \ 2730 do \ 2731 { \ 2732 __x = *__p; \ 2733 *__p++ = (__x - 1) & GMP_NUMB_MASK; \ 2734 } \ 2735 while (__x == 0); \ 2736 } \ 2737 else \ 2738 { \ 2739 __x = *__p - (incr); \ 2740 *__p++ = __x & GMP_NUMB_MASK; \ 2741 if (__x >> GMP_NUMB_BITS != 0) \ 2742 { \ 2743 do \ 2744 { \ 2745 __x = *__p; \ 2746 *__p++ = (__x - 1) & GMP_NUMB_MASK; \ 2747 } \ 2748 while (__x == 0); \ 2749 } \ 2750 } \ 2751 } while (0) 2752 #endif 2753 #endif 2754 2755 #ifndef MPN_INCR_U 2756 #if WANT_ASSERT 2757 #define MPN_INCR_U(ptr, size, n) \ 2758 do { \ 2759 ASSERT ((size) >= 1); \ 2760 ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n)); \ 2761 } while (0) 2762 #else 2763 #define MPN_INCR_U(ptr, size, n) mpn_incr_u (ptr, n) 2764 #endif 2765 #endif 2766 2767 #ifndef MPN_DECR_U 2768 #if WANT_ASSERT 2769 #define MPN_DECR_U(ptr, size, n) \ 2770 do { \ 2771 ASSERT ((size) >= 1); \ 2772 ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n)); \ 2773 } while (0) 2774 #else 2775 #define MPN_DECR_U(ptr, size, n) mpn_decr_u (ptr, n) 2776 #endif 2777 #endif 2778 2779 2780 /* Structure for conversion between internal binary format and strings. */ 2781 struct bases 2782 { 2783 /* Number of digits in the conversion base that always fits in an mp_limb_t. 2784 For example, for base 10 on a machine where a mp_limb_t has 32 bits this 2785 is 9, since 10**9 is the largest number that fits into a mp_limb_t. */ 2786 int chars_per_limb; 2787 2788 /* log(2)/log(conversion_base) */ 2789 mp_limb_t logb2; 2790 2791 /* log(conversion_base)/log(2) */ 2792 mp_limb_t log2b; 2793 2794 /* base**chars_per_limb, i.e. the biggest number that fits a word, built by 2795 factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base), 2796 i.e. the number of bits used to represent each digit in the base. */ 2797 mp_limb_t big_base; 2798 2799 /* A GMP_LIMB_BITS bit approximation to 1/big_base, represented as a 2800 fixed-point number. Instead of dividing by big_base an application can 2801 choose to multiply by big_base_inverted. */ 2802 mp_limb_t big_base_inverted; 2803 }; 2804 2805 #define mp_bases __MPN(bases) 2806 __GMP_DECLSPEC extern const struct bases mp_bases[257]; 2807 2808 2809 /* Compute the number of digits in base for nbits bits, making sure the result 2810 is never too small. The two variants of the macro implement the same 2811 function; the GT2 variant below works just for bases > 2. */ 2812 #define DIGITS_IN_BASE_FROM_BITS(res, nbits, b) \ 2813 do { \ 2814 mp_limb_t _ph, _dummy; \ 2815 size_t _nbits = (nbits); \ 2816 umul_ppmm (_ph, _dummy, mp_bases[b].logb2, _nbits); \ 2817 _ph += (_dummy + _nbits < _dummy); \ 2818 res = _ph + 1; \ 2819 } while (0) 2820 #define DIGITS_IN_BASEGT2_FROM_BITS(res, nbits, b) \ 2821 do { \ 2822 mp_limb_t _ph, _dummy; \ 2823 size_t _nbits = (nbits); \ 2824 umul_ppmm (_ph, _dummy, mp_bases[b].logb2 + 1, _nbits); \ 2825 res = _ph + 1; \ 2826 } while (0) 2827 2828 /* For power of 2 bases this is exact. For other bases the result is either 2829 exact or one too big. 2830 2831 To be exact always it'd be necessary to examine all the limbs of the 2832 operand, since numbers like 100..000 and 99...999 generally differ only 2833 in the lowest limb. It'd be possible to examine just a couple of high 2834 limbs to increase the probability of being exact, but that doesn't seem 2835 worth bothering with. */ 2836 2837 #define MPN_SIZEINBASE(result, ptr, size, base) \ 2838 do { \ 2839 int __lb_base, __cnt; \ 2840 size_t __totbits; \ 2841 \ 2842 ASSERT ((size) >= 0); \ 2843 ASSERT ((base) >= 2); \ 2844 ASSERT ((base) < numberof (mp_bases)); \ 2845 \ 2846 /* Special case for X == 0. */ \ 2847 if ((size) == 0) \ 2848 (result) = 1; \ 2849 else \ 2850 { \ 2851 /* Calculate the total number of significant bits of X. */ \ 2852 count_leading_zeros (__cnt, (ptr)[(size)-1]); \ 2853 __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\ 2854 \ 2855 if (POW2_P (base)) \ 2856 { \ 2857 __lb_base = mp_bases[base].big_base; \ 2858 (result) = (__totbits + __lb_base - 1) / __lb_base; \ 2859 } \ 2860 else \ 2861 { \ 2862 DIGITS_IN_BASEGT2_FROM_BITS (result, __totbits, base); \ 2863 } \ 2864 } \ 2865 } while (0) 2866 2867 #define MPN_SIZEINBASE_2EXP(result, ptr, size, base2exp) \ 2868 do { \ 2869 int __cnt; \ 2870 mp_bitcnt_t __totbits; \ 2871 ASSERT ((size) > 0); \ 2872 ASSERT ((ptr)[(size)-1] != 0); \ 2873 count_leading_zeros (__cnt, (ptr)[(size)-1]); \ 2874 __totbits = (mp_bitcnt_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS); \ 2875 (result) = (__totbits + (base2exp)-1) / (base2exp); \ 2876 } while (0) 2877 2878 2879 /* bit count to limb count, rounding up */ 2880 #define BITS_TO_LIMBS(n) (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS) 2881 2882 /* MPN_SET_UI sets an mpn (ptr, cnt) to given ui. MPZ_FAKE_UI creates fake 2883 mpz_t from ui. The zp argument must have room for LIMBS_PER_ULONG limbs 2884 in both cases (LIMBS_PER_ULONG is also defined here.) */ 2885 #if BITS_PER_ULONG <= GMP_NUMB_BITS /* need one limb per ulong */ 2886 2887 #define LIMBS_PER_ULONG 1 2888 #define MPN_SET_UI(zp, zn, u) \ 2889 (zp)[0] = (u); \ 2890 (zn) = ((zp)[0] != 0); 2891 #define MPZ_FAKE_UI(z, zp, u) \ 2892 (zp)[0] = (u); \ 2893 PTR (z) = (zp); \ 2894 SIZ (z) = ((zp)[0] != 0); \ 2895 ASSERT_CODE (ALLOC (z) = 1); 2896 2897 #else /* need two limbs per ulong */ 2898 2899 #define LIMBS_PER_ULONG 2 2900 #define MPN_SET_UI(zp, zn, u) \ 2901 (zp)[0] = (u) & GMP_NUMB_MASK; \ 2902 (zp)[1] = (u) >> GMP_NUMB_BITS; \ 2903 (zn) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); 2904 #define MPZ_FAKE_UI(z, zp, u) \ 2905 (zp)[0] = (u) & GMP_NUMB_MASK; \ 2906 (zp)[1] = (u) >> GMP_NUMB_BITS; \ 2907 SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); \ 2908 PTR (z) = (zp); \ 2909 ASSERT_CODE (ALLOC (z) = 2); 2910 2911 #endif 2912 2913 2914 #if HAVE_HOST_CPU_FAMILY_x86 2915 #define TARGET_REGISTER_STARVED 1 2916 #else 2917 #define TARGET_REGISTER_STARVED 0 2918 #endif 2919 2920 2921 /* LIMB_HIGHBIT_TO_MASK(n) examines the high bit of a limb value and turns 1 2922 or 0 there into a limb 0xFF..FF or 0 respectively. 2923 2924 On most CPUs this is just an arithmetic right shift by GMP_LIMB_BITS-1, 2925 but C99 doesn't guarantee signed right shifts are arithmetic, so we have 2926 a little compile-time test and a fallback to a "? :" form. The latter is 2927 necessary for instance on Cray vector systems. 2928 2929 Recent versions of gcc (eg. 3.3) will in fact optimize a "? :" like this 2930 to an arithmetic right shift anyway, but it's good to get the desired 2931 shift on past versions too (in particular since an important use of 2932 LIMB_HIGHBIT_TO_MASK is in udiv_qrnnd_preinv). */ 2933 2934 #define LIMB_HIGHBIT_TO_MASK(n) \ 2935 (((mp_limb_signed_t) -1 >> 1) < 0 \ 2936 ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1) \ 2937 : (n) & GMP_LIMB_HIGHBIT ? MP_LIMB_T_MAX : CNST_LIMB(0)) 2938 2939 2940 /* Use a library function for invert_limb, if available. */ 2941 #define mpn_invert_limb __MPN(invert_limb) 2942 __GMP_DECLSPEC mp_limb_t mpn_invert_limb (mp_limb_t) ATTRIBUTE_CONST; 2943 #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb 2944 #define invert_limb(invxl,xl) \ 2945 do { \ 2946 (invxl) = mpn_invert_limb (xl); \ 2947 } while (0) 2948 #endif 2949 2950 #ifndef invert_limb 2951 #define invert_limb(invxl,xl) \ 2952 do { \ 2953 mp_limb_t _dummy; \ 2954 ASSERT ((xl) != 0); \ 2955 udiv_qrnnd (invxl, _dummy, ~(xl), ~CNST_LIMB(0), xl); \ 2956 } while (0) 2957 #endif 2958 2959 #define invert_pi1(dinv, d1, d0) \ 2960 do { \ 2961 mp_limb_t _v, _p, _t1, _t0, _mask; \ 2962 invert_limb (_v, d1); \ 2963 _p = (d1) * _v; \ 2964 _p += (d0); \ 2965 if (_p < (d0)) \ 2966 { \ 2967 _v--; \ 2968 _mask = -(mp_limb_t) (_p >= (d1)); \ 2969 _p -= (d1); \ 2970 _v += _mask; \ 2971 _p -= _mask & (d1); \ 2972 } \ 2973 umul_ppmm (_t1, _t0, d0, _v); \ 2974 _p += _t1; \ 2975 if (_p < _t1) \ 2976 { \ 2977 _v--; \ 2978 if (UNLIKELY (_p >= (d1))) \ 2979 { \ 2980 if (_p > (d1) || _t0 >= (d0)) \ 2981 _v--; \ 2982 } \ 2983 } \ 2984 (dinv).inv32 = _v; \ 2985 } while (0) 2986 2987 2988 /* udiv_qrnnd_preinv -- Based on work by Niels M�ller and Torbj�rn Granlund. 2989 We write things strangely below, to help gcc. A more straightforward 2990 version: 2991 _r = (nl) - _qh * (d); 2992 _t = _r + (d); 2993 if (_r >= _ql) 2994 { 2995 _qh--; 2996 _r = _t; 2997 } 2998 For one operation shorter critical path, one may want to use this form: 2999 _p = _qh * (d) 3000 _s = (nl) + (d); 3001 _r = (nl) - _p; 3002 _t = _s - _p; 3003 if (_r >= _ql) 3004 { 3005 _qh--; 3006 _r = _t; 3007 } 3008 */ 3009 #define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \ 3010 do { \ 3011 mp_limb_t _qh, _ql, _r, _mask; \ 3012 umul_ppmm (_qh, _ql, (nh), (di)); \ 3013 if (__builtin_constant_p (nl) && (nl) == 0) \ 3014 { \ 3015 _qh += (nh) + 1; \ 3016 _r = - _qh * (d); \ 3017 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \ 3018 _qh += _mask; \ 3019 _r += _mask & (d); \ 3020 } \ 3021 else \ 3022 { \ 3023 add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \ 3024 _r = (nl) - _qh * (d); \ 3025 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \ 3026 _qh += _mask; \ 3027 _r += _mask & (d); \ 3028 if (UNLIKELY (_r >= (d))) \ 3029 { \ 3030 _r -= (d); \ 3031 _qh++; \ 3032 } \ 3033 } \ 3034 (r) = _r; \ 3035 (q) = _qh; \ 3036 } while (0) 3037 3038 /* Dividing (NH, NL) by D, returning the remainder only. Unlike 3039 udiv_qrnnd_preinv, works also for the case NH == D, where the 3040 quotient doesn't quite fit in a single limb. */ 3041 #define udiv_rnnd_preinv(r, nh, nl, d, di) \ 3042 do { \ 3043 mp_limb_t _qh, _ql, _r, _mask; \ 3044 umul_ppmm (_qh, _ql, (nh), (di)); \ 3045 if (__builtin_constant_p (nl) && (nl) == 0) \ 3046 { \ 3047 _r = ~(_qh + (nh)) * (d); \ 3048 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \ 3049 _r += _mask & (d); \ 3050 } \ 3051 else \ 3052 { \ 3053 add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \ 3054 _r = (nl) - _qh * (d); \ 3055 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \ 3056 _r += _mask & (d); \ 3057 if (UNLIKELY (_r >= (d))) \ 3058 _r -= (d); \ 3059 } \ 3060 (r) = _r; \ 3061 } while (0) 3062 3063 /* Compute quotient the quotient and remainder for n / d. Requires d 3064 >= B^2 / 2 and n < d B. di is the inverse 3065 3066 floor ((B^3 - 1) / (d0 + d1 B)) - B. 3067 3068 NOTE: Output variables are updated multiple times. Only some inputs 3069 and outputs may overlap. 3070 */ 3071 #define udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \ 3072 do { \ 3073 mp_limb_t _q0, _t1, _t0, _mask; \ 3074 umul_ppmm ((q), _q0, (n2), (dinv)); \ 3075 add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1)); \ 3076 \ 3077 /* Compute the two most significant limbs of n - q'd */ \ 3078 (r1) = (n1) - (d1) * (q); \ 3079 sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0)); \ 3080 umul_ppmm (_t1, _t0, (d0), (q)); \ 3081 sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \ 3082 (q)++; \ 3083 \ 3084 /* Conditionally adjust q and the remainders */ \ 3085 _mask = - (mp_limb_t) ((r1) >= _q0); \ 3086 (q) += _mask; \ 3087 add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \ 3088 if (UNLIKELY ((r1) >= (d1))) \ 3089 { \ 3090 if ((r1) > (d1) || (r0) >= (d0)) \ 3091 { \ 3092 (q)++; \ 3093 sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \ 3094 } \ 3095 } \ 3096 } while (0) 3097 3098 #ifndef mpn_preinv_divrem_1 /* if not done with cpuvec in a fat binary */ 3099 #define mpn_preinv_divrem_1 __MPN(preinv_divrem_1) 3100 __GMP_DECLSPEC mp_limb_t mpn_preinv_divrem_1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int); 3101 #endif 3102 3103 3104 /* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to the 3105 plain mpn_divrem_1. The default is yes, since the few CISC chips where 3106 preinv is not good have defines saying so. */ 3107 #ifndef USE_PREINV_DIVREM_1 3108 #define USE_PREINV_DIVREM_1 1 3109 #endif 3110 3111 #if USE_PREINV_DIVREM_1 3112 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \ 3113 mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift) 3114 #else 3115 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \ 3116 mpn_divrem_1 (qp, xsize, ap, size, d) 3117 #endif 3118 3119 #ifndef PREINV_MOD_1_TO_MOD_1_THRESHOLD 3120 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10 3121 #endif 3122 3123 /* This selection may seem backwards. The reason mpn_mod_1 typically takes 3124 over for larger sizes is that it uses the mod_1_1 function. */ 3125 #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \ 3126 (BELOW_THRESHOLD (size, PREINV_MOD_1_TO_MOD_1_THRESHOLD) \ 3127 ? mpn_preinv_mod_1 (src, size, divisor, inverse) \ 3128 : mpn_mod_1 (src, size, divisor)) 3129 3130 3131 #ifndef mpn_mod_34lsub1 /* if not done with cpuvec in a fat binary */ 3132 #define mpn_mod_34lsub1 __MPN(mod_34lsub1) 3133 __GMP_DECLSPEC mp_limb_t mpn_mod_34lsub1 (mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE; 3134 #endif 3135 3136 3137 /* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to 3138 plain mpn_divrem_1. Likewise BMOD_1_TO_MOD_1_THRESHOLD for 3139 mpn_modexact_1_odd against plain mpn_mod_1. On most CPUs divexact and 3140 modexact are faster at all sizes, so the defaults are 0. Those CPUs 3141 where this is not right have a tuned threshold. */ 3142 #ifndef DIVEXACT_1_THRESHOLD 3143 #define DIVEXACT_1_THRESHOLD 0 3144 #endif 3145 #ifndef BMOD_1_TO_MOD_1_THRESHOLD 3146 #define BMOD_1_TO_MOD_1_THRESHOLD 10 3147 #endif 3148 3149 #ifndef mpn_divexact_1 /* if not done with cpuvec in a fat binary */ 3150 #define mpn_divexact_1 __MPN(divexact_1) 3151 __GMP_DECLSPEC void mpn_divexact_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 3152 #endif 3153 3154 #define MPN_DIVREM_OR_DIVEXACT_1(rp, up, n, d) \ 3155 do { \ 3156 if (BELOW_THRESHOLD (n, DIVEXACT_1_THRESHOLD)) \ 3157 ASSERT_NOCARRY (mpn_divrem_1 (rp, (mp_size_t) 0, up, n, d)); \ 3158 else \ 3159 { \ 3160 ASSERT (mpn_mod_1 (up, n, d) == 0); \ 3161 mpn_divexact_1 (rp, up, n, d); \ 3162 } \ 3163 } while (0) 3164 3165 #ifndef mpn_modexact_1c_odd /* if not done with cpuvec in a fat binary */ 3166 #define mpn_modexact_1c_odd __MPN(modexact_1c_odd) 3167 __GMP_DECLSPEC mp_limb_t mpn_modexact_1c_odd (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE; 3168 #endif 3169 3170 #if HAVE_NATIVE_mpn_modexact_1_odd 3171 #define mpn_modexact_1_odd __MPN(modexact_1_odd) 3172 __GMP_DECLSPEC mp_limb_t mpn_modexact_1_odd (mp_srcptr, mp_size_t, mp_limb_t) __GMP_ATTRIBUTE_PURE; 3173 #else 3174 #define mpn_modexact_1_odd(src,size,divisor) \ 3175 mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0)) 3176 #endif 3177 3178 #define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor) \ 3179 (BELOW_THRESHOLD (size, BMOD_1_TO_MOD_1_THRESHOLD) \ 3180 ? mpn_modexact_1_odd (src, size, divisor) \ 3181 : mpn_mod_1 (src, size, divisor)) 3182 3183 /* binvert_limb() sets inv to the multiplicative inverse of n modulo 3184 2^GMP_NUMB_BITS, ie. satisfying inv*n == 1 mod 2^GMP_NUMB_BITS. 3185 n must be odd (otherwise such an inverse doesn't exist). 3186 3187 This is not to be confused with invert_limb(), which is completely 3188 different. 3189 3190 The table lookup gives an inverse with the low 8 bits valid, and each 3191 multiply step doubles the number of bits. See Jebelean "An algorithm for 3192 exact division" end of section 4 (reference in gmp.texi). 3193 3194 Possible enhancement: Could use UHWtype until the last step, if half-size 3195 multiplies are faster (might help under _LONG_LONG_LIMB). 3196 3197 Alternative: As noted in Granlund and Montgomery "Division by Invariant 3198 Integers using Multiplication" (reference in gmp.texi), n itself gives a 3199 3-bit inverse immediately, and could be used instead of a table lookup. 3200 A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into 3201 bit 3, for instance with (((n + 2) & 4) << 1) ^ n. */ 3202 3203 #define binvert_limb_table __gmp_binvert_limb_table 3204 __GMP_DECLSPEC extern const unsigned char binvert_limb_table[128]; 3205 3206 #define binvert_limb(inv,n) \ 3207 do { \ 3208 mp_limb_t __n = (n); \ 3209 mp_limb_t __inv; \ 3210 ASSERT ((__n & 1) == 1); \ 3211 \ 3212 __inv = binvert_limb_table[(__n/2) & 0x7F]; /* 8 */ \ 3213 if (GMP_NUMB_BITS > 8) __inv = 2 * __inv - __inv * __inv * __n; \ 3214 if (GMP_NUMB_BITS > 16) __inv = 2 * __inv - __inv * __inv * __n; \ 3215 if (GMP_NUMB_BITS > 32) __inv = 2 * __inv - __inv * __inv * __n; \ 3216 \ 3217 if (GMP_NUMB_BITS > 64) \ 3218 { \ 3219 int __invbits = 64; \ 3220 do { \ 3221 __inv = 2 * __inv - __inv * __inv * __n; \ 3222 __invbits *= 2; \ 3223 } while (__invbits < GMP_NUMB_BITS); \ 3224 } \ 3225 \ 3226 ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \ 3227 (inv) = __inv & GMP_NUMB_MASK; \ 3228 } while (0) 3229 #define modlimb_invert binvert_limb /* backward compatibility */ 3230 3231 /* Multiplicative inverse of 3, modulo 2^GMP_NUMB_BITS. 3232 Eg. 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits. 3233 GMP_NUMB_MAX/3*2+1 is right when GMP_NUMB_BITS is even, but when it's odd 3234 we need to start from GMP_NUMB_MAX>>1. */ 3235 #define MODLIMB_INVERSE_3 (((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 2)) / 3) * 2 + 1) 3236 3237 /* ceil(GMP_NUMB_MAX/3) and ceil(2*GMP_NUMB_MAX/3). 3238 These expressions work because GMP_NUMB_MAX%3 != 0 for all GMP_NUMB_BITS. */ 3239 #define GMP_NUMB_CEIL_MAX_DIV3 (GMP_NUMB_MAX / 3 + 1) 3240 #define GMP_NUMB_CEIL_2MAX_DIV3 ((GMP_NUMB_MAX>>1) / 3 + 1 + GMP_NUMB_HIGHBIT) 3241 3242 3243 /* Set r to -a mod d. a>=d is allowed. Can give r>d. All should be limbs. 3244 3245 It's not clear whether this is the best way to do this calculation. 3246 Anything congruent to -a would be fine for the one limb congruence 3247 tests. */ 3248 3249 #define NEG_MOD(r, a, d) \ 3250 do { \ 3251 ASSERT ((d) != 0); \ 3252 ASSERT_LIMB (a); \ 3253 ASSERT_LIMB (d); \ 3254 \ 3255 if ((a) <= (d)) \ 3256 { \ 3257 /* small a is reasonably likely */ \ 3258 (r) = (d) - (a); \ 3259 } \ 3260 else \ 3261 { \ 3262 unsigned __twos; \ 3263 mp_limb_t __dnorm; \ 3264 count_leading_zeros (__twos, d); \ 3265 __twos -= GMP_NAIL_BITS; \ 3266 __dnorm = (d) << __twos; \ 3267 (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a); \ 3268 } \ 3269 \ 3270 ASSERT_LIMB (r); \ 3271 } while (0) 3272 3273 /* A bit mask of all the least significant zero bits of n, or -1 if n==0. */ 3274 #define LOW_ZEROS_MASK(n) (((n) & -(n)) - 1) 3275 3276 3277 /* ULONG_PARITY sets "p" to 1 if there's an odd number of 1 bits in "n", or 3278 to 0 if there's an even number. "n" should be an unsigned long and "p" 3279 an int. */ 3280 3281 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX 3282 #define ULONG_PARITY(p, n) \ 3283 do { \ 3284 int __p; \ 3285 __asm__ ("ctpop %1, %0" : "=r" (__p) : "r" (n)); \ 3286 (p) = __p & 1; \ 3287 } while (0) 3288 #endif 3289 3290 /* Cray intrinsic _popcnt. */ 3291 #ifdef _CRAY 3292 #define ULONG_PARITY(p, n) \ 3293 do { \ 3294 (p) = _popcnt (n) & 1; \ 3295 } while (0) 3296 #endif 3297 3298 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \ 3299 && ! defined (NO_ASM) && defined (__ia64) 3300 /* unsigned long is either 32 or 64 bits depending on the ABI, zero extend 3301 to a 64 bit unsigned long long for popcnt */ 3302 #define ULONG_PARITY(p, n) \ 3303 do { \ 3304 unsigned long long __n = (unsigned long) (n); \ 3305 int __p; \ 3306 __asm__ ("popcnt %0 = %1" : "=r" (__p) : "r" (__n)); \ 3307 (p) = __p & 1; \ 3308 } while (0) 3309 #endif 3310 3311 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \ 3312 && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86 3313 #if __GMP_GNUC_PREREQ (3,1) 3314 #define __GMP_qm "=Qm" 3315 #define __GMP_q "=Q" 3316 #else 3317 #define __GMP_qm "=qm" 3318 #define __GMP_q "=q" 3319 #endif 3320 #define ULONG_PARITY(p, n) \ 3321 do { \ 3322 char __p; \ 3323 unsigned long __n = (n); \ 3324 __n ^= (__n >> 16); \ 3325 __asm__ ("xorb %h1, %b1\n\t" \ 3326 "setpo %0" \ 3327 : __GMP_qm (__p), __GMP_q (__n) \ 3328 : "1" (__n)); \ 3329 (p) = __p; \ 3330 } while (0) 3331 #endif 3332 3333 #if ! defined (ULONG_PARITY) 3334 #define ULONG_PARITY(p, n) \ 3335 do { \ 3336 unsigned long __n = (n); \ 3337 int __p = 0; \ 3338 do \ 3339 { \ 3340 __p ^= 0x96696996L >> (__n & 0x1F); \ 3341 __n >>= 5; \ 3342 } \ 3343 while (__n != 0); \ 3344 \ 3345 (p) = __p & 1; \ 3346 } while (0) 3347 #endif 3348 3349 3350 /* 3 cycles on 604 or 750 since shifts and rlwimi's can pair. gcc (as of 3351 version 3.1 at least) doesn't seem to know how to generate rlwimi for 3352 anything other than bit-fields, so use "asm". */ 3353 #if defined (__GNUC__) && ! defined (NO_ASM) \ 3354 && HAVE_HOST_CPU_FAMILY_powerpc && GMP_LIMB_BITS == 32 3355 #define BSWAP_LIMB(dst, src) \ 3356 do { \ 3357 mp_limb_t __bswapl_src = (src); \ 3358 mp_limb_t __tmp1 = __bswapl_src >> 24; /* low byte */ \ 3359 mp_limb_t __tmp2 = __bswapl_src << 24; /* high byte */ \ 3360 __asm__ ("rlwimi %0, %2, 24, 16, 23" /* 2nd low */ \ 3361 : "=r" (__tmp1) : "0" (__tmp1), "r" (__bswapl_src)); \ 3362 __asm__ ("rlwimi %0, %2, 8, 8, 15" /* 3nd high */ \ 3363 : "=r" (__tmp2) : "0" (__tmp2), "r" (__bswapl_src)); \ 3364 (dst) = __tmp1 | __tmp2; /* whole */ \ 3365 } while (0) 3366 #endif 3367 3368 /* bswap is available on i486 and up and is fast. A combination rorw $8 / 3369 roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux 3370 kernel with xchgb instead of rorw), but this is not done here, because 3371 i386 means generic x86 and mixing word and dword operations will cause 3372 partial register stalls on P6 chips. */ 3373 #if defined (__GNUC__) && ! defined (NO_ASM) \ 3374 && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386 \ 3375 && GMP_LIMB_BITS == 32 3376 #define BSWAP_LIMB(dst, src) \ 3377 do { \ 3378 __asm__ ("bswap %0" : "=r" (dst) : "0" (src)); \ 3379 } while (0) 3380 #endif 3381 3382 #if defined (__GNUC__) && ! defined (NO_ASM) \ 3383 && defined (__amd64__) && GMP_LIMB_BITS == 64 3384 #define BSWAP_LIMB(dst, src) \ 3385 do { \ 3386 __asm__ ("bswap %q0" : "=r" (dst) : "0" (src)); \ 3387 } while (0) 3388 #endif 3389 3390 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \ 3391 && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64 3392 #define BSWAP_LIMB(dst, src) \ 3393 do { \ 3394 __asm__ ("mux1 %0 = %1, @rev" : "=r" (dst) : "r" (src)); \ 3395 } while (0) 3396 #endif 3397 3398 /* As per glibc. */ 3399 #if defined (__GNUC__) && ! defined (NO_ASM) \ 3400 && HAVE_HOST_CPU_FAMILY_m68k && GMP_LIMB_BITS == 32 3401 #define BSWAP_LIMB(dst, src) \ 3402 do { \ 3403 mp_limb_t __bswapl_src = (src); \ 3404 __asm__ ("ror%.w %#8, %0\n\t" \ 3405 "swap %0\n\t" \ 3406 "ror%.w %#8, %0" \ 3407 : "=d" (dst) \ 3408 : "0" (__bswapl_src)); \ 3409 } while (0) 3410 #endif 3411 3412 #if ! defined (BSWAP_LIMB) 3413 #if GMP_LIMB_BITS == 8 3414 #define BSWAP_LIMB(dst, src) \ 3415 do { (dst) = (src); } while (0) 3416 #endif 3417 #if GMP_LIMB_BITS == 16 3418 #define BSWAP_LIMB(dst, src) \ 3419 do { \ 3420 (dst) = ((src) << 8) + ((src) >> 8); \ 3421 } while (0) 3422 #endif 3423 #if GMP_LIMB_BITS == 32 3424 #define BSWAP_LIMB(dst, src) \ 3425 do { \ 3426 (dst) = \ 3427 ((src) << 24) \ 3428 + (((src) & 0xFF00) << 8) \ 3429 + (((src) >> 8) & 0xFF00) \ 3430 + ((src) >> 24); \ 3431 } while (0) 3432 #endif 3433 #if GMP_LIMB_BITS == 64 3434 #define BSWAP_LIMB(dst, src) \ 3435 do { \ 3436 (dst) = \ 3437 ((src) << 56) \ 3438 + (((src) & 0xFF00) << 40) \ 3439 + (((src) & 0xFF0000) << 24) \ 3440 + (((src) & 0xFF000000) << 8) \ 3441 + (((src) >> 8) & 0xFF000000) \ 3442 + (((src) >> 24) & 0xFF0000) \ 3443 + (((src) >> 40) & 0xFF00) \ 3444 + ((src) >> 56); \ 3445 } while (0) 3446 #endif 3447 #endif 3448 3449 #if ! defined (BSWAP_LIMB) 3450 #define BSWAP_LIMB(dst, src) \ 3451 do { \ 3452 mp_limb_t __bswapl_src = (src); \ 3453 mp_limb_t __dstl = 0; \ 3454 int __i; \ 3455 for (__i = 0; __i < BYTES_PER_MP_LIMB; __i++) \ 3456 { \ 3457 __dstl = (__dstl << 8) | (__bswapl_src & 0xFF); \ 3458 __bswapl_src >>= 8; \ 3459 } \ 3460 (dst) = __dstl; \ 3461 } while (0) 3462 #endif 3463 3464 3465 /* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to 3466 those we know are fast. */ 3467 #if defined (__GNUC__) && ! defined (NO_ASM) \ 3468 && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN \ 3469 && (HAVE_HOST_CPU_powerpc604 \ 3470 || HAVE_HOST_CPU_powerpc604e \ 3471 || HAVE_HOST_CPU_powerpc750 \ 3472 || HAVE_HOST_CPU_powerpc7400) 3473 #define BSWAP_LIMB_FETCH(limb, src) \ 3474 do { \ 3475 mp_srcptr __blf_src = (src); \ 3476 mp_limb_t __limb; \ 3477 __asm__ ("lwbrx %0, 0, %1" \ 3478 : "=r" (__limb) \ 3479 : "r" (__blf_src), \ 3480 "m" (*__blf_src)); \ 3481 (limb) = __limb; \ 3482 } while (0) 3483 #endif 3484 3485 #if ! defined (BSWAP_LIMB_FETCH) 3486 #define BSWAP_LIMB_FETCH(limb, src) BSWAP_LIMB (limb, *(src)) 3487 #endif 3488 3489 3490 /* On the same basis that lwbrx might be slow, restrict stwbrx to those we 3491 know are fast. FIXME: Is this necessary? */ 3492 #if defined (__GNUC__) && ! defined (NO_ASM) \ 3493 && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN \ 3494 && (HAVE_HOST_CPU_powerpc604 \ 3495 || HAVE_HOST_CPU_powerpc604e \ 3496 || HAVE_HOST_CPU_powerpc750 \ 3497 || HAVE_HOST_CPU_powerpc7400) 3498 #define BSWAP_LIMB_STORE(dst, limb) \ 3499 do { \ 3500 mp_ptr __dst = (dst); \ 3501 mp_limb_t __limb = (limb); \ 3502 __asm__ ("stwbrx %1, 0, %2" \ 3503 : "=m" (*__dst) \ 3504 : "r" (__limb), \ 3505 "r" (__dst)); \ 3506 } while (0) 3507 #endif 3508 3509 #if ! defined (BSWAP_LIMB_STORE) 3510 #define BSWAP_LIMB_STORE(dst, limb) BSWAP_LIMB (*(dst), limb) 3511 #endif 3512 3513 3514 /* Byte swap limbs from {src,size} and store at {dst,size}. */ 3515 #define MPN_BSWAP(dst, src, size) \ 3516 do { \ 3517 mp_ptr __dst = (dst); \ 3518 mp_srcptr __src = (src); \ 3519 mp_size_t __size = (size); \ 3520 mp_size_t __i; \ 3521 ASSERT ((size) >= 0); \ 3522 ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); \ 3523 CRAY_Pragma ("_CRI ivdep"); \ 3524 for (__i = 0; __i < __size; __i++) \ 3525 { \ 3526 BSWAP_LIMB_FETCH (*__dst, __src); \ 3527 __dst++; \ 3528 __src++; \ 3529 } \ 3530 } while (0) 3531 3532 /* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */ 3533 #define MPN_BSWAP_REVERSE(dst, src, size) \ 3534 do { \ 3535 mp_ptr __dst = (dst); \ 3536 mp_size_t __size = (size); \ 3537 mp_srcptr __src = (src) + __size - 1; \ 3538 mp_size_t __i; \ 3539 ASSERT ((size) >= 0); \ 3540 ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \ 3541 CRAY_Pragma ("_CRI ivdep"); \ 3542 for (__i = 0; __i < __size; __i++) \ 3543 { \ 3544 BSWAP_LIMB_FETCH (*__dst, __src); \ 3545 __dst++; \ 3546 __src--; \ 3547 } \ 3548 } while (0) 3549 3550 3551 /* No processor claiming to be SPARC v9 compliant seems to 3552 implement the POPC instruction. Disable pattern for now. */ 3553 #if 0 3554 #if defined __GNUC__ && defined __sparc_v9__ && GMP_LIMB_BITS == 64 3555 #define popc_limb(result, input) \ 3556 do { \ 3557 DItype __res; \ 3558 __asm__ ("popc %1,%0" : "=r" (result) : "rI" (input)); \ 3559 } while (0) 3560 #endif 3561 #endif 3562 3563 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX 3564 #define popc_limb(result, input) \ 3565 do { \ 3566 __asm__ ("ctpop %1, %0" : "=r" (result) : "r" (input)); \ 3567 } while (0) 3568 #endif 3569 3570 /* Cray intrinsic. */ 3571 #ifdef _CRAY 3572 #define popc_limb(result, input) \ 3573 do { \ 3574 (result) = _popcnt (input); \ 3575 } while (0) 3576 #endif 3577 3578 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \ 3579 && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64 3580 #define popc_limb(result, input) \ 3581 do { \ 3582 __asm__ ("popcnt %0 = %1" : "=r" (result) : "r" (input)); \ 3583 } while (0) 3584 #endif 3585 3586 /* Cool population count of an mp_limb_t. 3587 You have to figure out how this works, We won't tell you! 3588 3589 The constants could also be expressed as: 3590 0x55... = [2^N / 3] = [(2^N-1)/3] 3591 0x33... = [2^N / 5] = [(2^N-1)/5] 3592 0x0f... = [2^N / 17] = [(2^N-1)/17] 3593 (N is GMP_LIMB_BITS, [] denotes truncation.) */ 3594 3595 #if ! defined (popc_limb) && GMP_LIMB_BITS == 8 3596 #define popc_limb(result, input) \ 3597 do { \ 3598 mp_limb_t __x = (input); \ 3599 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \ 3600 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \ 3601 __x = ((__x >> 4) + __x); \ 3602 (result) = __x & 0x0f; \ 3603 } while (0) 3604 #endif 3605 3606 #if ! defined (popc_limb) && GMP_LIMB_BITS == 16 3607 #define popc_limb(result, input) \ 3608 do { \ 3609 mp_limb_t __x = (input); \ 3610 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \ 3611 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \ 3612 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \ 3613 __x = ((__x >> 8) + __x); \ 3614 (result) = __x & 0xff; \ 3615 } while (0) 3616 #endif 3617 3618 #if ! defined (popc_limb) && GMP_LIMB_BITS == 32 3619 #define popc_limb(result, input) \ 3620 do { \ 3621 mp_limb_t __x = (input); \ 3622 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \ 3623 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \ 3624 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \ 3625 __x = ((__x >> 8) + __x); \ 3626 __x = ((__x >> 16) + __x); \ 3627 (result) = __x & 0xff; \ 3628 } while (0) 3629 #endif 3630 3631 #if ! defined (popc_limb) && GMP_LIMB_BITS == 64 3632 #define popc_limb(result, input) \ 3633 do { \ 3634 mp_limb_t __x = (input); \ 3635 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \ 3636 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \ 3637 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \ 3638 __x = ((__x >> 8) + __x); \ 3639 __x = ((__x >> 16) + __x); \ 3640 __x = ((__x >> 32) + __x); \ 3641 (result) = __x & 0xff; \ 3642 } while (0) 3643 #endif 3644 3645 3646 /* Define stuff for longlong.h. */ 3647 #if HAVE_ATTRIBUTE_MODE 3648 typedef unsigned int UQItype __attribute__ ((mode (QI))); 3649 typedef int SItype __attribute__ ((mode (SI))); 3650 typedef unsigned int USItype __attribute__ ((mode (SI))); 3651 typedef int DItype __attribute__ ((mode (DI))); 3652 typedef unsigned int UDItype __attribute__ ((mode (DI))); 3653 #else 3654 typedef unsigned char UQItype; 3655 typedef long SItype; 3656 typedef unsigned long USItype; 3657 #if HAVE_LONG_LONG 3658 typedef long long int DItype; 3659 typedef unsigned long long int UDItype; 3660 #else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */ 3661 typedef long int DItype; 3662 typedef unsigned long int UDItype; 3663 #endif 3664 #endif 3665 3666 typedef mp_limb_t UWtype; 3667 typedef unsigned int UHWtype; 3668 #define W_TYPE_SIZE GMP_LIMB_BITS 3669 3670 /* Define ieee_double_extract and _GMP_IEEE_FLOATS. 3671 3672 Bit field packing is "implementation defined" according to C99, which 3673 leaves us at the compiler's mercy here. For some systems packing is 3674 defined in the ABI (eg. x86). In any case so far it seems universal that 3675 little endian systems pack from low to high, and big endian from high to 3676 low within the given type. 3677 3678 Within the fields we rely on the integer endianness being the same as the 3679 float endianness, this is true everywhere we know of and it'd be a fairly 3680 strange system that did anything else. */ 3681 3682 #if HAVE_DOUBLE_IEEE_LITTLE_SWAPPED 3683 #define _GMP_IEEE_FLOATS 1 3684 union ieee_double_extract 3685 { 3686 struct 3687 { 3688 gmp_uint_least32_t manh:20; 3689 gmp_uint_least32_t exp:11; 3690 gmp_uint_least32_t sig:1; 3691 gmp_uint_least32_t manl:32; 3692 } s; 3693 double d; 3694 }; 3695 #endif 3696 3697 #if HAVE_DOUBLE_IEEE_LITTLE_ENDIAN 3698 #define _GMP_IEEE_FLOATS 1 3699 union ieee_double_extract 3700 { 3701 struct 3702 { 3703 gmp_uint_least32_t manl:32; 3704 gmp_uint_least32_t manh:20; 3705 gmp_uint_least32_t exp:11; 3706 gmp_uint_least32_t sig:1; 3707 } s; 3708 double d; 3709 }; 3710 #endif 3711 3712 #if HAVE_DOUBLE_IEEE_BIG_ENDIAN 3713 #define _GMP_IEEE_FLOATS 1 3714 union ieee_double_extract 3715 { 3716 struct 3717 { 3718 gmp_uint_least32_t sig:1; 3719 gmp_uint_least32_t exp:11; 3720 gmp_uint_least32_t manh:20; 3721 gmp_uint_least32_t manl:32; 3722 } s; 3723 double d; 3724 }; 3725 #endif 3726 3727 #if HAVE_DOUBLE_VAX_D 3728 union double_extract 3729 { 3730 struct 3731 { 3732 gmp_uint_least32_t man3:7; /* highest 7 bits */ 3733 gmp_uint_least32_t exp:8; /* excess-128 exponent */ 3734 gmp_uint_least32_t sig:1; 3735 gmp_uint_least32_t man2:16; 3736 gmp_uint_least32_t man1:16; 3737 gmp_uint_least32_t man0:16; /* lowest 16 bits */ 3738 } s; 3739 double d; 3740 }; 3741 #endif 3742 3743 /* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers 3744 that don't convert ulong->double correctly (eg. SunOS 4 native cc). */ 3745 #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2))) 3746 /* Maximum number of limbs it will take to store any `double'. 3747 We assume doubles have 53 mantissa bits. */ 3748 #define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 2) / GMP_NUMB_BITS + 1) 3749 3750 __GMP_DECLSPEC int __gmp_extract_double (mp_ptr, double); 3751 3752 #define mpn_get_d __gmpn_get_d 3753 __GMP_DECLSPEC double mpn_get_d (mp_srcptr, mp_size_t, mp_size_t, long) __GMP_ATTRIBUTE_PURE; 3754 3755 3756 /* DOUBLE_NAN_INF_ACTION executes code a_nan if x is a NaN, or executes 3757 a_inf if x is an infinity. Both are considered unlikely values, for 3758 branch prediction. */ 3759 3760 #if _GMP_IEEE_FLOATS 3761 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \ 3762 do { \ 3763 union ieee_double_extract u; \ 3764 u.d = (x); \ 3765 if (UNLIKELY (u.s.exp == 0x7FF)) \ 3766 { \ 3767 if (u.s.manl == 0 && u.s.manh == 0) \ 3768 { a_inf; } \ 3769 else \ 3770 { a_nan; } \ 3771 } \ 3772 } while (0) 3773 #endif 3774 3775 #if HAVE_DOUBLE_VAX_D || HAVE_DOUBLE_VAX_G || HAVE_DOUBLE_CRAY_CFP 3776 /* no nans or infs in these formats */ 3777 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \ 3778 do { } while (0) 3779 #endif 3780 3781 #ifndef DOUBLE_NAN_INF_ACTION 3782 /* Unknown format, try something generic. 3783 NaN should be "unordered", so x!=x. 3784 Inf should be bigger than DBL_MAX. */ 3785 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \ 3786 do { \ 3787 { \ 3788 if (UNLIKELY ((x) != (x))) \ 3789 { a_nan; } \ 3790 else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX)) \ 3791 { a_inf; } \ 3792 } \ 3793 } while (0) 3794 #endif 3795 3796 /* On m68k, x86 and amd64, gcc (and maybe other compilers) can hold doubles 3797 in the coprocessor, which means a bigger exponent range than normal, and 3798 depending on the rounding mode, a bigger mantissa than normal. (See 3799 "Disappointments" in the gcc manual.) FORCE_DOUBLE stores and fetches 3800 "d" through memory to force any rounding and overflows to occur. 3801 3802 On amd64, and on x86s with SSE2, gcc (depending on options) uses the xmm 3803 registers, where there's no such extra precision and no need for the 3804 FORCE_DOUBLE. We don't bother to detect this since the present uses for 3805 FORCE_DOUBLE are only in test programs and default generic C code. 3806 3807 Not quite sure that an "automatic volatile" will use memory, but it does 3808 in gcc. An asm("":"=m"(d):"0"(d)) can't be used to trick gcc, since 3809 apparently matching operands like "0" are only allowed on a register 3810 output. gcc 3.4 warns about this, though in fact it and past versions 3811 seem to put the operand through memory as hoped. */ 3812 3813 #if (HAVE_HOST_CPU_FAMILY_m68k || HAVE_HOST_CPU_FAMILY_x86 \ 3814 || defined (__amd64__)) 3815 #define FORCE_DOUBLE(d) \ 3816 do { volatile double __gmp_force = (d); (d) = __gmp_force; } while (0) 3817 #else 3818 #define FORCE_DOUBLE(d) do { } while (0) 3819 #endif 3820 3821 3822 __GMP_DECLSPEC extern const unsigned char __gmp_digit_value_tab[]; 3823 3824 __GMP_DECLSPEC extern int __gmp_junk; 3825 __GMP_DECLSPEC extern const int __gmp_0; 3826 __GMP_DECLSPEC void __gmp_exception (int) ATTRIBUTE_NORETURN; 3827 __GMP_DECLSPEC void __gmp_divide_by_zero (void) ATTRIBUTE_NORETURN; 3828 __GMP_DECLSPEC void __gmp_sqrt_of_negative (void) ATTRIBUTE_NORETURN; 3829 __GMP_DECLSPEC void __gmp_invalid_operation (void) ATTRIBUTE_NORETURN; 3830 #define GMP_ERROR(code) __gmp_exception (code) 3831 #define DIVIDE_BY_ZERO __gmp_divide_by_zero () 3832 #define SQRT_OF_NEGATIVE __gmp_sqrt_of_negative () 3833 3834 #if defined _LONG_LONG_LIMB 3835 #define CNST_LIMB(C) ((mp_limb_t) C##LL) 3836 #else /* not _LONG_LONG_LIMB */ 3837 #define CNST_LIMB(C) ((mp_limb_t) C##L) 3838 #endif /* _LONG_LONG_LIMB */ 3839 3840 /* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */ 3841 #if GMP_NUMB_BITS == 2 3842 #define PP 0x3 /* 3 */ 3843 #define PP_FIRST_OMITTED 5 3844 #endif 3845 #if GMP_NUMB_BITS == 4 3846 #define PP 0xF /* 3 x 5 */ 3847 #define PP_FIRST_OMITTED 7 3848 #endif 3849 #if GMP_NUMB_BITS == 8 3850 #define PP 0x69 /* 3 x 5 x 7 */ 3851 #define PP_FIRST_OMITTED 11 3852 #endif 3853 #if GMP_NUMB_BITS == 16 3854 #define PP 0x3AA7 /* 3 x 5 x 7 x 11 x 13 */ 3855 #define PP_FIRST_OMITTED 17 3856 #endif 3857 #if GMP_NUMB_BITS == 32 3858 #define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x ... x 29 */ 3859 #define PP_INVERTED 0x53E5645CL 3860 #define PP_FIRST_OMITTED 31 3861 #endif 3862 #if GMP_NUMB_BITS == 64 3863 #define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x ... x 53 */ 3864 #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B) 3865 #define PP_FIRST_OMITTED 59 3866 #endif 3867 #ifndef PP_FIRST_OMITTED 3868 #define PP_FIRST_OMITTED 3 3869 #endif 3870 3871 /* BIT1 means a result value in bit 1 (second least significant bit), with a 3872 zero bit representing +1 and a one bit representing -1. Bits other than 3873 bit 1 are garbage. These are meant to be kept in "int"s, and casts are 3874 used to ensure the expressions are "int"s even if a and/or b might be 3875 other types. 3876 3877 JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base 3878 and their speed is important. Expressions are used rather than 3879 conditionals to accumulate sign changes, which effectively means XORs 3880 instead of conditional JUMPs. */ 3881 3882 /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */ 3883 #define JACOBI_S0(a) (((a) == 1) | ((a) == -1)) 3884 3885 /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */ 3886 #define JACOBI_U0(a) ((a) == 1) 3887 3888 /* FIXME: JACOBI_LS0 and JACOBI_0LS are the same, so delete one and 3889 come up with a better name. */ 3890 3891 /* (a/0), with a given by low and size; 3892 is 1 if a=+/-1, 0 otherwise */ 3893 #define JACOBI_LS0(alow,asize) \ 3894 (((asize) == 1 || (asize) == -1) && (alow) == 1) 3895 3896 /* (a/0), with a an mpz_t; 3897 fetch of low limb always valid, even if size is zero */ 3898 #define JACOBI_Z0(a) JACOBI_LS0 (PTR(a)[0], SIZ(a)) 3899 3900 /* (0/b), with b unsigned; is 1 if b=1, 0 otherwise */ 3901 #define JACOBI_0U(b) ((b) == 1) 3902 3903 /* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */ 3904 #define JACOBI_0S(b) ((b) == 1 || (b) == -1) 3905 3906 /* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */ 3907 #define JACOBI_0LS(blow,bsize) \ 3908 (((bsize) == 1 || (bsize) == -1) && (blow) == 1) 3909 3910 /* Convert a bit1 to +1 or -1. */ 3911 #define JACOBI_BIT1_TO_PN(result_bit1) \ 3912 (1 - ((int) (result_bit1) & 2)) 3913 3914 /* (2/b), with b unsigned and odd; 3915 is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and 3916 hence obtained from (b>>1)^b */ 3917 #define JACOBI_TWO_U_BIT1(b) \ 3918 ((int) (((b) >> 1) ^ (b))) 3919 3920 /* (2/b)^twos, with b unsigned and odd */ 3921 #define JACOBI_TWOS_U_BIT1(twos, b) \ 3922 ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b)) 3923 3924 /* (2/b)^twos, with b unsigned and odd */ 3925 #define JACOBI_TWOS_U(twos, b) \ 3926 (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b))) 3927 3928 /* (-1/b), with b odd (signed or unsigned); 3929 is (-1)^((b-1)/2) */ 3930 #define JACOBI_N1B_BIT1(b) \ 3931 ((int) (b)) 3932 3933 /* (a/b) effect due to sign of a: signed/unsigned, b odd; 3934 is (-1/b) if a<0, or +1 if a>=0 */ 3935 #define JACOBI_ASGN_SU_BIT1(a, b) \ 3936 ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b)) 3937 3938 /* (a/b) effect due to sign of b: signed/signed; 3939 is -1 if a and b both negative, +1 otherwise */ 3940 #define JACOBI_BSGN_SS_BIT1(a, b) \ 3941 ((((a)<0) & ((b)<0)) << 1) 3942 3943 /* (a/b) effect due to sign of b: signed/mpz; 3944 is -1 if a and b both negative, +1 otherwise */ 3945 #define JACOBI_BSGN_SZ_BIT1(a, b) \ 3946 JACOBI_BSGN_SS_BIT1 (a, SIZ(b)) 3947 3948 /* (a/b) effect due to sign of b: mpz/signed; 3949 is -1 if a and b both negative, +1 otherwise */ 3950 #define JACOBI_BSGN_ZS_BIT1(a, b) \ 3951 JACOBI_BSGN_SZ_BIT1 (b, a) 3952 3953 /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd; 3954 is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if 3955 both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd 3956 because this is used in a couple of places with only bit 1 of a or b 3957 valid. */ 3958 #define JACOBI_RECIP_UU_BIT1(a, b) \ 3959 ((int) ((a) & (b))) 3960 3961 /* Strip low zero limbs from {b_ptr,b_size} by incrementing b_ptr and 3962 decrementing b_size. b_low should be b_ptr[0] on entry, and will be 3963 updated for the new b_ptr. result_bit1 is updated according to the 3964 factors of 2 stripped, as per (a/2). */ 3965 #define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low) \ 3966 do { \ 3967 ASSERT ((b_size) >= 1); \ 3968 ASSERT ((b_low) == (b_ptr)[0]); \ 3969 \ 3970 while (UNLIKELY ((b_low) == 0)) \ 3971 { \ 3972 (b_size)--; \ 3973 ASSERT ((b_size) >= 1); \ 3974 (b_ptr)++; \ 3975 (b_low) = *(b_ptr); \ 3976 \ 3977 ASSERT (((a) & 1) != 0); \ 3978 if ((GMP_NUMB_BITS % 2) == 1) \ 3979 (result_bit1) ^= JACOBI_TWO_U_BIT1(a); \ 3980 } \ 3981 } while (0) 3982 3983 /* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or 3984 modexact_1_odd, but in either case leaving a_rem<b. b must be odd and 3985 unsigned. modexact_1_odd effectively calculates -a mod b, and 3986 result_bit1 is adjusted for the factor of -1. 3987 3988 The way mpn_modexact_1_odd sometimes bases its remainder on a_size and 3989 sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what 3990 factor to introduce into result_bit1, so for that case use mpn_mod_1 3991 unconditionally. 3992 3993 FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used 3994 for odd GMP_NUMB_BITS would be good. Perhaps it could mung its result, 3995 or not skip a divide step, or something. */ 3996 3997 #define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \ 3998 do { \ 3999 mp_srcptr __a_ptr = (a_ptr); \ 4000 mp_size_t __a_size = (a_size); \ 4001 mp_limb_t __b = (b); \ 4002 \ 4003 ASSERT (__a_size >= 1); \ 4004 ASSERT (__b & 1); \ 4005 \ 4006 if ((GMP_NUMB_BITS % 2) != 0 \ 4007 || ABOVE_THRESHOLD (__a_size, BMOD_1_TO_MOD_1_THRESHOLD)) \ 4008 { \ 4009 (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b); \ 4010 } \ 4011 else \ 4012 { \ 4013 (result_bit1) ^= JACOBI_N1B_BIT1 (__b); \ 4014 (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b); \ 4015 } \ 4016 } while (0) 4017 4018 /* State for the Jacobi computation using Lehmer. */ 4019 #define jacobi_table __gmp_jacobi_table 4020 __GMP_DECLSPEC extern const unsigned char jacobi_table[208]; 4021 4022 /* Bit layout for the initial state. b must be odd. 4023 4024 3 2 1 0 4025 +--+--+--+--+ 4026 |a1|a0|b1| s| 4027 +--+--+--+--+ 4028 4029 */ 4030 static inline unsigned 4031 mpn_jacobi_init (unsigned a, unsigned b, unsigned s) 4032 { 4033 ASSERT (b & 1); 4034 ASSERT (s <= 1); 4035 return ((a & 3) << 2) + (b & 2) + s; 4036 } 4037 4038 static inline int 4039 mpn_jacobi_finish (unsigned bits) 4040 { 4041 /* (a, b) = (1,0) or (0,1) */ 4042 ASSERT ( (bits & 14) == 0); 4043 4044 return 1-2*(bits & 1); 4045 } 4046 4047 static inline unsigned 4048 mpn_jacobi_update (unsigned bits, unsigned denominator, unsigned q) 4049 { 4050 /* FIXME: Could halve table size by not including the e bit in the 4051 * index, and instead xor when updating. Then the lookup would be 4052 * like 4053 * 4054 * bits ^= table[((bits & 30) << 2) + (denominator << 2) + q]; 4055 */ 4056 4057 ASSERT (bits < 26); 4058 ASSERT (denominator < 2); 4059 ASSERT (q < 4); 4060 4061 /* For almost all calls, denominator is constant and quite often q 4062 is constant too. So use addition rather than or, so the compiler 4063 can put the constant part can into the offset of an indexed 4064 addressing instruction. 4065 4066 With constant denominator, the below table lookup is compiled to 4067 4068 C Constant q = 1, constant denominator = 1 4069 movzbl table+5(%eax,8), %eax 4070 4071 or 4072 4073 C q in %edx, constant denominator = 1 4074 movzbl table+4(%edx,%eax,8), %eax 4075 4076 One could maintain the state preshifted 3 bits, to save a shift 4077 here, but at least on x86, that's no real saving. 4078 */ 4079 return bits = jacobi_table[(bits << 3) + (denominator << 2) + q]; 4080 } 4081 4082 /* Matrix multiplication */ 4083 #define mpn_matrix22_mul __MPN(matrix22_mul) 4084 __GMP_DECLSPEC void mpn_matrix22_mul (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr); 4085 #define mpn_matrix22_mul_strassen __MPN(matrix22_mul_strassen) 4086 __GMP_DECLSPEC void mpn_matrix22_mul_strassen (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr); 4087 #define mpn_matrix22_mul_itch __MPN(matrix22_mul_itch) 4088 __GMP_DECLSPEC mp_size_t mpn_matrix22_mul_itch (mp_size_t, mp_size_t); 4089 4090 #ifndef MATRIX22_STRASSEN_THRESHOLD 4091 #define MATRIX22_STRASSEN_THRESHOLD 30 4092 #endif 4093 4094 /* HGCD definitions */ 4095 4096 /* Extract one numb, shifting count bits left 4097 ________ ________ 4098 |___xh___||___xl___| 4099 |____r____| 4100 >count < 4101 4102 The count includes any nail bits, so it should work fine if count 4103 is computed using count_leading_zeros. If GMP_NAIL_BITS > 0, all of 4104 xh, xl and r include nail bits. Must have 0 < count < GMP_LIMB_BITS. 4105 4106 FIXME: Omit masking with GMP_NUMB_MASK, and let callers do that for 4107 those calls where the count high bits of xh may be non-zero. 4108 */ 4109 4110 #define MPN_EXTRACT_NUMB(count, xh, xl) \ 4111 ((((xh) << ((count) - GMP_NAIL_BITS)) & GMP_NUMB_MASK) | \ 4112 ((xl) >> (GMP_LIMB_BITS - (count)))) 4113 4114 4115 /* The matrix non-negative M = (u, u'; v,v') keeps track of the 4116 reduction (a;b) = M (alpha; beta) where alpha, beta are smaller 4117 than a, b. The determinant must always be one, so that M has an 4118 inverse (v', -u'; -v, u). Elements always fit in GMP_NUMB_BITS - 1 4119 bits. */ 4120 struct hgcd_matrix1 4121 { 4122 mp_limb_t u[2][2]; 4123 }; 4124 4125 #define mpn_hgcd2 __MPN (hgcd2) 4126 __GMP_DECLSPEC int mpn_hgcd2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *); 4127 4128 #define mpn_hgcd_mul_matrix1_vector __MPN (hgcd_mul_matrix1_vector) 4129 __GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t); 4130 4131 #define mpn_matrix22_mul1_inverse_vector __MPN (matrix22_mul1_inverse_vector) 4132 __GMP_DECLSPEC mp_size_t mpn_matrix22_mul1_inverse_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t); 4133 4134 #define mpn_hgcd2_jacobi __MPN (hgcd2_jacobi) 4135 __GMP_DECLSPEC int mpn_hgcd2_jacobi (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *, unsigned *); 4136 4137 struct hgcd_matrix 4138 { 4139 mp_size_t alloc; /* for sanity checking only */ 4140 mp_size_t n; 4141 mp_ptr p[2][2]; 4142 }; 4143 4144 #define MPN_HGCD_MATRIX_INIT_ITCH(n) (4 * ((n+1)/2 + 1)) 4145 4146 #define mpn_hgcd_matrix_init __MPN (hgcd_matrix_init) 4147 __GMP_DECLSPEC void mpn_hgcd_matrix_init (struct hgcd_matrix *, mp_size_t, mp_ptr); 4148 4149 #define mpn_hgcd_matrix_update_q __MPN (hgcd_matrix_update_q) 4150 __GMP_DECLSPEC void mpn_hgcd_matrix_update_q (struct hgcd_matrix *, mp_srcptr, mp_size_t, unsigned, mp_ptr); 4151 4152 #define mpn_hgcd_matrix_mul_1 __MPN (hgcd_matrix_mul_1) 4153 __GMP_DECLSPEC void mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *, const struct hgcd_matrix1 *, mp_ptr); 4154 4155 #define mpn_hgcd_matrix_mul __MPN (hgcd_matrix_mul) 4156 __GMP_DECLSPEC void mpn_hgcd_matrix_mul (struct hgcd_matrix *, const struct hgcd_matrix *, mp_ptr); 4157 4158 #define mpn_hgcd_matrix_adjust __MPN (hgcd_matrix_adjust) 4159 __GMP_DECLSPEC mp_size_t mpn_hgcd_matrix_adjust (const struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr); 4160 4161 #define mpn_hgcd_step __MPN(hgcd_step) 4162 __GMP_DECLSPEC mp_size_t mpn_hgcd_step (mp_size_t, mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr); 4163 4164 #define mpn_hgcd_reduce __MPN(hgcd_reduce) 4165 __GMP_DECLSPEC mp_size_t mpn_hgcd_reduce (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr); 4166 4167 #define mpn_hgcd_reduce_itch __MPN(hgcd_reduce_itch) 4168 __GMP_DECLSPEC mp_size_t mpn_hgcd_reduce_itch (mp_size_t, mp_size_t); 4169 4170 #define mpn_hgcd_itch __MPN (hgcd_itch) 4171 __GMP_DECLSPEC mp_size_t mpn_hgcd_itch (mp_size_t); 4172 4173 #define mpn_hgcd __MPN (hgcd) 4174 __GMP_DECLSPEC mp_size_t mpn_hgcd (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr); 4175 4176 #define mpn_hgcd_appr_itch __MPN (hgcd_appr_itch) 4177 __GMP_DECLSPEC mp_size_t mpn_hgcd_appr_itch (mp_size_t); 4178 4179 #define mpn_hgcd_appr __MPN (hgcd_appr) 4180 __GMP_DECLSPEC int mpn_hgcd_appr (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr); 4181 4182 #define mpn_hgcd_jacobi __MPN (hgcd_jacobi) 4183 __GMP_DECLSPEC mp_size_t mpn_hgcd_jacobi (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, unsigned *, mp_ptr); 4184 4185 typedef void gcd_subdiv_step_hook(void *, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int); 4186 4187 /* Needs storage for the quotient */ 4188 #define MPN_GCD_SUBDIV_STEP_ITCH(n) (n) 4189 4190 #define mpn_gcd_subdiv_step __MPN(gcd_subdiv_step) 4191 __GMP_DECLSPEC mp_size_t mpn_gcd_subdiv_step (mp_ptr, mp_ptr, mp_size_t, mp_size_t, gcd_subdiv_step_hook *, void *, mp_ptr); 4192 4193 struct gcdext_ctx 4194 { 4195 /* Result parameters. */ 4196 mp_ptr gp; 4197 mp_size_t gn; 4198 mp_ptr up; 4199 mp_size_t *usize; 4200 4201 /* Cofactors updated in each step. */ 4202 mp_size_t un; 4203 mp_ptr u0, u1, tp; 4204 }; 4205 4206 #define mpn_gcdext_hook __MPN (gcdext_hook) 4207 gcd_subdiv_step_hook mpn_gcdext_hook; 4208 4209 #define MPN_GCDEXT_LEHMER_N_ITCH(n) (4*(n) + 3) 4210 4211 #define mpn_gcdext_lehmer_n __MPN(gcdext_lehmer_n) 4212 __GMP_DECLSPEC mp_size_t mpn_gcdext_lehmer_n (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr); 4213 4214 /* 4*(an + 1) + 4*(bn + 1) + an */ 4215 #define MPN_GCDEXT_LEHMER_ITCH(an, bn) (5*(an) + 4*(bn) + 8) 4216 4217 #ifndef HGCD_THRESHOLD 4218 #define HGCD_THRESHOLD 400 4219 #endif 4220 4221 #ifndef HGCD_APPR_THRESHOLD 4222 #define HGCD_APPR_THRESHOLD 400 4223 #endif 4224 4225 #ifndef HGCD_REDUCE_THRESHOLD 4226 #define HGCD_REDUCE_THRESHOLD 1000 4227 #endif 4228 4229 #ifndef GCD_DC_THRESHOLD 4230 #define GCD_DC_THRESHOLD 1000 4231 #endif 4232 4233 #ifndef GCDEXT_DC_THRESHOLD 4234 #define GCDEXT_DC_THRESHOLD 600 4235 #endif 4236 4237 /* Definitions for mpn_set_str and mpn_get_str */ 4238 struct powers 4239 { 4240 mp_ptr p; /* actual power value */ 4241 mp_size_t n; /* # of limbs at p */ 4242 mp_size_t shift; /* weight of lowest limb, in limb base B */ 4243 size_t digits_in_base; /* number of corresponding digits */ 4244 int base; 4245 }; 4246 typedef struct powers powers_t; 4247 #define mpn_dc_set_str_powtab_alloc(n) ((n) + GMP_LIMB_BITS) 4248 #define mpn_dc_set_str_itch(n) ((n) + GMP_LIMB_BITS) 4249 #define mpn_dc_get_str_powtab_alloc(n) ((n) + 2 * GMP_LIMB_BITS) 4250 #define mpn_dc_get_str_itch(n) ((n) + GMP_LIMB_BITS) 4251 4252 #define mpn_dc_set_str __MPN(dc_set_str) 4253 __GMP_DECLSPEC mp_size_t mpn_dc_set_str (mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr); 4254 #define mpn_bc_set_str __MPN(bc_set_str) 4255 __GMP_DECLSPEC mp_size_t mpn_bc_set_str (mp_ptr, const unsigned char *, size_t, int); 4256 #define mpn_set_str_compute_powtab __MPN(set_str_compute_powtab) 4257 __GMP_DECLSPEC void mpn_set_str_compute_powtab (powers_t *, mp_ptr, mp_size_t, int); 4258 4259 4260 /* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole 4261 limb and adds an extra limb. __GMPF_PREC_TO_BITS drops that extra limb, 4262 hence giving back the user's size in bits rounded up. Notice that 4263 converting prec->bits->prec gives an unchanged value. */ 4264 #define __GMPF_BITS_TO_PREC(n) \ 4265 ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS)) 4266 #define __GMPF_PREC_TO_BITS(n) \ 4267 ((mp_bitcnt_t) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS) 4268 4269 __GMP_DECLSPEC extern mp_size_t __gmp_default_fp_limb_precision; 4270 4271 /* Compute the number of base-b digits corresponding to nlimbs limbs, rounding 4272 down. */ 4273 #define DIGITS_IN_BASE_PER_LIMB(res, nlimbs, b) \ 4274 do { \ 4275 mp_limb_t _ph, _pl; \ 4276 umul_ppmm (_ph, _pl, \ 4277 mp_bases[b].logb2, GMP_NUMB_BITS * (mp_limb_t) (nlimbs));\ 4278 res = _ph; \ 4279 } while (0) 4280 4281 /* Compute the number of limbs corresponding to ndigits base-b digits, rounding 4282 up. */ 4283 #define LIMBS_PER_DIGIT_IN_BASE(res, ndigits, b) \ 4284 do { \ 4285 mp_limb_t _ph, _dummy; \ 4286 umul_ppmm (_ph, _dummy, mp_bases[b].log2b, (mp_limb_t) (ndigits)); \ 4287 res = 8 * _ph / GMP_NUMB_BITS + 2; \ 4288 } while (0) 4289 4290 4291 /* Set n to the number of significant digits an mpf of the given _mp_prec 4292 field, in the given base. This is a rounded up value, designed to ensure 4293 there's enough digits to reproduce all the guaranteed part of the value. 4294 4295 There are prec many limbs, but the high might be only "1" so forget it 4296 and just count prec-1 limbs into chars. +1 rounds that upwards, and a 4297 further +1 is because the limbs usually won't fall on digit boundaries. 4298 4299 FIXME: If base is a power of 2 and the bits per digit divides 4300 GMP_LIMB_BITS then the +2 is unnecessary. This happens always for 4301 base==2, and in base==16 with the current 32 or 64 bit limb sizes. */ 4302 4303 #define MPF_SIGNIFICANT_DIGITS(n, base, prec) \ 4304 do { \ 4305 size_t rawn; \ 4306 ASSERT (base >= 2 && base < numberof (mp_bases)); \ 4307 DIGITS_IN_BASE_PER_LIMB (rawn, (prec) - 1, base); \ 4308 n = rawn + 2; \ 4309 } while (0) 4310 4311 4312 /* Decimal point string, from the current C locale. Needs <langinfo.h> for 4313 nl_langinfo and constants, preferably with _GNU_SOURCE defined to get 4314 DECIMAL_POINT from glibc, and needs <locale.h> for localeconv, each under 4315 their respective #if HAVE_FOO_H. 4316 4317 GLIBC recommends nl_langinfo because getting only one facet can be 4318 faster, apparently. */ 4319 4320 /* DECIMAL_POINT seems to need _GNU_SOURCE defined to get it from glibc. */ 4321 #if HAVE_NL_LANGINFO && defined (DECIMAL_POINT) 4322 #define GMP_DECIMAL_POINT (nl_langinfo (DECIMAL_POINT)) 4323 #endif 4324 /* RADIXCHAR is deprecated, still in unix98 or some such. */ 4325 #if HAVE_NL_LANGINFO && defined (RADIXCHAR) && ! defined (GMP_DECIMAL_POINT) 4326 #define GMP_DECIMAL_POINT (nl_langinfo (RADIXCHAR)) 4327 #endif 4328 /* localeconv is slower since it returns all locale stuff */ 4329 #if HAVE_LOCALECONV && ! defined (GMP_DECIMAL_POINT) 4330 #define GMP_DECIMAL_POINT (localeconv()->decimal_point) 4331 #endif 4332 #if ! defined (GMP_DECIMAL_POINT) 4333 #define GMP_DECIMAL_POINT (".") 4334 #endif 4335 4336 4337 #define DOPRNT_CONV_FIXED 1 4338 #define DOPRNT_CONV_SCIENTIFIC 2 4339 #define DOPRNT_CONV_GENERAL 3 4340 4341 #define DOPRNT_JUSTIFY_NONE 0 4342 #define DOPRNT_JUSTIFY_LEFT 1 4343 #define DOPRNT_JUSTIFY_RIGHT 2 4344 #define DOPRNT_JUSTIFY_INTERNAL 3 4345 4346 #define DOPRNT_SHOWBASE_YES 1 4347 #define DOPRNT_SHOWBASE_NO 2 4348 #define DOPRNT_SHOWBASE_NONZERO 3 4349 4350 struct doprnt_params_t { 4351 int base; /* negative for upper case */ 4352 int conv; /* choices above */ 4353 const char *expfmt; /* exponent format */ 4354 int exptimes4; /* exponent multiply by 4 */ 4355 char fill; /* character */ 4356 int justify; /* choices above */ 4357 int prec; /* prec field, or -1 for all digits */ 4358 int showbase; /* choices above */ 4359 int showpoint; /* if radix point always shown */ 4360 int showtrailing; /* if trailing zeros wanted */ 4361 char sign; /* '+', ' ', or '\0' */ 4362 int width; /* width field */ 4363 }; 4364 4365 #if _GMP_H_HAVE_VA_LIST 4366 4367 typedef int (*doprnt_format_t) (void *, const char *, va_list); 4368 typedef int (*doprnt_memory_t) (void *, const char *, size_t); 4369 typedef int (*doprnt_reps_t) (void *, int, int); 4370 typedef int (*doprnt_final_t) (void *); 4371 4372 struct doprnt_funs_t { 4373 doprnt_format_t format; 4374 doprnt_memory_t memory; 4375 doprnt_reps_t reps; 4376 doprnt_final_t final; /* NULL if not required */ 4377 }; 4378 4379 extern const struct doprnt_funs_t __gmp_fprintf_funs; 4380 extern const struct doprnt_funs_t __gmp_sprintf_funs; 4381 extern const struct doprnt_funs_t __gmp_snprintf_funs; 4382 extern const struct doprnt_funs_t __gmp_obstack_printf_funs; 4383 extern const struct doprnt_funs_t __gmp_ostream_funs; 4384 4385 /* "buf" is a __gmp_allocate_func block of "alloc" many bytes. The first 4386 "size" of these have been written. "alloc > size" is maintained, so 4387 there's room to store a '\0' at the end. "result" is where the 4388 application wants the final block pointer. */ 4389 struct gmp_asprintf_t { 4390 char **result; 4391 char *buf; 4392 size_t size; 4393 size_t alloc; 4394 }; 4395 4396 #define GMP_ASPRINTF_T_INIT(d, output) \ 4397 do { \ 4398 (d).result = (output); \ 4399 (d).alloc = 256; \ 4400 (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc); \ 4401 (d).size = 0; \ 4402 } while (0) 4403 4404 /* If a realloc is necessary, use twice the size actually required, so as to 4405 avoid repeated small reallocs. */ 4406 #define GMP_ASPRINTF_T_NEED(d, n) \ 4407 do { \ 4408 size_t alloc, newsize, newalloc; \ 4409 ASSERT ((d)->alloc >= (d)->size + 1); \ 4410 \ 4411 alloc = (d)->alloc; \ 4412 newsize = (d)->size + (n); \ 4413 if (alloc <= newsize) \ 4414 { \ 4415 newalloc = 2*newsize; \ 4416 (d)->alloc = newalloc; \ 4417 (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf, \ 4418 alloc, newalloc, char); \ 4419 } \ 4420 } while (0) 4421 4422 __GMP_DECLSPEC int __gmp_asprintf_memory (struct gmp_asprintf_t *, const char *, size_t); 4423 __GMP_DECLSPEC int __gmp_asprintf_reps (struct gmp_asprintf_t *, int, int); 4424 __GMP_DECLSPEC int __gmp_asprintf_final (struct gmp_asprintf_t *); 4425 4426 /* buf is where to write the next output, and size is how much space is left 4427 there. If the application passed size==0 then that's what we'll have 4428 here, and nothing at all should be written. */ 4429 struct gmp_snprintf_t { 4430 char *buf; 4431 size_t size; 4432 }; 4433 4434 /* Add the bytes printed by the call to the total retval, or bail out on an 4435 error. */ 4436 #define DOPRNT_ACCUMULATE(call) \ 4437 do { \ 4438 int __ret; \ 4439 __ret = call; \ 4440 if (__ret == -1) \ 4441 goto error; \ 4442 retval += __ret; \ 4443 } while (0) 4444 #define DOPRNT_ACCUMULATE_FUN(fun, params) \ 4445 do { \ 4446 ASSERT ((fun) != NULL); \ 4447 DOPRNT_ACCUMULATE ((*(fun)) params); \ 4448 } while (0) 4449 4450 #define DOPRNT_FORMAT(fmt, ap) \ 4451 DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap)) 4452 #define DOPRNT_MEMORY(ptr, len) \ 4453 DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len)) 4454 #define DOPRNT_REPS(c, n) \ 4455 DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n)) 4456 4457 #define DOPRNT_STRING(str) DOPRNT_MEMORY (str, strlen (str)) 4458 4459 #define DOPRNT_REPS_MAYBE(c, n) \ 4460 do { \ 4461 if ((n) != 0) \ 4462 DOPRNT_REPS (c, n); \ 4463 } while (0) 4464 #define DOPRNT_MEMORY_MAYBE(ptr, len) \ 4465 do { \ 4466 if ((len) != 0) \ 4467 DOPRNT_MEMORY (ptr, len); \ 4468 } while (0) 4469 4470 __GMP_DECLSPEC int __gmp_doprnt (const struct doprnt_funs_t *, void *, const char *, va_list); 4471 __GMP_DECLSPEC int __gmp_doprnt_integer (const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *); 4472 4473 #define __gmp_doprnt_mpf __gmp_doprnt_mpf2 4474 __GMP_DECLSPEC int __gmp_doprnt_mpf (const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr); 4475 4476 __GMP_DECLSPEC int __gmp_replacement_vsnprintf (char *, size_t, const char *, va_list); 4477 #endif /* _GMP_H_HAVE_VA_LIST */ 4478 4479 4480 typedef int (*gmp_doscan_scan_t) (void *, const char *, ...); 4481 typedef void *(*gmp_doscan_step_t) (void *, int); 4482 typedef int (*gmp_doscan_get_t) (void *); 4483 typedef int (*gmp_doscan_unget_t) (int, void *); 4484 4485 struct gmp_doscan_funs_t { 4486 gmp_doscan_scan_t scan; 4487 gmp_doscan_step_t step; 4488 gmp_doscan_get_t get; 4489 gmp_doscan_unget_t unget; 4490 }; 4491 extern const struct gmp_doscan_funs_t __gmp_fscanf_funs; 4492 extern const struct gmp_doscan_funs_t __gmp_sscanf_funs; 4493 4494 #if _GMP_H_HAVE_VA_LIST 4495 __GMP_DECLSPEC int __gmp_doscan (const struct gmp_doscan_funs_t *, void *, const char *, va_list); 4496 #endif 4497 4498 4499 /* For testing and debugging. */ 4500 #define MPZ_CHECK_FORMAT(z) \ 4501 do { \ 4502 ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0); \ 4503 ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)); \ 4504 ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z)); \ 4505 } while (0) 4506 4507 #define MPQ_CHECK_FORMAT(q) \ 4508 do { \ 4509 MPZ_CHECK_FORMAT (mpq_numref (q)); \ 4510 MPZ_CHECK_FORMAT (mpq_denref (q)); \ 4511 ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1); \ 4512 \ 4513 if (SIZ(mpq_numref(q)) == 0) \ 4514 { \ 4515 /* should have zero as 0/1 */ \ 4516 ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1 \ 4517 && PTR(mpq_denref(q))[0] == 1); \ 4518 } \ 4519 else \ 4520 { \ 4521 /* should have no common factors */ \ 4522 mpz_t g; \ 4523 mpz_init (g); \ 4524 mpz_gcd (g, mpq_numref(q), mpq_denref(q)); \ 4525 ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0); \ 4526 mpz_clear (g); \ 4527 } \ 4528 } while (0) 4529 4530 #define MPF_CHECK_FORMAT(f) \ 4531 do { \ 4532 ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \ 4533 ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1); \ 4534 if (SIZ(f) == 0) \ 4535 ASSERT_ALWAYS (EXP(f) == 0); \ 4536 if (SIZ(f) != 0) \ 4537 ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0); \ 4538 } while (0) 4539 4540 4541 #define MPZ_PROVOKE_REALLOC(z) \ 4542 do { ALLOC(z) = ABSIZ(z); } while (0) 4543 4544 4545 /* Enhancement: The "mod" and "gcd_1" functions below could have 4546 __GMP_ATTRIBUTE_PURE, but currently (gcc 3.3) that's not supported on 4547 function pointers, only actual functions. It probably doesn't make much 4548 difference to the gmp code, since hopefully we arrange calls so there's 4549 no great need for the compiler to move things around. */ 4550 4551 #if WANT_FAT_BINARY && (HAVE_HOST_CPU_FAMILY_x86 || HAVE_HOST_CPU_FAMILY_x86_64) 4552 /* NOTE: The function pointers in this struct are also in CPUVEC_FUNCS_LIST 4553 in mpn/x86/x86-defs.m4. Be sure to update that when changing here. */ 4554 struct cpuvec_t { 4555 DECL_add_n ((*add_n)); 4556 DECL_addlsh1_n ((*addlsh1_n)); 4557 DECL_addlsh2_n ((*addlsh2_n)); 4558 DECL_addmul_1 ((*addmul_1)); 4559 DECL_addmul_2 ((*addmul_2)); 4560 DECL_bdiv_dbm1c ((*bdiv_dbm1c)); 4561 DECL_com ((*com)); 4562 DECL_copyd ((*copyd)); 4563 DECL_copyi ((*copyi)); 4564 DECL_divexact_1 ((*divexact_1)); 4565 DECL_divrem_1 ((*divrem_1)); 4566 DECL_gcd_1 ((*gcd_1)); 4567 DECL_lshift ((*lshift)); 4568 DECL_lshiftc ((*lshiftc)); 4569 DECL_mod_1 ((*mod_1)); 4570 DECL_mod_1_1p ((*mod_1_1p)); 4571 DECL_mod_1_1p_cps ((*mod_1_1p_cps)); 4572 DECL_mod_1s_2p ((*mod_1s_2p)); 4573 DECL_mod_1s_2p_cps ((*mod_1s_2p_cps)); 4574 DECL_mod_1s_4p ((*mod_1s_4p)); 4575 DECL_mod_1s_4p_cps ((*mod_1s_4p_cps)); 4576 DECL_mod_34lsub1 ((*mod_34lsub1)); 4577 DECL_modexact_1c_odd ((*modexact_1c_odd)); 4578 DECL_mul_1 ((*mul_1)); 4579 DECL_mul_basecase ((*mul_basecase)); 4580 DECL_mullo_basecase ((*mullo_basecase)); 4581 DECL_preinv_divrem_1 ((*preinv_divrem_1)); 4582 DECL_preinv_mod_1 ((*preinv_mod_1)); 4583 DECL_redc_1 ((*redc_1)); 4584 DECL_redc_2 ((*redc_2)); 4585 DECL_rshift ((*rshift)); 4586 DECL_sqr_basecase ((*sqr_basecase)); 4587 DECL_sub_n ((*sub_n)); 4588 DECL_sublsh1_n ((*sublsh1_n)); 4589 DECL_submul_1 ((*submul_1)); 4590 mp_size_t mul_toom22_threshold; 4591 mp_size_t mul_toom33_threshold; 4592 mp_size_t sqr_toom2_threshold; 4593 mp_size_t sqr_toom3_threshold; 4594 mp_size_t bmod_1_to_mod_1_threshold; 4595 }; 4596 __GMP_DECLSPEC extern struct cpuvec_t __gmpn_cpuvec; 4597 __GMP_DECLSPEC extern int __gmpn_cpuvec_initialized; 4598 #endif /* x86 fat binary */ 4599 4600 __GMP_DECLSPEC void __gmpn_cpuvec_init (void); 4601 4602 /* Get a threshold "field" from __gmpn_cpuvec, running __gmpn_cpuvec_init() 4603 if that hasn't yet been done (to establish the right values). */ 4604 #define CPUVEC_THRESHOLD(field) \ 4605 ((LIKELY (__gmpn_cpuvec_initialized) ? 0 : (__gmpn_cpuvec_init (), 0)), \ 4606 __gmpn_cpuvec.field) 4607 4608 4609 #if HAVE_NATIVE_mpn_add_nc 4610 #define mpn_add_nc __MPN(add_nc) 4611 __GMP_DECLSPEC mp_limb_t mpn_add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 4612 #else 4613 static inline 4614 mp_limb_t 4615 mpn_add_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci) 4616 { 4617 mp_limb_t co; 4618 co = mpn_add_n (rp, up, vp, n); 4619 co += mpn_add_1 (rp, rp, n, ci); 4620 return co; 4621 } 4622 #endif 4623 4624 #if HAVE_NATIVE_mpn_sub_nc 4625 #define mpn_sub_nc __MPN(sub_nc) 4626 __GMP_DECLSPEC mp_limb_t mpn_sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 4627 #else 4628 static inline mp_limb_t 4629 mpn_sub_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci) 4630 { 4631 mp_limb_t co; 4632 co = mpn_sub_n (rp, up, vp, n); 4633 co += mpn_sub_1 (rp, rp, n, ci); 4634 return co; 4635 } 4636 #endif 4637 4638 static inline int 4639 mpn_zero_p (mp_srcptr ap, mp_size_t n) 4640 { 4641 mp_size_t i; 4642 for (i = n - 1; i >= 0; i--) 4643 { 4644 if (ap[i] != 0) 4645 return 0; 4646 } 4647 return 1; 4648 } 4649 4650 #if TUNE_PROGRAM_BUILD 4651 /* Some extras wanted when recompiling some .c files for use by the tune 4652 program. Not part of a normal build. 4653 4654 It's necessary to keep these thresholds as #defines (just to an 4655 identically named variable), since various defaults are established based 4656 on #ifdef in the .c files. For some this is not so (the defaults are 4657 instead established above), but all are done this way for consistency. */ 4658 4659 #undef MUL_TOOM22_THRESHOLD 4660 #define MUL_TOOM22_THRESHOLD mul_toom22_threshold 4661 extern mp_size_t mul_toom22_threshold; 4662 4663 #undef MUL_TOOM33_THRESHOLD 4664 #define MUL_TOOM33_THRESHOLD mul_toom33_threshold 4665 extern mp_size_t mul_toom33_threshold; 4666 4667 #undef MUL_TOOM44_THRESHOLD 4668 #define MUL_TOOM44_THRESHOLD mul_toom44_threshold 4669 extern mp_size_t mul_toom44_threshold; 4670 4671 #undef MUL_TOOM6H_THRESHOLD 4672 #define MUL_TOOM6H_THRESHOLD mul_toom6h_threshold 4673 extern mp_size_t mul_toom6h_threshold; 4674 4675 #undef MUL_TOOM8H_THRESHOLD 4676 #define MUL_TOOM8H_THRESHOLD mul_toom8h_threshold 4677 extern mp_size_t mul_toom8h_threshold; 4678 4679 #undef MUL_TOOM32_TO_TOOM43_THRESHOLD 4680 #define MUL_TOOM32_TO_TOOM43_THRESHOLD mul_toom32_to_toom43_threshold 4681 extern mp_size_t mul_toom32_to_toom43_threshold; 4682 4683 #undef MUL_TOOM32_TO_TOOM53_THRESHOLD 4684 #define MUL_TOOM32_TO_TOOM53_THRESHOLD mul_toom32_to_toom53_threshold 4685 extern mp_size_t mul_toom32_to_toom53_threshold; 4686 4687 #undef MUL_TOOM42_TO_TOOM53_THRESHOLD 4688 #define MUL_TOOM42_TO_TOOM53_THRESHOLD mul_toom42_to_toom53_threshold 4689 extern mp_size_t mul_toom42_to_toom53_threshold; 4690 4691 #undef MUL_TOOM42_TO_TOOM63_THRESHOLD 4692 #define MUL_TOOM42_TO_TOOM63_THRESHOLD mul_toom42_to_toom63_threshold 4693 extern mp_size_t mul_toom42_to_toom63_threshold; 4694 4695 #undef MUL_TOOM43_TO_TOOM54_THRESHOLD 4696 #define MUL_TOOM43_TO_TOOM54_THRESHOLD mul_toom43_to_toom54_threshold; 4697 extern mp_size_t mul_toom43_to_toom54_threshold; 4698 4699 #undef MUL_FFT_THRESHOLD 4700 #define MUL_FFT_THRESHOLD mul_fft_threshold 4701 extern mp_size_t mul_fft_threshold; 4702 4703 #undef MUL_FFT_MODF_THRESHOLD 4704 #define MUL_FFT_MODF_THRESHOLD mul_fft_modf_threshold 4705 extern mp_size_t mul_fft_modf_threshold; 4706 4707 #undef MUL_FFT_TABLE 4708 #define MUL_FFT_TABLE { 0 } 4709 4710 #undef MUL_FFT_TABLE3 4711 #define MUL_FFT_TABLE3 { {0,0} } 4712 4713 /* A native mpn_sqr_basecase is not tuned and SQR_BASECASE_THRESHOLD should 4714 remain as zero (always use it). */ 4715 #if ! HAVE_NATIVE_mpn_sqr_basecase 4716 #undef SQR_BASECASE_THRESHOLD 4717 #define SQR_BASECASE_THRESHOLD sqr_basecase_threshold 4718 extern mp_size_t sqr_basecase_threshold; 4719 #endif 4720 4721 #if TUNE_PROGRAM_BUILD_SQR 4722 #undef SQR_TOOM2_THRESHOLD 4723 #define SQR_TOOM2_THRESHOLD SQR_TOOM2_MAX_GENERIC 4724 #else 4725 #undef SQR_TOOM2_THRESHOLD 4726 #define SQR_TOOM2_THRESHOLD sqr_toom2_threshold 4727 extern mp_size_t sqr_toom2_threshold; 4728 #endif 4729 4730 #undef SQR_TOOM3_THRESHOLD 4731 #define SQR_TOOM3_THRESHOLD sqr_toom3_threshold 4732 extern mp_size_t sqr_toom3_threshold; 4733 4734 #undef SQR_TOOM4_THRESHOLD 4735 #define SQR_TOOM4_THRESHOLD sqr_toom4_threshold 4736 extern mp_size_t sqr_toom4_threshold; 4737 4738 #undef SQR_TOOM6_THRESHOLD 4739 #define SQR_TOOM6_THRESHOLD sqr_toom6_threshold 4740 extern mp_size_t sqr_toom6_threshold; 4741 4742 #undef SQR_TOOM8_THRESHOLD 4743 #define SQR_TOOM8_THRESHOLD sqr_toom8_threshold 4744 extern mp_size_t sqr_toom8_threshold; 4745 4746 #undef SQR_FFT_THRESHOLD 4747 #define SQR_FFT_THRESHOLD sqr_fft_threshold 4748 extern mp_size_t sqr_fft_threshold; 4749 4750 #undef SQR_FFT_MODF_THRESHOLD 4751 #define SQR_FFT_MODF_THRESHOLD sqr_fft_modf_threshold 4752 extern mp_size_t sqr_fft_modf_threshold; 4753 4754 #undef SQR_FFT_TABLE 4755 #define SQR_FFT_TABLE { 0 } 4756 4757 #undef SQR_FFT_TABLE3 4758 #define SQR_FFT_TABLE3 { {0,0} } 4759 4760 #undef MULLO_BASECASE_THRESHOLD 4761 #define MULLO_BASECASE_THRESHOLD mullo_basecase_threshold 4762 extern mp_size_t mullo_basecase_threshold; 4763 4764 #undef MULLO_DC_THRESHOLD 4765 #define MULLO_DC_THRESHOLD mullo_dc_threshold 4766 extern mp_size_t mullo_dc_threshold; 4767 4768 #undef MULLO_MUL_N_THRESHOLD 4769 #define MULLO_MUL_N_THRESHOLD mullo_mul_n_threshold 4770 extern mp_size_t mullo_mul_n_threshold; 4771 4772 #undef MULMID_TOOM42_THRESHOLD 4773 #define MULMID_TOOM42_THRESHOLD mulmid_toom42_threshold 4774 extern mp_size_t mulmid_toom42_threshold; 4775 4776 #undef DIV_QR_2_PI2_THRESHOLD 4777 #define DIV_QR_2_PI2_THRESHOLD div_qr_2_pi2_threshold 4778 extern mp_size_t div_qr_2_pi2_threshold; 4779 4780 #undef DC_DIV_QR_THRESHOLD 4781 #define DC_DIV_QR_THRESHOLD dc_div_qr_threshold 4782 extern mp_size_t dc_div_qr_threshold; 4783 4784 #undef DC_DIVAPPR_Q_THRESHOLD 4785 #define DC_DIVAPPR_Q_THRESHOLD dc_divappr_q_threshold 4786 extern mp_size_t dc_divappr_q_threshold; 4787 4788 #undef DC_BDIV_Q_THRESHOLD 4789 #define DC_BDIV_Q_THRESHOLD dc_bdiv_q_threshold 4790 extern mp_size_t dc_bdiv_q_threshold; 4791 4792 #undef DC_BDIV_QR_THRESHOLD 4793 #define DC_BDIV_QR_THRESHOLD dc_bdiv_qr_threshold 4794 extern mp_size_t dc_bdiv_qr_threshold; 4795 4796 #undef MU_DIV_QR_THRESHOLD 4797 #define MU_DIV_QR_THRESHOLD mu_div_qr_threshold 4798 extern mp_size_t mu_div_qr_threshold; 4799 4800 #undef MU_DIVAPPR_Q_THRESHOLD 4801 #define MU_DIVAPPR_Q_THRESHOLD mu_divappr_q_threshold 4802 extern mp_size_t mu_divappr_q_threshold; 4803 4804 #undef MUPI_DIV_QR_THRESHOLD 4805 #define MUPI_DIV_QR_THRESHOLD mupi_div_qr_threshold 4806 extern mp_size_t mupi_div_qr_threshold; 4807 4808 #undef MU_BDIV_QR_THRESHOLD 4809 #define MU_BDIV_QR_THRESHOLD mu_bdiv_qr_threshold 4810 extern mp_size_t mu_bdiv_qr_threshold; 4811 4812 #undef MU_BDIV_Q_THRESHOLD 4813 #define MU_BDIV_Q_THRESHOLD mu_bdiv_q_threshold 4814 extern mp_size_t mu_bdiv_q_threshold; 4815 4816 #undef INV_MULMOD_BNM1_THRESHOLD 4817 #define INV_MULMOD_BNM1_THRESHOLD inv_mulmod_bnm1_threshold 4818 extern mp_size_t inv_mulmod_bnm1_threshold; 4819 4820 #undef INV_NEWTON_THRESHOLD 4821 #define INV_NEWTON_THRESHOLD inv_newton_threshold 4822 extern mp_size_t inv_newton_threshold; 4823 4824 #undef INV_APPR_THRESHOLD 4825 #define INV_APPR_THRESHOLD inv_appr_threshold 4826 extern mp_size_t inv_appr_threshold; 4827 4828 #undef BINV_NEWTON_THRESHOLD 4829 #define BINV_NEWTON_THRESHOLD binv_newton_threshold 4830 extern mp_size_t binv_newton_threshold; 4831 4832 #undef REDC_1_TO_REDC_2_THRESHOLD 4833 #define REDC_1_TO_REDC_2_THRESHOLD redc_1_to_redc_2_threshold 4834 extern mp_size_t redc_1_to_redc_2_threshold; 4835 4836 #undef REDC_2_TO_REDC_N_THRESHOLD 4837 #define REDC_2_TO_REDC_N_THRESHOLD redc_2_to_redc_n_threshold 4838 extern mp_size_t redc_2_to_redc_n_threshold; 4839 4840 #undef REDC_1_TO_REDC_N_THRESHOLD 4841 #define REDC_1_TO_REDC_N_THRESHOLD redc_1_to_redc_n_threshold 4842 extern mp_size_t redc_1_to_redc_n_threshold; 4843 4844 #undef MATRIX22_STRASSEN_THRESHOLD 4845 #define MATRIX22_STRASSEN_THRESHOLD matrix22_strassen_threshold 4846 extern mp_size_t matrix22_strassen_threshold; 4847 4848 #undef HGCD_THRESHOLD 4849 #define HGCD_THRESHOLD hgcd_threshold 4850 extern mp_size_t hgcd_threshold; 4851 4852 #undef HGCD_APPR_THRESHOLD 4853 #define HGCD_APPR_THRESHOLD hgcd_appr_threshold 4854 extern mp_size_t hgcd_appr_threshold; 4855 4856 #undef HGCD_REDUCE_THRESHOLD 4857 #define HGCD_REDUCE_THRESHOLD hgcd_reduce_threshold 4858 extern mp_size_t hgcd_reduce_threshold; 4859 4860 #undef GCD_DC_THRESHOLD 4861 #define GCD_DC_THRESHOLD gcd_dc_threshold 4862 extern mp_size_t gcd_dc_threshold; 4863 4864 #undef GCDEXT_DC_THRESHOLD 4865 #define GCDEXT_DC_THRESHOLD gcdext_dc_threshold 4866 extern mp_size_t gcdext_dc_threshold; 4867 4868 #undef DIVREM_1_NORM_THRESHOLD 4869 #define DIVREM_1_NORM_THRESHOLD divrem_1_norm_threshold 4870 extern mp_size_t divrem_1_norm_threshold; 4871 4872 #undef DIVREM_1_UNNORM_THRESHOLD 4873 #define DIVREM_1_UNNORM_THRESHOLD divrem_1_unnorm_threshold 4874 extern mp_size_t divrem_1_unnorm_threshold; 4875 4876 #undef MOD_1_NORM_THRESHOLD 4877 #define MOD_1_NORM_THRESHOLD mod_1_norm_threshold 4878 extern mp_size_t mod_1_norm_threshold; 4879 4880 #undef MOD_1_UNNORM_THRESHOLD 4881 #define MOD_1_UNNORM_THRESHOLD mod_1_unnorm_threshold 4882 extern mp_size_t mod_1_unnorm_threshold; 4883 4884 #undef MOD_1_1P_METHOD 4885 #define MOD_1_1P_METHOD mod_1_1p_method 4886 extern int mod_1_1p_method; 4887 4888 #undef MOD_1N_TO_MOD_1_1_THRESHOLD 4889 #define MOD_1N_TO_MOD_1_1_THRESHOLD mod_1n_to_mod_1_1_threshold 4890 extern mp_size_t mod_1n_to_mod_1_1_threshold; 4891 4892 #undef MOD_1U_TO_MOD_1_1_THRESHOLD 4893 #define MOD_1U_TO_MOD_1_1_THRESHOLD mod_1u_to_mod_1_1_threshold 4894 extern mp_size_t mod_1u_to_mod_1_1_threshold; 4895 4896 #undef MOD_1_1_TO_MOD_1_2_THRESHOLD 4897 #define MOD_1_1_TO_MOD_1_2_THRESHOLD mod_1_1_to_mod_1_2_threshold 4898 extern mp_size_t mod_1_1_to_mod_1_2_threshold; 4899 4900 #undef MOD_1_2_TO_MOD_1_4_THRESHOLD 4901 #define MOD_1_2_TO_MOD_1_4_THRESHOLD mod_1_2_to_mod_1_4_threshold 4902 extern mp_size_t mod_1_2_to_mod_1_4_threshold; 4903 4904 #undef PREINV_MOD_1_TO_MOD_1_THRESHOLD 4905 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD preinv_mod_1_to_mod_1_threshold 4906 extern mp_size_t preinv_mod_1_to_mod_1_threshold; 4907 4908 #if ! UDIV_PREINV_ALWAYS 4909 #undef DIVREM_2_THRESHOLD 4910 #define DIVREM_2_THRESHOLD divrem_2_threshold 4911 extern mp_size_t divrem_2_threshold; 4912 #endif 4913 4914 #undef MULMOD_BNM1_THRESHOLD 4915 #define MULMOD_BNM1_THRESHOLD mulmod_bnm1_threshold 4916 extern mp_size_t mulmod_bnm1_threshold; 4917 4918 #undef SQRMOD_BNM1_THRESHOLD 4919 #define SQRMOD_BNM1_THRESHOLD sqrmod_bnm1_threshold 4920 extern mp_size_t sqrmod_bnm1_threshold; 4921 4922 #undef GET_STR_DC_THRESHOLD 4923 #define GET_STR_DC_THRESHOLD get_str_dc_threshold 4924 extern mp_size_t get_str_dc_threshold; 4925 4926 #undef GET_STR_PRECOMPUTE_THRESHOLD 4927 #define GET_STR_PRECOMPUTE_THRESHOLD get_str_precompute_threshold 4928 extern mp_size_t get_str_precompute_threshold; 4929 4930 #undef SET_STR_DC_THRESHOLD 4931 #define SET_STR_DC_THRESHOLD set_str_dc_threshold 4932 extern mp_size_t set_str_dc_threshold; 4933 4934 #undef SET_STR_PRECOMPUTE_THRESHOLD 4935 #define SET_STR_PRECOMPUTE_THRESHOLD set_str_precompute_threshold 4936 extern mp_size_t set_str_precompute_threshold; 4937 4938 #undef FAC_ODD_THRESHOLD 4939 #define FAC_ODD_THRESHOLD fac_odd_threshold 4940 extern mp_size_t fac_odd_threshold; 4941 4942 #undef FAC_DSC_THRESHOLD 4943 #define FAC_DSC_THRESHOLD fac_dsc_threshold 4944 extern mp_size_t fac_dsc_threshold; 4945 4946 #undef FFT_TABLE_ATTRS 4947 #define FFT_TABLE_ATTRS 4948 extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE]; 4949 #define FFT_TABLE3_SIZE 2000 /* generous space for tuning */ 4950 extern struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE]; 4951 4952 /* Sizes the tune program tests up to, used in a couple of recompilations. */ 4953 #undef MUL_TOOM22_THRESHOLD_LIMIT 4954 #undef MUL_TOOM33_THRESHOLD_LIMIT 4955 #undef MULLO_BASECASE_THRESHOLD_LIMIT 4956 #undef SQR_TOOM3_THRESHOLD_LIMIT 4957 #define SQR_TOOM2_MAX_GENERIC 200 4958 #define MUL_TOOM22_THRESHOLD_LIMIT 700 4959 #define MUL_TOOM33_THRESHOLD_LIMIT 700 4960 #define SQR_TOOM3_THRESHOLD_LIMIT 400 4961 #define MUL_TOOM44_THRESHOLD_LIMIT 1000 4962 #define SQR_TOOM4_THRESHOLD_LIMIT 1000 4963 #define MUL_TOOM6H_THRESHOLD_LIMIT 1100 4964 #define SQR_TOOM6_THRESHOLD_LIMIT 1100 4965 #define MUL_TOOM8H_THRESHOLD_LIMIT 1200 4966 #define SQR_TOOM8_THRESHOLD_LIMIT 1200 4967 #define MULLO_BASECASE_THRESHOLD_LIMIT 200 4968 #define GET_STR_THRESHOLD_LIMIT 150 4969 #define FAC_DSC_THRESHOLD_LIMIT 2048 4970 4971 #endif /* TUNE_PROGRAM_BUILD */ 4972 4973 #if defined (__cplusplus) 4974 } 4975 #endif 4976 4977 /* FIXME: Make these itch functions less conservative. Also consider making 4978 them dependent on just 'an', and compute the allocation directly from 'an' 4979 instead of via n. */ 4980 4981 /* toom22/toom2: Scratch need is 2*(an + k), k is the recursion depth. 4982 k is ths smallest k such that 4983 ceil(an/2^k) < MUL_TOOM22_THRESHOLD. 4984 which implies that 4985 k = bitsize of floor ((an-1)/(MUL_TOOM22_THRESHOLD-1)) 4986 = 1 + floor (log_2 (floor ((an-1)/(MUL_TOOM22_THRESHOLD-1)))) 4987 */ 4988 #define mpn_toom22_mul_itch(an, bn) \ 4989 (2 * ((an) + GMP_NUMB_BITS)) 4990 #define mpn_toom2_sqr_itch(an) \ 4991 (2 * ((an) + GMP_NUMB_BITS)) 4992 4993 /* toom33/toom3: Scratch need is 5an/2 + 10k, k is the recursion depth. 4994 We use 3an + C, so that we can use a smaller constant. 4995 */ 4996 #define mpn_toom33_mul_itch(an, bn) \ 4997 (3 * (an) + GMP_NUMB_BITS) 4998 #define mpn_toom3_sqr_itch(an) \ 4999 (3 * (an) + GMP_NUMB_BITS) 5000 5001 /* toom33/toom3: Scratch need is 8an/3 + 13k, k is the recursion depth. 5002 We use 3an + C, so that we can use a smaller constant. 5003 */ 5004 #define mpn_toom44_mul_itch(an, bn) \ 5005 (3 * (an) + GMP_NUMB_BITS) 5006 #define mpn_toom4_sqr_itch(an) \ 5007 (3 * (an) + GMP_NUMB_BITS) 5008 5009 #define mpn_toom6_sqr_itch(n) \ 5010 (((n) - SQR_TOOM6_THRESHOLD)*2 + \ 5011 MAX(SQR_TOOM6_THRESHOLD*2 + GMP_NUMB_BITS*6, \ 5012 mpn_toom4_sqr_itch(SQR_TOOM6_THRESHOLD))) 5013 5014 #define MUL_TOOM6H_MIN \ 5015 ((MUL_TOOM6H_THRESHOLD > MUL_TOOM44_THRESHOLD) ? \ 5016 MUL_TOOM6H_THRESHOLD : MUL_TOOM44_THRESHOLD) 5017 #define mpn_toom6_mul_n_itch(n) \ 5018 (((n) - MUL_TOOM6H_MIN)*2 + \ 5019 MAX(MUL_TOOM6H_MIN*2 + GMP_NUMB_BITS*6, \ 5020 mpn_toom44_mul_itch(MUL_TOOM6H_MIN,MUL_TOOM6H_MIN))) 5021 5022 static inline mp_size_t 5023 mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) { 5024 mp_size_t estimatedN; 5025 estimatedN = (an + bn) / (size_t) 10 + 1; 5026 return mpn_toom6_mul_n_itch (estimatedN * 6); 5027 } 5028 5029 #define mpn_toom8_sqr_itch(n) \ 5030 ((((n)*15)>>3) - ((SQR_TOOM8_THRESHOLD*15)>>3) + \ 5031 MAX(((SQR_TOOM8_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6, \ 5032 mpn_toom6_sqr_itch(SQR_TOOM8_THRESHOLD))) 5033 5034 #define MUL_TOOM8H_MIN \ 5035 ((MUL_TOOM8H_THRESHOLD > MUL_TOOM6H_MIN) ? \ 5036 MUL_TOOM8H_THRESHOLD : MUL_TOOM6H_MIN) 5037 #define mpn_toom8_mul_n_itch(n) \ 5038 ((((n)*15)>>3) - ((MUL_TOOM8H_MIN*15)>>3) + \ 5039 MAX(((MUL_TOOM8H_MIN*15)>>3) + GMP_NUMB_BITS*6, \ 5040 mpn_toom6_mul_n_itch(MUL_TOOM8H_MIN))) 5041 5042 static inline mp_size_t 5043 mpn_toom8h_mul_itch (mp_size_t an, mp_size_t bn) { 5044 mp_size_t estimatedN; 5045 estimatedN = (an + bn) / (size_t) 14 + 1; 5046 return mpn_toom8_mul_n_itch (estimatedN * 8); 5047 } 5048 5049 static inline mp_size_t 5050 mpn_toom32_mul_itch (mp_size_t an, mp_size_t bn) 5051 { 5052 mp_size_t n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1); 5053 mp_size_t itch = 2 * n + 1; 5054 5055 return itch; 5056 } 5057 5058 static inline mp_size_t 5059 mpn_toom42_mul_itch (mp_size_t an, mp_size_t bn) 5060 { 5061 mp_size_t n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1; 5062 return 6 * n + 3; 5063 } 5064 5065 static inline mp_size_t 5066 mpn_toom43_mul_itch (mp_size_t an, mp_size_t bn) 5067 { 5068 mp_size_t n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3); 5069 5070 return 6*n + 4; 5071 } 5072 5073 static inline mp_size_t 5074 mpn_toom52_mul_itch (mp_size_t an, mp_size_t bn) 5075 { 5076 mp_size_t n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1); 5077 return 6*n + 4; 5078 } 5079 5080 static inline mp_size_t 5081 mpn_toom53_mul_itch (mp_size_t an, mp_size_t bn) 5082 { 5083 mp_size_t n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3); 5084 return 10 * n + 10; 5085 } 5086 5087 static inline mp_size_t 5088 mpn_toom62_mul_itch (mp_size_t an, mp_size_t bn) 5089 { 5090 mp_size_t n = 1 + (an >= 3 * bn ? (an - 1) / (size_t) 6 : (bn - 1) >> 1); 5091 return 10 * n + 10; 5092 } 5093 5094 static inline mp_size_t 5095 mpn_toom63_mul_itch (mp_size_t an, mp_size_t bn) 5096 { 5097 mp_size_t n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3); 5098 return 9 * n + 3; 5099 } 5100 5101 static inline mp_size_t 5102 mpn_toom54_mul_itch (mp_size_t an, mp_size_t bn) 5103 { 5104 mp_size_t n = 1 + (4 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 4); 5105 return 9 * n + 3; 5106 } 5107 5108 /* let S(n) = space required for input size n, 5109 then S(n) = 3 floor(n/2) + 1 + S(floor(n/2)). */ 5110 #define mpn_toom42_mulmid_itch(n) \ 5111 (3 * (n) + GMP_NUMB_BITS) 5112 5113 #if 0 5114 #define mpn_fft_mul mpn_mul_fft_full 5115 #else 5116 #define mpn_fft_mul mpn_nussbaumer_mul 5117 #endif 5118 5119 #ifdef __cplusplus 5120 5121 /* A little helper for a null-terminated __gmp_allocate_func string. 5122 The destructor ensures it's freed even if an exception is thrown. 5123 The len field is needed by the destructor, and can be used by anyone else 5124 to avoid a second strlen pass over the data. 5125 5126 Since our input is a C string, using strlen is correct. Perhaps it'd be 5127 more C++-ish style to use std::char_traits<char>::length, but char_traits 5128 isn't available in gcc 2.95.4. */ 5129 5130 class gmp_allocated_string { 5131 public: 5132 char *str; 5133 size_t len; 5134 gmp_allocated_string(char *arg) 5135 { 5136 str = arg; 5137 len = std::strlen (str); 5138 } 5139 ~gmp_allocated_string() 5140 { 5141 (*__gmp_free_func) (str, len+1); 5142 } 5143 }; 5144 5145 std::istream &__gmpz_operator_in_nowhite (std::istream &, mpz_ptr, char); 5146 int __gmp_istream_set_base (std::istream &, char &, bool &, bool &); 5147 void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int); 5148 void __gmp_doprnt_params_from_ios (struct doprnt_params_t *, std::ios &); 5149 std::ostream& __gmp_doprnt_integer_ostream (std::ostream &, struct doprnt_params_t *, char *); 5150 extern const struct doprnt_funs_t __gmp_asprintf_funs_noformat; 5151 5152 #endif /* __cplusplus */ 5153 5154 #endif /* __GMP_IMPL_H__ */ 5155