1 /* Speed measuring program. 2 3 Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010 Free 4 Software Foundation, Inc. 5 6 This file is part of the GNU MP Library. 7 8 The GNU MP Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MP Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 20 21 /* Usage message is in the code below, run with no arguments to print it. 22 See README for interesting applications. 23 24 To add a new routine foo(), create a speed_foo() function in the style of 25 the existing ones and add an entry in the routine[] array. Put FLAG_R if 26 speed_foo() wants an "r" parameter. 27 28 The routines don't have help messages or descriptions, but most have 29 suggestive names. See the source code for full details. 30 31 */ 32 33 #include "config.h" 34 35 #include <limits.h> 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 40 #if HAVE_UNISTD_H 41 #include <unistd.h> /* for getpid, R_OK */ 42 #endif 43 44 #if TIME_WITH_SYS_TIME 45 # include <sys/time.h> /* for struct timeval */ 46 # include <time.h> 47 #else 48 # if HAVE_SYS_TIME_H 49 # include <sys/time.h> 50 # else 51 # include <time.h> 52 # endif 53 #endif 54 55 #if HAVE_SYS_RESOURCE_H 56 #include <sys/resource.h> /* for getrusage() */ 57 #endif 58 59 60 #include "gmp.h" 61 #include "gmp-impl.h" 62 #include "longlong.h" /* for the benefit of speed-many.c */ 63 #include "tests.h" 64 #include "speed.h" 65 66 67 #if !HAVE_DECL_OPTARG 68 extern char *optarg; 69 extern int optind, opterr; 70 #endif 71 72 #if !HAVE_STRTOUL 73 #define strtoul(p,e,b) (unsigned long) strtol(p,e,b) 74 #endif 75 76 #ifdef SPEED_EXTRA_PROTOS 77 SPEED_EXTRA_PROTOS 78 #endif 79 #ifdef SPEED_EXTRA_PROTOS2 80 SPEED_EXTRA_PROTOS2 81 #endif 82 83 84 #define MPN_FILL(ptr, size, n) \ 85 do { \ 86 mp_size_t __i; \ 87 ASSERT ((size) >= 0); \ 88 for (__i = 0; __i < (size); __i++) \ 89 (ptr)[__i] = (n); \ 90 } while (0) 91 92 93 #if GMP_LIMB_BITS == 32 94 #define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK) 95 #endif 96 #if GMP_LIMB_BITS == 64 97 #define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK) 98 #endif 99 100 101 #define CMP_ABSOLUTE 1 102 #define CMP_RATIO 2 103 #define CMP_DIFFERENCE 3 104 #define CMP_DIFFPREV 4 105 int option_cmp = CMP_ABSOLUTE; 106 107 #define UNIT_SECONDS 1 108 #define UNIT_CYCLES 2 109 #define UNIT_CYCLESPERLIMB 3 110 int option_unit = UNIT_SECONDS; 111 112 #define DATA_RANDOM 1 113 #define DATA_RANDOM2 2 114 #define DATA_ZEROS 3 115 #define DATA_AAS 4 116 #define DATA_FFS 5 117 #define DATA_2FD 6 118 int option_data = DATA_RANDOM; 119 120 int option_square = 0; 121 double option_factor = 0.0; 122 mp_size_t option_step = 1; 123 int option_gnuplot = 0; 124 char *option_gnuplot_basename; 125 struct size_array_t { 126 mp_size_t start, end; 127 } *size_array = NULL; 128 mp_size_t size_num = 0; 129 mp_size_t size_allocnum = 0; 130 int option_resource_usage = 0; 131 long option_seed = 123456789; 132 133 struct speed_params sp; 134 135 #define COLUMN_WIDTH 13 /* for the free-form output */ 136 137 #define FLAG_R (1<<0) /* require ".r" */ 138 #define FLAG_R_OPTIONAL (1<<1) /* optional ".r" */ 139 #define FLAG_RSIZE (1<<2) 140 #define FLAG_NODATA (1<<3) /* don't alloc xp, yp */ 141 142 const struct routine_t { 143 /* constants */ 144 const char *name; 145 speed_function_t fun; 146 int flag; 147 } routine[] = { 148 149 { "noop", speed_noop }, 150 { "noop_wxs", speed_noop_wxs }, 151 { "noop_wxys", speed_noop_wxys }, 152 153 { "mpn_add_n", speed_mpn_add_n, FLAG_R_OPTIONAL }, 154 { "mpn_sub_n", speed_mpn_sub_n, FLAG_R_OPTIONAL }, 155 156 #if HAVE_NATIVE_mpn_add_n_sub_n 157 { "mpn_add_n_sub_n", speed_mpn_add_n_sub_n, FLAG_R_OPTIONAL }, 158 #endif 159 160 { "mpn_addmul_1", speed_mpn_addmul_1, FLAG_R }, 161 { "mpn_submul_1", speed_mpn_submul_1, FLAG_R }, 162 #if HAVE_NATIVE_mpn_addmul_2 163 { "mpn_addmul_2", speed_mpn_addmul_2, FLAG_R_OPTIONAL }, 164 #endif 165 #if HAVE_NATIVE_mpn_addmul_3 166 { "mpn_addmul_3", speed_mpn_addmul_3, FLAG_R_OPTIONAL }, 167 #endif 168 #if HAVE_NATIVE_mpn_addmul_4 169 { "mpn_addmul_4", speed_mpn_addmul_4, FLAG_R_OPTIONAL }, 170 #endif 171 #if HAVE_NATIVE_mpn_addmul_5 172 { "mpn_addmul_5", speed_mpn_addmul_5, FLAG_R_OPTIONAL }, 173 #endif 174 #if HAVE_NATIVE_mpn_addmul_6 175 { "mpn_addmul_6", speed_mpn_addmul_6, FLAG_R_OPTIONAL }, 176 #endif 177 #if HAVE_NATIVE_mpn_addmul_7 178 { "mpn_addmul_7", speed_mpn_addmul_7, FLAG_R_OPTIONAL }, 179 #endif 180 #if HAVE_NATIVE_mpn_addmul_8 181 { "mpn_addmul_8", speed_mpn_addmul_8, FLAG_R_OPTIONAL }, 182 #endif 183 { "mpn_mul_1", speed_mpn_mul_1, FLAG_R }, 184 { "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R }, 185 #if HAVE_NATIVE_mpn_mul_2 186 { "mpn_mul_2", speed_mpn_mul_2, FLAG_R_OPTIONAL }, 187 #endif 188 #if HAVE_NATIVE_mpn_mul_3 189 { "mpn_mul_3", speed_mpn_mul_3, FLAG_R_OPTIONAL }, 190 #endif 191 #if HAVE_NATIVE_mpn_mul_4 192 { "mpn_mul_4", speed_mpn_mul_4, FLAG_R_OPTIONAL }, 193 #endif 194 195 { "mpn_divrem_1", speed_mpn_divrem_1, FLAG_R }, 196 { "mpn_divrem_1f", speed_mpn_divrem_1f, FLAG_R }, 197 #if HAVE_NATIVE_mpn_divrem_1c 198 { "mpn_divrem_1c", speed_mpn_divrem_1c, FLAG_R }, 199 { "mpn_divrem_1cf", speed_mpn_divrem_1cf,FLAG_R }, 200 #endif 201 { "mpn_mod_1", speed_mpn_mod_1, FLAG_R_OPTIONAL }, 202 #if HAVE_NATIVE_mpn_mod_1c 203 { "mpn_mod_1c", speed_mpn_mod_1c, FLAG_R_OPTIONAL }, 204 #endif 205 { "mpn_preinv_divrem_1", speed_mpn_preinv_divrem_1, FLAG_R }, 206 { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R }, 207 { "mpn_preinv_mod_1", speed_mpn_preinv_mod_1, FLAG_R }, 208 209 { "mpn_mod_1_1", speed_mpn_mod_1_1, FLAG_R_OPTIONAL }, 210 { "mpn_mod_1s_2", speed_mpn_mod_1_2, FLAG_R_OPTIONAL }, 211 { "mpn_mod_1s_3", speed_mpn_mod_1_3, FLAG_R_OPTIONAL }, 212 { "mpn_mod_1s_4", speed_mpn_mod_1_4, FLAG_R_OPTIONAL }, 213 214 { "mpn_divrem_1_div", speed_mpn_divrem_1_div, FLAG_R }, 215 { "mpn_divrem_1_inv", speed_mpn_divrem_1_inv, FLAG_R }, 216 { "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R }, 217 { "mpn_divrem_1f_inv", speed_mpn_divrem_1f_inv, FLAG_R }, 218 { "mpn_mod_1_div", speed_mpn_mod_1_div, FLAG_R }, 219 { "mpn_mod_1_inv", speed_mpn_mod_1_inv, FLAG_R }, 220 221 { "mpn_divrem_2", speed_mpn_divrem_2, }, 222 { "mpn_divrem_2_div", speed_mpn_divrem_2_div, }, 223 { "mpn_divrem_2_inv", speed_mpn_divrem_2_inv, }, 224 225 { "mpn_divexact_1", speed_mpn_divexact_1, FLAG_R }, 226 { "mpn_divexact_by3", speed_mpn_divexact_by3 }, 227 228 { "mpn_bdiv_q_1", speed_mpn_bdiv_q_1, FLAG_R_OPTIONAL }, 229 { "mpn_pi1_bdiv_q_1", speed_mpn_pi1_bdiv_q_1, FLAG_R_OPTIONAL }, 230 { "mpn_bdiv_dbm1c", speed_mpn_bdiv_dbm1c, FLAG_R_OPTIONAL }, 231 232 #if HAVE_NATIVE_mpn_modexact_1_odd 233 { "mpn_modexact_1_odd", speed_mpn_modexact_1_odd, FLAG_R }, 234 #endif 235 { "mpn_modexact_1c_odd", speed_mpn_modexact_1c_odd, FLAG_R }, 236 237 #if GMP_NUMB_BITS % 4 == 0 238 { "mpn_mod_34lsub1", speed_mpn_mod_34lsub1 }, 239 #endif 240 241 { "mpn_lshift", speed_mpn_lshift, FLAG_R }, 242 { "mpn_lshiftc", speed_mpn_lshiftc, FLAG_R }, 243 { "mpn_rshift", speed_mpn_rshift, FLAG_R }, 244 245 { "mpn_and_n", speed_mpn_and_n, FLAG_R_OPTIONAL }, 246 { "mpn_andn_n", speed_mpn_andn_n, FLAG_R_OPTIONAL }, 247 { "mpn_nand_n", speed_mpn_nand_n, FLAG_R_OPTIONAL }, 248 { "mpn_ior_n", speed_mpn_ior_n, FLAG_R_OPTIONAL }, 249 { "mpn_iorn_n", speed_mpn_iorn_n, FLAG_R_OPTIONAL }, 250 { "mpn_nior_n", speed_mpn_nior_n, FLAG_R_OPTIONAL }, 251 { "mpn_xor_n", speed_mpn_xor_n, FLAG_R_OPTIONAL }, 252 { "mpn_xnor_n", speed_mpn_xnor_n, FLAG_R_OPTIONAL }, 253 { "mpn_com", speed_mpn_com }, 254 255 { "mpn_popcount", speed_mpn_popcount }, 256 { "mpn_hamdist", speed_mpn_hamdist }, 257 258 { "mpn_matrix22_mul", speed_mpn_matrix22_mul }, 259 260 { "mpn_hgcd", speed_mpn_hgcd }, 261 { "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer }, 262 263 { "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL }, 264 { "mpn_gcd_1N", speed_mpn_gcd_1N, FLAG_R_OPTIONAL }, 265 266 { "mpn_gcd", speed_mpn_gcd }, 267 #if 0 268 { "mpn_gcd_binary", speed_mpn_gcd_binary }, 269 { "mpn_gcd_accel", speed_mpn_gcd_accel }, 270 { "find_a", speed_find_a, FLAG_NODATA }, 271 #endif 272 273 { "mpn_gcdext", speed_mpn_gcdext }, 274 { "mpn_gcdext_single", speed_mpn_gcdext_single }, 275 { "mpn_gcdext_double", speed_mpn_gcdext_double }, 276 { "mpn_gcdext_one_single", speed_mpn_gcdext_one_single }, 277 { "mpn_gcdext_one_double", speed_mpn_gcdext_one_double }, 278 #if 0 279 { "mpn_gcdext_lehmer", speed_mpn_gcdext_lehmer }, 280 #endif 281 { "mpz_jacobi", speed_mpz_jacobi }, 282 { "mpn_jacobi_base", speed_mpn_jacobi_base }, 283 { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1 }, 284 { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2 }, 285 { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3 }, 286 287 { "mpn_mul", speed_mpn_mul, FLAG_R_OPTIONAL }, 288 { "mpn_mul_basecase", speed_mpn_mul_basecase,FLAG_R_OPTIONAL }, 289 { "mpn_sqr_basecase", speed_mpn_sqr_basecase }, 290 #if HAVE_NATIVE_mpn_sqr_diagonal 291 { "mpn_sqr_diagonal", speed_mpn_sqr_diagonal }, 292 #endif 293 294 { "mpn_mul_n", speed_mpn_mul_n }, 295 { "mpn_sqr", speed_mpn_sqr }, 296 297 { "mpn_toom2_sqr", speed_mpn_toom2_sqr }, 298 { "mpn_toom3_sqr", speed_mpn_toom3_sqr }, 299 { "mpn_toom4_sqr", speed_mpn_toom4_sqr }, 300 { "mpn_toom6_sqr", speed_mpn_toom6_sqr }, 301 { "mpn_toom8_sqr", speed_mpn_toom8_sqr }, 302 { "mpn_toom22_mul", speed_mpn_toom22_mul }, 303 { "mpn_toom33_mul", speed_mpn_toom33_mul }, 304 { "mpn_toom44_mul", speed_mpn_toom44_mul }, 305 { "mpn_toom6h_mul", speed_mpn_toom6h_mul }, 306 { "mpn_toom8h_mul", speed_mpn_toom8h_mul }, 307 { "mpn_toom32_mul", speed_mpn_toom32_mul }, 308 { "mpn_toom42_mul", speed_mpn_toom42_mul }, 309 { "mpn_toom43_mul", speed_mpn_toom43_mul }, 310 { "mpn_toom63_mul", speed_mpn_toom63_mul }, 311 { "mpn_nussbaumer_mul", speed_mpn_nussbaumer_mul }, 312 { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr}, 313 #if WANT_OLD_FFT_FULL 314 { "mpn_mul_fft_full", speed_mpn_mul_fft_full }, 315 { "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr }, 316 #endif 317 { "mpn_mul_fft", speed_mpn_mul_fft, FLAG_R_OPTIONAL }, 318 { "mpn_mul_fft_sqr", speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL }, 319 320 { "mpn_mullo_n", speed_mpn_mullo_n }, 321 { "mpn_mullo_basecase", speed_mpn_mullo_basecase }, 322 323 { "mpn_bc_mulmod_bnm1", speed_mpn_bc_mulmod_bnm1 }, 324 { "mpn_mulmod_bnm1", speed_mpn_mulmod_bnm1 }, 325 { "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded }, 326 { "mpn_sqrmod_bnm1", speed_mpn_sqrmod_bnm1 }, 327 328 { "mpn_invert", speed_mpn_invert }, 329 { "mpn_invertappr", speed_mpn_invertappr }, 330 { "mpn_ni_invertappr", speed_mpn_ni_invertappr }, 331 { "mpn_binvert", speed_mpn_binvert }, 332 333 { "mpn_sbpi1_div_qr", speed_mpn_sbpi1_div_qr, FLAG_R_OPTIONAL}, 334 { "mpn_dcpi1_div_qr", speed_mpn_dcpi1_div_qr, FLAG_R_OPTIONAL}, 335 { "mpn_mu_div_qr", speed_mpn_mu_div_qr, FLAG_R_OPTIONAL}, 336 { "mpn_mupi_div_qr", speed_mpn_mupi_div_qr, FLAG_R_OPTIONAL}, 337 { "mpn_sbpi1_divappr_q", speed_mpn_sbpi1_divappr_q, FLAG_R_OPTIONAL}, 338 { "mpn_dcpi1_divappr_q", speed_mpn_dcpi1_divappr_q, FLAG_R_OPTIONAL}, 339 340 { "mpn_sbpi1_bdiv_qr", speed_mpn_sbpi1_bdiv_qr }, 341 { "mpn_dcpi1_bdiv_qr", speed_mpn_dcpi1_bdiv_qr }, 342 { "mpn_sbpi1_bdiv_q", speed_mpn_sbpi1_bdiv_q }, 343 { "mpn_dcpi1_bdiv_q", speed_mpn_dcpi1_bdiv_q }, 344 345 { "mpn_get_str", speed_mpn_get_str, FLAG_R_OPTIONAL }, 346 { "mpn_set_str", speed_mpn_set_str, FLAG_R_OPTIONAL }, 347 { "mpn_set_str_basecase", speed_mpn_bc_set_str, FLAG_R_OPTIONAL }, 348 349 { "mpn_sqrtrem", speed_mpn_sqrtrem }, 350 { "mpn_rootrem", speed_mpn_rootrem, FLAG_R }, 351 352 { "mpn_fib2_ui", speed_mpn_fib2_ui, FLAG_NODATA }, 353 { "mpz_fib_ui", speed_mpz_fib_ui, FLAG_NODATA }, 354 { "mpz_fib2_ui", speed_mpz_fib2_ui, FLAG_NODATA }, 355 { "mpz_lucnum_ui", speed_mpz_lucnum_ui, FLAG_NODATA }, 356 { "mpz_lucnum2_ui", speed_mpz_lucnum2_ui, FLAG_NODATA }, 357 358 { "mpz_add", speed_mpz_add }, 359 { "mpz_bin_uiui", speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL }, 360 { "mpz_fac_ui", speed_mpz_fac_ui, FLAG_NODATA }, 361 { "mpz_powm", speed_mpz_powm }, 362 { "mpz_powm_mod", speed_mpz_powm_mod }, 363 { "mpz_powm_redc", speed_mpz_powm_redc }, 364 { "mpz_powm_ui", speed_mpz_powm_ui, FLAG_R_OPTIONAL }, 365 366 { "mpz_mod", speed_mpz_mod }, 367 { "mpn_redc_1", speed_mpn_redc_1 }, 368 { "mpn_redc_2", speed_mpn_redc_2 }, 369 { "mpn_redc_n", speed_mpn_redc_n }, 370 371 { "MPN_COPY", speed_MPN_COPY }, 372 { "MPN_COPY_INCR", speed_MPN_COPY_INCR }, 373 { "MPN_COPY_DECR", speed_MPN_COPY_DECR }, 374 { "memcpy", speed_memcpy }, 375 #if HAVE_NATIVE_mpn_copyi 376 { "mpn_copyi", speed_mpn_copyi }, 377 #endif 378 #if HAVE_NATIVE_mpn_copyd 379 { "mpn_copyd", speed_mpn_copyd }, 380 #endif 381 #if HAVE_NATIVE_mpn_addlsh1_n 382 { "mpn_addlsh1_n", speed_mpn_addlsh1_n }, 383 #endif 384 #if HAVE_NATIVE_mpn_sublsh1_n 385 { "mpn_sublsh1_n", speed_mpn_sublsh1_n }, 386 #endif 387 #if HAVE_NATIVE_mpn_rsblsh1_n 388 { "mpn_rsblsh1_n", speed_mpn_rsblsh1_n }, 389 #endif 390 #if HAVE_NATIVE_mpn_addlsh2_n 391 { "mpn_addlsh2_n", speed_mpn_addlsh2_n }, 392 #endif 393 #if HAVE_NATIVE_mpn_sublsh2_n 394 { "mpn_sublsh2_n", speed_mpn_sublsh2_n }, 395 #endif 396 #if HAVE_NATIVE_mpn_rsblsh2_n 397 { "mpn_rsblsh2_n", speed_mpn_rsblsh2_n }, 398 #endif 399 #if HAVE_NATIVE_mpn_rsh1add_n 400 { "mpn_rsh1add_n", speed_mpn_rsh1add_n }, 401 #endif 402 #if HAVE_NATIVE_mpn_rsh1sub_n 403 { "mpn_rsh1sub_n", speed_mpn_rsh1sub_n }, 404 #endif 405 406 { "MPN_ZERO", speed_MPN_ZERO }, 407 408 { "binvert_limb", speed_binvert_limb, FLAG_NODATA }, 409 { "binvert_limb_mul1", speed_binvert_limb_mul1, FLAG_NODATA }, 410 { "binvert_limb_loop", speed_binvert_limb_loop, FLAG_NODATA }, 411 { "binvert_limb_cond", speed_binvert_limb_cond, FLAG_NODATA }, 412 { "binvert_limb_arith", speed_binvert_limb_arith, FLAG_NODATA }, 413 414 { "malloc_free", speed_malloc_free }, 415 { "malloc_realloc_free", speed_malloc_realloc_free }, 416 { "gmp_allocate_free", speed_gmp_allocate_free }, 417 { "gmp_allocate_reallocate_free", speed_gmp_allocate_reallocate_free }, 418 { "mpz_init_clear", speed_mpz_init_clear }, 419 { "mpq_init_clear", speed_mpq_init_clear }, 420 { "mpf_init_clear", speed_mpf_init_clear }, 421 { "mpz_init_realloc_clear", speed_mpz_init_realloc_clear }, 422 423 { "umul_ppmm", speed_umul_ppmm, FLAG_R_OPTIONAL }, 424 #if HAVE_NATIVE_mpn_umul_ppmm 425 { "mpn_umul_ppmm", speed_mpn_umul_ppmm, FLAG_R_OPTIONAL }, 426 #endif 427 #if HAVE_NATIVE_mpn_umul_ppmm_r 428 { "mpn_umul_ppmm_r", speed_mpn_umul_ppmm_r, FLAG_R_OPTIONAL }, 429 #endif 430 431 { "count_leading_zeros", speed_count_leading_zeros, FLAG_NODATA | FLAG_R_OPTIONAL }, 432 { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL }, 433 434 { "udiv_qrnnd", speed_udiv_qrnnd, FLAG_R_OPTIONAL }, 435 { "udiv_qrnnd_preinv1", speed_udiv_qrnnd_preinv1, FLAG_R_OPTIONAL }, 436 { "udiv_qrnnd_preinv2", speed_udiv_qrnnd_preinv2, FLAG_R_OPTIONAL }, 437 { "udiv_qrnnd_c", speed_udiv_qrnnd_c, FLAG_R_OPTIONAL }, 438 #if HAVE_NATIVE_mpn_udiv_qrnnd 439 { "mpn_udiv_qrnnd", speed_mpn_udiv_qrnnd, FLAG_R_OPTIONAL }, 440 #endif 441 #if HAVE_NATIVE_mpn_udiv_qrnnd_r 442 { "mpn_udiv_qrnnd_r", speed_mpn_udiv_qrnnd_r, FLAG_R_OPTIONAL }, 443 #endif 444 { "invert_limb", speed_invert_limb, FLAG_R_OPTIONAL }, 445 446 { "operator_div", speed_operator_div, FLAG_R_OPTIONAL }, 447 { "operator_mod", speed_operator_mod, FLAG_R_OPTIONAL }, 448 449 { "gmp_randseed", speed_gmp_randseed, FLAG_R_OPTIONAL }, 450 { "gmp_randseed_ui", speed_gmp_randseed_ui, FLAG_R_OPTIONAL | FLAG_NODATA }, 451 { "mpz_urandomb", speed_mpz_urandomb, FLAG_R_OPTIONAL | FLAG_NODATA }, 452 453 #ifdef SPEED_EXTRA_ROUTINES 454 SPEED_EXTRA_ROUTINES 455 #endif 456 #ifdef SPEED_EXTRA_ROUTINES2 457 SPEED_EXTRA_ROUTINES2 458 #endif 459 }; 460 461 462 struct choice_t { 463 const struct routine_t *p; 464 mp_limb_t r; 465 double scale; 466 double time; 467 int no_time; 468 double prev_time; 469 const char *name; 470 }; 471 struct choice_t *choice; 472 int num_choices = 0; 473 474 475 void 476 data_fill (mp_ptr ptr, mp_size_t size) 477 { 478 switch (option_data) { 479 case DATA_RANDOM: 480 mpn_random (ptr, size); 481 break; 482 case DATA_RANDOM2: 483 mpn_random2 (ptr, size); 484 break; 485 case DATA_ZEROS: 486 MPN_ZERO (ptr, size); 487 break; 488 case DATA_AAS: 489 MPN_FILL (ptr, size, GMP_NUMB_0xAA); 490 break; 491 case DATA_FFS: 492 MPN_FILL (ptr, size, GMP_NUMB_MAX); 493 break; 494 case DATA_2FD: 495 MPN_FILL (ptr, size, GMP_NUMB_MAX); 496 ptr[0] -= 2; 497 break; 498 default: 499 abort(); 500 /*NOTREACHED*/ 501 } 502 } 503 504 /* The code here handling the various combinations of output options isn't 505 too attractive, but it works and is fairly clean. */ 506 507 #define SIZE_TO_DIVISOR(n) \ 508 (option_square == 1 ? (n)*(n) \ 509 : option_square == 2 ? (n)*((n)+1)/2 \ 510 : (n)) 511 512 void 513 run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size) 514 { 515 const char *first_open_fastest, *first_open_notfastest, *first_close; 516 int i, fastest, want_data; 517 double fastest_time; 518 TMP_DECL; 519 520 TMP_MARK; 521 522 /* allocate data, unless all routines are NODATA */ 523 want_data = 0; 524 for (i = 0; i < num_choices; i++) 525 want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0); 526 527 if (want_data) 528 { 529 SPEED_TMP_ALLOC_LIMBS (sp.xp, s->size, s->align_xp); 530 SPEED_TMP_ALLOC_LIMBS (sp.yp, s->size, s->align_yp); 531 532 data_fill (s->xp, s->size); 533 data_fill (s->yp, s->size); 534 } 535 else 536 { 537 sp.xp = NULL; 538 sp.yp = NULL; 539 } 540 541 if (prev_size == -1 && option_cmp == CMP_DIFFPREV) 542 { 543 first_open_fastest = "(#"; 544 first_open_notfastest = " ("; 545 first_close = ")"; 546 } 547 else 548 { 549 first_open_fastest = "#"; 550 first_open_notfastest = " "; 551 first_close = ""; 552 } 553 554 fastest = -1; 555 fastest_time = -1.0; 556 for (i = 0; i < num_choices; i++) 557 { 558 s->r = choice[i].r; 559 choice[i].time = speed_measure (choice[i].p->fun, s); 560 choice[i].no_time = (choice[i].time == -1.0); 561 if (! choice[i].no_time) 562 choice[i].time *= choice[i].scale; 563 564 /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time 565 is before any differences. */ 566 { 567 double t; 568 t = choice[i].time; 569 if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1) 570 { 571 if (choice[i].prev_time == -1.0) 572 choice[i].no_time = 1; 573 else 574 choice[i].time = choice[i].time - choice[i].prev_time; 575 } 576 choice[i].prev_time = t; 577 } 578 579 if (choice[i].no_time) 580 continue; 581 582 /* Look for the fastest after CMP_DIFFPREV has been applied, but 583 before CMP_RATIO or CMP_DIFFERENCE. There's only a fastest shown 584 if there's more than one routine. */ 585 if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time)) 586 { 587 fastest = i; 588 fastest_time = choice[i].time; 589 } 590 591 if (option_cmp == CMP_DIFFPREV) 592 { 593 /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */ 594 if (option_unit == UNIT_CYCLES) 595 choice[i].time /= speed_cycletime; 596 else if (option_unit == UNIT_CYCLESPERLIMB) 597 { 598 if (prev_size == -1) 599 choice[i].time /= speed_cycletime; 600 else 601 choice[i].time /= (speed_cycletime 602 * (SIZE_TO_DIVISOR(s->size) 603 - SIZE_TO_DIVISOR(prev_size))); 604 } 605 } 606 else 607 { 608 if (option_unit == UNIT_CYCLES) 609 choice[i].time /= speed_cycletime; 610 else if (option_unit == UNIT_CYCLESPERLIMB) 611 choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size)); 612 613 if (option_cmp == CMP_RATIO && i > 0) 614 { 615 /* A ratio isn't affected by the units chosen. */ 616 if (choice[0].no_time || choice[0].time == 0.0) 617 choice[i].no_time = 1; 618 else 619 choice[i].time /= choice[0].time; 620 } 621 else if (option_cmp == CMP_DIFFERENCE && i > 0) 622 { 623 if (choice[0].no_time) 624 { 625 choice[i].no_time = 1; 626 continue; 627 } 628 choice[i].time -= choice[0].time; 629 } 630 } 631 } 632 633 if (option_gnuplot) 634 { 635 /* In CMP_DIFFPREV, don't print anything for the first size, start 636 with the second where an actual difference is available. 637 638 In CMP_RATIO, print the first column as 1.0. 639 640 The 9 decimals printed is much more than the expected precision of 641 the measurements actually. */ 642 643 if (! (option_cmp == CMP_DIFFPREV && prev_size == -1)) 644 { 645 fprintf (fp, "%-6ld ", s->size); 646 for (i = 0; i < num_choices; i++) 647 fprintf (fp, " %.9e", 648 choice[i].no_time ? 0.0 649 : (option_cmp == CMP_RATIO && i == 0) ? 1.0 650 : choice[i].time); 651 fprintf (fp, "\n"); 652 } 653 } 654 else 655 { 656 fprintf (fp, "%-6ld ", s->size); 657 for (i = 0; i < num_choices; i++) 658 { 659 char buf[128]; 660 int decimals; 661 662 if (choice[i].no_time) 663 { 664 fprintf (fp, " %*s", COLUMN_WIDTH, "n/a"); 665 } 666 else 667 {if (option_unit == UNIT_CYCLESPERLIMB 668 || (option_cmp == CMP_RATIO && i > 0)) 669 decimals = 4; 670 else if (option_unit == UNIT_CYCLES) 671 decimals = 2; 672 else 673 decimals = 9; 674 675 sprintf (buf, "%s%.*f%s", 676 i == fastest ? first_open_fastest : first_open_notfastest, 677 decimals, choice[i].time, first_close); 678 fprintf (fp, " %*s", COLUMN_WIDTH, buf); 679 } 680 } 681 fprintf (fp, "\n"); 682 } 683 684 TMP_FREE; 685 } 686 687 void 688 run_all (FILE *fp) 689 { 690 mp_size_t prev_size; 691 int i; 692 TMP_DECL; 693 694 TMP_MARK; 695 SPEED_TMP_ALLOC_LIMBS (sp.xp_block, SPEED_BLOCK_SIZE, sp.align_xp); 696 SPEED_TMP_ALLOC_LIMBS (sp.yp_block, SPEED_BLOCK_SIZE, sp.align_yp); 697 698 data_fill (sp.xp_block, SPEED_BLOCK_SIZE); 699 data_fill (sp.yp_block, SPEED_BLOCK_SIZE); 700 701 for (i = 0; i < size_num; i++) 702 { 703 sp.size = size_array[i].start; 704 prev_size = -1; 705 for (;;) 706 { 707 mp_size_t step; 708 709 if (option_data == DATA_2FD && sp.size >= 2) 710 sp.xp[sp.size-1] = 2; 711 712 run_one (fp, &sp, prev_size); 713 prev_size = sp.size; 714 715 if (option_data == DATA_2FD && sp.size >= 2) 716 sp.xp[sp.size-1] = MP_LIMB_T_MAX; 717 718 if (option_factor != 0.0) 719 { 720 step = (mp_size_t) (sp.size * option_factor - sp.size); 721 if (step < 1) 722 step = 1; 723 } 724 else 725 step = 1; 726 if (step < option_step) 727 step = option_step; 728 729 sp.size += step; 730 if (sp.size > size_array[i].end) 731 break; 732 } 733 } 734 735 TMP_FREE; 736 } 737 738 739 FILE * 740 fopen_for_write (const char *filename) 741 { 742 FILE *fp; 743 if ((fp = fopen (filename, "w")) == NULL) 744 { 745 fprintf (stderr, "Cannot create %s\n", filename); 746 exit(1); 747 } 748 return fp; 749 } 750 751 void 752 fclose_written (FILE *fp, const char *filename) 753 { 754 int err; 755 756 err = ferror (fp); 757 err |= fclose (fp); 758 759 if (err) 760 { 761 fprintf (stderr, "Error writing %s\n", filename); 762 exit(1); 763 } 764 } 765 766 767 void 768 run_gnuplot (int argc, char *argv[]) 769 { 770 char *plot_filename; 771 char *data_filename; 772 FILE *fp; 773 int i; 774 775 plot_filename = (char *) (*__gmp_allocate_func) 776 (strlen (option_gnuplot_basename) + 20); 777 data_filename = (char *) (*__gmp_allocate_func) 778 (strlen (option_gnuplot_basename) + 20); 779 780 sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename); 781 sprintf (data_filename, "%s.data", option_gnuplot_basename); 782 783 fp = fopen_for_write (plot_filename); 784 785 fprintf (fp, "# Generated with:\n"); 786 fprintf (fp, "#"); 787 for (i = 0; i < argc; i++) 788 fprintf (fp, " %s", argv[i]); 789 fprintf (fp, "\n"); 790 fprintf (fp, "\n"); 791 792 fprintf (fp, "reset\n"); 793 794 /* Putting the key at the top left is usually good, and you can change it 795 interactively if it's not. */ 796 fprintf (fp, "set key left\n"); 797 798 /* designed to make it possible to see crossovers easily */ 799 fprintf (fp, "set data style lines\n"); 800 801 fprintf (fp, "plot "); 802 for (i = 0; i < num_choices; i++) 803 { 804 fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2); 805 fprintf (fp, " title \"%s\"", choice[i].name); 806 807 if (i != num_choices-1) 808 fprintf (fp, ", \\"); 809 fprintf (fp, "\n"); 810 } 811 812 fprintf (fp, "load \"-\"\n"); 813 fclose_written (fp, plot_filename); 814 815 fp = fopen_for_write (data_filename); 816 817 /* Unbuffered so you can see where the program was up to if it crashes or 818 you kill it. */ 819 setbuf (fp, NULL); 820 821 run_all (fp); 822 fclose_written (fp, data_filename); 823 } 824 825 826 /* Return a limb with n many one bits (starting from the least significant) */ 827 828 #define LIMB_ONES(n) \ 829 ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX \ 830 : (n) == 0 ? CNST_LIMB(0) \ 831 : (CNST_LIMB(1) << (n)) - 1) 832 833 mp_limb_t 834 r_string (const char *s) 835 { 836 const char *s_orig = s; 837 long n; 838 839 if (strcmp (s, "aas") == 0) 840 return GMP_NUMB_0xAA; 841 842 { 843 mpz_t z; 844 mp_limb_t l; 845 int set, siz; 846 847 mpz_init (z); 848 set = mpz_set_str (z, s, 0); 849 siz = SIZ(z); 850 l = (siz == 0 ? 0 : siz > 0 ? PTR(z)[0] : -PTR(z)[0]); 851 mpz_clear (z); 852 if (set == 0) 853 { 854 if (siz > 1 || siz < -1) 855 printf ("Warning, r parameter %s truncated to %d bits\n", 856 s_orig, GMP_LIMB_BITS); 857 return l; 858 } 859 } 860 861 if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) 862 n = strtoul (s+2, (char **) &s, 16); 863 else 864 n = strtol (s, (char **) &s, 10); 865 866 if (strcmp (s, "bits") == 0) 867 { 868 mp_limb_t l; 869 if (n > GMP_LIMB_BITS) 870 { 871 fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n", 872 n, GMP_LIMB_BITS); 873 exit (1); 874 } 875 mpn_random (&l, 1); 876 return (l | (CNST_LIMB(1) << (n-1))) & LIMB_ONES(n); 877 } 878 else if (strcmp (s, "ones") == 0) 879 { 880 if (n > GMP_LIMB_BITS) 881 { 882 fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n", 883 n, GMP_LIMB_BITS); 884 exit (1); 885 } 886 return LIMB_ONES (n); 887 } 888 else if (*s != '\0') 889 { 890 fprintf (stderr, "invalid r parameter: %s\n", s_orig); 891 exit (1); 892 } 893 894 return n; 895 } 896 897 898 void 899 routine_find (struct choice_t *c, const char *s_orig) 900 { 901 const char *s; 902 int i; 903 size_t nlen; 904 905 c->name = s_orig; 906 s = strchr (s_orig, '*'); 907 if (s != NULL) 908 { 909 c->scale = atof(s_orig); 910 s++; 911 } 912 else 913 { 914 c->scale = 1.0; 915 s = s_orig; 916 } 917 918 for (i = 0; i < numberof (routine); i++) 919 { 920 nlen = strlen (routine[i].name); 921 if (memcmp (s, routine[i].name, nlen) != 0) 922 continue; 923 924 if (s[nlen] == '.') 925 { 926 /* match, with a .r parameter */ 927 928 if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL))) 929 { 930 fprintf (stderr, 931 "Choice %s bad: doesn't take a \".<r>\" parameter\n", 932 s_orig); 933 exit (1); 934 } 935 936 c->p = &routine[i]; 937 c->r = r_string (s + nlen + 1); 938 return; 939 } 940 941 if (s[nlen] == '\0') 942 { 943 /* match, with no parameter */ 944 945 if (routine[i].flag & FLAG_R) 946 { 947 fprintf (stderr, 948 "Choice %s bad: needs a \".<r>\" parameter\n", 949 s_orig); 950 exit (1); 951 } 952 953 c->p = &routine[i]; 954 c->r = 0; 955 return; 956 } 957 } 958 959 fprintf (stderr, "Choice %s unrecognised\n", s_orig); 960 exit (1); 961 } 962 963 964 void 965 usage (void) 966 { 967 int i; 968 969 speed_time_init (); 970 971 printf ("Usage: speed [-options] -s size <routine>...\n"); 972 printf ("Measure the speed of some routines.\n"); 973 printf ("Times are in seconds, accuracy is shown.\n"); 974 printf ("\n"); 975 printf (" -p num set precision as number of time units each routine must run\n"); 976 printf (" -s size[-end][,size[-end]]... sizes to measure\n"); 977 printf (" single sizes or ranges, sep with comma or use multiple -s\n"); 978 printf (" -t step step through sizes by given amount\n"); 979 printf (" -f factor step through sizes by given factor (eg. 1.05)\n"); 980 printf (" -r show times as ratios of the first routine\n"); 981 printf (" -d show times as difference from the first routine\n"); 982 printf (" -D show times as difference from previous size shown\n"); 983 printf (" -c show times in CPU cycles\n"); 984 printf (" -C show times in cycles per limb\n"); 985 printf (" -u print resource usage (memory) at end\n"); 986 printf (" -P name output plot files \"name.gnuplot\" and \"name.data\"\n"); 987 printf (" -a <type> use given data: random(default), random2, zeros, aas, ffs, 2fd\n"); 988 printf (" -x, -y, -w, -W <align> specify data alignments, sources and dests\n"); 989 printf (" -o addrs print addresses of data blocks\n"); 990 printf ("\n"); 991 printf ("If both -t and -f are used, it means step by the factor or the step, whichever\n"); 992 printf ("is greater.\n"); 993 printf ("If both -C and -D are used, it means cycles per however many limbs between a\n"); 994 printf ("size and the previous size.\n"); 995 printf ("\n"); 996 printf ("After running with -P, plots can be viewed with Gnuplot or Quickplot.\n"); 997 printf ("\"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n"); 998 printf ("a log/log plot).\n"); 999 printf ("\"quickplot -s name.data\" (has interactive zooming, and note -s is important\n"); 1000 printf ("when viewing more than one routine, it means same axis scales for all data).\n"); 1001 printf ("\n"); 1002 printf ("The available routines are as follows.\n"); 1003 printf ("\n"); 1004 1005 for (i = 0; i < numberof (routine); i++) 1006 { 1007 if (routine[i].flag & FLAG_R) 1008 printf ("\t%s.r\n", routine[i].name); 1009 else if (routine[i].flag & FLAG_R_OPTIONAL) 1010 printf ("\t%s (optional .r)\n", routine[i].name); 1011 else 1012 printf ("\t%s\n", routine[i].name); 1013 } 1014 printf ("\n"); 1015 printf ("Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n"); 1016 printf ("r should be in decimal, or use 0xN for hexadecimal.\n"); 1017 printf ("\n"); 1018 printf ("Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n"); 1019 printf ("N one bits, or \"aas\" for 0xAA..AA.\n"); 1020 printf ("\n"); 1021 printf ("Times for sizes out of the range accepted by a routine are shown as 0.\n"); 1022 printf ("The fastest routine at each size is marked with a # (free form output only).\n"); 1023 printf ("\n"); 1024 printf ("%s", speed_time_string); 1025 printf ("\n"); 1026 printf ("Gnuplot home page http://www.gnuplot.info/\n"); 1027 printf ("Quickplot home page http://quickplot.sourceforge.net/\n"); 1028 } 1029 1030 void 1031 check_align_option (const char *name, mp_size_t align) 1032 { 1033 if (align < 0 || align > SPEED_TMP_ALLOC_ADJUST_MASK) 1034 { 1035 fprintf (stderr, "Alignment request out of range: %s %ld\n", 1036 name, (long) align); 1037 fprintf (stderr, " should be 0 to %d (limbs), inclusive\n", 1038 SPEED_TMP_ALLOC_ADJUST_MASK); 1039 exit (1); 1040 } 1041 } 1042 1043 int 1044 main (int argc, char *argv[]) 1045 { 1046 int i; 1047 int opt; 1048 1049 /* Unbuffered so output goes straight out when directed to a pipe or file 1050 and isn't lost on killing the program half way. */ 1051 setbuf (stdout, NULL); 1052 1053 for (;;) 1054 { 1055 opt = getopt(argc, argv, "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z"); 1056 if (opt == EOF) 1057 break; 1058 1059 switch (opt) { 1060 case 'a': 1061 if (strcmp (optarg, "random") == 0) option_data = DATA_RANDOM; 1062 else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2; 1063 else if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS; 1064 else if (strcmp (optarg, "aas") == 0) option_data = DATA_AAS; 1065 else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS; 1066 else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD; 1067 else 1068 { 1069 fprintf (stderr, "unrecognised data option: %s\n", optarg); 1070 exit (1); 1071 } 1072 break; 1073 case 'C': 1074 if (option_unit != UNIT_SECONDS) goto bad_unit; 1075 option_unit = UNIT_CYCLESPERLIMB; 1076 break; 1077 case 'c': 1078 if (option_unit != UNIT_SECONDS) 1079 { 1080 bad_unit: 1081 fprintf (stderr, "cannot use more than one of -c, -C\n"); 1082 exit (1); 1083 } 1084 option_unit = UNIT_CYCLES; 1085 break; 1086 case 'D': 1087 if (option_cmp != CMP_ABSOLUTE) goto bad_cmp; 1088 option_cmp = CMP_DIFFPREV; 1089 break; 1090 case 'd': 1091 if (option_cmp != CMP_ABSOLUTE) 1092 { 1093 bad_cmp: 1094 fprintf (stderr, "cannot use more than one of -d, -D, -r\n"); 1095 exit (1); 1096 } 1097 option_cmp = CMP_DIFFERENCE; 1098 break; 1099 case 'E': 1100 option_square = 1; 1101 break; 1102 case 'F': 1103 option_square = 2; 1104 break; 1105 case 'f': 1106 option_factor = atof (optarg); 1107 if (option_factor <= 1.0) 1108 { 1109 fprintf (stderr, "-f factor must be > 1.0\n"); 1110 exit (1); 1111 } 1112 break; 1113 case 'o': 1114 speed_option_set (optarg); 1115 break; 1116 case 'P': 1117 option_gnuplot = 1; 1118 option_gnuplot_basename = optarg; 1119 break; 1120 case 'p': 1121 speed_precision = atoi (optarg); 1122 break; 1123 case 'R': 1124 option_seed = time (NULL); 1125 break; 1126 case 'r': 1127 if (option_cmp != CMP_ABSOLUTE) 1128 goto bad_cmp; 1129 option_cmp = CMP_RATIO; 1130 break; 1131 case 's': 1132 { 1133 char *s; 1134 for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ",")) 1135 { 1136 if (size_num == size_allocnum) 1137 { 1138 size_array = (struct size_array_t *) 1139 __gmp_allocate_or_reallocate 1140 (size_array, 1141 size_allocnum * sizeof(size_array[0]), 1142 (size_allocnum+10) * sizeof(size_array[0])); 1143 size_allocnum += 10; 1144 } 1145 if (sscanf (s, "%ld-%ld", 1146 &size_array[size_num].start, 1147 &size_array[size_num].end) != 2) 1148 { 1149 size_array[size_num].start = size_array[size_num].end 1150 = atol (s); 1151 } 1152 1153 if (size_array[size_num].start < 0 1154 || size_array[size_num].end < 0 1155 || size_array[size_num].start > size_array[size_num].end) 1156 { 1157 fprintf (stderr, "invalid size parameter: %s\n", s); 1158 exit (1); 1159 } 1160 1161 size_num++; 1162 } 1163 } 1164 break; 1165 case 't': 1166 option_step = atol (optarg); 1167 if (option_step < 1) 1168 { 1169 fprintf (stderr, "-t step must be >= 1\n"); 1170 exit (1); 1171 } 1172 break; 1173 case 'u': 1174 option_resource_usage = 1; 1175 break; 1176 case 'z': 1177 sp.cache = 1; 1178 break; 1179 case 'x': 1180 sp.align_xp = atol (optarg); 1181 check_align_option ("-x", sp.align_xp); 1182 break; 1183 case 'y': 1184 sp.align_yp = atol (optarg); 1185 check_align_option ("-y", sp.align_yp); 1186 break; 1187 case 'w': 1188 sp.align_wp = atol (optarg); 1189 check_align_option ("-w", sp.align_wp); 1190 break; 1191 case 'W': 1192 sp.align_wp2 = atol (optarg); 1193 check_align_option ("-W", sp.align_wp2); 1194 break; 1195 case '?': 1196 exit(1); 1197 } 1198 } 1199 1200 if (optind >= argc) 1201 { 1202 usage (); 1203 exit (1); 1204 } 1205 1206 if (size_num == 0) 1207 { 1208 fprintf (stderr, "-s <size> must be specified\n"); 1209 exit (1); 1210 } 1211 1212 gmp_randinit_default (__gmp_rands); 1213 __gmp_rands_initialized = 1; 1214 gmp_randseed_ui (__gmp_rands, option_seed); 1215 1216 choice = (struct choice_t *) (*__gmp_allocate_func) 1217 ((argc - optind) * sizeof(choice[0])); 1218 for ( ; optind < argc; optind++) 1219 { 1220 struct choice_t c; 1221 routine_find (&c, argv[optind]); 1222 choice[num_choices] = c; 1223 num_choices++; 1224 } 1225 1226 if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) && 1227 num_choices < 2) 1228 { 1229 fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n"); 1230 } 1231 1232 speed_time_init (); 1233 if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB) 1234 speed_cycletime_need_cycles (); 1235 else 1236 speed_cycletime_need_seconds (); 1237 1238 if (option_gnuplot) 1239 { 1240 run_gnuplot (argc, argv); 1241 } 1242 else 1243 { 1244 if (option_unit == UNIT_SECONDS) 1245 printf ("overhead %.9f secs", speed_measure (speed_noop, NULL)); 1246 else 1247 printf ("overhead %.2f cycles", 1248 speed_measure (speed_noop, NULL) / speed_cycletime); 1249 printf (", precision %d units of %.2e secs", 1250 speed_precision, speed_unittime); 1251 1252 if (speed_cycletime == 1.0 || speed_cycletime == 0.0) 1253 printf (", CPU freq unknown\n"); 1254 else 1255 printf (", CPU freq %.2f MHz\n", 1e-6/speed_cycletime); 1256 1257 printf (" "); 1258 for (i = 0; i < num_choices; i++) 1259 printf (" %*s", COLUMN_WIDTH, choice[i].name); 1260 printf ("\n"); 1261 1262 run_all (stdout); 1263 } 1264 1265 if (option_resource_usage) 1266 { 1267 #if HAVE_GETRUSAGE 1268 { 1269 /* This doesn't give data sizes on linux 2.0.x, only utime. */ 1270 struct rusage r; 1271 if (getrusage (RUSAGE_SELF, &r) != 0) 1272 perror ("getrusage"); 1273 else 1274 printf ("getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\n", 1275 r.ru_utime.tv_sec, r.ru_utime.tv_usec, 1276 r.ru_idrss, r.ru_isrss, r.ru_ixrss); 1277 } 1278 #else 1279 printf ("getrusage() not available\n"); 1280 #endif 1281 1282 /* Linux kernel. */ 1283 { 1284 char buf[128]; 1285 sprintf (buf, "/proc/%d/status", getpid()); 1286 if (access (buf, R_OK) == 0) 1287 { 1288 sprintf (buf, "cat /proc/%d/status", getpid()); 1289 system (buf); 1290 } 1291 1292 } 1293 } 1294 1295 return 0; 1296 } 1297