1 /* Speed measuring program. 2 3 Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010, 4 2011, 2012 Free Software Foundation, Inc. 5 6 This file is part of the GNU MP Library. 7 8 The GNU MP Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MP Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 20 21 /* Usage message is in the code below, run with no arguments to print it. 22 See README for interesting applications. 23 24 To add a new routine foo(), create a speed_foo() function in the style of 25 the existing ones and add an entry in the routine[] array. Put FLAG_R if 26 speed_foo() wants an "r" parameter. 27 28 The routines don't have help messages or descriptions, but most have 29 suggestive names. See the source code for full details. 30 31 */ 32 33 #include "config.h" 34 35 #include <limits.h> 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 40 #if HAVE_UNISTD_H 41 #include <unistd.h> /* for getpid, R_OK */ 42 #endif 43 44 #if TIME_WITH_SYS_TIME 45 # include <sys/time.h> /* for struct timeval */ 46 # include <time.h> 47 #else 48 # if HAVE_SYS_TIME_H 49 # include <sys/time.h> 50 # else 51 # include <time.h> 52 # endif 53 #endif 54 55 #if HAVE_SYS_RESOURCE_H 56 #include <sys/resource.h> /* for getrusage() */ 57 #endif 58 59 60 #include "gmp.h" 61 #include "gmp-impl.h" 62 #include "longlong.h" /* for the benefit of speed-many.c */ 63 #include "tests.h" 64 #include "speed.h" 65 66 67 #if !HAVE_DECL_OPTARG 68 extern char *optarg; 69 extern int optind, opterr; 70 #endif 71 72 #if !HAVE_STRTOUL 73 #define strtoul(p,e,b) (unsigned long) strtol(p,e,b) 74 #endif 75 76 #ifdef SPEED_EXTRA_PROTOS 77 SPEED_EXTRA_PROTOS 78 #endif 79 #ifdef SPEED_EXTRA_PROTOS2 80 SPEED_EXTRA_PROTOS2 81 #endif 82 83 84 #define MPN_FILL(ptr, size, n) \ 85 do { \ 86 mp_size_t __i; \ 87 ASSERT ((size) >= 0); \ 88 for (__i = 0; __i < (size); __i++) \ 89 (ptr)[__i] = (n); \ 90 } while (0) 91 92 93 #if GMP_LIMB_BITS == 32 94 #define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK) 95 #endif 96 #if GMP_LIMB_BITS == 64 97 #define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK) 98 #endif 99 100 101 #define CMP_ABSOLUTE 1 102 #define CMP_RATIO 2 103 #define CMP_DIFFERENCE 3 104 #define CMP_DIFFPREV 4 105 int option_cmp = CMP_ABSOLUTE; 106 107 #define UNIT_SECONDS 1 108 #define UNIT_CYCLES 2 109 #define UNIT_CYCLESPERLIMB 3 110 int option_unit = UNIT_SECONDS; 111 112 #define DATA_RANDOM 1 113 #define DATA_RANDOM2 2 114 #define DATA_ZEROS 3 115 #define DATA_AAS 4 116 #define DATA_FFS 5 117 #define DATA_2FD 6 118 int option_data = DATA_RANDOM; 119 120 int option_square = 0; 121 double option_factor = 0.0; 122 mp_size_t option_step = 1; 123 int option_gnuplot = 0; 124 char *option_gnuplot_basename; 125 struct size_array_t { 126 mp_size_t start, end; 127 } *size_array = NULL; 128 mp_size_t size_num = 0; 129 mp_size_t size_allocnum = 0; 130 int option_resource_usage = 0; 131 long option_seed = 123456789; 132 133 struct speed_params sp; 134 135 #define COLUMN_WIDTH 13 /* for the free-form output */ 136 137 #define FLAG_R (1<<0) /* require ".r" */ 138 #define FLAG_R_OPTIONAL (1<<1) /* optional ".r" */ 139 #define FLAG_RSIZE (1<<2) 140 #define FLAG_NODATA (1<<3) /* don't alloc xp, yp */ 141 142 const struct routine_t { 143 /* constants */ 144 const char *name; 145 speed_function_t fun; 146 int flag; 147 } routine[] = { 148 149 { "noop", speed_noop }, 150 { "noop_wxs", speed_noop_wxs }, 151 { "noop_wxys", speed_noop_wxys }, 152 153 { "mpn_add_n", speed_mpn_add_n, FLAG_R_OPTIONAL }, 154 { "mpn_sub_n", speed_mpn_sub_n, FLAG_R_OPTIONAL }, 155 156 { "mpn_add_err1_n", speed_mpn_add_err1_n }, 157 { "mpn_add_err2_n", speed_mpn_add_err2_n }, 158 { "mpn_add_err3_n", speed_mpn_add_err3_n }, 159 { "mpn_sub_err1_n", speed_mpn_sub_err1_n }, 160 { "mpn_sub_err2_n", speed_mpn_sub_err2_n }, 161 { "mpn_sub_err3_n", speed_mpn_sub_err3_n }, 162 163 #if HAVE_NATIVE_mpn_add_n_sub_n 164 { "mpn_add_n_sub_n", speed_mpn_add_n_sub_n, FLAG_R_OPTIONAL }, 165 #endif 166 167 { "mpn_addmul_1", speed_mpn_addmul_1, FLAG_R }, 168 { "mpn_submul_1", speed_mpn_submul_1, FLAG_R }, 169 #if HAVE_NATIVE_mpn_addmul_2 170 { "mpn_addmul_2", speed_mpn_addmul_2, FLAG_R_OPTIONAL }, 171 #endif 172 #if HAVE_NATIVE_mpn_addmul_3 173 { "mpn_addmul_3", speed_mpn_addmul_3, FLAG_R_OPTIONAL }, 174 #endif 175 #if HAVE_NATIVE_mpn_addmul_4 176 { "mpn_addmul_4", speed_mpn_addmul_4, FLAG_R_OPTIONAL }, 177 #endif 178 #if HAVE_NATIVE_mpn_addmul_5 179 { "mpn_addmul_5", speed_mpn_addmul_5, FLAG_R_OPTIONAL }, 180 #endif 181 #if HAVE_NATIVE_mpn_addmul_6 182 { "mpn_addmul_6", speed_mpn_addmul_6, FLAG_R_OPTIONAL }, 183 #endif 184 #if HAVE_NATIVE_mpn_addmul_7 185 { "mpn_addmul_7", speed_mpn_addmul_7, FLAG_R_OPTIONAL }, 186 #endif 187 #if HAVE_NATIVE_mpn_addmul_8 188 { "mpn_addmul_8", speed_mpn_addmul_8, FLAG_R_OPTIONAL }, 189 #endif 190 { "mpn_mul_1", speed_mpn_mul_1, FLAG_R }, 191 { "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R }, 192 #if HAVE_NATIVE_mpn_mul_2 193 { "mpn_mul_2", speed_mpn_mul_2, FLAG_R_OPTIONAL }, 194 #endif 195 #if HAVE_NATIVE_mpn_mul_3 196 { "mpn_mul_3", speed_mpn_mul_3, FLAG_R_OPTIONAL }, 197 #endif 198 #if HAVE_NATIVE_mpn_mul_4 199 { "mpn_mul_4", speed_mpn_mul_4, FLAG_R_OPTIONAL }, 200 #endif 201 #if HAVE_NATIVE_mpn_mul_5 202 { "mpn_mul_5", speed_mpn_mul_5, FLAG_R_OPTIONAL }, 203 #endif 204 #if HAVE_NATIVE_mpn_mul_6 205 { "mpn_mul_6", speed_mpn_mul_6, FLAG_R_OPTIONAL }, 206 #endif 207 208 { "mpn_divrem_1", speed_mpn_divrem_1, FLAG_R }, 209 { "mpn_divrem_1f", speed_mpn_divrem_1f, FLAG_R }, 210 #if HAVE_NATIVE_mpn_divrem_1c 211 { "mpn_divrem_1c", speed_mpn_divrem_1c, FLAG_R }, 212 { "mpn_divrem_1cf", speed_mpn_divrem_1cf,FLAG_R }, 213 #endif 214 { "mpn_mod_1", speed_mpn_mod_1, FLAG_R }, 215 #if HAVE_NATIVE_mpn_mod_1c 216 { "mpn_mod_1c", speed_mpn_mod_1c, FLAG_R }, 217 #endif 218 { "mpn_preinv_divrem_1", speed_mpn_preinv_divrem_1, FLAG_R }, 219 { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R }, 220 { "mpn_preinv_mod_1", speed_mpn_preinv_mod_1, FLAG_R }, 221 222 { "mpn_mod_1_1", speed_mpn_mod_1_1, FLAG_R }, 223 { "mpn_mod_1_1_1", speed_mpn_mod_1_1_1, FLAG_R }, 224 { "mpn_mod_1_1_2", speed_mpn_mod_1_1_2, FLAG_R }, 225 { "mpn_mod_1s_2", speed_mpn_mod_1_2, FLAG_R }, 226 { "mpn_mod_1s_3", speed_mpn_mod_1_3, FLAG_R }, 227 { "mpn_mod_1s_4", speed_mpn_mod_1_4, FLAG_R }, 228 229 { "mpn_divrem_1_div", speed_mpn_divrem_1_div, FLAG_R }, 230 { "mpn_divrem_1_inv", speed_mpn_divrem_1_inv, FLAG_R }, 231 { "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R }, 232 { "mpn_divrem_1f_inv", speed_mpn_divrem_1f_inv, FLAG_R }, 233 { "mpn_mod_1_div", speed_mpn_mod_1_div, FLAG_R }, 234 { "mpn_mod_1_inv", speed_mpn_mod_1_inv, FLAG_R }, 235 236 { "mpn_divrem_2", speed_mpn_divrem_2, }, 237 { "mpn_divrem_2_div", speed_mpn_divrem_2_div, }, 238 { "mpn_divrem_2_inv", speed_mpn_divrem_2_inv, }, 239 240 { "mpn_div_qr_2n", speed_mpn_div_qr_2n, }, 241 { "mpn_div_qr_2u", speed_mpn_div_qr_2u, }, 242 243 { "mpn_divexact_1", speed_mpn_divexact_1, FLAG_R }, 244 { "mpn_divexact_by3", speed_mpn_divexact_by3 }, 245 246 { "mpn_bdiv_q_1", speed_mpn_bdiv_q_1, FLAG_R }, 247 { "mpn_pi1_bdiv_q_1", speed_mpn_pi1_bdiv_q_1, FLAG_R_OPTIONAL }, 248 { "mpn_bdiv_dbm1c", speed_mpn_bdiv_dbm1c, FLAG_R_OPTIONAL }, 249 250 #if HAVE_NATIVE_mpn_modexact_1_odd 251 { "mpn_modexact_1_odd", speed_mpn_modexact_1_odd, FLAG_R }, 252 #endif 253 { "mpn_modexact_1c_odd", speed_mpn_modexact_1c_odd, FLAG_R }, 254 255 #if GMP_NUMB_BITS % 4 == 0 256 { "mpn_mod_34lsub1", speed_mpn_mod_34lsub1 }, 257 #endif 258 259 { "mpn_lshift", speed_mpn_lshift, FLAG_R }, 260 { "mpn_lshiftc", speed_mpn_lshiftc, FLAG_R }, 261 { "mpn_rshift", speed_mpn_rshift, FLAG_R }, 262 263 { "mpn_and_n", speed_mpn_and_n, FLAG_R_OPTIONAL }, 264 { "mpn_andn_n", speed_mpn_andn_n, FLAG_R_OPTIONAL }, 265 { "mpn_nand_n", speed_mpn_nand_n, FLAG_R_OPTIONAL }, 266 { "mpn_ior_n", speed_mpn_ior_n, FLAG_R_OPTIONAL }, 267 { "mpn_iorn_n", speed_mpn_iorn_n, FLAG_R_OPTIONAL }, 268 { "mpn_nior_n", speed_mpn_nior_n, FLAG_R_OPTIONAL }, 269 { "mpn_xor_n", speed_mpn_xor_n, FLAG_R_OPTIONAL }, 270 { "mpn_xnor_n", speed_mpn_xnor_n, FLAG_R_OPTIONAL }, 271 { "mpn_com", speed_mpn_com }, 272 273 { "mpn_popcount", speed_mpn_popcount }, 274 { "mpn_hamdist", speed_mpn_hamdist }, 275 276 { "mpn_matrix22_mul", speed_mpn_matrix22_mul }, 277 278 { "mpn_hgcd", speed_mpn_hgcd }, 279 { "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer }, 280 { "mpn_hgcd_appr", speed_mpn_hgcd_appr }, 281 { "mpn_hgcd_appr_lehmer", speed_mpn_hgcd_appr_lehmer }, 282 283 { "mpn_hgcd_reduce", speed_mpn_hgcd_reduce }, 284 { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1 }, 285 { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2 }, 286 287 { "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL }, 288 { "mpn_gcd_1N", speed_mpn_gcd_1N, FLAG_R_OPTIONAL }, 289 290 { "mpn_gcd", speed_mpn_gcd }, 291 292 { "mpn_gcdext", speed_mpn_gcdext }, 293 { "mpn_gcdext_single", speed_mpn_gcdext_single }, 294 { "mpn_gcdext_double", speed_mpn_gcdext_double }, 295 { "mpn_gcdext_one_single", speed_mpn_gcdext_one_single }, 296 { "mpn_gcdext_one_double", speed_mpn_gcdext_one_double }, 297 #if 0 298 { "mpn_gcdext_lehmer", speed_mpn_gcdext_lehmer }, 299 #endif 300 { "mpz_jacobi", speed_mpz_jacobi }, 301 { "mpn_jacobi_base", speed_mpn_jacobi_base }, 302 { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1 }, 303 { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2 }, 304 { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3 }, 305 { "mpn_jacobi_base_4", speed_mpn_jacobi_base_4 }, 306 307 { "mpn_mul", speed_mpn_mul, FLAG_R_OPTIONAL }, 308 { "mpn_mul_basecase", speed_mpn_mul_basecase,FLAG_R_OPTIONAL }, 309 { "mpn_sqr_basecase", speed_mpn_sqr_basecase }, 310 #if HAVE_NATIVE_mpn_sqr_diagonal 311 { "mpn_sqr_diagonal", speed_mpn_sqr_diagonal }, 312 #endif 313 #if HAVE_NATIVE_mpn_sqr_diag_addlsh1 314 { "mpn_sqr_diag_addlsh1", speed_mpn_sqr_diag_addlsh1 }, 315 #endif 316 317 { "mpn_mul_n", speed_mpn_mul_n }, 318 { "mpn_sqr", speed_mpn_sqr }, 319 320 { "mpn_toom2_sqr", speed_mpn_toom2_sqr }, 321 { "mpn_toom3_sqr", speed_mpn_toom3_sqr }, 322 { "mpn_toom4_sqr", speed_mpn_toom4_sqr }, 323 { "mpn_toom6_sqr", speed_mpn_toom6_sqr }, 324 { "mpn_toom8_sqr", speed_mpn_toom8_sqr }, 325 { "mpn_toom22_mul", speed_mpn_toom22_mul }, 326 { "mpn_toom33_mul", speed_mpn_toom33_mul }, 327 { "mpn_toom44_mul", speed_mpn_toom44_mul }, 328 { "mpn_toom6h_mul", speed_mpn_toom6h_mul }, 329 { "mpn_toom8h_mul", speed_mpn_toom8h_mul }, 330 { "mpn_toom32_mul", speed_mpn_toom32_mul }, 331 { "mpn_toom42_mul", speed_mpn_toom42_mul }, 332 { "mpn_toom43_mul", speed_mpn_toom43_mul }, 333 { "mpn_toom63_mul", speed_mpn_toom63_mul }, 334 { "mpn_nussbaumer_mul", speed_mpn_nussbaumer_mul }, 335 { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr}, 336 #if WANT_OLD_FFT_FULL 337 { "mpn_mul_fft_full", speed_mpn_mul_fft_full }, 338 { "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr }, 339 #endif 340 { "mpn_mul_fft", speed_mpn_mul_fft, FLAG_R_OPTIONAL }, 341 { "mpn_mul_fft_sqr", speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL }, 342 343 { "mpn_mullo_n", speed_mpn_mullo_n }, 344 { "mpn_mullo_basecase", speed_mpn_mullo_basecase }, 345 346 { "mpn_mulmid_basecase", speed_mpn_mulmid_basecase, FLAG_R_OPTIONAL }, 347 { "mpn_toom42_mulmid", speed_mpn_toom42_mulmid }, 348 { "mpn_mulmid_n", speed_mpn_mulmid_n }, 349 { "mpn_mulmid", speed_mpn_mulmid, FLAG_R_OPTIONAL }, 350 351 { "mpn_bc_mulmod_bnm1", speed_mpn_bc_mulmod_bnm1 }, 352 { "mpn_mulmod_bnm1", speed_mpn_mulmod_bnm1 }, 353 { "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded }, 354 { "mpn_sqrmod_bnm1", speed_mpn_sqrmod_bnm1 }, 355 356 { "mpn_invert", speed_mpn_invert }, 357 { "mpn_invertappr", speed_mpn_invertappr }, 358 { "mpn_ni_invertappr", speed_mpn_ni_invertappr }, 359 { "mpn_binvert", speed_mpn_binvert }, 360 361 { "mpn_sbpi1_div_qr", speed_mpn_sbpi1_div_qr, FLAG_R_OPTIONAL}, 362 { "mpn_dcpi1_div_qr", speed_mpn_dcpi1_div_qr, FLAG_R_OPTIONAL}, 363 { "mpn_mu_div_qr", speed_mpn_mu_div_qr, FLAG_R_OPTIONAL}, 364 { "mpn_mupi_div_qr", speed_mpn_mupi_div_qr, FLAG_R_OPTIONAL}, 365 { "mpn_sbpi1_divappr_q", speed_mpn_sbpi1_divappr_q, FLAG_R_OPTIONAL}, 366 { "mpn_dcpi1_divappr_q", speed_mpn_dcpi1_divappr_q, FLAG_R_OPTIONAL}, 367 368 { "mpn_sbpi1_bdiv_qr", speed_mpn_sbpi1_bdiv_qr }, 369 { "mpn_dcpi1_bdiv_qr", speed_mpn_dcpi1_bdiv_qr }, 370 { "mpn_sbpi1_bdiv_q", speed_mpn_sbpi1_bdiv_q }, 371 { "mpn_dcpi1_bdiv_q", speed_mpn_dcpi1_bdiv_q }, 372 373 { "mpn_broot", speed_mpn_broot, FLAG_R }, 374 { "mpn_broot_invm1", speed_mpn_broot_invm1, FLAG_R }, 375 { "mpn_brootinv", speed_mpn_brootinv, FLAG_R }, 376 377 { "mpn_get_str", speed_mpn_get_str, FLAG_R_OPTIONAL }, 378 { "mpn_set_str", speed_mpn_set_str, FLAG_R_OPTIONAL }, 379 { "mpn_set_str_basecase", speed_mpn_bc_set_str, FLAG_R_OPTIONAL }, 380 381 { "mpn_sqrtrem", speed_mpn_sqrtrem }, 382 { "mpn_rootrem", speed_mpn_rootrem, FLAG_R }, 383 384 { "mpn_fib2_ui", speed_mpn_fib2_ui, FLAG_NODATA }, 385 { "mpz_fib_ui", speed_mpz_fib_ui, FLAG_NODATA }, 386 { "mpz_fib2_ui", speed_mpz_fib2_ui, FLAG_NODATA }, 387 { "mpz_lucnum_ui", speed_mpz_lucnum_ui, FLAG_NODATA }, 388 { "mpz_lucnum2_ui", speed_mpz_lucnum2_ui, FLAG_NODATA }, 389 390 { "mpz_add", speed_mpz_add }, 391 { "mpz_bin_uiui", speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL }, 392 { "mpz_bin_ui", speed_mpz_bin_ui, FLAG_NODATA | FLAG_R_OPTIONAL }, 393 { "mpz_fac_ui", speed_mpz_fac_ui, FLAG_NODATA }, 394 { "mpz_powm", speed_mpz_powm }, 395 { "mpz_powm_mod", speed_mpz_powm_mod }, 396 { "mpz_powm_redc", speed_mpz_powm_redc }, 397 { "mpz_powm_sec", speed_mpz_powm_sec }, 398 { "mpz_powm_ui", speed_mpz_powm_ui, FLAG_R_OPTIONAL }, 399 400 { "mpz_mod", speed_mpz_mod }, 401 { "mpn_redc_1", speed_mpn_redc_1 }, 402 { "mpn_redc_2", speed_mpn_redc_2 }, 403 { "mpn_redc_n", speed_mpn_redc_n }, 404 405 { "MPN_COPY", speed_MPN_COPY }, 406 { "MPN_COPY_INCR", speed_MPN_COPY_INCR }, 407 { "MPN_COPY_DECR", speed_MPN_COPY_DECR }, 408 { "memcpy", speed_memcpy }, 409 #if HAVE_NATIVE_mpn_copyi 410 { "mpn_copyi", speed_mpn_copyi }, 411 #endif 412 #if HAVE_NATIVE_mpn_copyd 413 { "mpn_copyd", speed_mpn_copyd }, 414 #endif 415 { "mpn_tabselect", speed_mpn_tabselect, FLAG_R_OPTIONAL }, 416 #if HAVE_NATIVE_mpn_addlsh1_n 417 { "mpn_addlsh1_n", speed_mpn_addlsh1_n, FLAG_R_OPTIONAL }, 418 #endif 419 #if HAVE_NATIVE_mpn_sublsh1_n 420 { "mpn_sublsh1_n", speed_mpn_sublsh1_n, FLAG_R_OPTIONAL }, 421 #endif 422 #if HAVE_NATIVE_mpn_addlsh1_n_ip1 423 { "mpn_addlsh1_n_ip1", speed_mpn_addlsh1_n_ip1 }, 424 #endif 425 #if HAVE_NATIVE_mpn_addlsh1_n_ip2 426 { "mpn_addlsh1_n_ip2", speed_mpn_addlsh1_n_ip2 }, 427 #endif 428 #if HAVE_NATIVE_mpn_sublsh1_n_ip1 429 { "mpn_sublsh1_n_ip1", speed_mpn_sublsh1_n_ip1 }, 430 #endif 431 #if HAVE_NATIVE_mpn_rsblsh1_n 432 { "mpn_rsblsh1_n", speed_mpn_rsblsh1_n, FLAG_R_OPTIONAL }, 433 #endif 434 #if HAVE_NATIVE_mpn_addlsh2_n 435 { "mpn_addlsh2_n", speed_mpn_addlsh2_n, FLAG_R_OPTIONAL }, 436 #endif 437 #if HAVE_NATIVE_mpn_sublsh2_n 438 { "mpn_sublsh2_n", speed_mpn_sublsh2_n, FLAG_R_OPTIONAL }, 439 #endif 440 #if HAVE_NATIVE_mpn_addlsh2_n_ip1 441 { "mpn_addlsh2_n_ip1", speed_mpn_addlsh2_n_ip1 }, 442 #endif 443 #if HAVE_NATIVE_mpn_addlsh2_n_ip2 444 { "mpn_addlsh2_n_ip2", speed_mpn_addlsh2_n_ip2 }, 445 #endif 446 #if HAVE_NATIVE_mpn_sublsh2_n_ip1 447 { "mpn_sublsh2_n_ip1", speed_mpn_sublsh2_n_ip1 }, 448 #endif 449 #if HAVE_NATIVE_mpn_rsblsh2_n 450 { "mpn_rsblsh2_n", speed_mpn_rsblsh2_n, FLAG_R_OPTIONAL }, 451 #endif 452 #if HAVE_NATIVE_mpn_addlsh_n 453 { "mpn_addlsh_n", speed_mpn_addlsh_n, FLAG_R_OPTIONAL }, 454 #endif 455 #if HAVE_NATIVE_mpn_sublsh_n 456 { "mpn_sublsh_n", speed_mpn_sublsh_n, FLAG_R_OPTIONAL }, 457 #endif 458 #if HAVE_NATIVE_mpn_addlsh_n_ip1 459 { "mpn_addlsh_n_ip1", speed_mpn_addlsh_n_ip1 }, 460 #endif 461 #if HAVE_NATIVE_mpn_addlsh_n_ip2 462 { "mpn_addlsh_n_ip2", speed_mpn_addlsh_n_ip2 }, 463 #endif 464 #if HAVE_NATIVE_mpn_sublsh_n_ip1 465 { "mpn_sublsh_n_ip1", speed_mpn_sublsh_n_ip1 }, 466 #endif 467 #if HAVE_NATIVE_mpn_rsblsh_n 468 { "mpn_rsblsh_n", speed_mpn_rsblsh_n, FLAG_R_OPTIONAL }, 469 #endif 470 #if HAVE_NATIVE_mpn_rsh1add_n 471 { "mpn_rsh1add_n", speed_mpn_rsh1add_n, FLAG_R_OPTIONAL }, 472 #endif 473 #if HAVE_NATIVE_mpn_rsh1sub_n 474 { "mpn_rsh1sub_n", speed_mpn_rsh1sub_n, FLAG_R_OPTIONAL }, 475 #endif 476 477 { "mpn_addcnd_n", speed_mpn_addcnd_n, FLAG_R_OPTIONAL }, 478 { "mpn_subcnd_n", speed_mpn_subcnd_n, FLAG_R_OPTIONAL }, 479 480 { "MPN_ZERO", speed_MPN_ZERO }, 481 482 { "binvert_limb", speed_binvert_limb, FLAG_NODATA }, 483 { "binvert_limb_mul1", speed_binvert_limb_mul1, FLAG_NODATA }, 484 { "binvert_limb_loop", speed_binvert_limb_loop, FLAG_NODATA }, 485 { "binvert_limb_cond", speed_binvert_limb_cond, FLAG_NODATA }, 486 { "binvert_limb_arith", speed_binvert_limb_arith, FLAG_NODATA }, 487 488 { "malloc_free", speed_malloc_free }, 489 { "malloc_realloc_free", speed_malloc_realloc_free }, 490 { "gmp_allocate_free", speed_gmp_allocate_free }, 491 { "gmp_allocate_reallocate_free", speed_gmp_allocate_reallocate_free }, 492 { "mpz_init_clear", speed_mpz_init_clear }, 493 { "mpq_init_clear", speed_mpq_init_clear }, 494 { "mpf_init_clear", speed_mpf_init_clear }, 495 { "mpz_init_realloc_clear", speed_mpz_init_realloc_clear }, 496 497 { "umul_ppmm", speed_umul_ppmm, FLAG_R_OPTIONAL }, 498 #if HAVE_NATIVE_mpn_umul_ppmm 499 { "mpn_umul_ppmm", speed_mpn_umul_ppmm, FLAG_R_OPTIONAL }, 500 #endif 501 #if HAVE_NATIVE_mpn_umul_ppmm_r 502 { "mpn_umul_ppmm_r", speed_mpn_umul_ppmm_r, FLAG_R_OPTIONAL }, 503 #endif 504 505 { "count_leading_zeros", speed_count_leading_zeros, FLAG_NODATA | FLAG_R_OPTIONAL }, 506 { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL }, 507 508 { "udiv_qrnnd", speed_udiv_qrnnd, FLAG_R_OPTIONAL }, 509 { "udiv_qrnnd_c", speed_udiv_qrnnd_c, FLAG_R_OPTIONAL }, 510 #if HAVE_NATIVE_mpn_udiv_qrnnd 511 { "mpn_udiv_qrnnd", speed_mpn_udiv_qrnnd, FLAG_R_OPTIONAL }, 512 #endif 513 #if HAVE_NATIVE_mpn_udiv_qrnnd_r 514 { "mpn_udiv_qrnnd_r", speed_mpn_udiv_qrnnd_r, FLAG_R_OPTIONAL }, 515 #endif 516 { "invert_limb", speed_invert_limb, FLAG_R_OPTIONAL }, 517 518 { "operator_div", speed_operator_div, FLAG_R_OPTIONAL }, 519 { "operator_mod", speed_operator_mod, FLAG_R_OPTIONAL }, 520 521 { "gmp_randseed", speed_gmp_randseed, FLAG_R_OPTIONAL }, 522 { "gmp_randseed_ui", speed_gmp_randseed_ui, FLAG_R_OPTIONAL | FLAG_NODATA }, 523 { "mpz_urandomb", speed_mpz_urandomb, FLAG_R_OPTIONAL | FLAG_NODATA }, 524 525 #ifdef SPEED_EXTRA_ROUTINES 526 SPEED_EXTRA_ROUTINES 527 #endif 528 #ifdef SPEED_EXTRA_ROUTINES2 529 SPEED_EXTRA_ROUTINES2 530 #endif 531 }; 532 533 534 struct choice_t { 535 const struct routine_t *p; 536 mp_limb_t r; 537 double scale; 538 double time; 539 int no_time; 540 double prev_time; 541 const char *name; 542 }; 543 struct choice_t *choice; 544 int num_choices = 0; 545 546 547 void 548 data_fill (mp_ptr ptr, mp_size_t size) 549 { 550 switch (option_data) { 551 case DATA_RANDOM: 552 mpn_random (ptr, size); 553 break; 554 case DATA_RANDOM2: 555 mpn_random2 (ptr, size); 556 break; 557 case DATA_ZEROS: 558 MPN_ZERO (ptr, size); 559 break; 560 case DATA_AAS: 561 MPN_FILL (ptr, size, GMP_NUMB_0xAA); 562 break; 563 case DATA_FFS: 564 MPN_FILL (ptr, size, GMP_NUMB_MAX); 565 break; 566 case DATA_2FD: 567 MPN_FILL (ptr, size, GMP_NUMB_MAX); 568 ptr[0] -= 2; 569 break; 570 default: 571 abort(); 572 /*NOTREACHED*/ 573 } 574 } 575 576 /* The code here handling the various combinations of output options isn't 577 too attractive, but it works and is fairly clean. */ 578 579 #define SIZE_TO_DIVISOR(n) \ 580 (option_square == 1 ? (n)*(n) \ 581 : option_square == 2 ? (n)*((n)+1)/2 \ 582 : (n)) 583 584 void 585 run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size) 586 { 587 const char *first_open_fastest, *first_open_notfastest, *first_close; 588 int i, fastest, want_data; 589 double fastest_time; 590 TMP_DECL; 591 592 TMP_MARK; 593 594 /* allocate data, unless all routines are NODATA */ 595 want_data = 0; 596 for (i = 0; i < num_choices; i++) 597 want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0); 598 599 if (want_data) 600 { 601 SPEED_TMP_ALLOC_LIMBS (sp.xp, s->size, s->align_xp); 602 SPEED_TMP_ALLOC_LIMBS (sp.yp, s->size, s->align_yp); 603 604 data_fill (s->xp, s->size); 605 data_fill (s->yp, s->size); 606 } 607 else 608 { 609 sp.xp = NULL; 610 sp.yp = NULL; 611 } 612 613 if (prev_size == -1 && option_cmp == CMP_DIFFPREV) 614 { 615 first_open_fastest = "(#"; 616 first_open_notfastest = " ("; 617 first_close = ")"; 618 } 619 else 620 { 621 first_open_fastest = "#"; 622 first_open_notfastest = " "; 623 first_close = ""; 624 } 625 626 fastest = -1; 627 fastest_time = -1.0; 628 for (i = 0; i < num_choices; i++) 629 { 630 s->r = choice[i].r; 631 choice[i].time = speed_measure (choice[i].p->fun, s); 632 choice[i].no_time = (choice[i].time == -1.0); 633 if (! choice[i].no_time) 634 choice[i].time *= choice[i].scale; 635 636 /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time 637 is before any differences. */ 638 { 639 double t; 640 t = choice[i].time; 641 if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1) 642 { 643 if (choice[i].prev_time == -1.0) 644 choice[i].no_time = 1; 645 else 646 choice[i].time = choice[i].time - choice[i].prev_time; 647 } 648 choice[i].prev_time = t; 649 } 650 651 if (choice[i].no_time) 652 continue; 653 654 /* Look for the fastest after CMP_DIFFPREV has been applied, but 655 before CMP_RATIO or CMP_DIFFERENCE. There's only a fastest shown 656 if there's more than one routine. */ 657 if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time)) 658 { 659 fastest = i; 660 fastest_time = choice[i].time; 661 } 662 663 if (option_cmp == CMP_DIFFPREV) 664 { 665 /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */ 666 if (option_unit == UNIT_CYCLES) 667 choice[i].time /= speed_cycletime; 668 else if (option_unit == UNIT_CYCLESPERLIMB) 669 { 670 if (prev_size == -1) 671 choice[i].time /= speed_cycletime; 672 else 673 choice[i].time /= (speed_cycletime 674 * (SIZE_TO_DIVISOR(s->size) 675 - SIZE_TO_DIVISOR(prev_size))); 676 } 677 } 678 else 679 { 680 if (option_unit == UNIT_CYCLES) 681 choice[i].time /= speed_cycletime; 682 else if (option_unit == UNIT_CYCLESPERLIMB) 683 choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size)); 684 685 if (option_cmp == CMP_RATIO && i > 0) 686 { 687 /* A ratio isn't affected by the units chosen. */ 688 if (choice[0].no_time || choice[0].time == 0.0) 689 choice[i].no_time = 1; 690 else 691 choice[i].time /= choice[0].time; 692 } 693 else if (option_cmp == CMP_DIFFERENCE && i > 0) 694 { 695 if (choice[0].no_time) 696 { 697 choice[i].no_time = 1; 698 continue; 699 } 700 choice[i].time -= choice[0].time; 701 } 702 } 703 } 704 705 if (option_gnuplot) 706 { 707 /* In CMP_DIFFPREV, don't print anything for the first size, start 708 with the second where an actual difference is available. 709 710 In CMP_RATIO, print the first column as 1.0. 711 712 The 9 decimals printed is much more than the expected precision of 713 the measurements actually. */ 714 715 if (! (option_cmp == CMP_DIFFPREV && prev_size == -1)) 716 { 717 fprintf (fp, "%-6ld ", s->size); 718 for (i = 0; i < num_choices; i++) 719 fprintf (fp, " %.9e", 720 choice[i].no_time ? 0.0 721 : (option_cmp == CMP_RATIO && i == 0) ? 1.0 722 : choice[i].time); 723 fprintf (fp, "\n"); 724 } 725 } 726 else 727 { 728 fprintf (fp, "%-6ld ", s->size); 729 for (i = 0; i < num_choices; i++) 730 { 731 char buf[128]; 732 int decimals; 733 734 if (choice[i].no_time) 735 { 736 fprintf (fp, " %*s", COLUMN_WIDTH, "n/a"); 737 } 738 else 739 {if (option_unit == UNIT_CYCLESPERLIMB 740 || (option_cmp == CMP_RATIO && i > 0)) 741 decimals = 4; 742 else if (option_unit == UNIT_CYCLES) 743 decimals = 2; 744 else 745 decimals = 9; 746 747 sprintf (buf, "%s%.*f%s", 748 i == fastest ? first_open_fastest : first_open_notfastest, 749 decimals, choice[i].time, first_close); 750 fprintf (fp, " %*s", COLUMN_WIDTH, buf); 751 } 752 } 753 fprintf (fp, "\n"); 754 } 755 756 TMP_FREE; 757 } 758 759 void 760 run_all (FILE *fp) 761 { 762 mp_size_t prev_size; 763 int i; 764 TMP_DECL; 765 766 TMP_MARK; 767 SPEED_TMP_ALLOC_LIMBS (sp.xp_block, SPEED_BLOCK_SIZE, sp.align_xp); 768 SPEED_TMP_ALLOC_LIMBS (sp.yp_block, SPEED_BLOCK_SIZE, sp.align_yp); 769 770 data_fill (sp.xp_block, SPEED_BLOCK_SIZE); 771 data_fill (sp.yp_block, SPEED_BLOCK_SIZE); 772 773 for (i = 0; i < size_num; i++) 774 { 775 sp.size = size_array[i].start; 776 prev_size = -1; 777 for (;;) 778 { 779 mp_size_t step; 780 781 if (option_data == DATA_2FD && sp.size >= 2) 782 sp.xp[sp.size-1] = 2; 783 784 run_one (fp, &sp, prev_size); 785 prev_size = sp.size; 786 787 if (option_data == DATA_2FD && sp.size >= 2) 788 sp.xp[sp.size-1] = MP_LIMB_T_MAX; 789 790 if (option_factor != 0.0) 791 { 792 step = (mp_size_t) (sp.size * option_factor - sp.size); 793 if (step < 1) 794 step = 1; 795 } 796 else 797 step = 1; 798 if (step < option_step) 799 step = option_step; 800 801 sp.size += step; 802 if (sp.size > size_array[i].end) 803 break; 804 } 805 } 806 807 TMP_FREE; 808 } 809 810 811 FILE * 812 fopen_for_write (const char *filename) 813 { 814 FILE *fp; 815 if ((fp = fopen (filename, "w")) == NULL) 816 { 817 fprintf (stderr, "Cannot create %s\n", filename); 818 exit(1); 819 } 820 return fp; 821 } 822 823 void 824 fclose_written (FILE *fp, const char *filename) 825 { 826 int err; 827 828 err = ferror (fp); 829 err |= fclose (fp); 830 831 if (err) 832 { 833 fprintf (stderr, "Error writing %s\n", filename); 834 exit(1); 835 } 836 } 837 838 839 void 840 run_gnuplot (int argc, char *argv[]) 841 { 842 char *plot_filename; 843 char *data_filename; 844 FILE *fp; 845 int i; 846 847 plot_filename = (char *) (*__gmp_allocate_func) 848 (strlen (option_gnuplot_basename) + 20); 849 data_filename = (char *) (*__gmp_allocate_func) 850 (strlen (option_gnuplot_basename) + 20); 851 852 sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename); 853 sprintf (data_filename, "%s.data", option_gnuplot_basename); 854 855 fp = fopen_for_write (plot_filename); 856 857 fprintf (fp, "# Generated with:\n"); 858 fprintf (fp, "#"); 859 for (i = 0; i < argc; i++) 860 fprintf (fp, " %s", argv[i]); 861 fprintf (fp, "\n"); 862 fprintf (fp, "\n"); 863 864 fprintf (fp, "reset\n"); 865 866 /* Putting the key at the top left is usually good, and you can change it 867 interactively if it's not. */ 868 fprintf (fp, "set key left\n"); 869 870 /* designed to make it possible to see crossovers easily */ 871 fprintf (fp, "set style data lines\n"); 872 873 fprintf (fp, "plot "); 874 for (i = 0; i < num_choices; i++) 875 { 876 fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2); 877 fprintf (fp, " title \"%s\"", choice[i].name); 878 879 if (i != num_choices-1) 880 fprintf (fp, ", \\"); 881 fprintf (fp, "\n"); 882 } 883 884 fprintf (fp, "load \"-\"\n"); 885 fclose_written (fp, plot_filename); 886 887 fp = fopen_for_write (data_filename); 888 889 /* Unbuffered so you can see where the program was up to if it crashes or 890 you kill it. */ 891 setbuf (fp, NULL); 892 893 run_all (fp); 894 fclose_written (fp, data_filename); 895 } 896 897 898 /* Return a limb with n many one bits (starting from the least significant) */ 899 900 #define LIMB_ONES(n) \ 901 ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX \ 902 : (n) == 0 ? CNST_LIMB(0) \ 903 : (CNST_LIMB(1) << (n)) - 1) 904 905 mp_limb_t 906 r_string (const char *s) 907 { 908 const char *s_orig = s; 909 long n; 910 911 if (strcmp (s, "aas") == 0) 912 return GMP_NUMB_0xAA; 913 914 { 915 mpz_t z; 916 mp_limb_t l; 917 int set, siz; 918 919 mpz_init (z); 920 set = mpz_set_str (z, s, 0); 921 siz = SIZ(z); 922 l = (siz == 0 ? 0 : siz > 0 ? PTR(z)[0] : -PTR(z)[0]); 923 mpz_clear (z); 924 if (set == 0) 925 { 926 if (siz > 1 || siz < -1) 927 printf ("Warning, r parameter %s truncated to %d bits\n", 928 s_orig, GMP_LIMB_BITS); 929 return l; 930 } 931 } 932 933 if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) 934 n = strtoul (s+2, (char **) &s, 16); 935 else 936 n = strtol (s, (char **) &s, 10); 937 938 if (strcmp (s, "bits") == 0) 939 { 940 mp_limb_t l; 941 if (n > GMP_LIMB_BITS) 942 { 943 fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n", 944 n, GMP_LIMB_BITS); 945 exit (1); 946 } 947 mpn_random (&l, 1); 948 return (l | (CNST_LIMB(1) << (n-1))) & LIMB_ONES(n); 949 } 950 else if (strcmp (s, "ones") == 0) 951 { 952 if (n > GMP_LIMB_BITS) 953 { 954 fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n", 955 n, GMP_LIMB_BITS); 956 exit (1); 957 } 958 return LIMB_ONES (n); 959 } 960 else if (*s != '\0') 961 { 962 fprintf (stderr, "invalid r parameter: %s\n", s_orig); 963 exit (1); 964 } 965 966 return n; 967 } 968 969 970 void 971 routine_find (struct choice_t *c, const char *s_orig) 972 { 973 const char *s; 974 int i; 975 size_t nlen; 976 977 c->name = s_orig; 978 s = strchr (s_orig, '*'); 979 if (s != NULL) 980 { 981 c->scale = atof(s_orig); 982 s++; 983 } 984 else 985 { 986 c->scale = 1.0; 987 s = s_orig; 988 } 989 990 for (i = 0; i < numberof (routine); i++) 991 { 992 nlen = strlen (routine[i].name); 993 if (memcmp (s, routine[i].name, nlen) != 0) 994 continue; 995 996 if (s[nlen] == '.') 997 { 998 /* match, with a .r parameter */ 999 1000 if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL))) 1001 { 1002 fprintf (stderr, 1003 "Choice %s bad: doesn't take a \".<r>\" parameter\n", 1004 s_orig); 1005 exit (1); 1006 } 1007 1008 c->p = &routine[i]; 1009 c->r = r_string (s + nlen + 1); 1010 return; 1011 } 1012 1013 if (s[nlen] == '\0') 1014 { 1015 /* match, with no parameter */ 1016 1017 if (routine[i].flag & FLAG_R) 1018 { 1019 fprintf (stderr, 1020 "Choice %s bad: needs a \".<r>\" parameter\n", 1021 s_orig); 1022 exit (1); 1023 } 1024 1025 c->p = &routine[i]; 1026 c->r = 0; 1027 return; 1028 } 1029 } 1030 1031 fprintf (stderr, "Choice %s unrecognised\n", s_orig); 1032 exit (1); 1033 } 1034 1035 1036 void 1037 usage (void) 1038 { 1039 int i; 1040 1041 speed_time_init (); 1042 1043 printf ("Usage: speed [-options] -s size <routine>...\n"); 1044 printf ("Measure the speed of some routines.\n"); 1045 printf ("Times are in seconds, accuracy is shown.\n"); 1046 printf ("\n"); 1047 printf (" -p num set precision as number of time units each routine must run\n"); 1048 printf (" -s size[-end][,size[-end]]... sizes to measure\n"); 1049 printf (" single sizes or ranges, sep with comma or use multiple -s\n"); 1050 printf (" -t step step through sizes by given amount\n"); 1051 printf (" -f factor step through sizes by given factor (eg. 1.05)\n"); 1052 printf (" -r show times as ratios of the first routine\n"); 1053 printf (" -d show times as difference from the first routine\n"); 1054 printf (" -D show times as difference from previous size shown\n"); 1055 printf (" -c show times in CPU cycles\n"); 1056 printf (" -C show times in cycles per limb\n"); 1057 printf (" -u print resource usage (memory) at end\n"); 1058 printf (" -P name output plot files \"name.gnuplot\" and \"name.data\"\n"); 1059 printf (" -a <type> use given data: random(default), random2, zeros, aas, ffs, 2fd\n"); 1060 printf (" -x, -y, -w, -W <align> specify data alignments, sources and dests\n"); 1061 printf (" -o addrs print addresses of data blocks\n"); 1062 printf ("\n"); 1063 printf ("If both -t and -f are used, it means step by the factor or the step, whichever\n"); 1064 printf ("is greater.\n"); 1065 printf ("If both -C and -D are used, it means cycles per however many limbs between a\n"); 1066 printf ("size and the previous size.\n"); 1067 printf ("\n"); 1068 printf ("After running with -P, plots can be viewed with Gnuplot or Quickplot.\n"); 1069 printf ("\"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n"); 1070 printf ("a log/log plot).\n"); 1071 printf ("\"quickplot -s name.data\" (has interactive zooming, and note -s is important\n"); 1072 printf ("when viewing more than one routine, it means same axis scales for all data).\n"); 1073 printf ("\n"); 1074 printf ("The available routines are as follows.\n"); 1075 printf ("\n"); 1076 1077 for (i = 0; i < numberof (routine); i++) 1078 { 1079 if (routine[i].flag & FLAG_R) 1080 printf ("\t%s.r\n", routine[i].name); 1081 else if (routine[i].flag & FLAG_R_OPTIONAL) 1082 printf ("\t%s (optional .r)\n", routine[i].name); 1083 else 1084 printf ("\t%s\n", routine[i].name); 1085 } 1086 printf ("\n"); 1087 printf ("Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n"); 1088 printf ("r should be in decimal, or use 0xN for hexadecimal.\n"); 1089 printf ("\n"); 1090 printf ("Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n"); 1091 printf ("N one bits, or \"aas\" for 0xAA..AA.\n"); 1092 printf ("\n"); 1093 printf ("Times for sizes out of the range accepted by a routine are shown as 0.\n"); 1094 printf ("The fastest routine at each size is marked with a # (free form output only).\n"); 1095 printf ("\n"); 1096 printf ("%s", speed_time_string); 1097 printf ("\n"); 1098 printf ("Gnuplot home page http://www.gnuplot.info/\n"); 1099 printf ("Quickplot home page http://quickplot.sourceforge.net/\n"); 1100 } 1101 1102 void 1103 check_align_option (const char *name, mp_size_t align) 1104 { 1105 if (align < 0 || align > SPEED_TMP_ALLOC_ADJUST_MASK) 1106 { 1107 fprintf (stderr, "Alignment request out of range: %s %ld\n", 1108 name, (long) align); 1109 fprintf (stderr, " should be 0 to %d (limbs), inclusive\n", 1110 SPEED_TMP_ALLOC_ADJUST_MASK); 1111 exit (1); 1112 } 1113 } 1114 1115 int 1116 main (int argc, char *argv[]) 1117 { 1118 int i; 1119 int opt; 1120 1121 /* Unbuffered so output goes straight out when directed to a pipe or file 1122 and isn't lost on killing the program half way. */ 1123 setbuf (stdout, NULL); 1124 1125 for (;;) 1126 { 1127 opt = getopt(argc, argv, "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z"); 1128 if (opt == EOF) 1129 break; 1130 1131 switch (opt) { 1132 case 'a': 1133 if (strcmp (optarg, "random") == 0) option_data = DATA_RANDOM; 1134 else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2; 1135 else if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS; 1136 else if (strcmp (optarg, "aas") == 0) option_data = DATA_AAS; 1137 else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS; 1138 else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD; 1139 else 1140 { 1141 fprintf (stderr, "unrecognised data option: %s\n", optarg); 1142 exit (1); 1143 } 1144 break; 1145 case 'C': 1146 if (option_unit != UNIT_SECONDS) goto bad_unit; 1147 option_unit = UNIT_CYCLESPERLIMB; 1148 break; 1149 case 'c': 1150 if (option_unit != UNIT_SECONDS) 1151 { 1152 bad_unit: 1153 fprintf (stderr, "cannot use more than one of -c, -C\n"); 1154 exit (1); 1155 } 1156 option_unit = UNIT_CYCLES; 1157 break; 1158 case 'D': 1159 if (option_cmp != CMP_ABSOLUTE) goto bad_cmp; 1160 option_cmp = CMP_DIFFPREV; 1161 break; 1162 case 'd': 1163 if (option_cmp != CMP_ABSOLUTE) 1164 { 1165 bad_cmp: 1166 fprintf (stderr, "cannot use more than one of -d, -D, -r\n"); 1167 exit (1); 1168 } 1169 option_cmp = CMP_DIFFERENCE; 1170 break; 1171 case 'E': 1172 option_square = 1; 1173 break; 1174 case 'F': 1175 option_square = 2; 1176 break; 1177 case 'f': 1178 option_factor = atof (optarg); 1179 if (option_factor <= 1.0) 1180 { 1181 fprintf (stderr, "-f factor must be > 1.0\n"); 1182 exit (1); 1183 } 1184 break; 1185 case 'o': 1186 speed_option_set (optarg); 1187 break; 1188 case 'P': 1189 option_gnuplot = 1; 1190 option_gnuplot_basename = optarg; 1191 break; 1192 case 'p': 1193 speed_precision = atoi (optarg); 1194 break; 1195 case 'R': 1196 option_seed = time (NULL); 1197 break; 1198 case 'r': 1199 if (option_cmp != CMP_ABSOLUTE) 1200 goto bad_cmp; 1201 option_cmp = CMP_RATIO; 1202 break; 1203 case 's': 1204 { 1205 char *s; 1206 for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ",")) 1207 { 1208 if (size_num == size_allocnum) 1209 { 1210 size_array = (struct size_array_t *) 1211 __gmp_allocate_or_reallocate 1212 (size_array, 1213 size_allocnum * sizeof(size_array[0]), 1214 (size_allocnum+10) * sizeof(size_array[0])); 1215 size_allocnum += 10; 1216 } 1217 if (sscanf (s, "%ld-%ld", 1218 &size_array[size_num].start, 1219 &size_array[size_num].end) != 2) 1220 { 1221 size_array[size_num].start = size_array[size_num].end 1222 = atol (s); 1223 } 1224 1225 if (size_array[size_num].start < 0 1226 || size_array[size_num].end < 0 1227 || size_array[size_num].start > size_array[size_num].end) 1228 { 1229 fprintf (stderr, "invalid size parameter: %s\n", s); 1230 exit (1); 1231 } 1232 1233 size_num++; 1234 } 1235 } 1236 break; 1237 case 't': 1238 option_step = atol (optarg); 1239 if (option_step < 1) 1240 { 1241 fprintf (stderr, "-t step must be >= 1\n"); 1242 exit (1); 1243 } 1244 break; 1245 case 'u': 1246 option_resource_usage = 1; 1247 break; 1248 case 'z': 1249 sp.cache = 1; 1250 break; 1251 case 'x': 1252 sp.align_xp = atol (optarg); 1253 check_align_option ("-x", sp.align_xp); 1254 break; 1255 case 'y': 1256 sp.align_yp = atol (optarg); 1257 check_align_option ("-y", sp.align_yp); 1258 break; 1259 case 'w': 1260 sp.align_wp = atol (optarg); 1261 check_align_option ("-w", sp.align_wp); 1262 break; 1263 case 'W': 1264 sp.align_wp2 = atol (optarg); 1265 check_align_option ("-W", sp.align_wp2); 1266 break; 1267 case '?': 1268 exit(1); 1269 } 1270 } 1271 1272 if (optind >= argc) 1273 { 1274 usage (); 1275 exit (1); 1276 } 1277 1278 if (size_num == 0) 1279 { 1280 fprintf (stderr, "-s <size> must be specified\n"); 1281 exit (1); 1282 } 1283 1284 gmp_randinit_default (__gmp_rands); 1285 __gmp_rands_initialized = 1; 1286 gmp_randseed_ui (__gmp_rands, option_seed); 1287 1288 choice = (struct choice_t *) (*__gmp_allocate_func) 1289 ((argc - optind) * sizeof(choice[0])); 1290 for ( ; optind < argc; optind++) 1291 { 1292 struct choice_t c; 1293 routine_find (&c, argv[optind]); 1294 choice[num_choices] = c; 1295 num_choices++; 1296 } 1297 1298 if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) && 1299 num_choices < 2) 1300 { 1301 fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n"); 1302 } 1303 1304 speed_time_init (); 1305 if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB) 1306 speed_cycletime_need_cycles (); 1307 else 1308 speed_cycletime_need_seconds (); 1309 1310 if (option_gnuplot) 1311 { 1312 run_gnuplot (argc, argv); 1313 } 1314 else 1315 { 1316 if (option_unit == UNIT_SECONDS) 1317 printf ("overhead %.9f secs", speed_measure (speed_noop, NULL)); 1318 else 1319 printf ("overhead %.2f cycles", 1320 speed_measure (speed_noop, NULL) / speed_cycletime); 1321 printf (", precision %d units of %.2e secs", 1322 speed_precision, speed_unittime); 1323 1324 if (speed_cycletime == 1.0 || speed_cycletime == 0.0) 1325 printf (", CPU freq unknown\n"); 1326 else 1327 printf (", CPU freq %.2f MHz\n", 1e-6/speed_cycletime); 1328 1329 printf (" "); 1330 for (i = 0; i < num_choices; i++) 1331 printf (" %*s", COLUMN_WIDTH, choice[i].name); 1332 printf ("\n"); 1333 1334 run_all (stdout); 1335 } 1336 1337 if (option_resource_usage) 1338 { 1339 #if HAVE_GETRUSAGE 1340 { 1341 /* This doesn't give data sizes on linux 2.0.x, only utime. */ 1342 struct rusage r; 1343 if (getrusage (RUSAGE_SELF, &r) != 0) 1344 perror ("getrusage"); 1345 else 1346 printf ("getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\n", 1347 r.ru_utime.tv_sec, r.ru_utime.tv_usec, 1348 r.ru_idrss, r.ru_isrss, r.ru_ixrss); 1349 } 1350 #else 1351 printf ("getrusage() not available\n"); 1352 #endif 1353 1354 /* Linux kernel. */ 1355 { 1356 char buf[128]; 1357 sprintf (buf, "/proc/%d/status", getpid()); 1358 if (access (buf, R_OK) == 0) 1359 { 1360 sprintf (buf, "cat /proc/%d/status", getpid()); 1361 system (buf); 1362 } 1363 1364 } 1365 } 1366 1367 return 0; 1368 } 1369