xref: /netbsd-src/external/lgpl3/gmp/dist/tune/speed.c (revision 6de51c519f1b899da63c1bf576f478920b89083f)
1 /* Speed measuring program.
2 
3 Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010 Free
4 Software Foundation, Inc.
5 
6 This file is part of the GNU MP Library.
7 
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MP Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
20 
21 /* Usage message is in the code below, run with no arguments to print it.
22    See README for interesting applications.
23 
24    To add a new routine foo(), create a speed_foo() function in the style of
25    the existing ones and add an entry in the routine[] array.  Put FLAG_R if
26    speed_foo() wants an "r" parameter.
27 
28    The routines don't have help messages or descriptions, but most have
29    suggestive names.  See the source code for full details.
30 
31 */
32 
33 #include "config.h"
34 
35 #include <limits.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 
40 #if HAVE_UNISTD_H
41 #include <unistd.h>  /* for getpid, R_OK */
42 #endif
43 
44 #if TIME_WITH_SYS_TIME
45 # include <sys/time.h>  /* for struct timeval */
46 # include <time.h>
47 #else
48 # if HAVE_SYS_TIME_H
49 #  include <sys/time.h>
50 # else
51 #  include <time.h>
52 # endif
53 #endif
54 
55 #if HAVE_SYS_RESOURCE_H
56 #include <sys/resource.h>  /* for getrusage() */
57 #endif
58 
59 
60 #include "gmp.h"
61 #include "gmp-impl.h"
62 #include "longlong.h"  /* for the benefit of speed-many.c */
63 #include "tests.h"
64 #include "speed.h"
65 
66 
67 #if !HAVE_DECL_OPTARG
68 extern char *optarg;
69 extern int optind, opterr;
70 #endif
71 
72 #if !HAVE_STRTOUL
73 #define strtoul(p,e,b)  (unsigned long) strtol(p,e,b)
74 #endif
75 
76 #ifdef SPEED_EXTRA_PROTOS
77 SPEED_EXTRA_PROTOS
78 #endif
79 #ifdef SPEED_EXTRA_PROTOS2
80 SPEED_EXTRA_PROTOS2
81 #endif
82 
83 
84 #define MPN_FILL(ptr, size, n)          \
85   do {                                  \
86     mp_size_t __i;                      \
87     ASSERT ((size) >= 0);               \
88     for (__i = 0; __i < (size); __i++)  \
89       (ptr)[__i] = (n);                 \
90   } while (0)
91 
92 
93 #if GMP_LIMB_BITS == 32
94 #define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK)
95 #endif
96 #if GMP_LIMB_BITS == 64
97 #define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK)
98 #endif
99 
100 
101 #define CMP_ABSOLUTE     1
102 #define CMP_RATIO        2
103 #define CMP_DIFFERENCE   3
104 #define CMP_DIFFPREV     4
105 int  option_cmp = CMP_ABSOLUTE;
106 
107 #define UNIT_SECONDS        1
108 #define UNIT_CYCLES         2
109 #define UNIT_CYCLESPERLIMB  3
110 int  option_unit = UNIT_SECONDS;
111 
112 #define DATA_RANDOM   1
113 #define DATA_RANDOM2  2
114 #define DATA_ZEROS    3
115 #define DATA_AAS      4
116 #define DATA_FFS      5
117 #define DATA_2FD      6
118 int  option_data = DATA_RANDOM;
119 
120 int        option_square = 0;
121 double     option_factor = 0.0;
122 mp_size_t  option_step = 1;
123 int        option_gnuplot = 0;
124 char      *option_gnuplot_basename;
125 struct size_array_t {
126   mp_size_t start, end;
127 } *size_array = NULL;
128 mp_size_t  size_num = 0;
129 mp_size_t  size_allocnum = 0;
130 int        option_resource_usage = 0;
131 long       option_seed = 123456789;
132 
133 struct speed_params  sp;
134 
135 #define COLUMN_WIDTH  13  /* for the free-form output */
136 
137 #define FLAG_R            (1<<0)  /* require ".r" */
138 #define FLAG_R_OPTIONAL   (1<<1)  /* optional ".r" */
139 #define FLAG_RSIZE        (1<<2)
140 #define FLAG_NODATA       (1<<3)  /* don't alloc xp, yp */
141 
142 const struct routine_t {
143   /* constants */
144   const char        *name;
145   speed_function_t  fun;
146   int               flag;
147 } routine[] = {
148 
149   { "noop",              speed_noop                 },
150   { "noop_wxs",          speed_noop_wxs             },
151   { "noop_wxys",         speed_noop_wxys            },
152 
153   { "mpn_add_n",         speed_mpn_add_n,     FLAG_R_OPTIONAL },
154   { "mpn_sub_n",         speed_mpn_sub_n,     FLAG_R_OPTIONAL },
155 
156 #if HAVE_NATIVE_mpn_add_n_sub_n
157   { "mpn_add_n_sub_n",      speed_mpn_add_n_sub_n,     FLAG_R_OPTIONAL },
158 #endif
159 
160   { "mpn_addmul_1",      speed_mpn_addmul_1,  FLAG_R },
161   { "mpn_submul_1",      speed_mpn_submul_1,  FLAG_R },
162 #if HAVE_NATIVE_mpn_addmul_2
163   { "mpn_addmul_2",      speed_mpn_addmul_2,  FLAG_R_OPTIONAL },
164 #endif
165 #if HAVE_NATIVE_mpn_addmul_3
166   { "mpn_addmul_3",      speed_mpn_addmul_3,  FLAG_R_OPTIONAL },
167 #endif
168 #if HAVE_NATIVE_mpn_addmul_4
169   { "mpn_addmul_4",      speed_mpn_addmul_4,  FLAG_R_OPTIONAL },
170 #endif
171 #if HAVE_NATIVE_mpn_addmul_5
172   { "mpn_addmul_5",      speed_mpn_addmul_5,  FLAG_R_OPTIONAL },
173 #endif
174 #if HAVE_NATIVE_mpn_addmul_6
175   { "mpn_addmul_6",      speed_mpn_addmul_6,  FLAG_R_OPTIONAL },
176 #endif
177 #if HAVE_NATIVE_mpn_addmul_7
178   { "mpn_addmul_7",      speed_mpn_addmul_7,  FLAG_R_OPTIONAL },
179 #endif
180 #if HAVE_NATIVE_mpn_addmul_8
181   { "mpn_addmul_8",      speed_mpn_addmul_8,  FLAG_R_OPTIONAL },
182 #endif
183   { "mpn_mul_1",         speed_mpn_mul_1,     FLAG_R },
184   { "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R },
185 #if HAVE_NATIVE_mpn_mul_2
186   { "mpn_mul_2",         speed_mpn_mul_2,     FLAG_R_OPTIONAL },
187 #endif
188 #if HAVE_NATIVE_mpn_mul_3
189   { "mpn_mul_3",         speed_mpn_mul_3,     FLAG_R_OPTIONAL },
190 #endif
191 #if HAVE_NATIVE_mpn_mul_4
192   { "mpn_mul_4",         speed_mpn_mul_4,     FLAG_R_OPTIONAL },
193 #endif
194 
195   { "mpn_divrem_1",      speed_mpn_divrem_1,  FLAG_R },
196   { "mpn_divrem_1f",     speed_mpn_divrem_1f, FLAG_R },
197 #if HAVE_NATIVE_mpn_divrem_1c
198   { "mpn_divrem_1c",     speed_mpn_divrem_1c, FLAG_R },
199   { "mpn_divrem_1cf",    speed_mpn_divrem_1cf,FLAG_R },
200 #endif
201   { "mpn_mod_1",         speed_mpn_mod_1,     FLAG_R_OPTIONAL },
202 #if HAVE_NATIVE_mpn_mod_1c
203   { "mpn_mod_1c",        speed_mpn_mod_1c,    FLAG_R_OPTIONAL },
204 #endif
205   { "mpn_preinv_divrem_1",  speed_mpn_preinv_divrem_1,  FLAG_R },
206   { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R },
207   { "mpn_preinv_mod_1",  speed_mpn_preinv_mod_1, FLAG_R },
208 
209   { "mpn_mod_1_1",       speed_mpn_mod_1_1,       FLAG_R_OPTIONAL },
210   { "mpn_mod_1s_2",      speed_mpn_mod_1_2,       FLAG_R_OPTIONAL },
211   { "mpn_mod_1s_3",      speed_mpn_mod_1_3,       FLAG_R_OPTIONAL },
212   { "mpn_mod_1s_4",      speed_mpn_mod_1_4,       FLAG_R_OPTIONAL },
213 
214   { "mpn_divrem_1_div",  speed_mpn_divrem_1_div,  FLAG_R },
215   { "mpn_divrem_1_inv",  speed_mpn_divrem_1_inv,  FLAG_R },
216   { "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R },
217   { "mpn_divrem_1f_inv", speed_mpn_divrem_1f_inv, FLAG_R },
218   { "mpn_mod_1_div",     speed_mpn_mod_1_div,     FLAG_R },
219   { "mpn_mod_1_inv",     speed_mpn_mod_1_inv,     FLAG_R },
220 
221   { "mpn_divrem_2",      speed_mpn_divrem_2,        },
222   { "mpn_divrem_2_div",  speed_mpn_divrem_2_div,    },
223   { "mpn_divrem_2_inv",  speed_mpn_divrem_2_inv,    },
224 
225   { "mpn_divexact_1",    speed_mpn_divexact_1,    FLAG_R },
226   { "mpn_divexact_by3",  speed_mpn_divexact_by3          },
227 
228   { "mpn_bdiv_q_1",      speed_mpn_bdiv_q_1,      FLAG_R_OPTIONAL },
229   { "mpn_pi1_bdiv_q_1",  speed_mpn_pi1_bdiv_q_1,  FLAG_R_OPTIONAL },
230   { "mpn_bdiv_dbm1c",    speed_mpn_bdiv_dbm1c,    FLAG_R_OPTIONAL },
231 
232 #if HAVE_NATIVE_mpn_modexact_1_odd
233   { "mpn_modexact_1_odd",  speed_mpn_modexact_1_odd,  FLAG_R },
234 #endif
235   { "mpn_modexact_1c_odd", speed_mpn_modexact_1c_odd, FLAG_R },
236 
237 #if GMP_NUMB_BITS % 4 == 0
238   { "mpn_mod_34lsub1",   speed_mpn_mod_34lsub1 },
239 #endif
240 
241   { "mpn_lshift",        speed_mpn_lshift, FLAG_R   },
242   { "mpn_lshiftc",       speed_mpn_lshiftc, FLAG_R   },
243   { "mpn_rshift",        speed_mpn_rshift, FLAG_R   },
244 
245   { "mpn_and_n",         speed_mpn_and_n,  FLAG_R_OPTIONAL },
246   { "mpn_andn_n",        speed_mpn_andn_n, FLAG_R_OPTIONAL },
247   { "mpn_nand_n",        speed_mpn_nand_n, FLAG_R_OPTIONAL },
248   { "mpn_ior_n",         speed_mpn_ior_n,  FLAG_R_OPTIONAL },
249   { "mpn_iorn_n",        speed_mpn_iorn_n, FLAG_R_OPTIONAL },
250   { "mpn_nior_n",        speed_mpn_nior_n, FLAG_R_OPTIONAL },
251   { "mpn_xor_n",         speed_mpn_xor_n,  FLAG_R_OPTIONAL },
252   { "mpn_xnor_n",        speed_mpn_xnor_n, FLAG_R_OPTIONAL },
253   { "mpn_com",           speed_mpn_com              },
254 
255   { "mpn_popcount",      speed_mpn_popcount         },
256   { "mpn_hamdist",       speed_mpn_hamdist          },
257 
258   { "mpn_matrix22_mul",  speed_mpn_matrix22_mul     },
259 
260   { "mpn_hgcd",          speed_mpn_hgcd             },
261   { "mpn_hgcd_lehmer",   speed_mpn_hgcd_lehmer      },
262 
263   { "mpn_gcd_1",         speed_mpn_gcd_1,  FLAG_R_OPTIONAL },
264   { "mpn_gcd_1N",        speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
265 
266   { "mpn_gcd",           speed_mpn_gcd                    },
267 #if 0
268   { "mpn_gcd_binary",    speed_mpn_gcd_binary             },
269   { "mpn_gcd_accel",     speed_mpn_gcd_accel              },
270   { "find_a",            speed_find_a,        FLAG_NODATA },
271 #endif
272 
273   { "mpn_gcdext",            speed_mpn_gcdext            },
274   { "mpn_gcdext_single",     speed_mpn_gcdext_single     },
275   { "mpn_gcdext_double",     speed_mpn_gcdext_double     },
276   { "mpn_gcdext_one_single", speed_mpn_gcdext_one_single },
277   { "mpn_gcdext_one_double", speed_mpn_gcdext_one_double },
278 #if 0
279   { "mpn_gcdext_lehmer",     speed_mpn_gcdext_lehmer     },
280 #endif
281   { "mpz_jacobi",        speed_mpz_jacobi           },
282   { "mpn_jacobi_base",   speed_mpn_jacobi_base      },
283   { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1    },
284   { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2    },
285   { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3    },
286 
287   { "mpn_mul",           speed_mpn_mul,         FLAG_R_OPTIONAL },
288   { "mpn_mul_basecase",  speed_mpn_mul_basecase,FLAG_R_OPTIONAL },
289   { "mpn_sqr_basecase",  speed_mpn_sqr_basecase     },
290 #if HAVE_NATIVE_mpn_sqr_diagonal
291   { "mpn_sqr_diagonal",  speed_mpn_sqr_diagonal     },
292 #endif
293 
294   { "mpn_mul_n",         speed_mpn_mul_n            },
295   { "mpn_sqr",           speed_mpn_sqr              },
296 
297   { "mpn_toom2_sqr",     speed_mpn_toom2_sqr        },
298   { "mpn_toom3_sqr",     speed_mpn_toom3_sqr        },
299   { "mpn_toom4_sqr",     speed_mpn_toom4_sqr        },
300   { "mpn_toom6_sqr",     speed_mpn_toom6_sqr        },
301   { "mpn_toom8_sqr",     speed_mpn_toom8_sqr        },
302   { "mpn_toom22_mul",    speed_mpn_toom22_mul       },
303   { "mpn_toom33_mul",    speed_mpn_toom33_mul       },
304   { "mpn_toom44_mul",    speed_mpn_toom44_mul       },
305   { "mpn_toom6h_mul",    speed_mpn_toom6h_mul       },
306   { "mpn_toom8h_mul",    speed_mpn_toom8h_mul       },
307   { "mpn_toom32_mul",    speed_mpn_toom32_mul       },
308   { "mpn_toom42_mul",    speed_mpn_toom42_mul       },
309   { "mpn_toom43_mul",    speed_mpn_toom43_mul       },
310   { "mpn_toom63_mul",    speed_mpn_toom63_mul       },
311   { "mpn_nussbaumer_mul",    speed_mpn_nussbaumer_mul    },
312   { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr},
313 #if WANT_OLD_FFT_FULL
314   { "mpn_mul_fft_full",      speed_mpn_mul_fft_full      },
315   { "mpn_mul_fft_full_sqr",  speed_mpn_mul_fft_full_sqr  },
316 #endif
317   { "mpn_mul_fft",       speed_mpn_mul_fft,     FLAG_R_OPTIONAL },
318   { "mpn_mul_fft_sqr",   speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
319 
320   { "mpn_mullo_n",        speed_mpn_mullo_n         },
321   { "mpn_mullo_basecase", speed_mpn_mullo_basecase  },
322 
323   { "mpn_bc_mulmod_bnm1",      speed_mpn_bc_mulmod_bnm1      },
324   { "mpn_mulmod_bnm1",         speed_mpn_mulmod_bnm1         },
325   { "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded },
326   { "mpn_sqrmod_bnm1",         speed_mpn_sqrmod_bnm1         },
327 
328   { "mpn_invert",              speed_mpn_invert              },
329   { "mpn_invertappr",          speed_mpn_invertappr          },
330   { "mpn_ni_invertappr",       speed_mpn_ni_invertappr       },
331   { "mpn_binvert",             speed_mpn_binvert             },
332 
333   { "mpn_sbpi1_div_qr",        speed_mpn_sbpi1_div_qr,    FLAG_R_OPTIONAL},
334   { "mpn_dcpi1_div_qr",        speed_mpn_dcpi1_div_qr,    FLAG_R_OPTIONAL},
335   { "mpn_mu_div_qr",           speed_mpn_mu_div_qr,       FLAG_R_OPTIONAL},
336   { "mpn_mupi_div_qr",         speed_mpn_mupi_div_qr,     FLAG_R_OPTIONAL},
337   { "mpn_sbpi1_divappr_q",     speed_mpn_sbpi1_divappr_q, FLAG_R_OPTIONAL},
338   { "mpn_dcpi1_divappr_q",     speed_mpn_dcpi1_divappr_q, FLAG_R_OPTIONAL},
339 
340   { "mpn_sbpi1_bdiv_qr",       speed_mpn_sbpi1_bdiv_qr       },
341   { "mpn_dcpi1_bdiv_qr",       speed_mpn_dcpi1_bdiv_qr       },
342   { "mpn_sbpi1_bdiv_q",        speed_mpn_sbpi1_bdiv_q        },
343   { "mpn_dcpi1_bdiv_q",        speed_mpn_dcpi1_bdiv_q        },
344 
345   { "mpn_get_str",          speed_mpn_get_str,     FLAG_R_OPTIONAL },
346   { "mpn_set_str",          speed_mpn_set_str,     FLAG_R_OPTIONAL },
347   { "mpn_set_str_basecase", speed_mpn_bc_set_str,  FLAG_R_OPTIONAL },
348 
349   { "mpn_sqrtrem",       speed_mpn_sqrtrem          },
350   { "mpn_rootrem",       speed_mpn_rootrem, FLAG_R  },
351 
352   { "mpn_fib2_ui",       speed_mpn_fib2_ui,    FLAG_NODATA },
353   { "mpz_fib_ui",        speed_mpz_fib_ui,     FLAG_NODATA },
354   { "mpz_fib2_ui",       speed_mpz_fib2_ui,    FLAG_NODATA },
355   { "mpz_lucnum_ui",     speed_mpz_lucnum_ui,  FLAG_NODATA },
356   { "mpz_lucnum2_ui",    speed_mpz_lucnum2_ui, FLAG_NODATA },
357 
358   { "mpz_add",           speed_mpz_add              },
359   { "mpz_bin_uiui",      speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL },
360   { "mpz_fac_ui",        speed_mpz_fac_ui,   FLAG_NODATA   },
361   { "mpz_powm",          speed_mpz_powm             },
362   { "mpz_powm_mod",      speed_mpz_powm_mod         },
363   { "mpz_powm_redc",     speed_mpz_powm_redc        },
364   { "mpz_powm_ui",       speed_mpz_powm_ui,  FLAG_R_OPTIONAL },
365 
366   { "mpz_mod",           speed_mpz_mod              },
367   { "mpn_redc_1",        speed_mpn_redc_1           },
368   { "mpn_redc_2",        speed_mpn_redc_2           },
369   { "mpn_redc_n",        speed_mpn_redc_n           },
370 
371   { "MPN_COPY",          speed_MPN_COPY             },
372   { "MPN_COPY_INCR",     speed_MPN_COPY_INCR        },
373   { "MPN_COPY_DECR",     speed_MPN_COPY_DECR        },
374   { "memcpy",            speed_memcpy               },
375 #if HAVE_NATIVE_mpn_copyi
376   { "mpn_copyi",         speed_mpn_copyi            },
377 #endif
378 #if HAVE_NATIVE_mpn_copyd
379   { "mpn_copyd",         speed_mpn_copyd            },
380 #endif
381 #if HAVE_NATIVE_mpn_addlsh1_n
382   { "mpn_addlsh1_n",     speed_mpn_addlsh1_n        },
383 #endif
384 #if HAVE_NATIVE_mpn_sublsh1_n
385   { "mpn_sublsh1_n",     speed_mpn_sublsh1_n        },
386 #endif
387 #if HAVE_NATIVE_mpn_rsblsh1_n
388   { "mpn_rsblsh1_n",     speed_mpn_rsblsh1_n        },
389 #endif
390 #if HAVE_NATIVE_mpn_addlsh2_n
391   { "mpn_addlsh2_n",     speed_mpn_addlsh2_n        },
392 #endif
393 #if HAVE_NATIVE_mpn_sublsh2_n
394   { "mpn_sublsh2_n",     speed_mpn_sublsh2_n        },
395 #endif
396 #if HAVE_NATIVE_mpn_rsblsh2_n
397   { "mpn_rsblsh2_n",     speed_mpn_rsblsh2_n        },
398 #endif
399 #if HAVE_NATIVE_mpn_rsh1add_n
400   { "mpn_rsh1add_n",     speed_mpn_rsh1add_n        },
401 #endif
402 #if HAVE_NATIVE_mpn_rsh1sub_n
403   { "mpn_rsh1sub_n",     speed_mpn_rsh1sub_n        },
404 #endif
405 
406   { "MPN_ZERO",          speed_MPN_ZERO             },
407 
408   { "binvert_limb",       speed_binvert_limb,       FLAG_NODATA },
409   { "binvert_limb_mul1",  speed_binvert_limb_mul1,  FLAG_NODATA },
410   { "binvert_limb_loop",  speed_binvert_limb_loop,  FLAG_NODATA },
411   { "binvert_limb_cond",  speed_binvert_limb_cond,  FLAG_NODATA },
412   { "binvert_limb_arith", speed_binvert_limb_arith, FLAG_NODATA },
413 
414   { "malloc_free",                  speed_malloc_free                  },
415   { "malloc_realloc_free",          speed_malloc_realloc_free          },
416   { "gmp_allocate_free",            speed_gmp_allocate_free            },
417   { "gmp_allocate_reallocate_free", speed_gmp_allocate_reallocate_free },
418   { "mpz_init_clear",               speed_mpz_init_clear               },
419   { "mpq_init_clear",               speed_mpq_init_clear               },
420   { "mpf_init_clear",               speed_mpf_init_clear               },
421   { "mpz_init_realloc_clear",       speed_mpz_init_realloc_clear       },
422 
423   { "umul_ppmm",         speed_umul_ppmm,     FLAG_R_OPTIONAL },
424 #if HAVE_NATIVE_mpn_umul_ppmm
425   { "mpn_umul_ppmm",     speed_mpn_umul_ppmm, FLAG_R_OPTIONAL },
426 #endif
427 #if HAVE_NATIVE_mpn_umul_ppmm_r
428   { "mpn_umul_ppmm_r",   speed_mpn_umul_ppmm_r, FLAG_R_OPTIONAL },
429 #endif
430 
431   { "count_leading_zeros",  speed_count_leading_zeros,  FLAG_NODATA | FLAG_R_OPTIONAL },
432   { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL },
433 
434   { "udiv_qrnnd",             speed_udiv_qrnnd,             FLAG_R_OPTIONAL },
435   { "udiv_qrnnd_preinv1",     speed_udiv_qrnnd_preinv1,     FLAG_R_OPTIONAL },
436   { "udiv_qrnnd_preinv2",     speed_udiv_qrnnd_preinv2,     FLAG_R_OPTIONAL },
437   { "udiv_qrnnd_c",           speed_udiv_qrnnd_c,           FLAG_R_OPTIONAL },
438 #if HAVE_NATIVE_mpn_udiv_qrnnd
439   { "mpn_udiv_qrnnd",         speed_mpn_udiv_qrnnd,         FLAG_R_OPTIONAL },
440 #endif
441 #if HAVE_NATIVE_mpn_udiv_qrnnd_r
442   { "mpn_udiv_qrnnd_r",       speed_mpn_udiv_qrnnd_r,       FLAG_R_OPTIONAL },
443 #endif
444   { "invert_limb",            speed_invert_limb,            FLAG_R_OPTIONAL },
445 
446   { "operator_div",           speed_operator_div,           FLAG_R_OPTIONAL },
447   { "operator_mod",           speed_operator_mod,           FLAG_R_OPTIONAL },
448 
449   { "gmp_randseed",    speed_gmp_randseed,    FLAG_R_OPTIONAL               },
450   { "gmp_randseed_ui", speed_gmp_randseed_ui, FLAG_R_OPTIONAL | FLAG_NODATA },
451   { "mpz_urandomb",    speed_mpz_urandomb,    FLAG_R_OPTIONAL | FLAG_NODATA },
452 
453 #ifdef SPEED_EXTRA_ROUTINES
454   SPEED_EXTRA_ROUTINES
455 #endif
456 #ifdef SPEED_EXTRA_ROUTINES2
457   SPEED_EXTRA_ROUTINES2
458 #endif
459 };
460 
461 
462 struct choice_t {
463   const struct routine_t  *p;
464   mp_limb_t               r;
465   double                  scale;
466   double                  time;
467   int                     no_time;
468   double                  prev_time;
469   const char              *name;
470 };
471 struct choice_t  *choice;
472 int  num_choices = 0;
473 
474 
475 void
476 data_fill (mp_ptr ptr, mp_size_t size)
477 {
478   switch (option_data) {
479   case DATA_RANDOM:
480     mpn_random (ptr, size);
481     break;
482   case DATA_RANDOM2:
483     mpn_random2 (ptr, size);
484     break;
485   case DATA_ZEROS:
486     MPN_ZERO (ptr, size);
487     break;
488   case DATA_AAS:
489     MPN_FILL (ptr, size, GMP_NUMB_0xAA);
490     break;
491   case DATA_FFS:
492     MPN_FILL (ptr, size, GMP_NUMB_MAX);
493     break;
494   case DATA_2FD:
495     MPN_FILL (ptr, size, GMP_NUMB_MAX);
496     ptr[0] -= 2;
497     break;
498   default:
499     abort();
500     /*NOTREACHED*/
501   }
502 }
503 
504 /* The code here handling the various combinations of output options isn't
505    too attractive, but it works and is fairly clean.  */
506 
507 #define SIZE_TO_DIVISOR(n)              \
508   (option_square == 1 ? (n)*(n)         \
509   : option_square == 2 ? (n)*((n)+1)/2  \
510   : (n))
511 
512 void
513 run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size)
514 {
515   const char  *first_open_fastest, *first_open_notfastest, *first_close;
516   int         i, fastest, want_data;
517   double      fastest_time;
518   TMP_DECL;
519 
520   TMP_MARK;
521 
522   /* allocate data, unless all routines are NODATA */
523   want_data = 0;
524   for (i = 0; i < num_choices; i++)
525     want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0);
526 
527   if (want_data)
528     {
529       SPEED_TMP_ALLOC_LIMBS (sp.xp, s->size, s->align_xp);
530       SPEED_TMP_ALLOC_LIMBS (sp.yp, s->size, s->align_yp);
531 
532       data_fill (s->xp, s->size);
533       data_fill (s->yp, s->size);
534     }
535   else
536     {
537       sp.xp = NULL;
538       sp.yp = NULL;
539     }
540 
541   if (prev_size == -1 && option_cmp == CMP_DIFFPREV)
542     {
543       first_open_fastest = "(#";
544       first_open_notfastest = " (";
545       first_close = ")";
546     }
547   else
548     {
549       first_open_fastest = "#";
550       first_open_notfastest = " ";
551       first_close = "";
552     }
553 
554   fastest = -1;
555   fastest_time = -1.0;
556   for (i = 0; i < num_choices; i++)
557     {
558       s->r = choice[i].r;
559       choice[i].time = speed_measure (choice[i].p->fun, s);
560       choice[i].no_time = (choice[i].time == -1.0);
561       if (! choice[i].no_time)
562         choice[i].time *= choice[i].scale;
563 
564       /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time
565          is before any differences.  */
566       {
567         double     t;
568         t = choice[i].time;
569         if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1)
570           {
571             if (choice[i].prev_time == -1.0)
572               choice[i].no_time = 1;
573             else
574               choice[i].time = choice[i].time - choice[i].prev_time;
575           }
576         choice[i].prev_time = t;
577       }
578 
579       if (choice[i].no_time)
580         continue;
581 
582       /* Look for the fastest after CMP_DIFFPREV has been applied, but
583          before CMP_RATIO or CMP_DIFFERENCE.  There's only a fastest shown
584          if there's more than one routine.  */
585       if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time))
586         {
587           fastest = i;
588           fastest_time = choice[i].time;
589         }
590 
591       if (option_cmp == CMP_DIFFPREV)
592         {
593           /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */
594           if (option_unit == UNIT_CYCLES)
595             choice[i].time /= speed_cycletime;
596           else if (option_unit == UNIT_CYCLESPERLIMB)
597             {
598               if (prev_size == -1)
599                 choice[i].time /= speed_cycletime;
600               else
601                 choice[i].time /=  (speed_cycletime
602                                     * (SIZE_TO_DIVISOR(s->size)
603                                        - SIZE_TO_DIVISOR(prev_size)));
604             }
605         }
606       else
607         {
608           if (option_unit == UNIT_CYCLES)
609             choice[i].time /= speed_cycletime;
610           else if (option_unit == UNIT_CYCLESPERLIMB)
611             choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size));
612 
613           if (option_cmp == CMP_RATIO && i > 0)
614             {
615               /* A ratio isn't affected by the units chosen. */
616               if (choice[0].no_time || choice[0].time == 0.0)
617                 choice[i].no_time = 1;
618               else
619                 choice[i].time /= choice[0].time;
620             }
621           else if (option_cmp == CMP_DIFFERENCE && i > 0)
622             {
623               if (choice[0].no_time)
624                 {
625                   choice[i].no_time = 1;
626                   continue;
627                 }
628               choice[i].time -= choice[0].time;
629             }
630         }
631     }
632 
633   if (option_gnuplot)
634     {
635       /* In CMP_DIFFPREV, don't print anything for the first size, start
636          with the second where an actual difference is available.
637 
638          In CMP_RATIO, print the first column as 1.0.
639 
640          The 9 decimals printed is much more than the expected precision of
641          the measurements actually. */
642 
643       if (! (option_cmp == CMP_DIFFPREV && prev_size == -1))
644         {
645           fprintf (fp, "%-6ld ", s->size);
646           for (i = 0; i < num_choices; i++)
647             fprintf (fp, "  %.9e",
648                      choice[i].no_time ? 0.0
649                      : (option_cmp == CMP_RATIO && i == 0) ? 1.0
650                      : choice[i].time);
651           fprintf (fp, "\n");
652         }
653     }
654   else
655     {
656       fprintf (fp, "%-6ld ", s->size);
657       for (i = 0; i < num_choices; i++)
658         {
659           char  buf[128];
660           int   decimals;
661 
662           if (choice[i].no_time)
663             {
664               fprintf (fp, " %*s", COLUMN_WIDTH, "n/a");
665             }
666           else
667             {if (option_unit == UNIT_CYCLESPERLIMB
668                  || (option_cmp == CMP_RATIO && i > 0))
669                 decimals = 4;
670               else if (option_unit == UNIT_CYCLES)
671                 decimals = 2;
672               else
673                 decimals = 9;
674 
675               sprintf (buf, "%s%.*f%s",
676                        i == fastest ? first_open_fastest : first_open_notfastest,
677                        decimals, choice[i].time, first_close);
678               fprintf (fp, " %*s", COLUMN_WIDTH, buf);
679             }
680         }
681       fprintf (fp, "\n");
682     }
683 
684   TMP_FREE;
685 }
686 
687 void
688 run_all (FILE *fp)
689 {
690   mp_size_t  prev_size;
691   int        i;
692   TMP_DECL;
693 
694   TMP_MARK;
695   SPEED_TMP_ALLOC_LIMBS (sp.xp_block, SPEED_BLOCK_SIZE, sp.align_xp);
696   SPEED_TMP_ALLOC_LIMBS (sp.yp_block, SPEED_BLOCK_SIZE, sp.align_yp);
697 
698   data_fill (sp.xp_block, SPEED_BLOCK_SIZE);
699   data_fill (sp.yp_block, SPEED_BLOCK_SIZE);
700 
701   for (i = 0; i < size_num; i++)
702     {
703       sp.size = size_array[i].start;
704       prev_size = -1;
705       for (;;)
706         {
707           mp_size_t  step;
708 
709           if (option_data == DATA_2FD && sp.size >= 2)
710             sp.xp[sp.size-1] = 2;
711 
712           run_one (fp, &sp, prev_size);
713           prev_size = sp.size;
714 
715           if (option_data == DATA_2FD && sp.size >= 2)
716             sp.xp[sp.size-1] = MP_LIMB_T_MAX;
717 
718           if (option_factor != 0.0)
719             {
720               step = (mp_size_t) (sp.size * option_factor - sp.size);
721               if (step < 1)
722                 step = 1;
723             }
724           else
725             step = 1;
726           if (step < option_step)
727             step = option_step;
728 
729           sp.size += step;
730           if (sp.size > size_array[i].end)
731             break;
732         }
733     }
734 
735   TMP_FREE;
736 }
737 
738 
739 FILE *
740 fopen_for_write (const char *filename)
741 {
742   FILE  *fp;
743   if ((fp = fopen (filename, "w")) == NULL)
744     {
745       fprintf (stderr, "Cannot create %s\n", filename);
746       exit(1);
747     }
748   return fp;
749 }
750 
751 void
752 fclose_written (FILE *fp, const char *filename)
753 {
754   int  err;
755 
756   err = ferror (fp);
757   err |= fclose (fp);
758 
759   if (err)
760     {
761       fprintf (stderr, "Error writing %s\n", filename);
762       exit(1);
763     }
764 }
765 
766 
767 void
768 run_gnuplot (int argc, char *argv[])
769 {
770   char  *plot_filename;
771   char  *data_filename;
772   FILE  *fp;
773   int   i;
774 
775   plot_filename = (char *) (*__gmp_allocate_func)
776     (strlen (option_gnuplot_basename) + 20);
777   data_filename = (char *) (*__gmp_allocate_func)
778     (strlen (option_gnuplot_basename) + 20);
779 
780   sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename);
781   sprintf (data_filename, "%s.data",    option_gnuplot_basename);
782 
783   fp = fopen_for_write (plot_filename);
784 
785   fprintf (fp, "# Generated with:\n");
786   fprintf (fp, "#");
787   for (i = 0; i < argc; i++)
788     fprintf (fp, " %s", argv[i]);
789   fprintf (fp, "\n");
790   fprintf (fp, "\n");
791 
792   fprintf (fp, "reset\n");
793 
794   /* Putting the key at the top left is usually good, and you can change it
795      interactively if it's not. */
796   fprintf (fp, "set key left\n");
797 
798   /* designed to make it possible to see crossovers easily */
799   fprintf (fp, "set data style lines\n");
800 
801   fprintf (fp, "plot ");
802   for (i = 0; i < num_choices; i++)
803     {
804       fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2);
805       fprintf (fp, " title \"%s\"", choice[i].name);
806 
807       if (i != num_choices-1)
808         fprintf (fp, ", \\");
809       fprintf (fp, "\n");
810     }
811 
812   fprintf (fp, "load \"-\"\n");
813   fclose_written (fp, plot_filename);
814 
815   fp = fopen_for_write (data_filename);
816 
817   /* Unbuffered so you can see where the program was up to if it crashes or
818      you kill it. */
819   setbuf (fp, NULL);
820 
821   run_all (fp);
822   fclose_written (fp, data_filename);
823 }
824 
825 
826 /* Return a limb with n many one bits (starting from the least significant) */
827 
828 #define LIMB_ONES(n) \
829   ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX      \
830     : (n) == 0 ? CNST_LIMB(0)                   \
831     : (CNST_LIMB(1) << (n)) - 1)
832 
833 mp_limb_t
834 r_string (const char *s)
835 {
836   const char  *s_orig = s;
837   long        n;
838 
839   if (strcmp (s, "aas") == 0)
840     return GMP_NUMB_0xAA;
841 
842   {
843     mpz_t      z;
844     mp_limb_t  l;
845     int        set, siz;
846 
847     mpz_init (z);
848     set = mpz_set_str (z, s, 0);
849     siz = SIZ(z);
850     l = (siz == 0 ? 0 : siz > 0 ? PTR(z)[0] : -PTR(z)[0]);
851     mpz_clear (z);
852     if (set == 0)
853       {
854         if (siz > 1 || siz < -1)
855           printf ("Warning, r parameter %s truncated to %d bits\n",
856                   s_orig, GMP_LIMB_BITS);
857         return l;
858       }
859   }
860 
861   if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
862     n = strtoul (s+2, (char **) &s, 16);
863   else
864     n = strtol (s, (char **) &s, 10);
865 
866   if (strcmp (s, "bits") == 0)
867     {
868       mp_limb_t  l;
869       if (n > GMP_LIMB_BITS)
870         {
871           fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
872                    n, GMP_LIMB_BITS);
873           exit (1);
874         }
875       mpn_random (&l, 1);
876       return (l | (CNST_LIMB(1) << (n-1))) & LIMB_ONES(n);
877     }
878   else  if (strcmp (s, "ones") == 0)
879     {
880       if (n > GMP_LIMB_BITS)
881         {
882           fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
883                    n, GMP_LIMB_BITS);
884           exit (1);
885         }
886       return LIMB_ONES (n);
887     }
888   else if (*s != '\0')
889     {
890       fprintf (stderr, "invalid r parameter: %s\n", s_orig);
891       exit (1);
892     }
893 
894   return n;
895 }
896 
897 
898 void
899 routine_find (struct choice_t *c, const char *s_orig)
900 {
901   const char  *s;
902   int     i;
903   size_t  nlen;
904 
905   c->name = s_orig;
906   s = strchr (s_orig, '*');
907   if (s != NULL)
908     {
909       c->scale = atof(s_orig);
910       s++;
911     }
912   else
913     {
914       c->scale = 1.0;
915       s = s_orig;
916     }
917 
918   for (i = 0; i < numberof (routine); i++)
919     {
920       nlen = strlen (routine[i].name);
921       if (memcmp (s, routine[i].name, nlen) != 0)
922         continue;
923 
924       if (s[nlen] == '.')
925         {
926           /* match, with a .r parameter */
927 
928           if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL)))
929             {
930               fprintf (stderr,
931                        "Choice %s bad: doesn't take a \".<r>\" parameter\n",
932                        s_orig);
933               exit (1);
934             }
935 
936           c->p = &routine[i];
937           c->r = r_string (s + nlen + 1);
938           return;
939         }
940 
941       if (s[nlen] == '\0')
942         {
943           /* match, with no parameter */
944 
945           if (routine[i].flag & FLAG_R)
946             {
947               fprintf (stderr,
948                        "Choice %s bad: needs a \".<r>\" parameter\n",
949                        s_orig);
950               exit (1);
951             }
952 
953           c->p = &routine[i];
954           c->r = 0;
955           return;
956         }
957     }
958 
959   fprintf (stderr, "Choice %s unrecognised\n", s_orig);
960   exit (1);
961 }
962 
963 
964 void
965 usage (void)
966 {
967   int  i;
968 
969   speed_time_init ();
970 
971   printf ("Usage: speed [-options] -s size <routine>...\n");
972   printf ("Measure the speed of some routines.\n");
973   printf ("Times are in seconds, accuracy is shown.\n");
974   printf ("\n");
975   printf ("   -p num     set precision as number of time units each routine must run\n");
976   printf ("   -s size[-end][,size[-end]]...   sizes to measure\n");
977   printf ("              single sizes or ranges, sep with comma or use multiple -s\n");
978   printf ("   -t step    step through sizes by given amount\n");
979   printf ("   -f factor  step through sizes by given factor (eg. 1.05)\n");
980   printf ("   -r         show times as ratios of the first routine\n");
981   printf ("   -d         show times as difference from the first routine\n");
982   printf ("   -D         show times as difference from previous size shown\n");
983   printf ("   -c         show times in CPU cycles\n");
984   printf ("   -C         show times in cycles per limb\n");
985   printf ("   -u         print resource usage (memory) at end\n");
986   printf ("   -P name    output plot files \"name.gnuplot\" and \"name.data\"\n");
987   printf ("   -a <type>  use given data: random(default), random2, zeros, aas, ffs, 2fd\n");
988   printf ("   -x, -y, -w, -W <align>  specify data alignments, sources and dests\n");
989   printf ("   -o addrs   print addresses of data blocks\n");
990   printf ("\n");
991   printf ("If both -t and -f are used, it means step by the factor or the step, whichever\n");
992   printf ("is greater.\n");
993   printf ("If both -C and -D are used, it means cycles per however many limbs between a\n");
994   printf ("size and the previous size.\n");
995   printf ("\n");
996   printf ("After running with -P, plots can be viewed with Gnuplot or Quickplot.\n");
997   printf ("\"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n");
998   printf ("a log/log plot).\n");
999   printf ("\"quickplot -s name.data\" (has interactive zooming, and note -s is important\n");
1000   printf ("when viewing more than one routine, it means same axis scales for all data).\n");
1001   printf ("\n");
1002   printf ("The available routines are as follows.\n");
1003   printf ("\n");
1004 
1005   for (i = 0; i < numberof (routine); i++)
1006     {
1007       if (routine[i].flag & FLAG_R)
1008         printf ("\t%s.r\n", routine[i].name);
1009       else if (routine[i].flag & FLAG_R_OPTIONAL)
1010         printf ("\t%s (optional .r)\n", routine[i].name);
1011       else
1012         printf ("\t%s\n", routine[i].name);
1013     }
1014   printf ("\n");
1015   printf ("Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n");
1016   printf ("r should be in decimal, or use 0xN for hexadecimal.\n");
1017   printf ("\n");
1018   printf ("Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n");
1019   printf ("N one bits, or \"aas\" for 0xAA..AA.\n");
1020   printf ("\n");
1021   printf ("Times for sizes out of the range accepted by a routine are shown as 0.\n");
1022   printf ("The fastest routine at each size is marked with a # (free form output only).\n");
1023   printf ("\n");
1024   printf ("%s", speed_time_string);
1025   printf ("\n");
1026   printf ("Gnuplot home page http://www.gnuplot.info/\n");
1027   printf ("Quickplot home page http://quickplot.sourceforge.net/\n");
1028 }
1029 
1030 void
1031 check_align_option (const char *name, mp_size_t align)
1032 {
1033   if (align < 0 || align > SPEED_TMP_ALLOC_ADJUST_MASK)
1034     {
1035       fprintf (stderr, "Alignment request out of range: %s %ld\n",
1036                name, (long) align);
1037       fprintf (stderr, "  should be 0 to %d (limbs), inclusive\n",
1038                SPEED_TMP_ALLOC_ADJUST_MASK);
1039       exit (1);
1040     }
1041 }
1042 
1043 int
1044 main (int argc, char *argv[])
1045 {
1046   int  i;
1047   int  opt;
1048 
1049   /* Unbuffered so output goes straight out when directed to a pipe or file
1050      and isn't lost on killing the program half way.  */
1051   setbuf (stdout, NULL);
1052 
1053   for (;;)
1054     {
1055       opt = getopt(argc, argv, "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z");
1056       if (opt == EOF)
1057         break;
1058 
1059       switch (opt) {
1060       case 'a':
1061         if (strcmp (optarg, "random") == 0)       option_data = DATA_RANDOM;
1062         else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2;
1063         else if (strcmp (optarg, "zeros") == 0)   option_data = DATA_ZEROS;
1064         else if (strcmp (optarg, "aas") == 0)     option_data = DATA_AAS;
1065         else if (strcmp (optarg, "ffs") == 0)     option_data = DATA_FFS;
1066         else if (strcmp (optarg, "2fd") == 0)     option_data = DATA_2FD;
1067         else
1068           {
1069             fprintf (stderr, "unrecognised data option: %s\n", optarg);
1070             exit (1);
1071           }
1072         break;
1073       case 'C':
1074         if (option_unit  != UNIT_SECONDS) goto bad_unit;
1075         option_unit = UNIT_CYCLESPERLIMB;
1076         break;
1077       case 'c':
1078         if (option_unit != UNIT_SECONDS)
1079           {
1080           bad_unit:
1081             fprintf (stderr, "cannot use more than one of -c, -C\n");
1082             exit (1);
1083           }
1084         option_unit = UNIT_CYCLES;
1085         break;
1086       case 'D':
1087         if (option_cmp != CMP_ABSOLUTE) goto bad_cmp;
1088         option_cmp = CMP_DIFFPREV;
1089         break;
1090       case 'd':
1091         if (option_cmp != CMP_ABSOLUTE)
1092           {
1093           bad_cmp:
1094             fprintf (stderr, "cannot use more than one of -d, -D, -r\n");
1095             exit (1);
1096           }
1097         option_cmp = CMP_DIFFERENCE;
1098         break;
1099       case 'E':
1100         option_square = 1;
1101         break;
1102       case 'F':
1103         option_square = 2;
1104         break;
1105       case 'f':
1106         option_factor = atof (optarg);
1107         if (option_factor <= 1.0)
1108           {
1109             fprintf (stderr, "-f factor must be > 1.0\n");
1110             exit (1);
1111           }
1112         break;
1113       case 'o':
1114         speed_option_set (optarg);
1115         break;
1116       case 'P':
1117         option_gnuplot = 1;
1118         option_gnuplot_basename = optarg;
1119         break;
1120       case 'p':
1121         speed_precision = atoi (optarg);
1122         break;
1123       case 'R':
1124         option_seed = time (NULL);
1125         break;
1126       case 'r':
1127         if (option_cmp != CMP_ABSOLUTE)
1128           goto bad_cmp;
1129         option_cmp = CMP_RATIO;
1130         break;
1131       case 's':
1132         {
1133           char  *s;
1134           for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ","))
1135             {
1136               if (size_num == size_allocnum)
1137                 {
1138                   size_array = (struct size_array_t *)
1139                     __gmp_allocate_or_reallocate
1140                     (size_array,
1141                      size_allocnum * sizeof(size_array[0]),
1142                      (size_allocnum+10) * sizeof(size_array[0]));
1143                   size_allocnum += 10;
1144                 }
1145               if (sscanf (s, "%ld-%ld",
1146                           &size_array[size_num].start,
1147                           &size_array[size_num].end) != 2)
1148                 {
1149                   size_array[size_num].start = size_array[size_num].end
1150                     = atol (s);
1151                 }
1152 
1153               if (size_array[size_num].start < 0
1154                   || size_array[size_num].end < 0
1155                   || size_array[size_num].start > size_array[size_num].end)
1156                 {
1157                   fprintf (stderr, "invalid size parameter: %s\n", s);
1158                   exit (1);
1159                 }
1160 
1161               size_num++;
1162             }
1163         }
1164         break;
1165       case 't':
1166         option_step = atol (optarg);
1167         if (option_step < 1)
1168           {
1169             fprintf (stderr, "-t step must be >= 1\n");
1170             exit (1);
1171           }
1172         break;
1173       case 'u':
1174         option_resource_usage = 1;
1175         break;
1176       case 'z':
1177         sp.cache = 1;
1178         break;
1179       case 'x':
1180         sp.align_xp = atol (optarg);
1181         check_align_option ("-x", sp.align_xp);
1182         break;
1183       case 'y':
1184         sp.align_yp = atol (optarg);
1185         check_align_option ("-y", sp.align_yp);
1186         break;
1187       case 'w':
1188         sp.align_wp = atol (optarg);
1189         check_align_option ("-w", sp.align_wp);
1190         break;
1191       case 'W':
1192         sp.align_wp2 = atol (optarg);
1193         check_align_option ("-W", sp.align_wp2);
1194         break;
1195       case '?':
1196         exit(1);
1197       }
1198     }
1199 
1200   if (optind >= argc)
1201     {
1202       usage ();
1203       exit (1);
1204     }
1205 
1206   if (size_num == 0)
1207     {
1208       fprintf (stderr, "-s <size> must be specified\n");
1209       exit (1);
1210     }
1211 
1212   gmp_randinit_default (__gmp_rands);
1213   __gmp_rands_initialized = 1;
1214   gmp_randseed_ui (__gmp_rands, option_seed);
1215 
1216   choice = (struct choice_t *) (*__gmp_allocate_func)
1217     ((argc - optind) * sizeof(choice[0]));
1218   for ( ; optind < argc; optind++)
1219     {
1220       struct choice_t  c;
1221       routine_find (&c, argv[optind]);
1222       choice[num_choices] = c;
1223       num_choices++;
1224     }
1225 
1226   if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) &&
1227       num_choices < 2)
1228     {
1229       fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n");
1230     }
1231 
1232   speed_time_init ();
1233   if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB)
1234     speed_cycletime_need_cycles ();
1235   else
1236     speed_cycletime_need_seconds ();
1237 
1238   if (option_gnuplot)
1239     {
1240       run_gnuplot (argc, argv);
1241     }
1242   else
1243     {
1244       if (option_unit == UNIT_SECONDS)
1245         printf ("overhead %.9f secs", speed_measure (speed_noop, NULL));
1246       else
1247         printf ("overhead %.2f cycles",
1248                 speed_measure (speed_noop, NULL) / speed_cycletime);
1249       printf (", precision %d units of %.2e secs",
1250               speed_precision, speed_unittime);
1251 
1252       if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
1253         printf (", CPU freq unknown\n");
1254       else
1255         printf (", CPU freq %.2f MHz\n", 1e-6/speed_cycletime);
1256 
1257       printf ("       ");
1258       for (i = 0; i < num_choices; i++)
1259         printf (" %*s", COLUMN_WIDTH, choice[i].name);
1260       printf ("\n");
1261 
1262       run_all (stdout);
1263     }
1264 
1265   if (option_resource_usage)
1266     {
1267 #if HAVE_GETRUSAGE
1268       {
1269         /* This doesn't give data sizes on linux 2.0.x, only utime. */
1270         struct rusage  r;
1271         if (getrusage (RUSAGE_SELF, &r) != 0)
1272           perror ("getrusage");
1273         else
1274           printf ("getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\n",
1275                   r.ru_utime.tv_sec, r.ru_utime.tv_usec,
1276                   r.ru_idrss, r.ru_isrss, r.ru_ixrss);
1277       }
1278 #else
1279       printf ("getrusage() not available\n");
1280 #endif
1281 
1282       /* Linux kernel. */
1283       {
1284         char  buf[128];
1285         sprintf (buf, "/proc/%d/status", getpid());
1286         if (access (buf, R_OK) == 0)
1287           {
1288             sprintf (buf, "cat /proc/%d/status", getpid());
1289             system (buf);
1290           }
1291 
1292       }
1293     }
1294 
1295   return 0;
1296 }
1297