xref: /netbsd-src/external/lgpl3/gmp/dist/tune/speed.c (revision d909946ca08dceb44d7d0f22ec9488679695d976)
1 /* Speed measuring program.
2 
3 Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010,
4 2011, 2012 Free Software Foundation, Inc.
5 
6 This file is part of the GNU MP Library.
7 
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MP Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
20 
21 /* Usage message is in the code below, run with no arguments to print it.
22    See README for interesting applications.
23 
24    To add a new routine foo(), create a speed_foo() function in the style of
25    the existing ones and add an entry in the routine[] array.  Put FLAG_R if
26    speed_foo() wants an "r" parameter.
27 
28    The routines don't have help messages or descriptions, but most have
29    suggestive names.  See the source code for full details.
30 
31 */
32 
33 #include "config.h"
34 
35 #include <limits.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 
40 #if HAVE_UNISTD_H
41 #include <unistd.h>  /* for getpid, R_OK */
42 #endif
43 
44 #if TIME_WITH_SYS_TIME
45 # include <sys/time.h>  /* for struct timeval */
46 # include <time.h>
47 #else
48 # if HAVE_SYS_TIME_H
49 #  include <sys/time.h>
50 # else
51 #  include <time.h>
52 # endif
53 #endif
54 
55 #if HAVE_SYS_RESOURCE_H
56 #include <sys/resource.h>  /* for getrusage() */
57 #endif
58 
59 
60 #include "gmp.h"
61 #include "gmp-impl.h"
62 #include "longlong.h"  /* for the benefit of speed-many.c */
63 #include "tests.h"
64 #include "speed.h"
65 
66 
67 #if !HAVE_DECL_OPTARG
68 extern char *optarg;
69 extern int optind, opterr;
70 #endif
71 
72 #if !HAVE_STRTOUL
73 #define strtoul(p,e,b)  (unsigned long) strtol(p,e,b)
74 #endif
75 
76 #ifdef SPEED_EXTRA_PROTOS
77 SPEED_EXTRA_PROTOS
78 #endif
79 #ifdef SPEED_EXTRA_PROTOS2
80 SPEED_EXTRA_PROTOS2
81 #endif
82 
83 
84 #define MPN_FILL(ptr, size, n)          \
85   do {                                  \
86     mp_size_t __i;                      \
87     ASSERT ((size) >= 0);               \
88     for (__i = 0; __i < (size); __i++)  \
89       (ptr)[__i] = (n);                 \
90   } while (0)
91 
92 
93 #if GMP_LIMB_BITS == 32
94 #define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK)
95 #endif
96 #if GMP_LIMB_BITS == 64
97 #define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK)
98 #endif
99 
100 
101 #define CMP_ABSOLUTE     1
102 #define CMP_RATIO        2
103 #define CMP_DIFFERENCE   3
104 #define CMP_DIFFPREV     4
105 int  option_cmp = CMP_ABSOLUTE;
106 
107 #define UNIT_SECONDS        1
108 #define UNIT_CYCLES         2
109 #define UNIT_CYCLESPERLIMB  3
110 int  option_unit = UNIT_SECONDS;
111 
112 #define DATA_RANDOM   1
113 #define DATA_RANDOM2  2
114 #define DATA_ZEROS    3
115 #define DATA_AAS      4
116 #define DATA_FFS      5
117 #define DATA_2FD      6
118 int  option_data = DATA_RANDOM;
119 
120 int        option_square = 0;
121 double     option_factor = 0.0;
122 mp_size_t  option_step = 1;
123 int        option_gnuplot = 0;
124 char      *option_gnuplot_basename;
125 struct size_array_t {
126   mp_size_t start, end;
127 } *size_array = NULL;
128 mp_size_t  size_num = 0;
129 mp_size_t  size_allocnum = 0;
130 int        option_resource_usage = 0;
131 long       option_seed = 123456789;
132 
133 struct speed_params  sp;
134 
135 #define COLUMN_WIDTH  13  /* for the free-form output */
136 
137 #define FLAG_R            (1<<0)  /* require ".r" */
138 #define FLAG_R_OPTIONAL   (1<<1)  /* optional ".r" */
139 #define FLAG_RSIZE        (1<<2)
140 #define FLAG_NODATA       (1<<3)  /* don't alloc xp, yp */
141 
142 const struct routine_t {
143   /* constants */
144   const char        *name;
145   speed_function_t  fun;
146   int               flag;
147 } routine[] = {
148 
149   { "noop",              speed_noop                 },
150   { "noop_wxs",          speed_noop_wxs             },
151   { "noop_wxys",         speed_noop_wxys            },
152 
153   { "mpn_add_n",         speed_mpn_add_n,     FLAG_R_OPTIONAL },
154   { "mpn_sub_n",         speed_mpn_sub_n,     FLAG_R_OPTIONAL },
155 
156   { "mpn_add_err1_n",    speed_mpn_add_err1_n    },
157   { "mpn_add_err2_n",    speed_mpn_add_err2_n    },
158   { "mpn_add_err3_n",    speed_mpn_add_err3_n    },
159   { "mpn_sub_err1_n",    speed_mpn_sub_err1_n    },
160   { "mpn_sub_err2_n",    speed_mpn_sub_err2_n    },
161   { "mpn_sub_err3_n",    speed_mpn_sub_err3_n    },
162 
163 #if HAVE_NATIVE_mpn_add_n_sub_n
164   { "mpn_add_n_sub_n",      speed_mpn_add_n_sub_n,     FLAG_R_OPTIONAL },
165 #endif
166 
167   { "mpn_addmul_1",      speed_mpn_addmul_1,  FLAG_R },
168   { "mpn_submul_1",      speed_mpn_submul_1,  FLAG_R },
169 #if HAVE_NATIVE_mpn_addmul_2
170   { "mpn_addmul_2",      speed_mpn_addmul_2,  FLAG_R_OPTIONAL },
171 #endif
172 #if HAVE_NATIVE_mpn_addmul_3
173   { "mpn_addmul_3",      speed_mpn_addmul_3,  FLAG_R_OPTIONAL },
174 #endif
175 #if HAVE_NATIVE_mpn_addmul_4
176   { "mpn_addmul_4",      speed_mpn_addmul_4,  FLAG_R_OPTIONAL },
177 #endif
178 #if HAVE_NATIVE_mpn_addmul_5
179   { "mpn_addmul_5",      speed_mpn_addmul_5,  FLAG_R_OPTIONAL },
180 #endif
181 #if HAVE_NATIVE_mpn_addmul_6
182   { "mpn_addmul_6",      speed_mpn_addmul_6,  FLAG_R_OPTIONAL },
183 #endif
184 #if HAVE_NATIVE_mpn_addmul_7
185   { "mpn_addmul_7",      speed_mpn_addmul_7,  FLAG_R_OPTIONAL },
186 #endif
187 #if HAVE_NATIVE_mpn_addmul_8
188   { "mpn_addmul_8",      speed_mpn_addmul_8,  FLAG_R_OPTIONAL },
189 #endif
190   { "mpn_mul_1",         speed_mpn_mul_1,     FLAG_R },
191   { "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R },
192 #if HAVE_NATIVE_mpn_mul_2
193   { "mpn_mul_2",         speed_mpn_mul_2,     FLAG_R_OPTIONAL },
194 #endif
195 #if HAVE_NATIVE_mpn_mul_3
196   { "mpn_mul_3",         speed_mpn_mul_3,     FLAG_R_OPTIONAL },
197 #endif
198 #if HAVE_NATIVE_mpn_mul_4
199   { "mpn_mul_4",         speed_mpn_mul_4,     FLAG_R_OPTIONAL },
200 #endif
201 #if HAVE_NATIVE_mpn_mul_5
202   { "mpn_mul_5",         speed_mpn_mul_5,     FLAG_R_OPTIONAL },
203 #endif
204 #if HAVE_NATIVE_mpn_mul_6
205   { "mpn_mul_6",         speed_mpn_mul_6,     FLAG_R_OPTIONAL },
206 #endif
207 
208   { "mpn_divrem_1",      speed_mpn_divrem_1,  FLAG_R },
209   { "mpn_divrem_1f",     speed_mpn_divrem_1f, FLAG_R },
210 #if HAVE_NATIVE_mpn_divrem_1c
211   { "mpn_divrem_1c",     speed_mpn_divrem_1c, FLAG_R },
212   { "mpn_divrem_1cf",    speed_mpn_divrem_1cf,FLAG_R },
213 #endif
214   { "mpn_mod_1",         speed_mpn_mod_1,     FLAG_R },
215 #if HAVE_NATIVE_mpn_mod_1c
216   { "mpn_mod_1c",        speed_mpn_mod_1c,    FLAG_R },
217 #endif
218   { "mpn_preinv_divrem_1",  speed_mpn_preinv_divrem_1,  FLAG_R },
219   { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R },
220   { "mpn_preinv_mod_1",  speed_mpn_preinv_mod_1, FLAG_R },
221 
222   { "mpn_mod_1_1",       speed_mpn_mod_1_1,       FLAG_R },
223   { "mpn_mod_1_1_1",     speed_mpn_mod_1_1_1,     FLAG_R },
224   { "mpn_mod_1_1_2",     speed_mpn_mod_1_1_2,     FLAG_R },
225   { "mpn_mod_1s_2",      speed_mpn_mod_1_2,       FLAG_R },
226   { "mpn_mod_1s_3",      speed_mpn_mod_1_3,       FLAG_R },
227   { "mpn_mod_1s_4",      speed_mpn_mod_1_4,       FLAG_R },
228 
229   { "mpn_divrem_1_div",  speed_mpn_divrem_1_div,  FLAG_R },
230   { "mpn_divrem_1_inv",  speed_mpn_divrem_1_inv,  FLAG_R },
231   { "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R },
232   { "mpn_divrem_1f_inv", speed_mpn_divrem_1f_inv, FLAG_R },
233   { "mpn_mod_1_div",     speed_mpn_mod_1_div,     FLAG_R },
234   { "mpn_mod_1_inv",     speed_mpn_mod_1_inv,     FLAG_R },
235 
236   { "mpn_divrem_2",      speed_mpn_divrem_2,        },
237   { "mpn_divrem_2_div",  speed_mpn_divrem_2_div,    },
238   { "mpn_divrem_2_inv",  speed_mpn_divrem_2_inv,    },
239 
240   { "mpn_div_qr_2n",     speed_mpn_div_qr_2n,       },
241   { "mpn_div_qr_2u",     speed_mpn_div_qr_2u,       },
242 
243   { "mpn_divexact_1",    speed_mpn_divexact_1,    FLAG_R },
244   { "mpn_divexact_by3",  speed_mpn_divexact_by3          },
245 
246   { "mpn_bdiv_q_1",      speed_mpn_bdiv_q_1,      FLAG_R },
247   { "mpn_pi1_bdiv_q_1",  speed_mpn_pi1_bdiv_q_1,  FLAG_R_OPTIONAL },
248   { "mpn_bdiv_dbm1c",    speed_mpn_bdiv_dbm1c,    FLAG_R_OPTIONAL },
249 
250 #if HAVE_NATIVE_mpn_modexact_1_odd
251   { "mpn_modexact_1_odd",  speed_mpn_modexact_1_odd,  FLAG_R },
252 #endif
253   { "mpn_modexact_1c_odd", speed_mpn_modexact_1c_odd, FLAG_R },
254 
255 #if GMP_NUMB_BITS % 4 == 0
256   { "mpn_mod_34lsub1",   speed_mpn_mod_34lsub1 },
257 #endif
258 
259   { "mpn_lshift",        speed_mpn_lshift, FLAG_R   },
260   { "mpn_lshiftc",       speed_mpn_lshiftc, FLAG_R   },
261   { "mpn_rshift",        speed_mpn_rshift, FLAG_R   },
262 
263   { "mpn_and_n",         speed_mpn_and_n,  FLAG_R_OPTIONAL },
264   { "mpn_andn_n",        speed_mpn_andn_n, FLAG_R_OPTIONAL },
265   { "mpn_nand_n",        speed_mpn_nand_n, FLAG_R_OPTIONAL },
266   { "mpn_ior_n",         speed_mpn_ior_n,  FLAG_R_OPTIONAL },
267   { "mpn_iorn_n",        speed_mpn_iorn_n, FLAG_R_OPTIONAL },
268   { "mpn_nior_n",        speed_mpn_nior_n, FLAG_R_OPTIONAL },
269   { "mpn_xor_n",         speed_mpn_xor_n,  FLAG_R_OPTIONAL },
270   { "mpn_xnor_n",        speed_mpn_xnor_n, FLAG_R_OPTIONAL },
271   { "mpn_com",           speed_mpn_com              },
272 
273   { "mpn_popcount",      speed_mpn_popcount         },
274   { "mpn_hamdist",       speed_mpn_hamdist          },
275 
276   { "mpn_matrix22_mul",  speed_mpn_matrix22_mul     },
277 
278   { "mpn_hgcd",          speed_mpn_hgcd             },
279   { "mpn_hgcd_lehmer",   speed_mpn_hgcd_lehmer      },
280   { "mpn_hgcd_appr",     speed_mpn_hgcd_appr        },
281   { "mpn_hgcd_appr_lehmer", speed_mpn_hgcd_appr_lehmer },
282 
283   { "mpn_hgcd_reduce",   speed_mpn_hgcd_reduce      },
284   { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1    },
285   { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2    },
286 
287   { "mpn_gcd_1",         speed_mpn_gcd_1,  FLAG_R_OPTIONAL },
288   { "mpn_gcd_1N",        speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
289 
290   { "mpn_gcd",           speed_mpn_gcd                    },
291 
292   { "mpn_gcdext",            speed_mpn_gcdext            },
293   { "mpn_gcdext_single",     speed_mpn_gcdext_single     },
294   { "mpn_gcdext_double",     speed_mpn_gcdext_double     },
295   { "mpn_gcdext_one_single", speed_mpn_gcdext_one_single },
296   { "mpn_gcdext_one_double", speed_mpn_gcdext_one_double },
297 #if 0
298   { "mpn_gcdext_lehmer",     speed_mpn_gcdext_lehmer     },
299 #endif
300   { "mpz_jacobi",        speed_mpz_jacobi           },
301   { "mpn_jacobi_base",   speed_mpn_jacobi_base      },
302   { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1    },
303   { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2    },
304   { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3    },
305   { "mpn_jacobi_base_4", speed_mpn_jacobi_base_4    },
306 
307   { "mpn_mul",           speed_mpn_mul,         FLAG_R_OPTIONAL },
308   { "mpn_mul_basecase",  speed_mpn_mul_basecase,FLAG_R_OPTIONAL },
309   { "mpn_sqr_basecase",  speed_mpn_sqr_basecase     },
310 #if HAVE_NATIVE_mpn_sqr_diagonal
311   { "mpn_sqr_diagonal",  speed_mpn_sqr_diagonal     },
312 #endif
313 #if HAVE_NATIVE_mpn_sqr_diag_addlsh1
314   { "mpn_sqr_diag_addlsh1", speed_mpn_sqr_diag_addlsh1 },
315 #endif
316 
317   { "mpn_mul_n",         speed_mpn_mul_n            },
318   { "mpn_sqr",           speed_mpn_sqr              },
319 
320   { "mpn_toom2_sqr",     speed_mpn_toom2_sqr        },
321   { "mpn_toom3_sqr",     speed_mpn_toom3_sqr        },
322   { "mpn_toom4_sqr",     speed_mpn_toom4_sqr        },
323   { "mpn_toom6_sqr",     speed_mpn_toom6_sqr        },
324   { "mpn_toom8_sqr",     speed_mpn_toom8_sqr        },
325   { "mpn_toom22_mul",    speed_mpn_toom22_mul       },
326   { "mpn_toom33_mul",    speed_mpn_toom33_mul       },
327   { "mpn_toom44_mul",    speed_mpn_toom44_mul       },
328   { "mpn_toom6h_mul",    speed_mpn_toom6h_mul       },
329   { "mpn_toom8h_mul",    speed_mpn_toom8h_mul       },
330   { "mpn_toom32_mul",    speed_mpn_toom32_mul       },
331   { "mpn_toom42_mul",    speed_mpn_toom42_mul       },
332   { "mpn_toom43_mul",    speed_mpn_toom43_mul       },
333   { "mpn_toom63_mul",    speed_mpn_toom63_mul       },
334   { "mpn_nussbaumer_mul",    speed_mpn_nussbaumer_mul    },
335   { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr},
336 #if WANT_OLD_FFT_FULL
337   { "mpn_mul_fft_full",      speed_mpn_mul_fft_full      },
338   { "mpn_mul_fft_full_sqr",  speed_mpn_mul_fft_full_sqr  },
339 #endif
340   { "mpn_mul_fft",       speed_mpn_mul_fft,     FLAG_R_OPTIONAL },
341   { "mpn_mul_fft_sqr",   speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
342 
343   { "mpn_mullo_n",        speed_mpn_mullo_n         },
344   { "mpn_mullo_basecase", speed_mpn_mullo_basecase  },
345 
346   { "mpn_mulmid_basecase",  speed_mpn_mulmid_basecase, FLAG_R_OPTIONAL },
347   { "mpn_toom42_mulmid",    speed_mpn_toom42_mulmid },
348   { "mpn_mulmid_n",         speed_mpn_mulmid_n },
349   { "mpn_mulmid",           speed_mpn_mulmid, FLAG_R_OPTIONAL },
350 
351   { "mpn_bc_mulmod_bnm1",      speed_mpn_bc_mulmod_bnm1      },
352   { "mpn_mulmod_bnm1",         speed_mpn_mulmod_bnm1         },
353   { "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded },
354   { "mpn_sqrmod_bnm1",         speed_mpn_sqrmod_bnm1         },
355 
356   { "mpn_invert",              speed_mpn_invert              },
357   { "mpn_invertappr",          speed_mpn_invertappr          },
358   { "mpn_ni_invertappr",       speed_mpn_ni_invertappr       },
359   { "mpn_binvert",             speed_mpn_binvert             },
360 
361   { "mpn_sbpi1_div_qr",        speed_mpn_sbpi1_div_qr,    FLAG_R_OPTIONAL},
362   { "mpn_dcpi1_div_qr",        speed_mpn_dcpi1_div_qr,    FLAG_R_OPTIONAL},
363   { "mpn_mu_div_qr",           speed_mpn_mu_div_qr,       FLAG_R_OPTIONAL},
364   { "mpn_mupi_div_qr",         speed_mpn_mupi_div_qr,     FLAG_R_OPTIONAL},
365   { "mpn_sbpi1_divappr_q",     speed_mpn_sbpi1_divappr_q, FLAG_R_OPTIONAL},
366   { "mpn_dcpi1_divappr_q",     speed_mpn_dcpi1_divappr_q, FLAG_R_OPTIONAL},
367 
368   { "mpn_sbpi1_bdiv_qr",       speed_mpn_sbpi1_bdiv_qr       },
369   { "mpn_dcpi1_bdiv_qr",       speed_mpn_dcpi1_bdiv_qr       },
370   { "mpn_sbpi1_bdiv_q",        speed_mpn_sbpi1_bdiv_q        },
371   { "mpn_dcpi1_bdiv_q",        speed_mpn_dcpi1_bdiv_q        },
372 
373   { "mpn_broot",               speed_mpn_broot,    FLAG_R },
374   { "mpn_broot_invm1",         speed_mpn_broot_invm1, FLAG_R },
375   { "mpn_brootinv",            speed_mpn_brootinv, FLAG_R },
376 
377   { "mpn_get_str",          speed_mpn_get_str,     FLAG_R_OPTIONAL },
378   { "mpn_set_str",          speed_mpn_set_str,     FLAG_R_OPTIONAL },
379   { "mpn_set_str_basecase", speed_mpn_bc_set_str,  FLAG_R_OPTIONAL },
380 
381   { "mpn_sqrtrem",       speed_mpn_sqrtrem          },
382   { "mpn_rootrem",       speed_mpn_rootrem, FLAG_R  },
383 
384   { "mpn_fib2_ui",       speed_mpn_fib2_ui,    FLAG_NODATA },
385   { "mpz_fib_ui",        speed_mpz_fib_ui,     FLAG_NODATA },
386   { "mpz_fib2_ui",       speed_mpz_fib2_ui,    FLAG_NODATA },
387   { "mpz_lucnum_ui",     speed_mpz_lucnum_ui,  FLAG_NODATA },
388   { "mpz_lucnum2_ui",    speed_mpz_lucnum2_ui, FLAG_NODATA },
389 
390   { "mpz_add",           speed_mpz_add              },
391   { "mpz_bin_uiui",      speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL },
392   { "mpz_bin_ui",        speed_mpz_bin_ui,   FLAG_NODATA | FLAG_R_OPTIONAL },
393   { "mpz_fac_ui",        speed_mpz_fac_ui,   FLAG_NODATA   },
394   { "mpz_powm",          speed_mpz_powm             },
395   { "mpz_powm_mod",      speed_mpz_powm_mod         },
396   { "mpz_powm_redc",     speed_mpz_powm_redc        },
397   { "mpz_powm_sec",      speed_mpz_powm_sec        },
398   { "mpz_powm_ui",       speed_mpz_powm_ui,  FLAG_R_OPTIONAL },
399 
400   { "mpz_mod",           speed_mpz_mod              },
401   { "mpn_redc_1",        speed_mpn_redc_1           },
402   { "mpn_redc_2",        speed_mpn_redc_2           },
403   { "mpn_redc_n",        speed_mpn_redc_n           },
404 
405   { "MPN_COPY",          speed_MPN_COPY             },
406   { "MPN_COPY_INCR",     speed_MPN_COPY_INCR        },
407   { "MPN_COPY_DECR",     speed_MPN_COPY_DECR        },
408   { "memcpy",            speed_memcpy               },
409 #if HAVE_NATIVE_mpn_copyi
410   { "mpn_copyi",         speed_mpn_copyi            },
411 #endif
412 #if HAVE_NATIVE_mpn_copyd
413   { "mpn_copyd",         speed_mpn_copyd            },
414 #endif
415   { "mpn_tabselect",     speed_mpn_tabselect, FLAG_R_OPTIONAL },
416 #if HAVE_NATIVE_mpn_addlsh1_n
417   { "mpn_addlsh1_n",     speed_mpn_addlsh1_n, FLAG_R_OPTIONAL },
418 #endif
419 #if HAVE_NATIVE_mpn_sublsh1_n
420   { "mpn_sublsh1_n",     speed_mpn_sublsh1_n, FLAG_R_OPTIONAL },
421 #endif
422 #if HAVE_NATIVE_mpn_addlsh1_n_ip1
423   { "mpn_addlsh1_n_ip1", speed_mpn_addlsh1_n_ip1    },
424 #endif
425 #if HAVE_NATIVE_mpn_addlsh1_n_ip2
426   { "mpn_addlsh1_n_ip2", speed_mpn_addlsh1_n_ip2    },
427 #endif
428 #if HAVE_NATIVE_mpn_sublsh1_n_ip1
429   { "mpn_sublsh1_n_ip1", speed_mpn_sublsh1_n_ip1    },
430 #endif
431 #if HAVE_NATIVE_mpn_rsblsh1_n
432   { "mpn_rsblsh1_n",     speed_mpn_rsblsh1_n, FLAG_R_OPTIONAL },
433 #endif
434 #if HAVE_NATIVE_mpn_addlsh2_n
435   { "mpn_addlsh2_n",     speed_mpn_addlsh2_n, FLAG_R_OPTIONAL },
436 #endif
437 #if HAVE_NATIVE_mpn_sublsh2_n
438   { "mpn_sublsh2_n",     speed_mpn_sublsh2_n, FLAG_R_OPTIONAL },
439 #endif
440 #if HAVE_NATIVE_mpn_addlsh2_n_ip1
441   { "mpn_addlsh2_n_ip1", speed_mpn_addlsh2_n_ip1    },
442 #endif
443 #if HAVE_NATIVE_mpn_addlsh2_n_ip2
444   { "mpn_addlsh2_n_ip2", speed_mpn_addlsh2_n_ip2    },
445 #endif
446 #if HAVE_NATIVE_mpn_sublsh2_n_ip1
447   { "mpn_sublsh2_n_ip1", speed_mpn_sublsh2_n_ip1    },
448 #endif
449 #if HAVE_NATIVE_mpn_rsblsh2_n
450   { "mpn_rsblsh2_n",     speed_mpn_rsblsh2_n, FLAG_R_OPTIONAL },
451 #endif
452 #if HAVE_NATIVE_mpn_addlsh_n
453   { "mpn_addlsh_n",     speed_mpn_addlsh_n, FLAG_R_OPTIONAL },
454 #endif
455 #if HAVE_NATIVE_mpn_sublsh_n
456   { "mpn_sublsh_n",     speed_mpn_sublsh_n, FLAG_R_OPTIONAL },
457 #endif
458 #if HAVE_NATIVE_mpn_addlsh_n_ip1
459   { "mpn_addlsh_n_ip1", speed_mpn_addlsh_n_ip1    },
460 #endif
461 #if HAVE_NATIVE_mpn_addlsh_n_ip2
462   { "mpn_addlsh_n_ip2", speed_mpn_addlsh_n_ip2    },
463 #endif
464 #if HAVE_NATIVE_mpn_sublsh_n_ip1
465   { "mpn_sublsh_n_ip1", speed_mpn_sublsh_n_ip1    },
466 #endif
467 #if HAVE_NATIVE_mpn_rsblsh_n
468   { "mpn_rsblsh_n",     speed_mpn_rsblsh_n, FLAG_R_OPTIONAL },
469 #endif
470 #if HAVE_NATIVE_mpn_rsh1add_n
471   { "mpn_rsh1add_n",     speed_mpn_rsh1add_n, FLAG_R_OPTIONAL },
472 #endif
473 #if HAVE_NATIVE_mpn_rsh1sub_n
474   { "mpn_rsh1sub_n",     speed_mpn_rsh1sub_n, FLAG_R_OPTIONAL },
475 #endif
476 
477   { "mpn_addcnd_n",     speed_mpn_addcnd_n, FLAG_R_OPTIONAL },
478   { "mpn_subcnd_n",     speed_mpn_subcnd_n, FLAG_R_OPTIONAL },
479 
480   { "MPN_ZERO",          speed_MPN_ZERO             },
481 
482   { "binvert_limb",       speed_binvert_limb,       FLAG_NODATA },
483   { "binvert_limb_mul1",  speed_binvert_limb_mul1,  FLAG_NODATA },
484   { "binvert_limb_loop",  speed_binvert_limb_loop,  FLAG_NODATA },
485   { "binvert_limb_cond",  speed_binvert_limb_cond,  FLAG_NODATA },
486   { "binvert_limb_arith", speed_binvert_limb_arith, FLAG_NODATA },
487 
488   { "malloc_free",                  speed_malloc_free                  },
489   { "malloc_realloc_free",          speed_malloc_realloc_free          },
490   { "gmp_allocate_free",            speed_gmp_allocate_free            },
491   { "gmp_allocate_reallocate_free", speed_gmp_allocate_reallocate_free },
492   { "mpz_init_clear",               speed_mpz_init_clear               },
493   { "mpq_init_clear",               speed_mpq_init_clear               },
494   { "mpf_init_clear",               speed_mpf_init_clear               },
495   { "mpz_init_realloc_clear",       speed_mpz_init_realloc_clear       },
496 
497   { "umul_ppmm",         speed_umul_ppmm,     FLAG_R_OPTIONAL },
498 #if HAVE_NATIVE_mpn_umul_ppmm
499   { "mpn_umul_ppmm",     speed_mpn_umul_ppmm, FLAG_R_OPTIONAL },
500 #endif
501 #if HAVE_NATIVE_mpn_umul_ppmm_r
502   { "mpn_umul_ppmm_r",   speed_mpn_umul_ppmm_r, FLAG_R_OPTIONAL },
503 #endif
504 
505   { "count_leading_zeros",  speed_count_leading_zeros,  FLAG_NODATA | FLAG_R_OPTIONAL },
506   { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL },
507 
508   { "udiv_qrnnd",             speed_udiv_qrnnd,             FLAG_R_OPTIONAL },
509   { "udiv_qrnnd_c",           speed_udiv_qrnnd_c,           FLAG_R_OPTIONAL },
510 #if HAVE_NATIVE_mpn_udiv_qrnnd
511   { "mpn_udiv_qrnnd",         speed_mpn_udiv_qrnnd,         FLAG_R_OPTIONAL },
512 #endif
513 #if HAVE_NATIVE_mpn_udiv_qrnnd_r
514   { "mpn_udiv_qrnnd_r",       speed_mpn_udiv_qrnnd_r,       FLAG_R_OPTIONAL },
515 #endif
516   { "invert_limb",            speed_invert_limb,            FLAG_R_OPTIONAL },
517 
518   { "operator_div",           speed_operator_div,           FLAG_R_OPTIONAL },
519   { "operator_mod",           speed_operator_mod,           FLAG_R_OPTIONAL },
520 
521   { "gmp_randseed",    speed_gmp_randseed,    FLAG_R_OPTIONAL               },
522   { "gmp_randseed_ui", speed_gmp_randseed_ui, FLAG_R_OPTIONAL | FLAG_NODATA },
523   { "mpz_urandomb",    speed_mpz_urandomb,    FLAG_R_OPTIONAL | FLAG_NODATA },
524 
525 #ifdef SPEED_EXTRA_ROUTINES
526   SPEED_EXTRA_ROUTINES
527 #endif
528 #ifdef SPEED_EXTRA_ROUTINES2
529   SPEED_EXTRA_ROUTINES2
530 #endif
531 };
532 
533 
534 struct choice_t {
535   const struct routine_t  *p;
536   mp_limb_t               r;
537   double                  scale;
538   double                  time;
539   int                     no_time;
540   double                  prev_time;
541   const char              *name;
542 };
543 struct choice_t  *choice;
544 int  num_choices = 0;
545 
546 
547 void
548 data_fill (mp_ptr ptr, mp_size_t size)
549 {
550   switch (option_data) {
551   case DATA_RANDOM:
552     mpn_random (ptr, size);
553     break;
554   case DATA_RANDOM2:
555     mpn_random2 (ptr, size);
556     break;
557   case DATA_ZEROS:
558     MPN_ZERO (ptr, size);
559     break;
560   case DATA_AAS:
561     MPN_FILL (ptr, size, GMP_NUMB_0xAA);
562     break;
563   case DATA_FFS:
564     MPN_FILL (ptr, size, GMP_NUMB_MAX);
565     break;
566   case DATA_2FD:
567     MPN_FILL (ptr, size, GMP_NUMB_MAX);
568     ptr[0] -= 2;
569     break;
570   default:
571     abort();
572     /*NOTREACHED*/
573   }
574 }
575 
576 /* The code here handling the various combinations of output options isn't
577    too attractive, but it works and is fairly clean.  */
578 
579 #define SIZE_TO_DIVISOR(n)              \
580   (option_square == 1 ? (n)*(n)         \
581   : option_square == 2 ? (n)*((n)+1)/2  \
582   : (n))
583 
584 void
585 run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size)
586 {
587   const char  *first_open_fastest, *first_open_notfastest, *first_close;
588   int         i, fastest, want_data;
589   double      fastest_time;
590   TMP_DECL;
591 
592   TMP_MARK;
593 
594   /* allocate data, unless all routines are NODATA */
595   want_data = 0;
596   for (i = 0; i < num_choices; i++)
597     want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0);
598 
599   if (want_data)
600     {
601       SPEED_TMP_ALLOC_LIMBS (sp.xp, s->size, s->align_xp);
602       SPEED_TMP_ALLOC_LIMBS (sp.yp, s->size, s->align_yp);
603 
604       data_fill (s->xp, s->size);
605       data_fill (s->yp, s->size);
606     }
607   else
608     {
609       sp.xp = NULL;
610       sp.yp = NULL;
611     }
612 
613   if (prev_size == -1 && option_cmp == CMP_DIFFPREV)
614     {
615       first_open_fastest = "(#";
616       first_open_notfastest = " (";
617       first_close = ")";
618     }
619   else
620     {
621       first_open_fastest = "#";
622       first_open_notfastest = " ";
623       first_close = "";
624     }
625 
626   fastest = -1;
627   fastest_time = -1.0;
628   for (i = 0; i < num_choices; i++)
629     {
630       s->r = choice[i].r;
631       choice[i].time = speed_measure (choice[i].p->fun, s);
632       choice[i].no_time = (choice[i].time == -1.0);
633       if (! choice[i].no_time)
634         choice[i].time *= choice[i].scale;
635 
636       /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time
637          is before any differences.  */
638       {
639         double     t;
640         t = choice[i].time;
641         if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1)
642           {
643             if (choice[i].prev_time == -1.0)
644               choice[i].no_time = 1;
645             else
646               choice[i].time = choice[i].time - choice[i].prev_time;
647           }
648         choice[i].prev_time = t;
649       }
650 
651       if (choice[i].no_time)
652         continue;
653 
654       /* Look for the fastest after CMP_DIFFPREV has been applied, but
655          before CMP_RATIO or CMP_DIFFERENCE.  There's only a fastest shown
656          if there's more than one routine.  */
657       if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time))
658         {
659           fastest = i;
660           fastest_time = choice[i].time;
661         }
662 
663       if (option_cmp == CMP_DIFFPREV)
664         {
665           /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */
666           if (option_unit == UNIT_CYCLES)
667             choice[i].time /= speed_cycletime;
668           else if (option_unit == UNIT_CYCLESPERLIMB)
669             {
670               if (prev_size == -1)
671                 choice[i].time /= speed_cycletime;
672               else
673                 choice[i].time /=  (speed_cycletime
674                                     * (SIZE_TO_DIVISOR(s->size)
675                                        - SIZE_TO_DIVISOR(prev_size)));
676             }
677         }
678       else
679         {
680           if (option_unit == UNIT_CYCLES)
681             choice[i].time /= speed_cycletime;
682           else if (option_unit == UNIT_CYCLESPERLIMB)
683             choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size));
684 
685           if (option_cmp == CMP_RATIO && i > 0)
686             {
687               /* A ratio isn't affected by the units chosen. */
688               if (choice[0].no_time || choice[0].time == 0.0)
689                 choice[i].no_time = 1;
690               else
691                 choice[i].time /= choice[0].time;
692             }
693           else if (option_cmp == CMP_DIFFERENCE && i > 0)
694             {
695               if (choice[0].no_time)
696                 {
697                   choice[i].no_time = 1;
698                   continue;
699                 }
700               choice[i].time -= choice[0].time;
701             }
702         }
703     }
704 
705   if (option_gnuplot)
706     {
707       /* In CMP_DIFFPREV, don't print anything for the first size, start
708          with the second where an actual difference is available.
709 
710          In CMP_RATIO, print the first column as 1.0.
711 
712          The 9 decimals printed is much more than the expected precision of
713          the measurements actually. */
714 
715       if (! (option_cmp == CMP_DIFFPREV && prev_size == -1))
716         {
717           fprintf (fp, "%-6ld ", s->size);
718           for (i = 0; i < num_choices; i++)
719             fprintf (fp, "  %.9e",
720                      choice[i].no_time ? 0.0
721                      : (option_cmp == CMP_RATIO && i == 0) ? 1.0
722                      : choice[i].time);
723           fprintf (fp, "\n");
724         }
725     }
726   else
727     {
728       fprintf (fp, "%-6ld ", s->size);
729       for (i = 0; i < num_choices; i++)
730         {
731           char  buf[128];
732           int   decimals;
733 
734           if (choice[i].no_time)
735             {
736               fprintf (fp, " %*s", COLUMN_WIDTH, "n/a");
737             }
738           else
739             {if (option_unit == UNIT_CYCLESPERLIMB
740                  || (option_cmp == CMP_RATIO && i > 0))
741                 decimals = 4;
742               else if (option_unit == UNIT_CYCLES)
743                 decimals = 2;
744               else
745                 decimals = 9;
746 
747               sprintf (buf, "%s%.*f%s",
748                        i == fastest ? first_open_fastest : first_open_notfastest,
749                        decimals, choice[i].time, first_close);
750               fprintf (fp, " %*s", COLUMN_WIDTH, buf);
751             }
752         }
753       fprintf (fp, "\n");
754     }
755 
756   TMP_FREE;
757 }
758 
759 void
760 run_all (FILE *fp)
761 {
762   mp_size_t  prev_size;
763   int        i;
764   TMP_DECL;
765 
766   TMP_MARK;
767   SPEED_TMP_ALLOC_LIMBS (sp.xp_block, SPEED_BLOCK_SIZE, sp.align_xp);
768   SPEED_TMP_ALLOC_LIMBS (sp.yp_block, SPEED_BLOCK_SIZE, sp.align_yp);
769 
770   data_fill (sp.xp_block, SPEED_BLOCK_SIZE);
771   data_fill (sp.yp_block, SPEED_BLOCK_SIZE);
772 
773   for (i = 0; i < size_num; i++)
774     {
775       sp.size = size_array[i].start;
776       prev_size = -1;
777       for (;;)
778         {
779           mp_size_t  step;
780 
781           if (option_data == DATA_2FD && sp.size >= 2)
782             sp.xp[sp.size-1] = 2;
783 
784           run_one (fp, &sp, prev_size);
785           prev_size = sp.size;
786 
787           if (option_data == DATA_2FD && sp.size >= 2)
788             sp.xp[sp.size-1] = MP_LIMB_T_MAX;
789 
790           if (option_factor != 0.0)
791             {
792               step = (mp_size_t) (sp.size * option_factor - sp.size);
793               if (step < 1)
794                 step = 1;
795             }
796           else
797             step = 1;
798           if (step < option_step)
799             step = option_step;
800 
801           sp.size += step;
802           if (sp.size > size_array[i].end)
803             break;
804         }
805     }
806 
807   TMP_FREE;
808 }
809 
810 
811 FILE *
812 fopen_for_write (const char *filename)
813 {
814   FILE  *fp;
815   if ((fp = fopen (filename, "w")) == NULL)
816     {
817       fprintf (stderr, "Cannot create %s\n", filename);
818       exit(1);
819     }
820   return fp;
821 }
822 
823 void
824 fclose_written (FILE *fp, const char *filename)
825 {
826   int  err;
827 
828   err = ferror (fp);
829   err |= fclose (fp);
830 
831   if (err)
832     {
833       fprintf (stderr, "Error writing %s\n", filename);
834       exit(1);
835     }
836 }
837 
838 
839 void
840 run_gnuplot (int argc, char *argv[])
841 {
842   char  *plot_filename;
843   char  *data_filename;
844   FILE  *fp;
845   int   i;
846 
847   plot_filename = (char *) (*__gmp_allocate_func)
848     (strlen (option_gnuplot_basename) + 20);
849   data_filename = (char *) (*__gmp_allocate_func)
850     (strlen (option_gnuplot_basename) + 20);
851 
852   sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename);
853   sprintf (data_filename, "%s.data",    option_gnuplot_basename);
854 
855   fp = fopen_for_write (plot_filename);
856 
857   fprintf (fp, "# Generated with:\n");
858   fprintf (fp, "#");
859   for (i = 0; i < argc; i++)
860     fprintf (fp, " %s", argv[i]);
861   fprintf (fp, "\n");
862   fprintf (fp, "\n");
863 
864   fprintf (fp, "reset\n");
865 
866   /* Putting the key at the top left is usually good, and you can change it
867      interactively if it's not. */
868   fprintf (fp, "set key left\n");
869 
870   /* designed to make it possible to see crossovers easily */
871   fprintf (fp, "set style data lines\n");
872 
873   fprintf (fp, "plot ");
874   for (i = 0; i < num_choices; i++)
875     {
876       fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2);
877       fprintf (fp, " title \"%s\"", choice[i].name);
878 
879       if (i != num_choices-1)
880         fprintf (fp, ", \\");
881       fprintf (fp, "\n");
882     }
883 
884   fprintf (fp, "load \"-\"\n");
885   fclose_written (fp, plot_filename);
886 
887   fp = fopen_for_write (data_filename);
888 
889   /* Unbuffered so you can see where the program was up to if it crashes or
890      you kill it. */
891   setbuf (fp, NULL);
892 
893   run_all (fp);
894   fclose_written (fp, data_filename);
895 }
896 
897 
898 /* Return a limb with n many one bits (starting from the least significant) */
899 
900 #define LIMB_ONES(n) \
901   ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX      \
902     : (n) == 0 ? CNST_LIMB(0)                   \
903     : (CNST_LIMB(1) << (n)) - 1)
904 
905 mp_limb_t
906 r_string (const char *s)
907 {
908   const char  *s_orig = s;
909   long        n;
910 
911   if (strcmp (s, "aas") == 0)
912     return GMP_NUMB_0xAA;
913 
914   {
915     mpz_t      z;
916     mp_limb_t  l;
917     int        set, siz;
918 
919     mpz_init (z);
920     set = mpz_set_str (z, s, 0);
921     siz = SIZ(z);
922     l = (siz == 0 ? 0 : siz > 0 ? PTR(z)[0] : -PTR(z)[0]);
923     mpz_clear (z);
924     if (set == 0)
925       {
926         if (siz > 1 || siz < -1)
927           printf ("Warning, r parameter %s truncated to %d bits\n",
928                   s_orig, GMP_LIMB_BITS);
929         return l;
930       }
931   }
932 
933   if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
934     n = strtoul (s+2, (char **) &s, 16);
935   else
936     n = strtol (s, (char **) &s, 10);
937 
938   if (strcmp (s, "bits") == 0)
939     {
940       mp_limb_t  l;
941       if (n > GMP_LIMB_BITS)
942         {
943           fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
944                    n, GMP_LIMB_BITS);
945           exit (1);
946         }
947       mpn_random (&l, 1);
948       return (l | (CNST_LIMB(1) << (n-1))) & LIMB_ONES(n);
949     }
950   else  if (strcmp (s, "ones") == 0)
951     {
952       if (n > GMP_LIMB_BITS)
953         {
954           fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
955                    n, GMP_LIMB_BITS);
956           exit (1);
957         }
958       return LIMB_ONES (n);
959     }
960   else if (*s != '\0')
961     {
962       fprintf (stderr, "invalid r parameter: %s\n", s_orig);
963       exit (1);
964     }
965 
966   return n;
967 }
968 
969 
970 void
971 routine_find (struct choice_t *c, const char *s_orig)
972 {
973   const char  *s;
974   int     i;
975   size_t  nlen;
976 
977   c->name = s_orig;
978   s = strchr (s_orig, '*');
979   if (s != NULL)
980     {
981       c->scale = atof(s_orig);
982       s++;
983     }
984   else
985     {
986       c->scale = 1.0;
987       s = s_orig;
988     }
989 
990   for (i = 0; i < numberof (routine); i++)
991     {
992       nlen = strlen (routine[i].name);
993       if (memcmp (s, routine[i].name, nlen) != 0)
994         continue;
995 
996       if (s[nlen] == '.')
997         {
998           /* match, with a .r parameter */
999 
1000           if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL)))
1001             {
1002               fprintf (stderr,
1003                        "Choice %s bad: doesn't take a \".<r>\" parameter\n",
1004                        s_orig);
1005               exit (1);
1006             }
1007 
1008           c->p = &routine[i];
1009           c->r = r_string (s + nlen + 1);
1010           return;
1011         }
1012 
1013       if (s[nlen] == '\0')
1014         {
1015           /* match, with no parameter */
1016 
1017           if (routine[i].flag & FLAG_R)
1018             {
1019               fprintf (stderr,
1020                        "Choice %s bad: needs a \".<r>\" parameter\n",
1021                        s_orig);
1022               exit (1);
1023             }
1024 
1025           c->p = &routine[i];
1026           c->r = 0;
1027           return;
1028         }
1029     }
1030 
1031   fprintf (stderr, "Choice %s unrecognised\n", s_orig);
1032   exit (1);
1033 }
1034 
1035 
1036 void
1037 usage (void)
1038 {
1039   int  i;
1040 
1041   speed_time_init ();
1042 
1043   printf ("Usage: speed [-options] -s size <routine>...\n");
1044   printf ("Measure the speed of some routines.\n");
1045   printf ("Times are in seconds, accuracy is shown.\n");
1046   printf ("\n");
1047   printf ("   -p num     set precision as number of time units each routine must run\n");
1048   printf ("   -s size[-end][,size[-end]]...   sizes to measure\n");
1049   printf ("              single sizes or ranges, sep with comma or use multiple -s\n");
1050   printf ("   -t step    step through sizes by given amount\n");
1051   printf ("   -f factor  step through sizes by given factor (eg. 1.05)\n");
1052   printf ("   -r         show times as ratios of the first routine\n");
1053   printf ("   -d         show times as difference from the first routine\n");
1054   printf ("   -D         show times as difference from previous size shown\n");
1055   printf ("   -c         show times in CPU cycles\n");
1056   printf ("   -C         show times in cycles per limb\n");
1057   printf ("   -u         print resource usage (memory) at end\n");
1058   printf ("   -P name    output plot files \"name.gnuplot\" and \"name.data\"\n");
1059   printf ("   -a <type>  use given data: random(default), random2, zeros, aas, ffs, 2fd\n");
1060   printf ("   -x, -y, -w, -W <align>  specify data alignments, sources and dests\n");
1061   printf ("   -o addrs   print addresses of data blocks\n");
1062   printf ("\n");
1063   printf ("If both -t and -f are used, it means step by the factor or the step, whichever\n");
1064   printf ("is greater.\n");
1065   printf ("If both -C and -D are used, it means cycles per however many limbs between a\n");
1066   printf ("size and the previous size.\n");
1067   printf ("\n");
1068   printf ("After running with -P, plots can be viewed with Gnuplot or Quickplot.\n");
1069   printf ("\"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n");
1070   printf ("a log/log plot).\n");
1071   printf ("\"quickplot -s name.data\" (has interactive zooming, and note -s is important\n");
1072   printf ("when viewing more than one routine, it means same axis scales for all data).\n");
1073   printf ("\n");
1074   printf ("The available routines are as follows.\n");
1075   printf ("\n");
1076 
1077   for (i = 0; i < numberof (routine); i++)
1078     {
1079       if (routine[i].flag & FLAG_R)
1080         printf ("\t%s.r\n", routine[i].name);
1081       else if (routine[i].flag & FLAG_R_OPTIONAL)
1082         printf ("\t%s (optional .r)\n", routine[i].name);
1083       else
1084         printf ("\t%s\n", routine[i].name);
1085     }
1086   printf ("\n");
1087   printf ("Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n");
1088   printf ("r should be in decimal, or use 0xN for hexadecimal.\n");
1089   printf ("\n");
1090   printf ("Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n");
1091   printf ("N one bits, or \"aas\" for 0xAA..AA.\n");
1092   printf ("\n");
1093   printf ("Times for sizes out of the range accepted by a routine are shown as 0.\n");
1094   printf ("The fastest routine at each size is marked with a # (free form output only).\n");
1095   printf ("\n");
1096   printf ("%s", speed_time_string);
1097   printf ("\n");
1098   printf ("Gnuplot home page http://www.gnuplot.info/\n");
1099   printf ("Quickplot home page http://quickplot.sourceforge.net/\n");
1100 }
1101 
1102 void
1103 check_align_option (const char *name, mp_size_t align)
1104 {
1105   if (align < 0 || align > SPEED_TMP_ALLOC_ADJUST_MASK)
1106     {
1107       fprintf (stderr, "Alignment request out of range: %s %ld\n",
1108                name, (long) align);
1109       fprintf (stderr, "  should be 0 to %d (limbs), inclusive\n",
1110                SPEED_TMP_ALLOC_ADJUST_MASK);
1111       exit (1);
1112     }
1113 }
1114 
1115 int
1116 main (int argc, char *argv[])
1117 {
1118   int  i;
1119   int  opt;
1120 
1121   /* Unbuffered so output goes straight out when directed to a pipe or file
1122      and isn't lost on killing the program half way.  */
1123   setbuf (stdout, NULL);
1124 
1125   for (;;)
1126     {
1127       opt = getopt(argc, argv, "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z");
1128       if (opt == EOF)
1129         break;
1130 
1131       switch (opt) {
1132       case 'a':
1133         if (strcmp (optarg, "random") == 0)       option_data = DATA_RANDOM;
1134         else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2;
1135         else if (strcmp (optarg, "zeros") == 0)   option_data = DATA_ZEROS;
1136         else if (strcmp (optarg, "aas") == 0)     option_data = DATA_AAS;
1137         else if (strcmp (optarg, "ffs") == 0)     option_data = DATA_FFS;
1138         else if (strcmp (optarg, "2fd") == 0)     option_data = DATA_2FD;
1139         else
1140           {
1141             fprintf (stderr, "unrecognised data option: %s\n", optarg);
1142             exit (1);
1143           }
1144         break;
1145       case 'C':
1146         if (option_unit  != UNIT_SECONDS) goto bad_unit;
1147         option_unit = UNIT_CYCLESPERLIMB;
1148         break;
1149       case 'c':
1150         if (option_unit != UNIT_SECONDS)
1151           {
1152           bad_unit:
1153             fprintf (stderr, "cannot use more than one of -c, -C\n");
1154             exit (1);
1155           }
1156         option_unit = UNIT_CYCLES;
1157         break;
1158       case 'D':
1159         if (option_cmp != CMP_ABSOLUTE) goto bad_cmp;
1160         option_cmp = CMP_DIFFPREV;
1161         break;
1162       case 'd':
1163         if (option_cmp != CMP_ABSOLUTE)
1164           {
1165           bad_cmp:
1166             fprintf (stderr, "cannot use more than one of -d, -D, -r\n");
1167             exit (1);
1168           }
1169         option_cmp = CMP_DIFFERENCE;
1170         break;
1171       case 'E':
1172         option_square = 1;
1173         break;
1174       case 'F':
1175         option_square = 2;
1176         break;
1177       case 'f':
1178         option_factor = atof (optarg);
1179         if (option_factor <= 1.0)
1180           {
1181             fprintf (stderr, "-f factor must be > 1.0\n");
1182             exit (1);
1183           }
1184         break;
1185       case 'o':
1186         speed_option_set (optarg);
1187         break;
1188       case 'P':
1189         option_gnuplot = 1;
1190         option_gnuplot_basename = optarg;
1191         break;
1192       case 'p':
1193         speed_precision = atoi (optarg);
1194         break;
1195       case 'R':
1196         option_seed = time (NULL);
1197         break;
1198       case 'r':
1199         if (option_cmp != CMP_ABSOLUTE)
1200           goto bad_cmp;
1201         option_cmp = CMP_RATIO;
1202         break;
1203       case 's':
1204         {
1205           char  *s;
1206           for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ","))
1207             {
1208               if (size_num == size_allocnum)
1209                 {
1210                   size_array = (struct size_array_t *)
1211                     __gmp_allocate_or_reallocate
1212                     (size_array,
1213                      size_allocnum * sizeof(size_array[0]),
1214                      (size_allocnum+10) * sizeof(size_array[0]));
1215                   size_allocnum += 10;
1216                 }
1217               if (sscanf (s, "%ld-%ld",
1218                           &size_array[size_num].start,
1219                           &size_array[size_num].end) != 2)
1220                 {
1221                   size_array[size_num].start = size_array[size_num].end
1222                     = atol (s);
1223                 }
1224 
1225               if (size_array[size_num].start < 0
1226                   || size_array[size_num].end < 0
1227                   || size_array[size_num].start > size_array[size_num].end)
1228                 {
1229                   fprintf (stderr, "invalid size parameter: %s\n", s);
1230                   exit (1);
1231                 }
1232 
1233               size_num++;
1234             }
1235         }
1236         break;
1237       case 't':
1238         option_step = atol (optarg);
1239         if (option_step < 1)
1240           {
1241             fprintf (stderr, "-t step must be >= 1\n");
1242             exit (1);
1243           }
1244         break;
1245       case 'u':
1246         option_resource_usage = 1;
1247         break;
1248       case 'z':
1249         sp.cache = 1;
1250         break;
1251       case 'x':
1252         sp.align_xp = atol (optarg);
1253         check_align_option ("-x", sp.align_xp);
1254         break;
1255       case 'y':
1256         sp.align_yp = atol (optarg);
1257         check_align_option ("-y", sp.align_yp);
1258         break;
1259       case 'w':
1260         sp.align_wp = atol (optarg);
1261         check_align_option ("-w", sp.align_wp);
1262         break;
1263       case 'W':
1264         sp.align_wp2 = atol (optarg);
1265         check_align_option ("-W", sp.align_wp2);
1266         break;
1267       case '?':
1268         exit(1);
1269       }
1270     }
1271 
1272   if (optind >= argc)
1273     {
1274       usage ();
1275       exit (1);
1276     }
1277 
1278   if (size_num == 0)
1279     {
1280       fprintf (stderr, "-s <size> must be specified\n");
1281       exit (1);
1282     }
1283 
1284   gmp_randinit_default (__gmp_rands);
1285   __gmp_rands_initialized = 1;
1286   gmp_randseed_ui (__gmp_rands, option_seed);
1287 
1288   choice = (struct choice_t *) (*__gmp_allocate_func)
1289     ((argc - optind) * sizeof(choice[0]));
1290   for ( ; optind < argc; optind++)
1291     {
1292       struct choice_t  c;
1293       routine_find (&c, argv[optind]);
1294       choice[num_choices] = c;
1295       num_choices++;
1296     }
1297 
1298   if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) &&
1299       num_choices < 2)
1300     {
1301       fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n");
1302     }
1303 
1304   speed_time_init ();
1305   if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB)
1306     speed_cycletime_need_cycles ();
1307   else
1308     speed_cycletime_need_seconds ();
1309 
1310   if (option_gnuplot)
1311     {
1312       run_gnuplot (argc, argv);
1313     }
1314   else
1315     {
1316       if (option_unit == UNIT_SECONDS)
1317         printf ("overhead %.9f secs", speed_measure (speed_noop, NULL));
1318       else
1319         printf ("overhead %.2f cycles",
1320                 speed_measure (speed_noop, NULL) / speed_cycletime);
1321       printf (", precision %d units of %.2e secs",
1322               speed_precision, speed_unittime);
1323 
1324       if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
1325         printf (", CPU freq unknown\n");
1326       else
1327         printf (", CPU freq %.2f MHz\n", 1e-6/speed_cycletime);
1328 
1329       printf ("       ");
1330       for (i = 0; i < num_choices; i++)
1331         printf (" %*s", COLUMN_WIDTH, choice[i].name);
1332       printf ("\n");
1333 
1334       run_all (stdout);
1335     }
1336 
1337   if (option_resource_usage)
1338     {
1339 #if HAVE_GETRUSAGE
1340       {
1341         /* This doesn't give data sizes on linux 2.0.x, only utime. */
1342         struct rusage  r;
1343         if (getrusage (RUSAGE_SELF, &r) != 0)
1344           perror ("getrusage");
1345         else
1346           printf ("getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\n",
1347                   r.ru_utime.tv_sec, r.ru_utime.tv_usec,
1348                   r.ru_idrss, r.ru_isrss, r.ru_ixrss);
1349       }
1350 #else
1351       printf ("getrusage() not available\n");
1352 #endif
1353 
1354       /* Linux kernel. */
1355       {
1356         char  buf[128];
1357         sprintf (buf, "/proc/%d/status", getpid());
1358         if (access (buf, R_OK) == 0)
1359           {
1360             sprintf (buf, "cat /proc/%d/status", getpid());
1361             system (buf);
1362           }
1363 
1364       }
1365     }
1366 
1367   return 0;
1368 }
1369