xref: /netbsd-src/external/lgpl3/gmp/dist/tune/speed.c (revision 154bfe8e089c1a0a4e9ed8414f08d3da90949162)
1 /* Speed measuring program.
2 
3 Copyright 1999-2003, 2005, 2006, 2008-2015 Free Software Foundation, Inc.
4 
5 This file is part of the GNU MP Library.
6 
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of either:
9 
10   * the GNU Lesser General Public License as published by the Free
11     Software Foundation; either version 3 of the License, or (at your
12     option) any later version.
13 
14 or
15 
16   * the GNU General Public License as published by the Free Software
17     Foundation; either version 2 of the License, or (at your option) any
18     later version.
19 
20 or both in parallel, as here.
21 
22 The GNU MP Library is distributed in the hope that it will be useful, but
23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25 for more details.
26 
27 You should have received copies of the GNU General Public License and the
28 GNU Lesser General Public License along with the GNU MP Library.  If not,
29 see https://www.gnu.org/licenses/.  */
30 
31 /* Usage message is in the code below, run with no arguments to print it.
32    See README for interesting applications.
33 
34    To add a new routine foo(), create a speed_foo() function in the style of
35    the existing ones and add an entry in the routine[] array.  Put FLAG_R if
36    speed_foo() wants an "r" parameter.
37 
38    The routines don't have help messages or descriptions, but most have
39    suggestive names.  See the source code for full details.
40 
41 */
42 
43 #include "config.h"
44 
45 #include <limits.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 
50 #if HAVE_UNISTD_H
51 #include <unistd.h>  /* for getpid, R_OK */
52 #endif
53 
54 #if TIME_WITH_SYS_TIME
55 # include <sys/time.h>  /* for struct timeval */
56 # include <time.h>
57 #else
58 # if HAVE_SYS_TIME_H
59 #  include <sys/time.h>
60 # else
61 #  include <time.h>
62 # endif
63 #endif
64 
65 #if HAVE_SYS_RESOURCE_H
66 #include <sys/resource.h>  /* for getrusage() */
67 #endif
68 
69 
70 #include "gmp.h"
71 #include "gmp-impl.h"
72 #include "longlong.h"  /* for the benefit of speed-many.c */
73 #include "tests.h"
74 #include "speed.h"
75 
76 
77 #if !HAVE_DECL_OPTARG
78 extern char *optarg;
79 extern int optind, opterr;
80 #endif
81 
82 #if !HAVE_STRTOUL
83 #define strtoul(p,e,b)  (unsigned long) strtol(p,e,b)
84 #endif
85 
86 #ifdef SPEED_EXTRA_PROTOS
87 SPEED_EXTRA_PROTOS
88 #endif
89 #ifdef SPEED_EXTRA_PROTOS2
90 SPEED_EXTRA_PROTOS2
91 #endif
92 
93 
94 #if GMP_LIMB_BITS == 32
95 #define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK)
96 #endif
97 #if GMP_LIMB_BITS == 64
98 #define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK)
99 #endif
100 
101 
102 #define CMP_ABSOLUTE     1
103 #define CMP_RATIO        2
104 #define CMP_DIFFERENCE   3
105 #define CMP_DIFFPREV     4
106 int  option_cmp = CMP_ABSOLUTE;
107 
108 #define UNIT_SECONDS        1
109 #define UNIT_CYCLES         2
110 #define UNIT_CYCLESPERLIMB  3
111 int  option_unit = UNIT_SECONDS;
112 
113 #define DATA_RANDOM   1
114 #define DATA_RANDOM2  2
115 #define DATA_ZEROS    3
116 #define DATA_AAS      4
117 #define DATA_FFS      5
118 #define DATA_2FD      6
119 int  option_data = DATA_RANDOM;
120 
121 int        option_square = 0;
122 double     option_factor = 0.0;
123 mp_size_t  option_step = 1;
124 int        option_gnuplot = 0;
125 char      *option_gnuplot_basename;
126 struct size_array_t {
127   mp_size_t start, end;
128 } *size_array = NULL;
129 mp_size_t  size_num = 0;
130 mp_size_t  size_allocnum = 0;
131 int        option_resource_usage = 0;
132 long       option_seed = 123456789;
133 
134 struct speed_params  sp;
135 
136 #define COLUMN_WIDTH  13  /* for the free-form output */
137 
138 #define FLAG_R            (1<<0)  /* require ".r" */
139 #define FLAG_R_OPTIONAL   (1<<1)  /* optional ".r" */
140 #define FLAG_RSIZE        (1<<2)
141 #define FLAG_NODATA       (1<<3)  /* don't alloc xp, yp */
142 
143 const struct routine_t {
144   /* constants */
145   const char        *name;
146   speed_function_t  fun;
147   int               flag;
148 } routine[] = {
149 
150   { "noop",              speed_noop                 },
151   { "noop_wxs",          speed_noop_wxs             },
152   { "noop_wxys",         speed_noop_wxys            },
153 
154   { "mpn_add_n",         speed_mpn_add_n,     FLAG_R_OPTIONAL },
155   { "mpn_sub_n",         speed_mpn_sub_n,     FLAG_R_OPTIONAL },
156   { "mpn_add_1",         speed_mpn_add_1,     FLAG_R },
157   { "mpn_add_1_inplace", speed_mpn_add_1_inplace, FLAG_R },
158   { "mpn_sub_1",         speed_mpn_sub_1,     FLAG_R },
159   { "mpn_sub_1_inplace", speed_mpn_sub_1_inplace, FLAG_R },
160 
161   { "mpn_add_err1_n",    speed_mpn_add_err1_n    },
162   { "mpn_add_err2_n",    speed_mpn_add_err2_n    },
163   { "mpn_add_err3_n",    speed_mpn_add_err3_n    },
164   { "mpn_sub_err1_n",    speed_mpn_sub_err1_n    },
165   { "mpn_sub_err2_n",    speed_mpn_sub_err2_n    },
166   { "mpn_sub_err3_n",    speed_mpn_sub_err3_n    },
167 
168 #if HAVE_NATIVE_mpn_add_n_sub_n
169   { "mpn_add_n_sub_n",      speed_mpn_add_n_sub_n,     FLAG_R_OPTIONAL },
170 #endif
171 
172   { "mpn_addmul_1",      speed_mpn_addmul_1,  FLAG_R },
173   { "mpn_submul_1",      speed_mpn_submul_1,  FLAG_R },
174 #if HAVE_NATIVE_mpn_addmul_2
175   { "mpn_addmul_2",      speed_mpn_addmul_2,  FLAG_R_OPTIONAL },
176 #endif
177 #if HAVE_NATIVE_mpn_addmul_3
178   { "mpn_addmul_3",      speed_mpn_addmul_3,  FLAG_R_OPTIONAL },
179 #endif
180 #if HAVE_NATIVE_mpn_addmul_4
181   { "mpn_addmul_4",      speed_mpn_addmul_4,  FLAG_R_OPTIONAL },
182 #endif
183 #if HAVE_NATIVE_mpn_addmul_5
184   { "mpn_addmul_5",      speed_mpn_addmul_5,  FLAG_R_OPTIONAL },
185 #endif
186 #if HAVE_NATIVE_mpn_addmul_6
187   { "mpn_addmul_6",      speed_mpn_addmul_6,  FLAG_R_OPTIONAL },
188 #endif
189 #if HAVE_NATIVE_mpn_addmul_7
190   { "mpn_addmul_7",      speed_mpn_addmul_7,  FLAG_R_OPTIONAL },
191 #endif
192 #if HAVE_NATIVE_mpn_addmul_8
193   { "mpn_addmul_8",      speed_mpn_addmul_8,  FLAG_R_OPTIONAL },
194 #endif
195   { "mpn_mul_1",         speed_mpn_mul_1,     FLAG_R },
196   { "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R },
197 #if HAVE_NATIVE_mpn_mul_2
198   { "mpn_mul_2",         speed_mpn_mul_2,     FLAG_R_OPTIONAL },
199 #endif
200 #if HAVE_NATIVE_mpn_mul_3
201   { "mpn_mul_3",         speed_mpn_mul_3,     FLAG_R_OPTIONAL },
202 #endif
203 #if HAVE_NATIVE_mpn_mul_4
204   { "mpn_mul_4",         speed_mpn_mul_4,     FLAG_R_OPTIONAL },
205 #endif
206 #if HAVE_NATIVE_mpn_mul_5
207   { "mpn_mul_5",         speed_mpn_mul_5,     FLAG_R_OPTIONAL },
208 #endif
209 #if HAVE_NATIVE_mpn_mul_6
210   { "mpn_mul_6",         speed_mpn_mul_6,     FLAG_R_OPTIONAL },
211 #endif
212 
213   { "mpn_divrem_1",      speed_mpn_divrem_1,  FLAG_R },
214   { "mpn_divrem_1f",     speed_mpn_divrem_1f, FLAG_R },
215 #if HAVE_NATIVE_mpn_divrem_1c
216   { "mpn_divrem_1c",     speed_mpn_divrem_1c, FLAG_R },
217   { "mpn_divrem_1cf",    speed_mpn_divrem_1cf,FLAG_R },
218 #endif
219   { "mpn_mod_1",         speed_mpn_mod_1,     FLAG_R },
220 #if HAVE_NATIVE_mpn_mod_1c
221   { "mpn_mod_1c",        speed_mpn_mod_1c,    FLAG_R },
222 #endif
223   { "mpn_preinv_divrem_1",  speed_mpn_preinv_divrem_1,  FLAG_R },
224   { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R },
225   { "mpn_preinv_mod_1",  speed_mpn_preinv_mod_1, FLAG_R },
226 
227   { "mpn_mod_1_1",       speed_mpn_mod_1_1,       FLAG_R },
228   { "mpn_mod_1_1_1",     speed_mpn_mod_1_1_1,     FLAG_R },
229   { "mpn_mod_1_1_2",     speed_mpn_mod_1_1_2,     FLAG_R },
230   { "mpn_mod_1s_2",      speed_mpn_mod_1_2,       FLAG_R },
231   { "mpn_mod_1s_3",      speed_mpn_mod_1_3,       FLAG_R },
232   { "mpn_mod_1s_4",      speed_mpn_mod_1_4,       FLAG_R },
233 
234   { "mpn_divrem_1_div",  speed_mpn_divrem_1_div,  FLAG_R },
235   { "mpn_divrem_1_inv",  speed_mpn_divrem_1_inv,  FLAG_R },
236   { "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R },
237   { "mpn_divrem_1f_inv", speed_mpn_divrem_1f_inv, FLAG_R },
238   { "mpn_mod_1_div",     speed_mpn_mod_1_div,     FLAG_R },
239   { "mpn_mod_1_inv",     speed_mpn_mod_1_inv,     FLAG_R },
240 
241   { "mpn_divrem_2",      speed_mpn_divrem_2,        },
242   { "mpn_divrem_2_div",  speed_mpn_divrem_2_div,    },
243   { "mpn_divrem_2_inv",  speed_mpn_divrem_2_inv,    },
244 
245   { "mpn_div_qr_1n_pi1", speed_mpn_div_qr_1n_pi1, FLAG_R  },
246   { "mpn_div_qr_1n_pi1_1",speed_mpn_div_qr_1n_pi1_1, FLAG_R  },
247   { "mpn_div_qr_1n_pi1_2",speed_mpn_div_qr_1n_pi1_2, FLAG_R  },
248   { "mpn_div_qr_1",      speed_mpn_div_qr_1,      FLAG_R },
249 
250   { "mpn_div_qr_2n",     speed_mpn_div_qr_2n,       },
251   { "mpn_div_qr_2u",     speed_mpn_div_qr_2u,       },
252 
253   { "mpn_divexact_1",    speed_mpn_divexact_1,    FLAG_R },
254   { "mpn_divexact_by3",  speed_mpn_divexact_by3          },
255 
256   { "mpn_bdiv_q_1",      speed_mpn_bdiv_q_1,      FLAG_R },
257   { "mpn_pi1_bdiv_q_1",  speed_mpn_pi1_bdiv_q_1,  FLAG_R_OPTIONAL },
258   { "mpn_bdiv_dbm1c",    speed_mpn_bdiv_dbm1c,    FLAG_R_OPTIONAL },
259 
260 #if HAVE_NATIVE_mpn_modexact_1_odd
261   { "mpn_modexact_1_odd",  speed_mpn_modexact_1_odd,  FLAG_R },
262 #endif
263   { "mpn_modexact_1c_odd", speed_mpn_modexact_1c_odd, FLAG_R },
264 
265 #if GMP_NUMB_BITS % 4 == 0
266   { "mpn_mod_34lsub1",   speed_mpn_mod_34lsub1 },
267 #endif
268 
269   { "mpn_lshift",        speed_mpn_lshift, FLAG_R   },
270   { "mpn_lshiftc",       speed_mpn_lshiftc, FLAG_R   },
271   { "mpn_rshift",        speed_mpn_rshift, FLAG_R   },
272 
273   { "mpn_and_n",         speed_mpn_and_n,  FLAG_R_OPTIONAL },
274   { "mpn_andn_n",        speed_mpn_andn_n, FLAG_R_OPTIONAL },
275   { "mpn_nand_n",        speed_mpn_nand_n, FLAG_R_OPTIONAL },
276   { "mpn_ior_n",         speed_mpn_ior_n,  FLAG_R_OPTIONAL },
277   { "mpn_iorn_n",        speed_mpn_iorn_n, FLAG_R_OPTIONAL },
278   { "mpn_nior_n",        speed_mpn_nior_n, FLAG_R_OPTIONAL },
279   { "mpn_xor_n",         speed_mpn_xor_n,  FLAG_R_OPTIONAL },
280   { "mpn_xnor_n",        speed_mpn_xnor_n, FLAG_R_OPTIONAL },
281   { "mpn_com",           speed_mpn_com              },
282   { "mpn_neg",           speed_mpn_neg              },
283 
284   { "mpn_popcount",      speed_mpn_popcount         },
285   { "mpn_hamdist",       speed_mpn_hamdist          },
286 
287   { "mpn_matrix22_mul",  speed_mpn_matrix22_mul     },
288 
289   { "mpn_hgcd",          speed_mpn_hgcd             },
290   { "mpn_hgcd_lehmer",   speed_mpn_hgcd_lehmer      },
291   { "mpn_hgcd_appr",     speed_mpn_hgcd_appr        },
292   { "mpn_hgcd_appr_lehmer", speed_mpn_hgcd_appr_lehmer },
293 
294   { "mpn_hgcd_reduce",   speed_mpn_hgcd_reduce      },
295   { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1    },
296   { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2    },
297 
298   { "mpn_gcd_1",         speed_mpn_gcd_1,  FLAG_R_OPTIONAL },
299   { "mpn_gcd_1N",        speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
300 
301   { "mpn_gcd",           speed_mpn_gcd                    },
302 
303   { "mpn_gcdext",            speed_mpn_gcdext            },
304   { "mpn_gcdext_single",     speed_mpn_gcdext_single     },
305   { "mpn_gcdext_double",     speed_mpn_gcdext_double     },
306   { "mpn_gcdext_one_single", speed_mpn_gcdext_one_single },
307   { "mpn_gcdext_one_double", speed_mpn_gcdext_one_double },
308 #if 0
309   { "mpn_gcdext_lehmer",     speed_mpn_gcdext_lehmer     },
310 #endif
311   { "mpz_jacobi",        speed_mpz_jacobi           },
312   { "mpn_jacobi_base",   speed_mpn_jacobi_base      },
313   { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1    },
314   { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2    },
315   { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3    },
316   { "mpn_jacobi_base_4", speed_mpn_jacobi_base_4    },
317 
318   { "mpn_mul",           speed_mpn_mul,         FLAG_R_OPTIONAL },
319   { "mpn_mul_basecase",  speed_mpn_mul_basecase,FLAG_R_OPTIONAL },
320   { "mpn_sqr_basecase",  speed_mpn_sqr_basecase     },
321 #if HAVE_NATIVE_mpn_sqr_diagonal
322   { "mpn_sqr_diagonal",  speed_mpn_sqr_diagonal     },
323 #endif
324 #if HAVE_NATIVE_mpn_sqr_diag_addlsh1
325   { "mpn_sqr_diag_addlsh1", speed_mpn_sqr_diag_addlsh1 },
326 #endif
327 
328   { "mpn_mul_n",         speed_mpn_mul_n            },
329   { "mpn_sqr",           speed_mpn_sqr              },
330 
331   { "mpn_toom2_sqr",     speed_mpn_toom2_sqr        },
332   { "mpn_toom3_sqr",     speed_mpn_toom3_sqr        },
333   { "mpn_toom4_sqr",     speed_mpn_toom4_sqr        },
334   { "mpn_toom6_sqr",     speed_mpn_toom6_sqr        },
335   { "mpn_toom8_sqr",     speed_mpn_toom8_sqr        },
336   { "mpn_toom22_mul",    speed_mpn_toom22_mul       },
337   { "mpn_toom33_mul",    speed_mpn_toom33_mul       },
338   { "mpn_toom44_mul",    speed_mpn_toom44_mul       },
339   { "mpn_toom6h_mul",    speed_mpn_toom6h_mul       },
340   { "mpn_toom8h_mul",    speed_mpn_toom8h_mul       },
341   { "mpn_toom32_mul",    speed_mpn_toom32_mul       },
342   { "mpn_toom42_mul",    speed_mpn_toom42_mul       },
343   { "mpn_toom43_mul",    speed_mpn_toom43_mul       },
344   { "mpn_toom63_mul",    speed_mpn_toom63_mul       },
345   { "mpn_nussbaumer_mul",    speed_mpn_nussbaumer_mul    },
346   { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr},
347 #if WANT_OLD_FFT_FULL
348   { "mpn_mul_fft_full",      speed_mpn_mul_fft_full      },
349   { "mpn_mul_fft_full_sqr",  speed_mpn_mul_fft_full_sqr  },
350 #endif
351   { "mpn_mul_fft",       speed_mpn_mul_fft,     FLAG_R_OPTIONAL },
352   { "mpn_mul_fft_sqr",   speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
353 
354   { "mpn_sqrlo",          speed_mpn_sqrlo           },
355   { "mpn_sqrlo_basecase", speed_mpn_sqrlo_basecase  },
356   { "mpn_mullo_n",        speed_mpn_mullo_n         },
357   { "mpn_mullo_basecase", speed_mpn_mullo_basecase  },
358 
359   { "mpn_mulmid_basecase",  speed_mpn_mulmid_basecase, FLAG_R_OPTIONAL },
360   { "mpn_toom42_mulmid",    speed_mpn_toom42_mulmid },
361   { "mpn_mulmid_n",         speed_mpn_mulmid_n },
362   { "mpn_mulmid",           speed_mpn_mulmid, FLAG_R_OPTIONAL },
363 
364   { "mpn_bc_mulmod_bnm1",      speed_mpn_bc_mulmod_bnm1      },
365   { "mpn_mulmod_bnm1",         speed_mpn_mulmod_bnm1         },
366   { "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded },
367   { "mpn_sqrmod_bnm1",         speed_mpn_sqrmod_bnm1         },
368 
369   { "mpn_invert",              speed_mpn_invert              },
370   { "mpn_invertappr",          speed_mpn_invertappr          },
371   { "mpn_ni_invertappr",       speed_mpn_ni_invertappr       },
372   { "mpn_binvert",             speed_mpn_binvert             },
373   { "mpn_sec_invert",          speed_mpn_sec_invert          },
374 
375   { "mpn_sbpi1_div_qr",        speed_mpn_sbpi1_div_qr,    FLAG_R_OPTIONAL},
376   { "mpn_dcpi1_div_qr",        speed_mpn_dcpi1_div_qr,    FLAG_R_OPTIONAL},
377   { "mpn_mu_div_qr",           speed_mpn_mu_div_qr,       FLAG_R_OPTIONAL},
378   { "mpn_mupi_div_qr",         speed_mpn_mupi_div_qr,     FLAG_R_OPTIONAL},
379   { "mpn_sbpi1_divappr_q",     speed_mpn_sbpi1_divappr_q, FLAG_R_OPTIONAL},
380   { "mpn_dcpi1_divappr_q",     speed_mpn_dcpi1_divappr_q, FLAG_R_OPTIONAL},
381 
382   { "mpn_sbpi1_bdiv_qr",       speed_mpn_sbpi1_bdiv_qr       },
383   { "mpn_dcpi1_bdiv_qr",       speed_mpn_dcpi1_bdiv_qr       },
384   { "mpn_sbpi1_bdiv_q",        speed_mpn_sbpi1_bdiv_q        },
385   { "mpn_dcpi1_bdiv_q",        speed_mpn_dcpi1_bdiv_q        },
386 
387   { "mpn_broot",               speed_mpn_broot,    FLAG_R },
388   { "mpn_broot_invm1",         speed_mpn_broot_invm1, FLAG_R },
389   { "mpn_brootinv",            speed_mpn_brootinv, FLAG_R },
390 
391   { "mpn_get_str",          speed_mpn_get_str,     FLAG_R_OPTIONAL },
392   { "mpn_set_str",          speed_mpn_set_str,     FLAG_R_OPTIONAL },
393   { "mpn_set_str_basecase", speed_mpn_bc_set_str,  FLAG_R_OPTIONAL },
394 
395   { "mpn_sqrtrem",       speed_mpn_sqrtrem          },
396   { "mpn_rootrem",       speed_mpn_rootrem, FLAG_R  },
397   { "mpn_sqrt",          speed_mpn_sqrt             },
398   { "mpn_root",          speed_mpn_root, FLAG_R     },
399 
400   { "mpn_fib2_ui",       speed_mpn_fib2_ui,    FLAG_NODATA },
401   { "mpz_fib_ui",        speed_mpz_fib_ui,     FLAG_NODATA },
402   { "mpz_fib2_ui",       speed_mpz_fib2_ui,    FLAG_NODATA },
403   { "mpz_lucnum_ui",     speed_mpz_lucnum_ui,  FLAG_NODATA },
404   { "mpz_lucnum2_ui",    speed_mpz_lucnum2_ui, FLAG_NODATA },
405 
406   { "mpz_add",           speed_mpz_add              },
407   { "mpz_bin_uiui",      speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL },
408   { "mpz_bin_ui",        speed_mpz_bin_ui,   FLAG_NODATA | FLAG_R_OPTIONAL },
409   { "mpz_fac_ui",        speed_mpz_fac_ui,   FLAG_NODATA   },
410   { "mpz_2fac_ui",       speed_mpz_2fac_ui,  FLAG_NODATA   },
411   { "mpz_powm",          speed_mpz_powm             },
412   { "mpz_powm_mod",      speed_mpz_powm_mod         },
413   { "mpz_powm_redc",     speed_mpz_powm_redc        },
414   { "mpz_powm_sec",      speed_mpz_powm_sec        },
415   { "mpz_powm_ui",       speed_mpz_powm_ui,  FLAG_R_OPTIONAL },
416 
417   { "mpz_mod",           speed_mpz_mod              },
418   { "mpn_redc_1",        speed_mpn_redc_1           },
419   { "mpn_redc_2",        speed_mpn_redc_2           },
420   { "mpn_redc_n",        speed_mpn_redc_n           },
421 
422   { "MPN_COPY",          speed_MPN_COPY             },
423   { "MPN_COPY_INCR",     speed_MPN_COPY_INCR        },
424   { "MPN_COPY_DECR",     speed_MPN_COPY_DECR        },
425   { "memcpy",            speed_memcpy               },
426 #if HAVE_NATIVE_mpn_copyi
427   { "mpn_copyi",         speed_mpn_copyi            },
428 #endif
429 #if HAVE_NATIVE_mpn_copyd
430   { "mpn_copyd",         speed_mpn_copyd            },
431 #endif
432   { "mpn_sec_tabselect", speed_mpn_sec_tabselect, FLAG_R_OPTIONAL },
433 #if HAVE_NATIVE_mpn_addlsh1_n == 1
434   { "mpn_addlsh1_n",     speed_mpn_addlsh1_n, FLAG_R_OPTIONAL },
435 #endif
436 #if HAVE_NATIVE_mpn_sublsh1_n == 1
437   { "mpn_sublsh1_n",     speed_mpn_sublsh1_n, FLAG_R_OPTIONAL },
438 #endif
439 #if HAVE_NATIVE_mpn_addlsh1_n_ip1
440   { "mpn_addlsh1_n_ip1", speed_mpn_addlsh1_n_ip1    },
441 #endif
442 #if HAVE_NATIVE_mpn_addlsh1_n_ip2
443   { "mpn_addlsh1_n_ip2", speed_mpn_addlsh1_n_ip2    },
444 #endif
445 #if HAVE_NATIVE_mpn_sublsh1_n_ip1
446   { "mpn_sublsh1_n_ip1", speed_mpn_sublsh1_n_ip1    },
447 #endif
448 #if HAVE_NATIVE_mpn_rsblsh1_n == 1
449   { "mpn_rsblsh1_n",     speed_mpn_rsblsh1_n, FLAG_R_OPTIONAL },
450 #endif
451 #if HAVE_NATIVE_mpn_addlsh2_n == 1
452   { "mpn_addlsh2_n",     speed_mpn_addlsh2_n, FLAG_R_OPTIONAL },
453 #endif
454 #if HAVE_NATIVE_mpn_sublsh2_n == 1
455   { "mpn_sublsh2_n",     speed_mpn_sublsh2_n, FLAG_R_OPTIONAL },
456 #endif
457 #if HAVE_NATIVE_mpn_addlsh2_n_ip1
458   { "mpn_addlsh2_n_ip1", speed_mpn_addlsh2_n_ip1    },
459 #endif
460 #if HAVE_NATIVE_mpn_addlsh2_n_ip2
461   { "mpn_addlsh2_n_ip2", speed_mpn_addlsh2_n_ip2    },
462 #endif
463 #if HAVE_NATIVE_mpn_sublsh2_n_ip1
464   { "mpn_sublsh2_n_ip1", speed_mpn_sublsh2_n_ip1    },
465 #endif
466 #if HAVE_NATIVE_mpn_rsblsh2_n == 1
467   { "mpn_rsblsh2_n",     speed_mpn_rsblsh2_n, FLAG_R_OPTIONAL },
468 #endif
469 #if HAVE_NATIVE_mpn_addlsh_n
470   { "mpn_addlsh_n",     speed_mpn_addlsh_n, FLAG_R_OPTIONAL },
471 #endif
472 #if HAVE_NATIVE_mpn_sublsh_n
473   { "mpn_sublsh_n",     speed_mpn_sublsh_n, FLAG_R_OPTIONAL },
474 #endif
475 #if HAVE_NATIVE_mpn_addlsh_n_ip1
476   { "mpn_addlsh_n_ip1", speed_mpn_addlsh_n_ip1    },
477 #endif
478 #if HAVE_NATIVE_mpn_addlsh_n_ip2
479   { "mpn_addlsh_n_ip2", speed_mpn_addlsh_n_ip2    },
480 #endif
481 #if HAVE_NATIVE_mpn_sublsh_n_ip1
482   { "mpn_sublsh_n_ip1", speed_mpn_sublsh_n_ip1    },
483 #endif
484 #if HAVE_NATIVE_mpn_rsblsh_n
485   { "mpn_rsblsh_n",     speed_mpn_rsblsh_n, FLAG_R_OPTIONAL },
486 #endif
487 #if HAVE_NATIVE_mpn_rsh1add_n
488   { "mpn_rsh1add_n",     speed_mpn_rsh1add_n, FLAG_R_OPTIONAL },
489 #endif
490 #if HAVE_NATIVE_mpn_rsh1sub_n
491   { "mpn_rsh1sub_n",     speed_mpn_rsh1sub_n, FLAG_R_OPTIONAL },
492 #endif
493 
494   { "mpn_cnd_add_n",     speed_mpn_cnd_add_n, FLAG_R_OPTIONAL },
495   { "mpn_cnd_sub_n",     speed_mpn_cnd_sub_n, FLAG_R_OPTIONAL },
496 
497   { "MPN_ZERO",          speed_MPN_ZERO             },
498 
499   { "binvert_limb",       speed_binvert_limb,       FLAG_NODATA },
500   { "binvert_limb_mul1",  speed_binvert_limb_mul1,  FLAG_NODATA },
501   { "binvert_limb_loop",  speed_binvert_limb_loop,  FLAG_NODATA },
502   { "binvert_limb_cond",  speed_binvert_limb_cond,  FLAG_NODATA },
503   { "binvert_limb_arith", speed_binvert_limb_arith, FLAG_NODATA },
504 
505   { "malloc_free",                  speed_malloc_free                  },
506   { "malloc_realloc_free",          speed_malloc_realloc_free          },
507   { "gmp_allocate_free",            speed_gmp_allocate_free            },
508   { "gmp_allocate_reallocate_free", speed_gmp_allocate_reallocate_free },
509   { "mpz_init_clear",               speed_mpz_init_clear               },
510   { "mpq_init_clear",               speed_mpq_init_clear               },
511   { "mpf_init_clear",               speed_mpf_init_clear               },
512   { "mpz_init_realloc_clear",       speed_mpz_init_realloc_clear       },
513 
514   { "umul_ppmm",         speed_umul_ppmm,     FLAG_R_OPTIONAL },
515 #if HAVE_NATIVE_mpn_umul_ppmm
516   { "mpn_umul_ppmm",     speed_mpn_umul_ppmm, FLAG_R_OPTIONAL },
517 #endif
518 #if HAVE_NATIVE_mpn_umul_ppmm_r
519   { "mpn_umul_ppmm_r",   speed_mpn_umul_ppmm_r, FLAG_R_OPTIONAL },
520 #endif
521 
522   { "count_leading_zeros",  speed_count_leading_zeros,  FLAG_NODATA | FLAG_R_OPTIONAL },
523   { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL },
524 
525   { "udiv_qrnnd",             speed_udiv_qrnnd,             FLAG_R_OPTIONAL },
526   { "udiv_qrnnd_c",           speed_udiv_qrnnd_c,           FLAG_R_OPTIONAL },
527 #if HAVE_NATIVE_mpn_udiv_qrnnd
528   { "mpn_udiv_qrnnd",         speed_mpn_udiv_qrnnd,         FLAG_R_OPTIONAL },
529 #endif
530 #if HAVE_NATIVE_mpn_udiv_qrnnd_r
531   { "mpn_udiv_qrnnd_r",       speed_mpn_udiv_qrnnd_r,       FLAG_R_OPTIONAL },
532 #endif
533   { "invert_limb",            speed_invert_limb,            FLAG_R_OPTIONAL },
534 
535   { "operator_div",           speed_operator_div,           FLAG_R_OPTIONAL },
536   { "operator_mod",           speed_operator_mod,           FLAG_R_OPTIONAL },
537 
538   { "gmp_randseed",    speed_gmp_randseed,    FLAG_R_OPTIONAL               },
539   { "gmp_randseed_ui", speed_gmp_randseed_ui, FLAG_R_OPTIONAL | FLAG_NODATA },
540   { "mpz_urandomb",    speed_mpz_urandomb,    FLAG_R_OPTIONAL | FLAG_NODATA },
541 
542 #ifdef SPEED_EXTRA_ROUTINES
543   SPEED_EXTRA_ROUTINES
544 #endif
545 #ifdef SPEED_EXTRA_ROUTINES2
546   SPEED_EXTRA_ROUTINES2
547 #endif
548 };
549 
550 
551 struct choice_t {
552   const struct routine_t  *p;
553   mp_limb_t               r;
554   double                  scale;
555   double                  time;
556   int                     no_time;
557   double                  prev_time;
558   const char              *name;
559 };
560 struct choice_t  *choice;
561 int  num_choices = 0;
562 
563 
564 void
565 data_fill (mp_ptr ptr, mp_size_t size)
566 {
567   switch (option_data) {
568   case DATA_RANDOM:
569     mpn_random (ptr, size);
570     break;
571   case DATA_RANDOM2:
572     mpn_random2 (ptr, size);
573     break;
574   case DATA_ZEROS:
575     MPN_ZERO (ptr, size);
576     break;
577   case DATA_AAS:
578     MPN_FILL (ptr, size, GMP_NUMB_0xAA);
579     break;
580   case DATA_FFS:
581     MPN_FILL (ptr, size, GMP_NUMB_MAX);
582     break;
583   case DATA_2FD:
584     MPN_FILL (ptr, size, GMP_NUMB_MAX);
585     ptr[0] -= 2;
586     break;
587   default:
588     abort();
589     /*NOTREACHED*/
590   }
591 }
592 
593 /* The code here handling the various combinations of output options isn't
594    too attractive, but it works and is fairly clean.  */
595 
596 #define SIZE_TO_DIVISOR(n)              \
597   (option_square == 1 ? (n)*(n)         \
598   : option_square == 2 ? (n)*((n)+1)/2  \
599   : (n))
600 
601 void
602 run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size)
603 {
604   const char  *first_open_fastest, *first_open_notfastest, *first_close;
605   int         i, fastest, want_data;
606   double      fastest_time;
607   TMP_DECL;
608 
609   TMP_MARK;
610 
611   /* allocate data, unless all routines are NODATA */
612   want_data = 0;
613   for (i = 0; i < num_choices; i++)
614     want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0);
615 
616   if (want_data)
617     {
618       SPEED_TMP_ALLOC_LIMBS (sp.xp, s->size, s->align_xp);
619       SPEED_TMP_ALLOC_LIMBS (sp.yp, s->size, s->align_yp);
620 
621       data_fill (s->xp, s->size);
622       data_fill (s->yp, s->size);
623     }
624   else
625     {
626       sp.xp = NULL;
627       sp.yp = NULL;
628     }
629 
630   if (prev_size == -1 && option_cmp == CMP_DIFFPREV)
631     {
632       first_open_fastest = "(#";
633       first_open_notfastest = " (";
634       first_close = ")";
635     }
636   else
637     {
638       first_open_fastest = "#";
639       first_open_notfastest = " ";
640       first_close = "";
641     }
642 
643   fastest = -1;
644   fastest_time = -1.0;
645   for (i = 0; i < num_choices; i++)
646     {
647       s->r = choice[i].r;
648       choice[i].time = speed_measure (choice[i].p->fun, s);
649       choice[i].no_time = (choice[i].time == -1.0);
650       if (! choice[i].no_time)
651         choice[i].time *= choice[i].scale;
652 
653       /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time
654          is before any differences.  */
655       {
656         double     t;
657         t = choice[i].time;
658         if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1)
659           {
660             if (choice[i].prev_time == -1.0)
661               choice[i].no_time = 1;
662             else
663               choice[i].time = choice[i].time - choice[i].prev_time;
664           }
665         choice[i].prev_time = t;
666       }
667 
668       if (choice[i].no_time)
669         continue;
670 
671       /* Look for the fastest after CMP_DIFFPREV has been applied, but
672          before CMP_RATIO or CMP_DIFFERENCE.  There's only a fastest shown
673          if there's more than one routine.  */
674       if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time))
675         {
676           fastest = i;
677           fastest_time = choice[i].time;
678         }
679 
680       if (option_cmp == CMP_DIFFPREV)
681         {
682           /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */
683           if (option_unit == UNIT_CYCLES)
684             choice[i].time /= speed_cycletime;
685           else if (option_unit == UNIT_CYCLESPERLIMB)
686             {
687               if (prev_size == -1)
688                 choice[i].time /= speed_cycletime;
689               else
690                 choice[i].time /=  (speed_cycletime
691                                     * (SIZE_TO_DIVISOR(s->size)
692                                        - SIZE_TO_DIVISOR(prev_size)));
693             }
694         }
695       else
696         {
697           if (option_unit == UNIT_CYCLES)
698             choice[i].time /= speed_cycletime;
699           else if (option_unit == UNIT_CYCLESPERLIMB)
700             choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size));
701 
702           if (option_cmp == CMP_RATIO && i > 0)
703             {
704               /* A ratio isn't affected by the units chosen. */
705               if (choice[0].no_time || choice[0].time == 0.0)
706                 choice[i].no_time = 1;
707               else
708                 choice[i].time /= choice[0].time;
709             }
710           else if (option_cmp == CMP_DIFFERENCE && i > 0)
711             {
712               if (choice[0].no_time)
713                 {
714                   choice[i].no_time = 1;
715                   continue;
716                 }
717               choice[i].time -= choice[0].time;
718             }
719         }
720     }
721 
722   if (option_gnuplot)
723     {
724       /* In CMP_DIFFPREV, don't print anything for the first size, start
725          with the second where an actual difference is available.
726 
727          In CMP_RATIO, print the first column as 1.0.
728 
729          The 9 decimals printed is much more than the expected precision of
730          the measurements actually. */
731 
732       if (! (option_cmp == CMP_DIFFPREV && prev_size == -1))
733         {
734           fprintf (fp, "%-6ld ", s->size);
735           for (i = 0; i < num_choices; i++)
736             fprintf (fp, "  %.9e",
737                      choice[i].no_time ? 0.0
738                      : (option_cmp == CMP_RATIO && i == 0) ? 1.0
739                      : choice[i].time);
740           fprintf (fp, "\n");
741         }
742     }
743   else
744     {
745       fprintf (fp, "%-6ld ", s->size);
746       for (i = 0; i < num_choices; i++)
747         {
748           char  buf[128];
749           int   decimals;
750 
751           if (choice[i].no_time)
752             {
753               fprintf (fp, " %*s", COLUMN_WIDTH, "n/a");
754             }
755           else
756             {if (option_unit == UNIT_CYCLESPERLIMB
757                  || (option_cmp == CMP_RATIO && i > 0))
758                 decimals = 4;
759               else if (option_unit == UNIT_CYCLES)
760                 decimals = 2;
761               else
762                 decimals = 9;
763 
764               sprintf (buf, "%s%.*f%s",
765                        i == fastest ? first_open_fastest : first_open_notfastest,
766                        decimals, choice[i].time, first_close);
767               fprintf (fp, " %*s", COLUMN_WIDTH, buf);
768             }
769         }
770       fprintf (fp, "\n");
771     }
772 
773   TMP_FREE;
774 }
775 
776 void
777 run_all (FILE *fp)
778 {
779   mp_size_t  prev_size;
780   int        i;
781   TMP_DECL;
782 
783   TMP_MARK;
784   SPEED_TMP_ALLOC_LIMBS (sp.xp_block, SPEED_BLOCK_SIZE, sp.align_xp);
785   SPEED_TMP_ALLOC_LIMBS (sp.yp_block, SPEED_BLOCK_SIZE, sp.align_yp);
786 
787   data_fill (sp.xp_block, SPEED_BLOCK_SIZE);
788   data_fill (sp.yp_block, SPEED_BLOCK_SIZE);
789 
790   for (i = 0; i < size_num; i++)
791     {
792       sp.size = size_array[i].start;
793       prev_size = -1;
794       for (;;)
795         {
796           mp_size_t  step;
797 
798           if (option_data == DATA_2FD && sp.size >= 2)
799             sp.xp[sp.size-1] = 2;
800 
801           run_one (fp, &sp, prev_size);
802           prev_size = sp.size;
803 
804           if (option_data == DATA_2FD && sp.size >= 2)
805             sp.xp[sp.size-1] = MP_LIMB_T_MAX;
806 
807           if (option_factor != 0.0)
808             {
809               step = (mp_size_t) (sp.size * option_factor - sp.size);
810               if (step < 1)
811                 step = 1;
812             }
813           else
814             step = 1;
815           if (step < option_step)
816             step = option_step;
817 
818           sp.size += step;
819           if (sp.size > size_array[i].end)
820             break;
821         }
822     }
823 
824   TMP_FREE;
825 }
826 
827 
828 FILE *
829 fopen_for_write (const char *filename)
830 {
831   FILE  *fp;
832   if ((fp = fopen (filename, "w")) == NULL)
833     {
834       fprintf (stderr, "Cannot create %s\n", filename);
835       exit(1);
836     }
837   return fp;
838 }
839 
840 void
841 fclose_written (FILE *fp, const char *filename)
842 {
843   int  err;
844 
845   err = ferror (fp);
846   err |= fclose (fp);
847 
848   if (err)
849     {
850       fprintf (stderr, "Error writing %s\n", filename);
851       exit(1);
852     }
853 }
854 
855 
856 void
857 run_gnuplot (int argc, char *argv[])
858 {
859   char  *plot_filename;
860   char  *data_filename;
861   FILE  *fp;
862   int   i;
863 
864   plot_filename = (char *) (*__gmp_allocate_func)
865     (strlen (option_gnuplot_basename) + 20);
866   data_filename = (char *) (*__gmp_allocate_func)
867     (strlen (option_gnuplot_basename) + 20);
868 
869   sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename);
870   sprintf (data_filename, "%s.data",    option_gnuplot_basename);
871 
872   fp = fopen_for_write (plot_filename);
873 
874   fprintf (fp, "# Generated with:\n");
875   fprintf (fp, "#");
876   for (i = 0; i < argc; i++)
877     fprintf (fp, " %s", argv[i]);
878   fprintf (fp, "\n");
879   fprintf (fp, "\n");
880 
881   fprintf (fp, "reset\n");
882 
883   /* Putting the key at the top left is usually good, and you can change it
884      interactively if it's not. */
885   fprintf (fp, "set key left\n");
886 
887   /* designed to make it possible to see crossovers easily */
888   fprintf (fp, "set style data lines\n");
889 
890   fprintf (fp, "plot ");
891   for (i = 0; i < num_choices; i++)
892     {
893       fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2);
894       fprintf (fp, " title \"%s\"", choice[i].name);
895 
896       if (i != num_choices-1)
897         fprintf (fp, ", \\");
898       fprintf (fp, "\n");
899     }
900 
901   fprintf (fp, "load \"-\"\n");
902   fclose_written (fp, plot_filename);
903 
904   fp = fopen_for_write (data_filename);
905 
906   /* Unbuffered so you can see where the program was up to if it crashes or
907      you kill it. */
908   setbuf (fp, NULL);
909 
910   run_all (fp);
911   fclose_written (fp, data_filename);
912 }
913 
914 
915 /* Return a limb with n many one bits (starting from the least significant) */
916 
917 #define LIMB_ONES(n) \
918   ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX      \
919     : (n) == 0 ? CNST_LIMB(0)                   \
920     : (CNST_LIMB(1) << (n)) - 1)
921 
922 mp_limb_t
923 r_string (const char *s)
924 {
925   const char  *s_orig = s;
926   long        n;
927 
928   if (strcmp (s, "aas") == 0)
929     return GMP_NUMB_0xAA;
930 
931   {
932     mpz_t      z;
933     mp_limb_t  l;
934     int        set, siz;
935 
936     mpz_init (z);
937     set = mpz_set_str (z, s, 0);
938     siz = SIZ(z);
939     l = (siz == 0 ? 0 : siz > 0 ? PTR(z)[0] : -PTR(z)[0]);
940     mpz_clear (z);
941     if (set == 0)
942       {
943         if (siz > 1 || siz < -1)
944           printf ("Warning, r parameter %s truncated to %d bits\n",
945                   s_orig, GMP_LIMB_BITS);
946         return l;
947       }
948   }
949 
950   if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
951     n = strtoul (s+2, (char **) &s, 16);
952   else
953     n = strtol (s, (char **) &s, 10);
954 
955   if (strcmp (s, "bits") == 0)
956     {
957       mp_limb_t  l;
958       if (n > GMP_LIMB_BITS)
959         {
960           fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
961                    n, GMP_LIMB_BITS);
962           exit (1);
963         }
964       mpn_random (&l, 1);
965       return (l | (CNST_LIMB(1) << (n-1))) & LIMB_ONES(n);
966     }
967   else  if (strcmp (s, "ones") == 0)
968     {
969       if (n > GMP_LIMB_BITS)
970         {
971           fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
972                    n, GMP_LIMB_BITS);
973           exit (1);
974         }
975       return LIMB_ONES (n);
976     }
977   else if (*s != '\0')
978     {
979       fprintf (stderr, "invalid r parameter: %s\n", s_orig);
980       exit (1);
981     }
982 
983   return n;
984 }
985 
986 
987 void
988 routine_find (struct choice_t *c, const char *s_orig)
989 {
990   const char  *s;
991   int     i;
992   size_t  nlen;
993 
994   c->name = s_orig;
995   s = strchr (s_orig, '*');
996   if (s != NULL)
997     {
998       c->scale = atof(s_orig);
999       s++;
1000     }
1001   else
1002     {
1003       c->scale = 1.0;
1004       s = s_orig;
1005     }
1006 
1007   for (i = 0; i < numberof (routine); i++)
1008     {
1009       nlen = strlen (routine[i].name);
1010       if (memcmp (s, routine[i].name, nlen) != 0)
1011         continue;
1012 
1013       if (s[nlen] == '.')
1014         {
1015           /* match, with a .r parameter */
1016 
1017           if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL)))
1018             {
1019               fprintf (stderr,
1020                        "Choice %s bad: doesn't take a \".<r>\" parameter\n",
1021                        s_orig);
1022               exit (1);
1023             }
1024 
1025           c->p = &routine[i];
1026           c->r = r_string (s + nlen + 1);
1027           return;
1028         }
1029 
1030       if (s[nlen] == '\0')
1031         {
1032           /* match, with no parameter */
1033 
1034           if (routine[i].flag & FLAG_R)
1035             {
1036               fprintf (stderr,
1037                        "Choice %s bad: needs a \".<r>\" parameter\n",
1038                        s_orig);
1039               exit (1);
1040             }
1041 
1042           c->p = &routine[i];
1043           c->r = 0;
1044           return;
1045         }
1046     }
1047 
1048   fprintf (stderr, "Choice %s unrecognised\n", s_orig);
1049   exit (1);
1050 }
1051 
1052 
1053 void
1054 usage (void)
1055 {
1056   int  i;
1057 
1058   speed_time_init ();
1059 
1060   printf ("Usage: speed [-options] -s size <routine>...\n");
1061   printf ("Measure the speed of some routines.\n");
1062   printf ("Times are in seconds, accuracy is shown.\n");
1063   printf ("\n");
1064   printf ("   -p num     set precision as number of time units each routine must run\n");
1065   printf ("   -s size[-end][,size[-end]]...   sizes to measure\n");
1066   printf ("              single sizes or ranges, sep with comma or use multiple -s\n");
1067   printf ("   -t step    step through sizes by given amount\n");
1068   printf ("   -f factor  step through sizes by given factor (eg. 1.05)\n");
1069   printf ("   -r         show times as ratios of the first routine\n");
1070   printf ("   -d         show times as difference from the first routine\n");
1071   printf ("   -D         show times as difference from previous size shown\n");
1072   printf ("   -c         show times in CPU cycles\n");
1073   printf ("   -C         show times in cycles per limb\n");
1074   printf ("   -u         print resource usage (memory) at end\n");
1075   printf ("   -P name    output plot files \"name.gnuplot\" and \"name.data\"\n");
1076   printf ("   -a <type>  use given data: random(default), random2, zeros, aas, ffs, 2fd\n");
1077   printf ("   -x, -y, -w, -W <align>  specify data alignments, sources and dests\n");
1078   printf ("   -o addrs   print addresses of data blocks\n");
1079   printf ("\n");
1080   printf ("If both -t and -f are used, it means step by the factor or the step, whichever\n");
1081   printf ("is greater.\n");
1082   printf ("If both -C and -D are used, it means cycles per however many limbs between a\n");
1083   printf ("size and the previous size.\n");
1084   printf ("\n");
1085   printf ("After running with -P, plots can be viewed with Gnuplot or Quickplot.\n");
1086   printf ("\"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n");
1087   printf ("a log/log plot).\n");
1088   printf ("\"quickplot -s name.data\" (has interactive zooming, and note -s is important\n");
1089   printf ("when viewing more than one routine, it means same axis scales for all data).\n");
1090   printf ("\n");
1091   printf ("The available routines are as follows.\n");
1092   printf ("\n");
1093 
1094   for (i = 0; i < numberof (routine); i++)
1095     {
1096       if (routine[i].flag & FLAG_R)
1097         printf ("\t%s.r\n", routine[i].name);
1098       else if (routine[i].flag & FLAG_R_OPTIONAL)
1099         printf ("\t%s (optional .r)\n", routine[i].name);
1100       else
1101         printf ("\t%s\n", routine[i].name);
1102     }
1103   printf ("\n");
1104   printf ("Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n");
1105   printf ("r should be in decimal, or use 0xN for hexadecimal.\n");
1106   printf ("\n");
1107   printf ("Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n");
1108   printf ("N one bits, or \"aas\" for 0xAA..AA.\n");
1109   printf ("\n");
1110   printf ("Times for sizes out of the range accepted by a routine are shown as 0.\n");
1111   printf ("The fastest routine at each size is marked with a # (free form output only).\n");
1112   printf ("\n");
1113   printf ("%s", speed_time_string);
1114   printf ("\n");
1115   printf ("Gnuplot home page http://www.gnuplot.info/\n");
1116   printf ("Quickplot home page http://quickplot.sourceforge.net/\n");
1117 }
1118 
1119 void
1120 check_align_option (const char *name, mp_size_t align)
1121 {
1122   if (align < 0 || align > SPEED_TMP_ALLOC_ADJUST_MASK)
1123     {
1124       fprintf (stderr, "Alignment request out of range: %s %ld\n",
1125                name, (long) align);
1126       fprintf (stderr, "  should be 0 to %d (limbs), inclusive\n",
1127                SPEED_TMP_ALLOC_ADJUST_MASK);
1128       exit (1);
1129     }
1130 }
1131 
1132 int
1133 main (int argc, char *argv[])
1134 {
1135   int  i;
1136   int  opt;
1137 
1138   /* Unbuffered so output goes straight out when directed to a pipe or file
1139      and isn't lost on killing the program half way.  */
1140   setbuf (stdout, NULL);
1141 
1142   for (;;)
1143     {
1144       opt = getopt(argc, argv, "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z");
1145       if (opt == EOF)
1146         break;
1147 
1148       switch (opt) {
1149       case 'a':
1150         if (strcmp (optarg, "random") == 0)       option_data = DATA_RANDOM;
1151         else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2;
1152         else if (strcmp (optarg, "zeros") == 0)   option_data = DATA_ZEROS;
1153         else if (strcmp (optarg, "aas") == 0)     option_data = DATA_AAS;
1154         else if (strcmp (optarg, "ffs") == 0)     option_data = DATA_FFS;
1155         else if (strcmp (optarg, "2fd") == 0)     option_data = DATA_2FD;
1156         else
1157           {
1158             fprintf (stderr, "unrecognised data option: %s\n", optarg);
1159             exit (1);
1160           }
1161         break;
1162       case 'C':
1163         if (option_unit  != UNIT_SECONDS) goto bad_unit;
1164         option_unit = UNIT_CYCLESPERLIMB;
1165         break;
1166       case 'c':
1167         if (option_unit != UNIT_SECONDS)
1168           {
1169           bad_unit:
1170             fprintf (stderr, "cannot use more than one of -c, -C\n");
1171             exit (1);
1172           }
1173         option_unit = UNIT_CYCLES;
1174         break;
1175       case 'D':
1176         if (option_cmp != CMP_ABSOLUTE) goto bad_cmp;
1177         option_cmp = CMP_DIFFPREV;
1178         break;
1179       case 'd':
1180         if (option_cmp != CMP_ABSOLUTE)
1181           {
1182           bad_cmp:
1183             fprintf (stderr, "cannot use more than one of -d, -D, -r\n");
1184             exit (1);
1185           }
1186         option_cmp = CMP_DIFFERENCE;
1187         break;
1188       case 'E':
1189         option_square = 1;
1190         break;
1191       case 'F':
1192         option_square = 2;
1193         break;
1194       case 'f':
1195         option_factor = atof (optarg);
1196         if (option_factor <= 1.0)
1197           {
1198             fprintf (stderr, "-f factor must be > 1.0\n");
1199             exit (1);
1200           }
1201         break;
1202       case 'o':
1203         speed_option_set (optarg);
1204         break;
1205       case 'P':
1206         option_gnuplot = 1;
1207         option_gnuplot_basename = optarg;
1208         break;
1209       case 'p':
1210         speed_precision = atoi (optarg);
1211         break;
1212       case 'R':
1213         option_seed = time (NULL);
1214         break;
1215       case 'r':
1216         if (option_cmp != CMP_ABSOLUTE)
1217           goto bad_cmp;
1218         option_cmp = CMP_RATIO;
1219         break;
1220       case 's':
1221         {
1222           char  *s;
1223           for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ","))
1224             {
1225               if (size_num == size_allocnum)
1226                 {
1227                   size_array = (struct size_array_t *)
1228                     __gmp_allocate_or_reallocate
1229                     (size_array,
1230                      size_allocnum * sizeof(size_array[0]),
1231                      (size_allocnum+10) * sizeof(size_array[0]));
1232                   size_allocnum += 10;
1233                 }
1234               if (sscanf (s, "%ld-%ld",
1235                           &size_array[size_num].start,
1236                           &size_array[size_num].end) != 2)
1237                 {
1238                   size_array[size_num].start = size_array[size_num].end
1239                     = atol (s);
1240                 }
1241 
1242               if (size_array[size_num].start < 0
1243                   || size_array[size_num].end < 0
1244                   || size_array[size_num].start > size_array[size_num].end)
1245                 {
1246                   fprintf (stderr, "invalid size parameter: %s\n", s);
1247                   exit (1);
1248                 }
1249 
1250               size_num++;
1251             }
1252         }
1253         break;
1254       case 't':
1255         option_step = atol (optarg);
1256         if (option_step < 1)
1257           {
1258             fprintf (stderr, "-t step must be >= 1\n");
1259             exit (1);
1260           }
1261         break;
1262       case 'u':
1263         option_resource_usage = 1;
1264         break;
1265       case 'z':
1266         sp.cache = 1;
1267         break;
1268       case 'x':
1269         sp.align_xp = atol (optarg);
1270         check_align_option ("-x", sp.align_xp);
1271         break;
1272       case 'y':
1273         sp.align_yp = atol (optarg);
1274         check_align_option ("-y", sp.align_yp);
1275         break;
1276       case 'w':
1277         sp.align_wp = atol (optarg);
1278         check_align_option ("-w", sp.align_wp);
1279         break;
1280       case 'W':
1281         sp.align_wp2 = atol (optarg);
1282         check_align_option ("-W", sp.align_wp2);
1283         break;
1284       case '?':
1285         exit(1);
1286       }
1287     }
1288 
1289   if (optind >= argc)
1290     {
1291       usage ();
1292       exit (1);
1293     }
1294 
1295   if (size_num == 0)
1296     {
1297       fprintf (stderr, "-s <size> must be specified\n");
1298       exit (1);
1299     }
1300 
1301   gmp_randinit_default (__gmp_rands);
1302   __gmp_rands_initialized = 1;
1303   gmp_randseed_ui (__gmp_rands, option_seed);
1304 
1305   choice = (struct choice_t *) (*__gmp_allocate_func)
1306     ((argc - optind) * sizeof(choice[0]));
1307   for ( ; optind < argc; optind++)
1308     {
1309       struct choice_t  c;
1310       routine_find (&c, argv[optind]);
1311       choice[num_choices] = c;
1312       num_choices++;
1313     }
1314 
1315   if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) &&
1316       num_choices < 2)
1317     {
1318       fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n");
1319     }
1320 
1321   speed_time_init ();
1322   if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB)
1323     speed_cycletime_need_cycles ();
1324   else
1325     speed_cycletime_need_seconds ();
1326 
1327   if (option_gnuplot)
1328     {
1329       run_gnuplot (argc, argv);
1330     }
1331   else
1332     {
1333       if (option_unit == UNIT_SECONDS)
1334         printf ("overhead %.9f secs", speed_measure (speed_noop, NULL));
1335       else
1336         printf ("overhead %.2f cycles",
1337                 speed_measure (speed_noop, NULL) / speed_cycletime);
1338       printf (", precision %d units of %.2e secs",
1339               speed_precision, speed_unittime);
1340 
1341       if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
1342         printf (", CPU freq unknown\n");
1343       else
1344         printf (", CPU freq %.2f MHz\n", 1e-6/speed_cycletime);
1345 
1346       printf ("       ");
1347       for (i = 0; i < num_choices; i++)
1348         printf (" %*s", COLUMN_WIDTH, choice[i].name);
1349       printf ("\n");
1350 
1351       run_all (stdout);
1352     }
1353 
1354   if (option_resource_usage)
1355     {
1356 #if HAVE_GETRUSAGE
1357       {
1358         /* This doesn't give data sizes on linux 2.0.x, only utime. */
1359         struct rusage  r;
1360         if (getrusage (RUSAGE_SELF, &r) != 0)
1361           perror ("getrusage");
1362         else
1363           printf ("getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\n",
1364                   r.ru_utime.tv_sec, r.ru_utime.tv_usec,
1365                   r.ru_idrss, r.ru_isrss, r.ru_ixrss);
1366       }
1367 #else
1368       printf ("getrusage() not available\n");
1369 #endif
1370 
1371       /* Linux kernel. */
1372       {
1373         char  buf[128];
1374         sprintf (buf, "/proc/%d/status", getpid());
1375         if (access (buf, R_OK) == 0)
1376           {
1377             sprintf (buf, "cat /proc/%d/status", getpid());
1378             system (buf);
1379           }
1380 
1381       }
1382     }
1383 
1384   return 0;
1385 }
1386