xref: /netbsd-src/external/lgpl3/gmp/dist/tune/time.c (revision c3ab26950fe8540fb553d1d1dcae454bc98e5a25)
1 /* Time routines for speed measurments.
2 
3 Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2010, 2011, 2012 Free Software
4 Foundation, Inc.
5 
6 This file is part of the GNU MP Library.
7 
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MP Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
20 
21 
22 /* Usage:
23 
24    The code in this file implements the lowest level of time measuring,
25    simple one-time measuring of time between two points.
26 
27    void speed_starttime (void)
28    double speed_endtime (void)
29        Call speed_starttime to start measuring, and then call speed_endtime
30        when done.
31 
32        speed_endtime returns the time taken, in seconds.  Or if the timebase
33        is in CPU cycles and the CPU frequency is unknown then speed_endtime
34        returns cycles.  Applications can identify the cycles return by
35        checking for speed_cycletime (described below) equal to 1.0.
36 
37        If some sort of temporary glitch occurs then speed_endtime returns
38        0.0.  Currently this is for various cases where a negative time has
39        occurred.  This unfortunately occurs with getrusage on some systems,
40        and with the hppa cycle counter on hpux.
41 
42    double speed_cycletime
43        The time in seconds for each CPU cycle.  For example on a 100 MHz CPU
44        this would be 1.0e-8.
45 
46        If the CPU frequency is unknown, then speed_cycletime is either 0.0
47        or 1.0.  It's 0.0 when speed_endtime is returning seconds, or it's
48        1.0 when speed_endtime is returning cycles.
49 
50        It may be noted that "speed_endtime() / speed_cycletime" gives a
51        measured time in cycles, irrespective of whether speed_endtime is
52        returning cycles or seconds.  (Assuming cycles can be had, ie. it's
53        either cycles already or the cpu frequency is known.  See also
54        speed_cycletime_need_cycles below.)
55 
56    double speed_unittime
57        The unit of time measurement accuracy for the timing method in use.
58        This is in seconds or cycles, as per speed_endtime.
59 
60    char speed_time_string[]
61        A null-terminated string describing the time method in use.
62 
63    void speed_time_init (void)
64        Initialize time measuring.  speed_starttime() does this
65        automatically, so it's only needed if an application wants to inspect
66        the above global variables before making a measurement.
67 
68    int speed_precision
69        The intended accuracy of time measurements.  speed_measure() in
70        common.c for instance runs target routines with enough repetitions so
71        it takes at least "speed_unittime * speed_precision" (this expression
72        works for both cycles or seconds from speed_endtime).
73 
74        A program can provide an option so the user to set speed_precision.
75        If speed_precision is zero when speed_time_init or speed_starttime
76        first run then it gets a default based on the measuring method
77        chosen.  (More precision for higher accuracy methods.)
78 
79    void speed_cycletime_need_seconds (void)
80        Call this to demand that speed_endtime will return seconds, and not
81        cycles.  If only cycles are available then an error is printed and
82        the program exits.
83 
84    void speed_cycletime_need_cycles (void)
85        Call this to demand that speed_cycletime is non-zero, so that
86        "speed_endtime() / speed_cycletime" will give times in cycles.
87 
88 
89 
90    Notes:
91 
92    Various combinations of cycle counter, read_real_time(), getrusage(),
93    gettimeofday() and times() can arise, according to which are available
94    and their precision.
95 
96 
97    Allowing speed_endtime() to return either seconds or cycles is only a
98    slight complication and makes it possible for the speed program to do
99    some sensible things without demanding the CPU frequency.  If seconds are
100    being measured then it can always print seconds, and if cycles are being
101    measured then it can always print them without needing to know how long
102    they are.  Also the tune program doesn't care at all what the units are.
103 
104    GMP_CPU_FREQUENCY can always be set when the automated methods in freq.c
105    fail.  This will be needed if times in seconds are wanted but a cycle
106    counter is being used, or if times in cycles are wanted but getrusage or
107    another seconds based timer is in use.
108 
109    If the measuring method uses a cycle counter but supplements it with
110    getrusage or the like, then knowing the CPU frequency is mandatory since
111    the code compares values from the two.
112 
113 
114    Not done:
115 
116    Solaris gethrtime() seems no more than a slow way to access the Sparc V9
117    cycle counter.  gethrvtime() seems to be relevant only to light weight
118    processes, it doesn't for instance give nanosecond virtual time.  So
119    neither of these are used.
120 
121 
122    Bugs:
123 
124    getrusage_microseconds_p is fundamentally flawed, getrusage and
125    gettimeofday can have resolutions other than clock ticks or microseconds,
126    for instance IRIX 5 has a tick of 10 ms but a getrusage of 1 ms.
127 
128 
129    Enhancements:
130 
131    The SGI hardware counter has 64 bits on some machines, which could be
132    used when available.  But perhaps 32 bits is enough range, and then rely
133    on the getrusage supplement.
134 
135    Maybe getrusage (or times) should be used as a supplement for any
136    wall-clock measuring method.  Currently a wall clock with a good range
137    (eg. a 64-bit cycle counter) is used without a supplement.
138 
139    On PowerPC the timebase registers could be used, but would have to do
140    something to find out the speed.  On 6xx chips it's normally 1/4 bus
141    speed, on 4xx chips it's either that or an external clock.  Measuring
142    against gettimeofday might be ok.  */
143 
144 
145 #include "config.h"
146 
147 #include <errno.h>
148 #include <setjmp.h>
149 #include <signal.h>
150 #include <stddef.h>
151 #include <stdio.h>
152 #include <string.h>
153 #include <stdlib.h> /* for getenv() */
154 
155 #if HAVE_FCNTL_H
156 #include <fcntl.h>  /* for open() */
157 #endif
158 
159 #if HAVE_STDINT_H
160 #include <stdint.h> /* for uint64_t */
161 #endif
162 
163 #if HAVE_UNISTD_H
164 #include <unistd.h> /* for sysconf() */
165 #endif
166 
167 #include <sys/types.h>
168 
169 #if TIME_WITH_SYS_TIME
170 # include <sys/time.h>  /* for struct timeval */
171 # include <time.h>
172 #else
173 # if HAVE_SYS_TIME_H
174 #  include <sys/time.h>
175 # else
176 #  include <time.h>
177 # endif
178 #endif
179 
180 #if HAVE_SYS_MMAN_H
181 #include <sys/mman.h>      /* for mmap() */
182 #endif
183 
184 #if HAVE_SYS_RESOURCE_H
185 #include <sys/resource.h>  /* for struct rusage */
186 #endif
187 
188 #if HAVE_SYS_SYSSGI_H
189 #include <sys/syssgi.h>    /* for syssgi() */
190 #endif
191 
192 #if HAVE_SYS_SYSTEMCFG_H
193 #include <sys/systemcfg.h> /* for RTC_POWER on AIX */
194 #endif
195 
196 #if HAVE_SYS_TIMES_H
197 #include <sys/times.h>  /* for times() and struct tms */
198 #endif
199 
200 #include "gmp.h"
201 #include "gmp-impl.h"
202 
203 #include "speed.h"
204 
205 
206 /* strerror is only used for some stuff on newish systems, no need to have a
207    proper replacement */
208 #if ! HAVE_STRERROR
209 #define strerror(n)  "<strerror not available>"
210 #endif
211 
212 
213 char    speed_time_string[256];
214 int     speed_precision = 0;
215 double  speed_unittime;
216 double  speed_cycletime = 0.0;
217 
218 
219 /* don't rely on "unsigned" to "double" conversion, it's broken in SunOS 4
220    native cc */
221 #define M_2POWU   (((double) INT_MAX + 1.0) * 2.0)
222 
223 #define M_2POW32  4294967296.0
224 #define M_2POW64  (M_2POW32 * M_2POW32)
225 
226 
227 /* Conditionals for the time functions available are done with normal C
228    code, which is a lot easier than wildly nested preprocessor directives.
229 
230    The choice of what to use is partly made at run-time, according to
231    whether the cycle counter works and the measured accuracy of getrusage
232    and gettimeofday.
233 
234    A routine that's not available won't be getting called, but is an abort()
235    to be sure it isn't called mistakenly.
236 
237    It can be assumed that if a function exists then its data type will, but
238    if the function doesn't then the data type might or might not exist, so
239    the type can't be used unconditionally.  The "struct_rusage" etc macros
240    provide dummies when the respective function doesn't exist. */
241 
242 
243 #if HAVE_SPEED_CYCLECOUNTER
244 static const int have_cycles = HAVE_SPEED_CYCLECOUNTER;
245 #else
246 static const int have_cycles = 0;
247 #define speed_cyclecounter(p)  ASSERT_FAIL (speed_cyclecounter not available)
248 #endif
249 
250 /* "stck" returns ticks since 1 Jan 1900 00:00 GMT, where each tick is 2^-12
251    microseconds.  Same #ifdefs here as in longlong.h.  */
252 #if defined (__GNUC__) && ! defined (NO_ASM)                            \
253   && (defined (__i370__) || defined (__s390__) || defined (__mvs__))
254 static const int  have_stck = 1;
255 static const int  use_stck = 1;  /* always use when available */
256 typedef uint64_t  stck_t; /* gcc for s390 is quite new, always has uint64_t */
257 #define STCK(timestamp)                 \
258   do {                                  \
259     asm ("stck %0" : "=Q" (timestamp)); \
260   } while (0)
261 #else
262 static const int  have_stck = 0;
263 static const int  use_stck = 0;
264 typedef unsigned long  stck_t;   /* dummy */
265 #define STCK(timestamp)  ASSERT_FAIL (stck instruction not available)
266 #endif
267 #define STCK_PERIOD      (1.0 / 4096e6)   /* 2^-12 microseconds */
268 
269 /* mftb
270    Enhancement: On 64-bit chips mftb gives a 64-bit value, no need for mftbu
271    and a loop (see powerpc64.asm).  */
272 #if HAVE_HOST_CPU_FAMILY_powerpc
273 static const int  have_mftb = 1;
274 #if defined (__GNUC__) && ! defined (NO_ASM)
275 #define MFTB(a)                         \
276   do {                                  \
277     unsigned  __h1, __l, __h2;          \
278     do {                                \
279       asm volatile ("mftbu %0\n"        \
280 		    "mftb  %1\n"        \
281 		    "mftbu %2"          \
282 		    : "=r" (__h1),      \
283 		      "=r" (__l),       \
284 		      "=r" (__h2));     \
285     } while (__h1 != __h2);             \
286     a[0] = __l;                         \
287     a[1] = __h1;                        \
288   } while (0)
289 #else
290 #define MFTB(a)   mftb_function (a)
291 #endif
292 #else /* ! powerpc */
293 static const int  have_mftb = 0;
294 #define MFTB(a)                         \
295   do {                                  \
296     a[0] = 0;                           \
297     a[1] = 0;                           \
298     ASSERT_FAIL (mftb not available);   \
299   } while (0)
300 #endif
301 
302 /* Unicos 10.X has syssgi(), but not mmap(). */
303 #if HAVE_SYSSGI && HAVE_MMAP
304 static const int  have_sgi = 1;
305 #else
306 static const int  have_sgi = 0;
307 #endif
308 
309 #if HAVE_READ_REAL_TIME
310 static const int have_rrt = 1;
311 #else
312 static const int have_rrt = 0;
313 #define read_real_time(t,s)     ASSERT_FAIL (read_real_time not available)
314 #define time_base_to_time(t,s)  ASSERT_FAIL (time_base_to_time not available)
315 #define RTC_POWER     1
316 #define RTC_POWER_PC  2
317 #define timebasestruct_t   struct timebasestruct_dummy
318 struct timebasestruct_dummy {
319   int             flag;
320   unsigned int    tb_high;
321   unsigned int    tb_low;
322 };
323 #endif
324 
325 #if HAVE_CLOCK_GETTIME
326 static const int have_cgt = 1;
327 #define struct_timespec  struct timespec
328 #else
329 static const int have_cgt = 0;
330 #define struct_timespec       struct timespec_dummy
331 #define clock_gettime(id,ts)  (ASSERT_FAIL (clock_gettime not available), -1)
332 #define clock_getres(id,ts)   (ASSERT_FAIL (clock_getres not available), -1)
333 #endif
334 
335 #if HAVE_GETRUSAGE
336 static const int have_grus = 1;
337 #define struct_rusage   struct rusage
338 #else
339 static const int have_grus = 0;
340 #define getrusage(n,ru)  ASSERT_FAIL (getrusage not available)
341 #define struct_rusage    struct rusage_dummy
342 #endif
343 
344 #if HAVE_GETTIMEOFDAY
345 static const int have_gtod = 1;
346 #define struct_timeval   struct timeval
347 #else
348 static const int have_gtod = 0;
349 #define gettimeofday(tv,tz)  ASSERT_FAIL (gettimeofday not available)
350 #define struct_timeval   struct timeval_dummy
351 #endif
352 
353 #if HAVE_TIMES
354 static const int have_times = 1;
355 #define struct_tms   struct tms
356 #else
357 static const int have_times = 0;
358 #define times(tms)   ASSERT_FAIL (times not available)
359 #define struct_tms   struct tms_dummy
360 #endif
361 
362 struct tms_dummy {
363   long  tms_utime;
364 };
365 struct timeval_dummy {
366   long  tv_sec;
367   long  tv_usec;
368 };
369 struct rusage_dummy {
370   struct_timeval ru_utime;
371 };
372 struct timespec_dummy {
373   long  tv_sec;
374   long  tv_nsec;
375 };
376 
377 static int  use_cycles;
378 static int  use_mftb;
379 static int  use_sgi;
380 static int  use_rrt;
381 static int  use_cgt;
382 static int  use_gtod;
383 static int  use_grus;
384 static int  use_times;
385 static int  use_tick_boundary;
386 
387 static unsigned         start_cycles[2];
388 static stck_t           start_stck;
389 static unsigned         start_mftb[2];
390 static unsigned         start_sgi;
391 static timebasestruct_t start_rrt;
392 static struct_timespec  start_cgt;
393 static struct_rusage    start_grus;
394 static struct_timeval   start_gtod;
395 static struct_tms       start_times;
396 
397 static double  cycles_limit = 1e100;
398 static double  mftb_unittime;
399 static double  sgi_unittime;
400 static double  cgt_unittime;
401 static double  grus_unittime;
402 static double  gtod_unittime;
403 static double  times_unittime;
404 
405 /* for RTC_POWER format, ie. seconds and nanoseconds */
406 #define TIMEBASESTRUCT_SECS(t)  ((t)->tb_high + (t)->tb_low * 1e-9)
407 
408 
409 /* Return a string representing a time in seconds, nicely formatted.
410    Eg. "10.25ms".  */
411 char *
412 unittime_string (double t)
413 {
414   static char  buf[128];
415 
416   const char  *unit;
417   int         prec;
418 
419   /* choose units and scale */
420   if (t < 1e-6)
421     t *= 1e9, unit = "ns";
422   else if (t < 1e-3)
423     t *= 1e6, unit = "us";
424   else if (t < 1.0)
425     t *= 1e3, unit = "ms";
426   else
427     unit = "s";
428 
429   /* want 4 significant figures */
430   if (t < 1.0)
431     prec = 4;
432   else if (t < 10.0)
433     prec = 3;
434   else if (t < 100.0)
435     prec = 2;
436   else
437     prec = 1;
438 
439   sprintf (buf, "%.*f%s", prec, t, unit);
440   return buf;
441 }
442 
443 
444 static jmp_buf  cycles_works_buf;
445 
446 static RETSIGTYPE
447 cycles_works_handler (int sig)
448 {
449   longjmp (cycles_works_buf, 1);
450 }
451 
452 int
453 cycles_works_p (void)
454 {
455   static int  result = -1;
456 
457   if (result != -1)
458     goto done;
459 
460   /* FIXME: On linux, the cycle counter is not saved and restored over
461    * context switches, making it almost useless for precise cputime
462    * measurements. When available, it's better to use clock_gettime,
463    * which seems to have reasonable accuracy (tested on x86_32,
464    * linux-2.6.26, glibc-2.7). However, there are also some linux
465    * systems where clock_gettime is broken in one way or the other,
466    * like CLOCK_PROCESS_CPUTIME_ID not implemented (easy case) or
467    * kind-of implemented but broken (needs code to detect that), and
468    * on those systems a wall-clock cycle counter is the least bad
469    * fallback.
470    *
471    * So we need some code to disable the cycle counter on some but not
472    * all linux systems. */
473 #ifdef SIGILL
474   {
475     RETSIGTYPE (*old_handler) (int);
476     unsigned  cycles[2];
477 
478     old_handler = signal (SIGILL, cycles_works_handler);
479     if (old_handler == SIG_ERR)
480       {
481 	if (speed_option_verbose)
482 	  printf ("cycles_works_p(): SIGILL not supported, assuming speed_cyclecounter() works\n");
483 	goto yes;
484       }
485     if (setjmp (cycles_works_buf))
486       {
487 	if (speed_option_verbose)
488 	  printf ("cycles_works_p(): SIGILL during speed_cyclecounter(), so doesn't work\n");
489 	result = 0;
490 	goto done;
491       }
492     speed_cyclecounter (cycles);
493     signal (SIGILL, old_handler);
494     if (speed_option_verbose)
495       printf ("cycles_works_p(): speed_cyclecounter() works\n");
496   }
497 #else
498 
499   if (speed_option_verbose)
500     printf ("cycles_works_p(): SIGILL not defined, assuming speed_cyclecounter() works\n");
501   goto yes;
502 #endif
503 
504  yes:
505   result = 1;
506 
507  done:
508   return result;
509 }
510 
511 
512 /* The number of clock ticks per second, but looking at sysconf rather than
513    just CLK_TCK, where possible.  */
514 long
515 clk_tck (void)
516 {
517   static long  result = -1L;
518   if (result != -1L)
519     return result;
520 
521 #if HAVE_SYSCONF
522   result = sysconf (_SC_CLK_TCK);
523   if (result != -1L)
524     {
525       if (speed_option_verbose)
526 	printf ("sysconf(_SC_CLK_TCK) is %ld per second\n", result);
527       return result;
528     }
529 
530   fprintf (stderr,
531 	   "sysconf(_SC_CLK_TCK) not working, using CLK_TCK instead\n");
532 #endif
533 
534 #ifdef CLK_TCK
535   result = CLK_TCK;
536   if (speed_option_verbose)
537     printf ("CLK_TCK is %ld per second\n", result);
538   return result;
539 #else
540   fprintf (stderr, "CLK_TCK not defined, cannot continue\n");
541   abort ();
542 #endif
543 }
544 
545 
546 /* If two times can be observed less than half a clock tick apart, then
547    assume "get" is microsecond accurate.
548 
549    Two times only 1 microsecond apart are not believed, since some kernels
550    take it upon themselves to ensure gettimeofday doesn't return the same
551    value twice, for the benefit of applications using it for a timestamp.
552    This is obviously very stupid given the speed of CPUs these days.
553 
554    Making "reps" many calls to noop_1() is designed to waste some CPU, with
555    a view to getting measurements 2 microseconds (or more) apart.  "reps" is
556    increased progressively until such a period is seen.
557 
558    The outer loop "attempts" are just to allow for any random nonsense or
559    system load upsetting the measurements (ie. making two successive calls
560    to "get" come out as a longer interval than normal).
561 
562    Bugs:
563 
564    The assumption that any interval less than a half tick implies
565    microsecond resolution is obviously fairly rash, the true resolution
566    could be anything between a microsecond and that half tick.  Perhaps
567    something special would have to be done on a system where this is the
568    case, since there's no obvious reliable way to detect it
569    automatically.  */
570 
571 #define MICROSECONDS_P(name, type, get, sec, usec)                      \
572   {                                                                     \
573     static int  result = -1;                                            \
574     type      st, et;                                                   \
575     long      dt, half_tick;                                            \
576     unsigned  attempt, reps, i, j;                                      \
577 									\
578     if (result != -1)                                                   \
579       return result;                                                    \
580 									\
581     result = 0;                                                         \
582     half_tick = (1000000L / clk_tck ()) / 2;                            \
583 									\
584     for (attempt = 0; attempt < 5; attempt++)                           \
585       {                                                                 \
586 	reps = 0;                                                       \
587 	for (;;)                                                        \
588 	  {                                                             \
589 	    get (st);                                                   \
590 	    for (i = 0; i < reps; i++)                                  \
591 	      for (j = 0; j < 100; j++)                                 \
592 		noop_1 (CNST_LIMB(0));                                  \
593 	    get (et);                                                   \
594 									\
595 	    dt = (sec(et)-sec(st))*1000000L + usec(et)-usec(st);        \
596 									\
597 	    if (speed_option_verbose >= 2)                              \
598 	      printf ("%s attempt=%u, reps=%u, dt=%ld\n",               \
599 		      name, attempt, reps, dt);                         \
600 									\
601 	    if (dt >= 2)                                                \
602 	      break;                                                    \
603 									\
604 	    reps = (reps == 0 ? 1 : 2*reps);                            \
605 	    if (reps == 0)                                              \
606 	      break;  /* uint overflow, not normal */                   \
607 	  }                                                             \
608 									\
609 	if (dt < half_tick)                                             \
610 	  {                                                             \
611 	    result = 1;                                                 \
612 	    break;                                                      \
613 	  }                                                             \
614       }                                                                 \
615 									\
616     if (speed_option_verbose)                                           \
617       {                                                                 \
618 	if (result)                                                     \
619 	  printf ("%s is microsecond accurate\n", name);                \
620 	else                                                            \
621 	  printf ("%s is only %s clock tick accurate\n",                \
622 		  name, unittime_string (1.0/clk_tck()));               \
623       }                                                                 \
624     return result;                                                      \
625   }
626 
627 
628 int
629 gettimeofday_microseconds_p (void)
630 {
631 #define call_gettimeofday(t)   gettimeofday (&(t), NULL)
632 #define timeval_tv_sec(t)      ((t).tv_sec)
633 #define timeval_tv_usec(t)     ((t).tv_usec)
634   MICROSECONDS_P ("gettimeofday", struct_timeval,
635 		  call_gettimeofday, timeval_tv_sec, timeval_tv_usec);
636 }
637 
638 int
639 getrusage_microseconds_p (void)
640 {
641 #define call_getrusage(t)   getrusage (0, &(t))
642 #define rusage_tv_sec(t)    ((t).ru_utime.tv_sec)
643 #define rusage_tv_usec(t)   ((t).ru_utime.tv_usec)
644   MICROSECONDS_P ("getrusage", struct_rusage,
645 		  call_getrusage, rusage_tv_sec, rusage_tv_usec);
646 }
647 
648 /* Test whether getrusage goes backwards, return non-zero if it does
649    (suggesting it's flawed).
650 
651    On a macintosh m68040-unknown-netbsd1.4.1 getrusage looks like it's
652    microsecond accurate, but has been seen remaining unchanged after many
653    microseconds have elapsed.  It also regularly goes backwards by 1000 to
654    5000 usecs, this has been seen after between 500 and 4000 attempts taking
655    perhaps 0.03 seconds.  We consider this too broken for good measuring.
656    We used to have configure pretend getrusage didn't exist on this system,
657    but a runtime test should be more reliable, since we imagine the problem
658    is not confined to just this exact system tuple.  */
659 
660 int
661 getrusage_backwards_p (void)
662 {
663   static int result = -1;
664   struct rusage  start, prev, next;
665   long  d;
666   int   i;
667 
668   if (result != -1)
669     return result;
670 
671   getrusage (0, &start);
672   memcpy (&next, &start, sizeof (next));
673 
674   result = 0;
675   i = 0;
676   for (;;)
677     {
678       memcpy (&prev, &next, sizeof (prev));
679       getrusage (0, &next);
680 
681       if (next.ru_utime.tv_sec < prev.ru_utime.tv_sec
682 	  || (next.ru_utime.tv_sec == prev.ru_utime.tv_sec
683 	      && next.ru_utime.tv_usec < prev.ru_utime.tv_usec))
684 	{
685 	  if (speed_option_verbose)
686 	    printf ("getrusage went backwards (attempt %d: %ld.%06ld -> %ld.%06ld)\n",
687 		    i,
688 		    (long) prev.ru_utime.tv_sec, (long) prev.ru_utime.tv_usec,
689 		    (long) next.ru_utime.tv_sec, (long) next.ru_utime.tv_usec);
690 	  result = 1;
691 	  break;
692 	}
693 
694       /* minimum 1000 attempts, then stop after either 0.1 seconds or 50000
695 	 attempts, whichever comes first */
696       d = 1000000 * (next.ru_utime.tv_sec - start.ru_utime.tv_sec)
697 	+ (next.ru_utime.tv_usec - start.ru_utime.tv_usec);
698       i++;
699       if (i > 50000 || (i > 1000 && d > 100000))
700 	break;
701     }
702 
703   return result;
704 }
705 
706 /* CLOCK_PROCESS_CPUTIME_ID looks like it's going to be in a future version
707    of glibc (some time post 2.2).
708 
709    CLOCK_VIRTUAL is process time, available in BSD systems (though sometimes
710    defined, but returning -1 for an error).  */
711 
712 #ifdef CLOCK_PROCESS_CPUTIME_ID
713 # define CGT_ID        CLOCK_PROCESS_CPUTIME_ID
714 #else
715 # ifdef CLOCK_VIRTUAL
716 #  define CGT_ID       CLOCK_VIRTUAL
717 # endif
718 #endif
719 #ifdef CGT_ID
720 const int  have_cgt_id = 1;
721 #else
722 const int  have_cgt_id = 0;
723 # define CGT_ID       (ASSERT_FAIL (CGT_ID not determined), -1)
724 #endif
725 
726 #define CGT_DELAY_COUNT 1000
727 
728 int
729 cgt_works_p (void)
730 {
731   static int  result = -1;
732   struct_timespec  unit;
733 
734   if (! have_cgt)
735     return 0;
736 
737   if (! have_cgt_id)
738     {
739       if (speed_option_verbose)
740 	printf ("clock_gettime don't know what ID to use\n");
741       result = 0;
742       return result;
743     }
744 
745   if (result != -1)
746     return result;
747 
748   /* trial run to see if it works */
749   if (clock_gettime (CGT_ID, &unit) != 0)
750     {
751       if (speed_option_verbose)
752 	printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
753       result = 0;
754       return result;
755     }
756 
757   /* get the resolution */
758   if (clock_getres (CGT_ID, &unit) != 0)
759     {
760       if (speed_option_verbose)
761 	printf ("clock_getres id=%d error: %s\n", CGT_ID, strerror (errno));
762       result = 0;
763       return result;
764     }
765 
766   cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9;
767   printf ("clock_gettime is %s accurate\n",
768 	  unittime_string (cgt_unittime));
769 
770   if (cgt_unittime < 10e-9)
771     {
772       /* Do we believe this? */
773       struct timespec start, end;
774       static volatile int counter;
775       double duration;
776       if (clock_gettime (CGT_ID, &start))
777 	{
778 	  if (speed_option_verbose)
779 	    printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
780 	  result = 0;
781 	  return result;
782 	}
783       /* Loop of at least 1000 memory accesses, ought to take at
784 	 least 100 ns*/
785       for (counter = 0; counter < CGT_DELAY_COUNT; counter++)
786 	;
787       if (clock_gettime (CGT_ID, &end))
788 	{
789 	  if (speed_option_verbose)
790 	    printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
791 	  result = 0;
792 	  return result;
793 	}
794       duration = (end.tv_sec + end.tv_nsec * 1e-9
795 		  - start.tv_sec - start.tv_nsec * 1e-9);
796       if (speed_option_verbose)
797 	printf ("delay loop of %d rounds took %s (according to clock_get_time)\n",
798 		CGT_DELAY_COUNT, unittime_string (duration));
799       if (duration < 100e-9)
800 	{
801 	  if (speed_option_verbose)
802 	    printf ("clock_gettime id=%d not believable\n", CGT_ID);
803 	  result = 0;
804 	  return result;
805 	}
806     }
807   result = 1;
808   return result;
809 }
810 
811 
812 static double
813 freq_measure_mftb_one (void)
814 {
815 #define call_gettimeofday(t)   gettimeofday (&(t), NULL)
816 #define timeval_tv_sec(t)      ((t).tv_sec)
817 #define timeval_tv_usec(t)     ((t).tv_usec)
818   FREQ_MEASURE_ONE ("mftb", struct_timeval,
819 		    call_gettimeofday, MFTB,
820 		    timeval_tv_sec, timeval_tv_usec);
821 }
822 
823 
824 static jmp_buf  mftb_works_buf;
825 
826 static RETSIGTYPE
827 mftb_works_handler (int sig)
828 {
829   longjmp (mftb_works_buf, 1);
830 }
831 
832 int
833 mftb_works_p (void)
834 {
835   unsigned   a[2];
836   RETSIGTYPE (*old_handler) (int);
837   double     cycletime;
838 
839   /* suppress a warning about a[] unused */
840   a[0] = 0;
841 
842   if (! have_mftb)
843     return 0;
844 
845 #ifdef SIGILL
846   old_handler = signal (SIGILL, mftb_works_handler);
847   if (old_handler == SIG_ERR)
848     {
849       if (speed_option_verbose)
850 	printf ("mftb_works_p(): SIGILL not supported, assuming mftb works\n");
851       return 1;
852     }
853   if (setjmp (mftb_works_buf))
854     {
855       if (speed_option_verbose)
856 	printf ("mftb_works_p(): SIGILL during mftb, so doesn't work\n");
857       return 0;
858     }
859   MFTB (a);
860   signal (SIGILL, old_handler);
861   if (speed_option_verbose)
862     printf ("mftb_works_p(): mftb works\n");
863 #else
864 
865   if (speed_option_verbose)
866     printf ("mftb_works_p(): SIGILL not defined, assuming mftb works\n");
867 #endif
868 
869 #if ! HAVE_GETTIMEOFDAY
870   if (speed_option_verbose)
871     printf ("mftb_works_p(): no gettimeofday available to measure mftb\n");
872   return 0;
873 #endif
874 
875   /* The time base is normally 1/4 of the bus speed on 6xx and 7xx chips, on
876      other chips it can be driven from an external clock. */
877   cycletime = freq_measure ("mftb", freq_measure_mftb_one);
878   if (cycletime == -1.0)
879     {
880       if (speed_option_verbose)
881 	printf ("mftb_works_p(): cannot measure mftb period\n");
882       return 0;
883     }
884 
885   mftb_unittime = cycletime;
886   return 1;
887 }
888 
889 
890 volatile unsigned  *sgi_addr;
891 
892 int
893 sgi_works_p (void)
894 {
895 #if HAVE_SYSSGI && HAVE_MMAP
896   static int  result = -1;
897 
898   size_t          pagesize, offset;
899   __psunsigned_t  phys, physpage;
900   void            *virtpage;
901   unsigned        period_picoseconds;
902   int             size, fd;
903 
904   if (result != -1)
905     return result;
906 
907   phys = syssgi (SGI_QUERY_CYCLECNTR, &period_picoseconds);
908   if (phys == (__psunsigned_t) -1)
909     {
910       /* ENODEV is the error when a counter is not available */
911       if (speed_option_verbose)
912 	printf ("syssgi SGI_QUERY_CYCLECNTR error: %s\n", strerror (errno));
913       result = 0;
914       return result;
915     }
916   sgi_unittime = period_picoseconds * 1e-12;
917 
918   /* IRIX 5 doesn't have SGI_CYCLECNTR_SIZE, assume 32 bits in that case.
919      Challenge/ONYX hardware has a 64 bit byte counter, but there seems no
920      obvious way to identify that without SGI_CYCLECNTR_SIZE.  */
921 #ifdef SGI_CYCLECNTR_SIZE
922   size = syssgi (SGI_CYCLECNTR_SIZE);
923   if (size == -1)
924     {
925       if (speed_option_verbose)
926 	{
927 	  printf ("syssgi SGI_CYCLECNTR_SIZE error: %s\n", strerror (errno));
928 	  printf ("    will assume size==4\n");
929 	}
930       size = 32;
931     }
932 #else
933   size = 32;
934 #endif
935 
936   if (size < 32)
937     {
938       printf ("syssgi SGI_CYCLECNTR_SIZE gives %d, expected 32 or 64\n", size);
939       result = 0;
940       return result;
941     }
942 
943   pagesize = getpagesize();
944   offset = (size_t) phys & (pagesize-1);
945   physpage = phys - offset;
946 
947   /* shouldn't cross over a page boundary */
948   ASSERT_ALWAYS (offset + size/8 <= pagesize);
949 
950   fd = open("/dev/mmem", O_RDONLY);
951   if (fd == -1)
952     {
953       if (speed_option_verbose)
954 	printf ("open /dev/mmem: %s\n", strerror (errno));
955       result = 0;
956       return result;
957     }
958 
959   virtpage = mmap (0, pagesize, PROT_READ, MAP_PRIVATE, fd, (off_t) physpage);
960   if (virtpage == (void *) -1)
961     {
962       if (speed_option_verbose)
963 	printf ("mmap /dev/mmem: %s\n", strerror (errno));
964       result = 0;
965       return result;
966     }
967 
968   /* address of least significant 4 bytes, knowing mips is big endian */
969   sgi_addr = (unsigned *) ((char *) virtpage + offset
970 			   + size/8 - sizeof(unsigned));
971   result = 1;
972   return result;
973 
974 #else /* ! (HAVE_SYSSGI && HAVE_MMAP) */
975   return 0;
976 #endif
977 }
978 
979 
980 #define DEFAULT(var,n)  \
981   do {                  \
982     if (! (var))        \
983       (var) = (n);      \
984   } while (0)
985 
986 void
987 speed_time_init (void)
988 {
989   double supplement_unittime = 0.0;
990 
991   static int  speed_time_initialized = 0;
992   if (speed_time_initialized)
993     return;
994   speed_time_initialized = 1;
995 
996   speed_cycletime_init ();
997 
998   if (!speed_option_cycles_broken && have_cycles && cycles_works_p ())
999     {
1000       use_cycles = 1;
1001       DEFAULT (speed_cycletime, 1.0);
1002       speed_unittime = speed_cycletime;
1003       DEFAULT (speed_precision, 10000);
1004       strcpy (speed_time_string, "CPU cycle counter");
1005 
1006       /* only used if a supplementary method is chosen below */
1007       cycles_limit = (have_cycles == 1 ? M_2POW32 : M_2POW64) / 2.0
1008 	* speed_cycletime;
1009 
1010       if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())
1011 	{
1012 	  /* this is a good combination */
1013 	  use_grus = 1;
1014 	  supplement_unittime = grus_unittime = 1.0e-6;
1015 	  strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond getrusage()");
1016 	}
1017       else if (have_cycles == 1)
1018 	{
1019 	  /* When speed_cyclecounter has a limited range, look for something
1020 	     to supplement it. */
1021 	  if (have_gtod && gettimeofday_microseconds_p())
1022 	    {
1023 	      use_gtod = 1;
1024 	      supplement_unittime = gtod_unittime = 1.0e-6;
1025 	      strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond gettimeofday()");
1026 	    }
1027 	  else if (have_grus)
1028 	    {
1029 	      use_grus = 1;
1030 	      supplement_unittime = grus_unittime = 1.0 / (double) clk_tck ();
1031 	      sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick getrusage()", unittime_string (supplement_unittime));
1032 	    }
1033 	  else if (have_times)
1034 	    {
1035 	      use_times = 1;
1036 	      supplement_unittime = times_unittime = 1.0 / (double) clk_tck ();
1037 	      sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick times()", unittime_string (supplement_unittime));
1038 	    }
1039 	  else if (have_gtod)
1040 	    {
1041 	      use_gtod = 1;
1042 	      supplement_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
1043 	      sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick gettimeofday()", unittime_string (supplement_unittime));
1044 	    }
1045 	  else
1046 	    {
1047 	      fprintf (stderr, "WARNING: cycle counter is 32 bits and there's no other functions.\n");
1048 	      fprintf (stderr, "    Wraparounds may produce bad results on long measurements.\n");
1049 	    }
1050 	}
1051 
1052       if (use_grus || use_times || use_gtod)
1053 	{
1054 	  /* must know cycle period to compare cycles to other measuring
1055 	     (via cycles_limit) */
1056 	  speed_cycletime_need_seconds ();
1057 
1058 	  if (speed_precision * supplement_unittime > cycles_limit)
1059 	    {
1060 	      fprintf (stderr, "WARNING: requested precision can't always be achieved due to limited range\n");
1061 	      fprintf (stderr, "    cycle counter and limited precision supplemental method\n");
1062 	      fprintf (stderr, "    (%s)\n", speed_time_string);
1063 	    }
1064 	}
1065     }
1066   else if (have_stck)
1067     {
1068       strcpy (speed_time_string, "STCK timestamp");
1069       /* stck is in units of 2^-12 microseconds, which is very likely higher
1070 	 resolution than a cpu cycle */
1071       if (speed_cycletime == 0.0)
1072 	speed_cycletime_fail
1073 	  ("Need to know CPU frequency for effective stck unit");
1074       speed_unittime = MAX (speed_cycletime, STCK_PERIOD);
1075       DEFAULT (speed_precision, 10000);
1076     }
1077   else if (have_mftb && mftb_works_p ())
1078     {
1079       use_mftb = 1;
1080       DEFAULT (speed_precision, 10000);
1081       speed_unittime = mftb_unittime;
1082       sprintf (speed_time_string, "mftb counter (%s)",
1083 	       unittime_string (speed_unittime));
1084     }
1085   else if (have_sgi && sgi_works_p ())
1086     {
1087       use_sgi = 1;
1088       DEFAULT (speed_precision, 10000);
1089       speed_unittime = sgi_unittime;
1090       sprintf (speed_time_string, "syssgi() mmap counter (%s), supplemented by millisecond getrusage()",
1091 	       unittime_string (speed_unittime));
1092       /* supplemented with getrusage, which we assume to have 1ms resolution */
1093       use_grus = 1;
1094       supplement_unittime = 1e-3;
1095     }
1096   else if (have_rrt)
1097     {
1098       timebasestruct_t  t;
1099       use_rrt = 1;
1100       DEFAULT (speed_precision, 10000);
1101       read_real_time (&t, sizeof(t));
1102       switch (t.flag) {
1103       case RTC_POWER:
1104 	/* FIXME: What's the actual RTC resolution? */
1105 	speed_unittime = 1e-7;
1106 	strcpy (speed_time_string, "read_real_time() power nanoseconds");
1107 	break;
1108       case RTC_POWER_PC:
1109 	t.tb_high = 1;
1110 	t.tb_low = 0;
1111 	time_base_to_time (&t, sizeof(t));
1112 	speed_unittime = TIMEBASESTRUCT_SECS(&t) / M_2POW32;
1113 	sprintf (speed_time_string, "%s read_real_time() powerpc ticks",
1114 		 unittime_string (speed_unittime));
1115 	break;
1116       default:
1117 	fprintf (stderr, "ERROR: Unrecognised timebasestruct_t flag=%d\n",
1118 		 t.flag);
1119 	abort ();
1120       }
1121     }
1122   else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5e-6)
1123     {
1124       /* use clock_gettime if microsecond or better resolution */
1125     choose_cgt:
1126       use_cgt = 1;
1127       speed_unittime = cgt_unittime;
1128       DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000));
1129       strcpy (speed_time_string, "microsecond accurate clock_gettime()");
1130     }
1131   else if (have_times && clk_tck() > 1000000)
1132     {
1133       /* Cray vector systems have times() which is clock cycle resolution
1134 	 (eg. 450 MHz).  */
1135       DEFAULT (speed_precision, 10000);
1136       goto choose_times;
1137     }
1138   else if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())
1139     {
1140       use_grus = 1;
1141       speed_unittime = grus_unittime = 1.0e-6;
1142       DEFAULT (speed_precision, 1000);
1143       strcpy (speed_time_string, "microsecond accurate getrusage()");
1144     }
1145   else if (have_gtod && gettimeofday_microseconds_p())
1146     {
1147       use_gtod = 1;
1148       speed_unittime = gtod_unittime = 1.0e-6;
1149       DEFAULT (speed_precision, 1000);
1150       strcpy (speed_time_string, "microsecond accurate gettimeofday()");
1151     }
1152   else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5/clk_tck())
1153     {
1154       /* use clock_gettime if 1 tick or better resolution */
1155       goto choose_cgt;
1156     }
1157   else if (have_times)
1158     {
1159       use_tick_boundary = 1;
1160       DEFAULT (speed_precision, 200);
1161     choose_times:
1162       use_times = 1;
1163       speed_unittime = times_unittime = 1.0 / (double) clk_tck ();
1164       sprintf (speed_time_string, "%s clock tick times()",
1165 	       unittime_string (speed_unittime));
1166     }
1167   else if (have_grus)
1168     {
1169       use_grus = 1;
1170       use_tick_boundary = 1;
1171       speed_unittime = grus_unittime = 1.0 / (double) clk_tck ();
1172       DEFAULT (speed_precision, 200);
1173       sprintf (speed_time_string, "%s clock tick getrusage()\n",
1174 	       unittime_string (speed_unittime));
1175     }
1176   else if (have_gtod)
1177     {
1178       use_gtod = 1;
1179       use_tick_boundary = 1;
1180       speed_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
1181       DEFAULT (speed_precision, 200);
1182       sprintf (speed_time_string, "%s clock tick gettimeofday()",
1183 	       unittime_string (speed_unittime));
1184     }
1185   else
1186     {
1187       fprintf (stderr, "No time measuring method available\n");
1188       fprintf (stderr, "None of: speed_cyclecounter(), STCK(), getrusage(), gettimeofday(), times()\n");
1189       abort ();
1190     }
1191 
1192   if (speed_option_verbose)
1193     {
1194       printf ("speed_time_init: %s\n", speed_time_string);
1195       printf ("    speed_precision     %d\n", speed_precision);
1196       printf ("    speed_unittime      %.2g\n", speed_unittime);
1197       if (supplement_unittime)
1198 	printf ("    supplement_unittime %.2g\n", supplement_unittime);
1199       printf ("    use_tick_boundary   %d\n", use_tick_boundary);
1200       if (have_cycles)
1201 	printf ("    cycles_limit        %.2g seconds\n", cycles_limit);
1202     }
1203 }
1204 
1205 
1206 
1207 /* Burn up CPU until a clock tick boundary, for greater accuracy.  Set the
1208    corresponding "start_foo" appropriately too. */
1209 
1210 void
1211 grus_tick_boundary (void)
1212 {
1213   struct_rusage  prev;
1214   getrusage (0, &prev);
1215   do {
1216     getrusage (0, &start_grus);
1217   } while (start_grus.ru_utime.tv_usec == prev.ru_utime.tv_usec);
1218 }
1219 
1220 void
1221 gtod_tick_boundary (void)
1222 {
1223   struct_timeval  prev;
1224   gettimeofday (&prev, NULL);
1225   do {
1226     gettimeofday (&start_gtod, NULL);
1227   } while (start_gtod.tv_usec == prev.tv_usec);
1228 }
1229 
1230 void
1231 times_tick_boundary (void)
1232 {
1233   struct_tms  prev;
1234   times (&prev);
1235   do
1236     times (&start_times);
1237   while (start_times.tms_utime == prev.tms_utime);
1238 }
1239 
1240 
1241 /* "have_" values are tested to let unused code go dead.  */
1242 
1243 void
1244 speed_starttime (void)
1245 {
1246   speed_time_init ();
1247 
1248   if (have_grus && use_grus)
1249     {
1250       if (use_tick_boundary)
1251 	grus_tick_boundary ();
1252       else
1253 	getrusage (0, &start_grus);
1254     }
1255 
1256   if (have_gtod && use_gtod)
1257     {
1258       if (use_tick_boundary)
1259 	gtod_tick_boundary ();
1260       else
1261 	gettimeofday (&start_gtod, NULL);
1262     }
1263 
1264   if (have_times && use_times)
1265     {
1266       if (use_tick_boundary)
1267 	times_tick_boundary ();
1268       else
1269 	times (&start_times);
1270     }
1271 
1272   if (have_cgt && use_cgt)
1273     clock_gettime (CGT_ID, &start_cgt);
1274 
1275   if (have_rrt && use_rrt)
1276     read_real_time (&start_rrt, sizeof(start_rrt));
1277 
1278   if (have_sgi && use_sgi)
1279     start_sgi = *sgi_addr;
1280 
1281   if (have_mftb && use_mftb)
1282     MFTB (start_mftb);
1283 
1284   if (have_stck && use_stck)
1285     STCK (start_stck);
1286 
1287   /* Cycles sampled last for maximum accuracy. */
1288   if (have_cycles && use_cycles)
1289     speed_cyclecounter (start_cycles);
1290 }
1291 
1292 
1293 /* Calculate the difference between two cycle counter samples, as a "double"
1294    counter of cycles.
1295 
1296    The start and end values are allowed to cancel in integers in case the
1297    counter values are bigger than the 53 bits that normally fit in a double.
1298 
1299    This works even if speed_cyclecounter() puts a value bigger than 32-bits
1300    in the low word (the high word always gets a 2**32 multiplier though). */
1301 
1302 double
1303 speed_cyclecounter_diff (const unsigned end[2], const unsigned start[2])
1304 {
1305   unsigned  d;
1306   double    t;
1307 
1308   if (have_cycles == 1)
1309     {
1310       t = (end[0] - start[0]);
1311     }
1312   else
1313     {
1314       d = end[0] - start[0];
1315       t = d - (d > end[0] ? M_2POWU : 0.0);
1316       t += (end[1] - start[1]) * M_2POW32;
1317     }
1318   return t;
1319 }
1320 
1321 
1322 double
1323 speed_mftb_diff (const unsigned end[2], const unsigned start[2])
1324 {
1325   unsigned  d;
1326   double    t;
1327 
1328   d = end[0] - start[0];
1329   t = (double) d - (d > end[0] ? M_2POW32 : 0.0);
1330   t += (end[1] - start[1]) * M_2POW32;
1331   return t;
1332 }
1333 
1334 
1335 /* Calculate the difference between "start" and "end" using fields "sec" and
1336    "psec", where each "psec" is a "punit" of a second.
1337 
1338    The seconds parts are allowed to cancel before being combined with the
1339    psec parts, in case a simple "sec+psec*punit" exceeds the precision of a
1340    double.
1341 
1342    Total time is only calculated in a "double" since an integer count of
1343    psecs might overflow.  2^32 microseconds is only a bit over an hour, or
1344    2^32 nanoseconds only about 4 seconds.
1345 
1346    The casts to "long" are for the benefit of timebasestruct_t, where the
1347    fields are only "unsigned int", but we want a signed difference.  */
1348 
1349 #define DIFF_SECS_ROUTINE(sec, psec, punit)                     \
1350   {                                                             \
1351     long  sec_diff, psec_diff;                                  \
1352     sec_diff = (long) end->sec - (long) start->sec;             \
1353     psec_diff = (long) end->psec - (long) start->psec;          \
1354     return (double) sec_diff + punit * (double) psec_diff;      \
1355   }
1356 
1357 double
1358 timeval_diff_secs (const struct_timeval *end, const struct_timeval *start)
1359 {
1360   DIFF_SECS_ROUTINE (tv_sec, tv_usec, 1e-6);
1361 }
1362 
1363 double
1364 rusage_diff_secs (const struct_rusage *end, const struct_rusage *start)
1365 {
1366   DIFF_SECS_ROUTINE (ru_utime.tv_sec, ru_utime.tv_usec, 1e-6);
1367 }
1368 
1369 double
1370 timespec_diff_secs (const struct_timespec *end, const struct_timespec *start)
1371 {
1372   DIFF_SECS_ROUTINE (tv_sec, tv_nsec, 1e-9);
1373 }
1374 
1375 /* This is for use after time_base_to_time, ie. for seconds and nanoseconds. */
1376 double
1377 timebasestruct_diff_secs (const timebasestruct_t *end,
1378 			  const timebasestruct_t *start)
1379 {
1380   DIFF_SECS_ROUTINE (tb_high, tb_low, 1e-9);
1381 }
1382 
1383 
1384 double
1385 speed_endtime (void)
1386 {
1387 #define END_USE(name,value)                             \
1388   do {                                                  \
1389     if (speed_option_verbose >= 3)                      \
1390       printf ("speed_endtime(): used %s\n", name);      \
1391     result = value;                                     \
1392     goto done;                                          \
1393   } while (0)
1394 
1395 #define END_ENOUGH(name,value)                                          \
1396   do {                                                                  \
1397     if (speed_option_verbose >= 3)                                      \
1398       printf ("speed_endtime(): %s gives enough precision\n", name);    \
1399     result = value;                                                     \
1400     goto done;                                                          \
1401   } while (0)
1402 
1403 #define END_EXCEED(name,value)                                            \
1404   do {                                                                    \
1405     if (speed_option_verbose >= 3)                                        \
1406       printf ("speed_endtime(): cycle counter limit exceeded, used %s\n", \
1407 	      name);                                                      \
1408     result = value;                                                       \
1409     goto done;                                                            \
1410   } while (0)
1411 
1412   unsigned          end_cycles[2];
1413   stck_t            end_stck;
1414   unsigned          end_mftb[2];
1415   unsigned          end_sgi;
1416   timebasestruct_t  end_rrt;
1417   struct_timespec   end_cgt;
1418   struct_timeval    end_gtod;
1419   struct_rusage     end_grus;
1420   struct_tms        end_times;
1421   double            t_gtod, t_grus, t_times, t_cgt;
1422   double            t_rrt, t_sgi, t_mftb, t_stck, t_cycles;
1423   double            result;
1424 
1425   /* Cycles sampled first for maximum accuracy.
1426      "have_" values tested to let unused code go dead.  */
1427 
1428   if (have_cycles && use_cycles)  speed_cyclecounter (end_cycles);
1429   if (have_stck   && use_stck)    STCK (end_stck);
1430   if (have_mftb   && use_mftb)    MFTB (end_mftb);
1431   if (have_sgi    && use_sgi)     end_sgi = *sgi_addr;
1432   if (have_rrt    && use_rrt)     read_real_time (&end_rrt, sizeof(end_rrt));
1433   if (have_cgt    && use_cgt)     clock_gettime (CGT_ID, &end_cgt);
1434   if (have_gtod   && use_gtod)    gettimeofday (&end_gtod, NULL);
1435   if (have_grus   && use_grus)    getrusage (0, &end_grus);
1436   if (have_times  && use_times)   times (&end_times);
1437 
1438   result = -1.0;
1439 
1440   if (speed_option_verbose >= 4)
1441     {
1442       printf ("speed_endtime():\n");
1443       if (use_cycles)
1444 	printf ("   cycles  0x%X,0x%X -> 0x%X,0x%X\n",
1445 		start_cycles[1], start_cycles[0],
1446 		end_cycles[1], end_cycles[0]);
1447 
1448       if (use_stck)
1449 	printf ("   stck  0x%lX -> 0x%lX\n", start_stck, end_stck);
1450 
1451       if (use_mftb)
1452 	printf ("   mftb  0x%X,%08X -> 0x%X,%08X\n",
1453 		start_mftb[1], start_mftb[0],
1454 		end_mftb[1], end_mftb[0]);
1455 
1456       if (use_sgi)
1457 	printf ("   sgi  0x%X -> 0x%X\n", start_sgi, end_sgi);
1458 
1459       if (use_rrt)
1460 	printf ("   read_real_time  (%d)%u,%u -> (%d)%u,%u\n",
1461 		start_rrt.flag, start_rrt.tb_high, start_rrt.tb_low,
1462 		end_rrt.flag, end_rrt.tb_high, end_rrt.tb_low);
1463 
1464       if (use_cgt)
1465 	printf ("   clock_gettime  %ld.%09ld -> %ld.%09ld\n",
1466 		start_cgt.tv_sec, start_cgt.tv_nsec,
1467 		end_cgt.tv_sec, end_cgt.tv_nsec);
1468 
1469       if (use_gtod)
1470 	printf ("   gettimeofday  %ld.%06ld -> %ld.%06ld\n",
1471 		start_gtod.tv_sec, start_gtod.tv_usec,
1472 		end_gtod.tv_sec, end_gtod.tv_usec);
1473 
1474       if (use_grus)
1475 	printf ("   getrusage  %ld.%06ld -> %ld.%06ld\n",
1476 		start_grus.ru_utime.tv_sec, start_grus.ru_utime.tv_usec,
1477 		end_grus.ru_utime.tv_sec, end_grus.ru_utime.tv_usec);
1478 
1479       if (use_times)
1480 	printf ("   times  %ld -> %ld\n",
1481 		start_times.tms_utime, end_times.tms_utime);
1482     }
1483 
1484   if (use_rrt)
1485     {
1486       time_base_to_time (&start_rrt, sizeof(start_rrt));
1487       time_base_to_time (&end_rrt, sizeof(end_rrt));
1488       t_rrt = timebasestruct_diff_secs (&end_rrt, &start_rrt);
1489       END_USE ("read_real_time()", t_rrt);
1490     }
1491 
1492   if (use_cgt)
1493     {
1494       t_cgt = timespec_diff_secs (&end_cgt, &start_cgt);
1495       END_USE ("clock_gettime()", t_cgt);
1496     }
1497 
1498   if (use_grus)
1499     {
1500       t_grus = rusage_diff_secs (&end_grus, &start_grus);
1501 
1502       /* Use getrusage() if the cycle counter limit would be exceeded, or if
1503 	 it provides enough accuracy already. */
1504       if (use_cycles)
1505 	{
1506 	  if (t_grus >= speed_precision*grus_unittime)
1507 	    END_ENOUGH ("getrusage()", t_grus);
1508 	  if (t_grus >= cycles_limit)
1509 	    END_EXCEED ("getrusage()", t_grus);
1510 	}
1511     }
1512 
1513   if (use_times)
1514     {
1515       t_times = (end_times.tms_utime - start_times.tms_utime) * times_unittime;
1516 
1517       /* Use times() if the cycle counter limit would be exceeded, or if
1518 	 it provides enough accuracy already. */
1519       if (use_cycles)
1520 	{
1521 	  if (t_times >= speed_precision*times_unittime)
1522 	    END_ENOUGH ("times()", t_times);
1523 	  if (t_times >= cycles_limit)
1524 	    END_EXCEED ("times()", t_times);
1525 	}
1526     }
1527 
1528   if (use_gtod)
1529     {
1530       t_gtod = timeval_diff_secs (&end_gtod, &start_gtod);
1531 
1532       /* Use gettimeofday() if it measured a value bigger than the cycle
1533 	 counter can handle.  */
1534       if (use_cycles)
1535 	{
1536 	  if (t_gtod >= cycles_limit)
1537 	    END_EXCEED ("gettimeofday()", t_gtod);
1538 	}
1539     }
1540 
1541   if (use_mftb)
1542     {
1543       t_mftb = speed_mftb_diff (end_mftb, start_mftb) * mftb_unittime;
1544       END_USE ("mftb", t_mftb);
1545     }
1546 
1547   if (use_stck)
1548     {
1549       t_stck = (end_stck - start_stck) * STCK_PERIOD;
1550       END_USE ("stck", t_stck);
1551     }
1552 
1553   if (use_sgi)
1554     {
1555       t_sgi = (end_sgi - start_sgi) * sgi_unittime;
1556       END_USE ("SGI hardware counter", t_sgi);
1557     }
1558 
1559   if (use_cycles)
1560     {
1561       t_cycles = speed_cyclecounter_diff (end_cycles, start_cycles)
1562 	* speed_cycletime;
1563       END_USE ("cycle counter", t_cycles);
1564     }
1565 
1566   if (use_grus && getrusage_microseconds_p())
1567     END_USE ("getrusage()", t_grus);
1568 
1569   if (use_gtod && gettimeofday_microseconds_p())
1570     END_USE ("gettimeofday()", t_gtod);
1571 
1572   if (use_times)  END_USE ("times()",        t_times);
1573   if (use_grus)   END_USE ("getrusage()",    t_grus);
1574   if (use_gtod)   END_USE ("gettimeofday()", t_gtod);
1575 
1576   fprintf (stderr, "speed_endtime(): oops, no time method available\n");
1577   abort ();
1578 
1579  done:
1580   if (result < 0.0)
1581     {
1582       if (speed_option_verbose >= 2)
1583 	fprintf (stderr, "speed_endtime(): warning, treating negative time as zero: %.9f\n", result);
1584       result = 0.0;
1585     }
1586   return result;
1587 }
1588