1 /* Time routines for speed measurments. 2 3 Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2010, 2011, 2012 Free Software 4 Foundation, Inc. 5 6 This file is part of the GNU MP Library. 7 8 The GNU MP Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MP Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 20 21 22 /* Usage: 23 24 The code in this file implements the lowest level of time measuring, 25 simple one-time measuring of time between two points. 26 27 void speed_starttime (void) 28 double speed_endtime (void) 29 Call speed_starttime to start measuring, and then call speed_endtime 30 when done. 31 32 speed_endtime returns the time taken, in seconds. Or if the timebase 33 is in CPU cycles and the CPU frequency is unknown then speed_endtime 34 returns cycles. Applications can identify the cycles return by 35 checking for speed_cycletime (described below) equal to 1.0. 36 37 If some sort of temporary glitch occurs then speed_endtime returns 38 0.0. Currently this is for various cases where a negative time has 39 occurred. This unfortunately occurs with getrusage on some systems, 40 and with the hppa cycle counter on hpux. 41 42 double speed_cycletime 43 The time in seconds for each CPU cycle. For example on a 100 MHz CPU 44 this would be 1.0e-8. 45 46 If the CPU frequency is unknown, then speed_cycletime is either 0.0 47 or 1.0. It's 0.0 when speed_endtime is returning seconds, or it's 48 1.0 when speed_endtime is returning cycles. 49 50 It may be noted that "speed_endtime() / speed_cycletime" gives a 51 measured time in cycles, irrespective of whether speed_endtime is 52 returning cycles or seconds. (Assuming cycles can be had, ie. it's 53 either cycles already or the cpu frequency is known. See also 54 speed_cycletime_need_cycles below.) 55 56 double speed_unittime 57 The unit of time measurement accuracy for the timing method in use. 58 This is in seconds or cycles, as per speed_endtime. 59 60 char speed_time_string[] 61 A null-terminated string describing the time method in use. 62 63 void speed_time_init (void) 64 Initialize time measuring. speed_starttime() does this 65 automatically, so it's only needed if an application wants to inspect 66 the above global variables before making a measurement. 67 68 int speed_precision 69 The intended accuracy of time measurements. speed_measure() in 70 common.c for instance runs target routines with enough repetitions so 71 it takes at least "speed_unittime * speed_precision" (this expression 72 works for both cycles or seconds from speed_endtime). 73 74 A program can provide an option so the user to set speed_precision. 75 If speed_precision is zero when speed_time_init or speed_starttime 76 first run then it gets a default based on the measuring method 77 chosen. (More precision for higher accuracy methods.) 78 79 void speed_cycletime_need_seconds (void) 80 Call this to demand that speed_endtime will return seconds, and not 81 cycles. If only cycles are available then an error is printed and 82 the program exits. 83 84 void speed_cycletime_need_cycles (void) 85 Call this to demand that speed_cycletime is non-zero, so that 86 "speed_endtime() / speed_cycletime" will give times in cycles. 87 88 89 90 Notes: 91 92 Various combinations of cycle counter, read_real_time(), getrusage(), 93 gettimeofday() and times() can arise, according to which are available 94 and their precision. 95 96 97 Allowing speed_endtime() to return either seconds or cycles is only a 98 slight complication and makes it possible for the speed program to do 99 some sensible things without demanding the CPU frequency. If seconds are 100 being measured then it can always print seconds, and if cycles are being 101 measured then it can always print them without needing to know how long 102 they are. Also the tune program doesn't care at all what the units are. 103 104 GMP_CPU_FREQUENCY can always be set when the automated methods in freq.c 105 fail. This will be needed if times in seconds are wanted but a cycle 106 counter is being used, or if times in cycles are wanted but getrusage or 107 another seconds based timer is in use. 108 109 If the measuring method uses a cycle counter but supplements it with 110 getrusage or the like, then knowing the CPU frequency is mandatory since 111 the code compares values from the two. 112 113 114 Not done: 115 116 Solaris gethrtime() seems no more than a slow way to access the Sparc V9 117 cycle counter. gethrvtime() seems to be relevant only to light weight 118 processes, it doesn't for instance give nanosecond virtual time. So 119 neither of these are used. 120 121 122 Bugs: 123 124 getrusage_microseconds_p is fundamentally flawed, getrusage and 125 gettimeofday can have resolutions other than clock ticks or microseconds, 126 for instance IRIX 5 has a tick of 10 ms but a getrusage of 1 ms. 127 128 129 Enhancements: 130 131 The SGI hardware counter has 64 bits on some machines, which could be 132 used when available. But perhaps 32 bits is enough range, and then rely 133 on the getrusage supplement. 134 135 Maybe getrusage (or times) should be used as a supplement for any 136 wall-clock measuring method. Currently a wall clock with a good range 137 (eg. a 64-bit cycle counter) is used without a supplement. 138 139 On PowerPC the timebase registers could be used, but would have to do 140 something to find out the speed. On 6xx chips it's normally 1/4 bus 141 speed, on 4xx chips it's either that or an external clock. Measuring 142 against gettimeofday might be ok. */ 143 144 145 #include "config.h" 146 147 #include <errno.h> 148 #include <setjmp.h> 149 #include <signal.h> 150 #include <stddef.h> 151 #include <stdio.h> 152 #include <string.h> 153 #include <stdlib.h> /* for getenv() */ 154 155 #if HAVE_FCNTL_H 156 #include <fcntl.h> /* for open() */ 157 #endif 158 159 #if HAVE_STDINT_H 160 #include <stdint.h> /* for uint64_t */ 161 #endif 162 163 #if HAVE_UNISTD_H 164 #include <unistd.h> /* for sysconf() */ 165 #endif 166 167 #include <sys/types.h> 168 169 #if TIME_WITH_SYS_TIME 170 # include <sys/time.h> /* for struct timeval */ 171 # include <time.h> 172 #else 173 # if HAVE_SYS_TIME_H 174 # include <sys/time.h> 175 # else 176 # include <time.h> 177 # endif 178 #endif 179 180 #if HAVE_SYS_MMAN_H 181 #include <sys/mman.h> /* for mmap() */ 182 #endif 183 184 #if HAVE_SYS_RESOURCE_H 185 #include <sys/resource.h> /* for struct rusage */ 186 #endif 187 188 #if HAVE_SYS_SYSSGI_H 189 #include <sys/syssgi.h> /* for syssgi() */ 190 #endif 191 192 #if HAVE_SYS_SYSTEMCFG_H 193 #include <sys/systemcfg.h> /* for RTC_POWER on AIX */ 194 #endif 195 196 #if HAVE_SYS_TIMES_H 197 #include <sys/times.h> /* for times() and struct tms */ 198 #endif 199 200 #include "gmp.h" 201 #include "gmp-impl.h" 202 203 #include "speed.h" 204 205 206 /* strerror is only used for some stuff on newish systems, no need to have a 207 proper replacement */ 208 #if ! HAVE_STRERROR 209 #define strerror(n) "<strerror not available>" 210 #endif 211 212 213 char speed_time_string[256]; 214 int speed_precision = 0; 215 double speed_unittime; 216 double speed_cycletime = 0.0; 217 218 219 /* don't rely on "unsigned" to "double" conversion, it's broken in SunOS 4 220 native cc */ 221 #define M_2POWU (((double) INT_MAX + 1.0) * 2.0) 222 223 #define M_2POW32 4294967296.0 224 #define M_2POW64 (M_2POW32 * M_2POW32) 225 226 227 /* Conditionals for the time functions available are done with normal C 228 code, which is a lot easier than wildly nested preprocessor directives. 229 230 The choice of what to use is partly made at run-time, according to 231 whether the cycle counter works and the measured accuracy of getrusage 232 and gettimeofday. 233 234 A routine that's not available won't be getting called, but is an abort() 235 to be sure it isn't called mistakenly. 236 237 It can be assumed that if a function exists then its data type will, but 238 if the function doesn't then the data type might or might not exist, so 239 the type can't be used unconditionally. The "struct_rusage" etc macros 240 provide dummies when the respective function doesn't exist. */ 241 242 243 #if HAVE_SPEED_CYCLECOUNTER 244 static const int have_cycles = HAVE_SPEED_CYCLECOUNTER; 245 #else 246 static const int have_cycles = 0; 247 #define speed_cyclecounter(p) ASSERT_FAIL (speed_cyclecounter not available) 248 #endif 249 250 /* "stck" returns ticks since 1 Jan 1900 00:00 GMT, where each tick is 2^-12 251 microseconds. Same #ifdefs here as in longlong.h. */ 252 #if defined (__GNUC__) && ! defined (NO_ASM) \ 253 && (defined (__i370__) || defined (__s390__) || defined (__mvs__)) 254 static const int have_stck = 1; 255 static const int use_stck = 1; /* always use when available */ 256 typedef uint64_t stck_t; /* gcc for s390 is quite new, always has uint64_t */ 257 #define STCK(timestamp) \ 258 do { \ 259 asm ("stck %0" : "=Q" (timestamp)); \ 260 } while (0) 261 #else 262 static const int have_stck = 0; 263 static const int use_stck = 0; 264 typedef unsigned long stck_t; /* dummy */ 265 #define STCK(timestamp) ASSERT_FAIL (stck instruction not available) 266 #endif 267 #define STCK_PERIOD (1.0 / 4096e6) /* 2^-12 microseconds */ 268 269 /* mftb 270 Enhancement: On 64-bit chips mftb gives a 64-bit value, no need for mftbu 271 and a loop (see powerpc64.asm). */ 272 #if HAVE_HOST_CPU_FAMILY_powerpc 273 static const int have_mftb = 1; 274 #if defined (__GNUC__) && ! defined (NO_ASM) 275 #define MFTB(a) \ 276 do { \ 277 unsigned __h1, __l, __h2; \ 278 do { \ 279 asm volatile ("mftbu %0\n" \ 280 "mftb %1\n" \ 281 "mftbu %2" \ 282 : "=r" (__h1), \ 283 "=r" (__l), \ 284 "=r" (__h2)); \ 285 } while (__h1 != __h2); \ 286 a[0] = __l; \ 287 a[1] = __h1; \ 288 } while (0) 289 #else 290 #define MFTB(a) mftb_function (a) 291 #endif 292 #else /* ! powerpc */ 293 static const int have_mftb = 0; 294 #define MFTB(a) \ 295 do { \ 296 a[0] = 0; \ 297 a[1] = 0; \ 298 ASSERT_FAIL (mftb not available); \ 299 } while (0) 300 #endif 301 302 /* Unicos 10.X has syssgi(), but not mmap(). */ 303 #if HAVE_SYSSGI && HAVE_MMAP 304 static const int have_sgi = 1; 305 #else 306 static const int have_sgi = 0; 307 #endif 308 309 #if HAVE_READ_REAL_TIME 310 static const int have_rrt = 1; 311 #else 312 static const int have_rrt = 0; 313 #define read_real_time(t,s) ASSERT_FAIL (read_real_time not available) 314 #define time_base_to_time(t,s) ASSERT_FAIL (time_base_to_time not available) 315 #define RTC_POWER 1 316 #define RTC_POWER_PC 2 317 #define timebasestruct_t struct timebasestruct_dummy 318 struct timebasestruct_dummy { 319 int flag; 320 unsigned int tb_high; 321 unsigned int tb_low; 322 }; 323 #endif 324 325 #if HAVE_CLOCK_GETTIME 326 static const int have_cgt = 1; 327 #define struct_timespec struct timespec 328 #else 329 static const int have_cgt = 0; 330 #define struct_timespec struct timespec_dummy 331 #define clock_gettime(id,ts) (ASSERT_FAIL (clock_gettime not available), -1) 332 #define clock_getres(id,ts) (ASSERT_FAIL (clock_getres not available), -1) 333 #endif 334 335 #if HAVE_GETRUSAGE 336 static const int have_grus = 1; 337 #define struct_rusage struct rusage 338 #else 339 static const int have_grus = 0; 340 #define getrusage(n,ru) ASSERT_FAIL (getrusage not available) 341 #define struct_rusage struct rusage_dummy 342 #endif 343 344 #if HAVE_GETTIMEOFDAY 345 static const int have_gtod = 1; 346 #define struct_timeval struct timeval 347 #else 348 static const int have_gtod = 0; 349 #define gettimeofday(tv,tz) ASSERT_FAIL (gettimeofday not available) 350 #define struct_timeval struct timeval_dummy 351 #endif 352 353 #if HAVE_TIMES 354 static const int have_times = 1; 355 #define struct_tms struct tms 356 #else 357 static const int have_times = 0; 358 #define times(tms) ASSERT_FAIL (times not available) 359 #define struct_tms struct tms_dummy 360 #endif 361 362 struct tms_dummy { 363 long tms_utime; 364 }; 365 struct timeval_dummy { 366 long tv_sec; 367 long tv_usec; 368 }; 369 struct rusage_dummy { 370 struct_timeval ru_utime; 371 }; 372 struct timespec_dummy { 373 long tv_sec; 374 long tv_nsec; 375 }; 376 377 static int use_cycles; 378 static int use_mftb; 379 static int use_sgi; 380 static int use_rrt; 381 static int use_cgt; 382 static int use_gtod; 383 static int use_grus; 384 static int use_times; 385 static int use_tick_boundary; 386 387 static unsigned start_cycles[2]; 388 static stck_t start_stck; 389 static unsigned start_mftb[2]; 390 static unsigned start_sgi; 391 static timebasestruct_t start_rrt; 392 static struct_timespec start_cgt; 393 static struct_rusage start_grus; 394 static struct_timeval start_gtod; 395 static struct_tms start_times; 396 397 static double cycles_limit = 1e100; 398 static double mftb_unittime; 399 static double sgi_unittime; 400 static double cgt_unittime; 401 static double grus_unittime; 402 static double gtod_unittime; 403 static double times_unittime; 404 405 /* for RTC_POWER format, ie. seconds and nanoseconds */ 406 #define TIMEBASESTRUCT_SECS(t) ((t)->tb_high + (t)->tb_low * 1e-9) 407 408 409 /* Return a string representing a time in seconds, nicely formatted. 410 Eg. "10.25ms". */ 411 char * 412 unittime_string (double t) 413 { 414 static char buf[128]; 415 416 const char *unit; 417 int prec; 418 419 /* choose units and scale */ 420 if (t < 1e-6) 421 t *= 1e9, unit = "ns"; 422 else if (t < 1e-3) 423 t *= 1e6, unit = "us"; 424 else if (t < 1.0) 425 t *= 1e3, unit = "ms"; 426 else 427 unit = "s"; 428 429 /* want 4 significant figures */ 430 if (t < 1.0) 431 prec = 4; 432 else if (t < 10.0) 433 prec = 3; 434 else if (t < 100.0) 435 prec = 2; 436 else 437 prec = 1; 438 439 sprintf (buf, "%.*f%s", prec, t, unit); 440 return buf; 441 } 442 443 444 static jmp_buf cycles_works_buf; 445 446 static RETSIGTYPE 447 cycles_works_handler (int sig) 448 { 449 longjmp (cycles_works_buf, 1); 450 } 451 452 int 453 cycles_works_p (void) 454 { 455 static int result = -1; 456 457 if (result != -1) 458 goto done; 459 460 /* FIXME: On linux, the cycle counter is not saved and restored over 461 * context switches, making it almost useless for precise cputime 462 * measurements. When available, it's better to use clock_gettime, 463 * which seems to have reasonable accuracy (tested on x86_32, 464 * linux-2.6.26, glibc-2.7). However, there are also some linux 465 * systems where clock_gettime is broken in one way or the other, 466 * like CLOCK_PROCESS_CPUTIME_ID not implemented (easy case) or 467 * kind-of implemented but broken (needs code to detect that), and 468 * on those systems a wall-clock cycle counter is the least bad 469 * fallback. 470 * 471 * So we need some code to disable the cycle counter on some but not 472 * all linux systems. */ 473 #ifdef SIGILL 474 { 475 RETSIGTYPE (*old_handler) (int); 476 unsigned cycles[2]; 477 478 old_handler = signal (SIGILL, cycles_works_handler); 479 if (old_handler == SIG_ERR) 480 { 481 if (speed_option_verbose) 482 printf ("cycles_works_p(): SIGILL not supported, assuming speed_cyclecounter() works\n"); 483 goto yes; 484 } 485 if (setjmp (cycles_works_buf)) 486 { 487 if (speed_option_verbose) 488 printf ("cycles_works_p(): SIGILL during speed_cyclecounter(), so doesn't work\n"); 489 result = 0; 490 goto done; 491 } 492 speed_cyclecounter (cycles); 493 signal (SIGILL, old_handler); 494 if (speed_option_verbose) 495 printf ("cycles_works_p(): speed_cyclecounter() works\n"); 496 } 497 #else 498 499 if (speed_option_verbose) 500 printf ("cycles_works_p(): SIGILL not defined, assuming speed_cyclecounter() works\n"); 501 goto yes; 502 #endif 503 504 yes: 505 result = 1; 506 507 done: 508 return result; 509 } 510 511 512 /* The number of clock ticks per second, but looking at sysconf rather than 513 just CLK_TCK, where possible. */ 514 long 515 clk_tck (void) 516 { 517 static long result = -1L; 518 if (result != -1L) 519 return result; 520 521 #if HAVE_SYSCONF 522 result = sysconf (_SC_CLK_TCK); 523 if (result != -1L) 524 { 525 if (speed_option_verbose) 526 printf ("sysconf(_SC_CLK_TCK) is %ld per second\n", result); 527 return result; 528 } 529 530 fprintf (stderr, 531 "sysconf(_SC_CLK_TCK) not working, using CLK_TCK instead\n"); 532 #endif 533 534 #ifdef CLK_TCK 535 result = CLK_TCK; 536 if (speed_option_verbose) 537 printf ("CLK_TCK is %ld per second\n", result); 538 return result; 539 #else 540 fprintf (stderr, "CLK_TCK not defined, cannot continue\n"); 541 abort (); 542 #endif 543 } 544 545 546 /* If two times can be observed less than half a clock tick apart, then 547 assume "get" is microsecond accurate. 548 549 Two times only 1 microsecond apart are not believed, since some kernels 550 take it upon themselves to ensure gettimeofday doesn't return the same 551 value twice, for the benefit of applications using it for a timestamp. 552 This is obviously very stupid given the speed of CPUs these days. 553 554 Making "reps" many calls to noop_1() is designed to waste some CPU, with 555 a view to getting measurements 2 microseconds (or more) apart. "reps" is 556 increased progressively until such a period is seen. 557 558 The outer loop "attempts" are just to allow for any random nonsense or 559 system load upsetting the measurements (ie. making two successive calls 560 to "get" come out as a longer interval than normal). 561 562 Bugs: 563 564 The assumption that any interval less than a half tick implies 565 microsecond resolution is obviously fairly rash, the true resolution 566 could be anything between a microsecond and that half tick. Perhaps 567 something special would have to be done on a system where this is the 568 case, since there's no obvious reliable way to detect it 569 automatically. */ 570 571 #define MICROSECONDS_P(name, type, get, sec, usec) \ 572 { \ 573 static int result = -1; \ 574 type st, et; \ 575 long dt, half_tick; \ 576 unsigned attempt, reps, i, j; \ 577 \ 578 if (result != -1) \ 579 return result; \ 580 \ 581 result = 0; \ 582 half_tick = (1000000L / clk_tck ()) / 2; \ 583 \ 584 for (attempt = 0; attempt < 5; attempt++) \ 585 { \ 586 reps = 0; \ 587 for (;;) \ 588 { \ 589 get (st); \ 590 for (i = 0; i < reps; i++) \ 591 for (j = 0; j < 100; j++) \ 592 noop_1 (CNST_LIMB(0)); \ 593 get (et); \ 594 \ 595 dt = (sec(et)-sec(st))*1000000L + usec(et)-usec(st); \ 596 \ 597 if (speed_option_verbose >= 2) \ 598 printf ("%s attempt=%u, reps=%u, dt=%ld\n", \ 599 name, attempt, reps, dt); \ 600 \ 601 if (dt >= 2) \ 602 break; \ 603 \ 604 reps = (reps == 0 ? 1 : 2*reps); \ 605 if (reps == 0) \ 606 break; /* uint overflow, not normal */ \ 607 } \ 608 \ 609 if (dt < half_tick) \ 610 { \ 611 result = 1; \ 612 break; \ 613 } \ 614 } \ 615 \ 616 if (speed_option_verbose) \ 617 { \ 618 if (result) \ 619 printf ("%s is microsecond accurate\n", name); \ 620 else \ 621 printf ("%s is only %s clock tick accurate\n", \ 622 name, unittime_string (1.0/clk_tck())); \ 623 } \ 624 return result; \ 625 } 626 627 628 int 629 gettimeofday_microseconds_p (void) 630 { 631 #define call_gettimeofday(t) gettimeofday (&(t), NULL) 632 #define timeval_tv_sec(t) ((t).tv_sec) 633 #define timeval_tv_usec(t) ((t).tv_usec) 634 MICROSECONDS_P ("gettimeofday", struct_timeval, 635 call_gettimeofday, timeval_tv_sec, timeval_tv_usec); 636 } 637 638 int 639 getrusage_microseconds_p (void) 640 { 641 #define call_getrusage(t) getrusage (0, &(t)) 642 #define rusage_tv_sec(t) ((t).ru_utime.tv_sec) 643 #define rusage_tv_usec(t) ((t).ru_utime.tv_usec) 644 MICROSECONDS_P ("getrusage", struct_rusage, 645 call_getrusage, rusage_tv_sec, rusage_tv_usec); 646 } 647 648 /* Test whether getrusage goes backwards, return non-zero if it does 649 (suggesting it's flawed). 650 651 On a macintosh m68040-unknown-netbsd1.4.1 getrusage looks like it's 652 microsecond accurate, but has been seen remaining unchanged after many 653 microseconds have elapsed. It also regularly goes backwards by 1000 to 654 5000 usecs, this has been seen after between 500 and 4000 attempts taking 655 perhaps 0.03 seconds. We consider this too broken for good measuring. 656 We used to have configure pretend getrusage didn't exist on this system, 657 but a runtime test should be more reliable, since we imagine the problem 658 is not confined to just this exact system tuple. */ 659 660 int 661 getrusage_backwards_p (void) 662 { 663 static int result = -1; 664 struct rusage start, prev, next; 665 long d; 666 int i; 667 668 if (result != -1) 669 return result; 670 671 getrusage (0, &start); 672 memcpy (&next, &start, sizeof (next)); 673 674 result = 0; 675 i = 0; 676 for (;;) 677 { 678 memcpy (&prev, &next, sizeof (prev)); 679 getrusage (0, &next); 680 681 if (next.ru_utime.tv_sec < prev.ru_utime.tv_sec 682 || (next.ru_utime.tv_sec == prev.ru_utime.tv_sec 683 && next.ru_utime.tv_usec < prev.ru_utime.tv_usec)) 684 { 685 if (speed_option_verbose) 686 printf ("getrusage went backwards (attempt %d: %ld.%06ld -> %ld.%06ld)\n", 687 i, 688 (long) prev.ru_utime.tv_sec, (long) prev.ru_utime.tv_usec, 689 (long) next.ru_utime.tv_sec, (long) next.ru_utime.tv_usec); 690 result = 1; 691 break; 692 } 693 694 /* minimum 1000 attempts, then stop after either 0.1 seconds or 50000 695 attempts, whichever comes first */ 696 d = 1000000 * (next.ru_utime.tv_sec - start.ru_utime.tv_sec) 697 + (next.ru_utime.tv_usec - start.ru_utime.tv_usec); 698 i++; 699 if (i > 50000 || (i > 1000 && d > 100000)) 700 break; 701 } 702 703 return result; 704 } 705 706 /* CLOCK_PROCESS_CPUTIME_ID looks like it's going to be in a future version 707 of glibc (some time post 2.2). 708 709 CLOCK_VIRTUAL is process time, available in BSD systems (though sometimes 710 defined, but returning -1 for an error). */ 711 712 #ifdef CLOCK_PROCESS_CPUTIME_ID 713 # define CGT_ID CLOCK_PROCESS_CPUTIME_ID 714 #else 715 # ifdef CLOCK_VIRTUAL 716 # define CGT_ID CLOCK_VIRTUAL 717 # endif 718 #endif 719 #ifdef CGT_ID 720 const int have_cgt_id = 1; 721 #else 722 const int have_cgt_id = 0; 723 # define CGT_ID (ASSERT_FAIL (CGT_ID not determined), -1) 724 #endif 725 726 #define CGT_DELAY_COUNT 1000 727 728 int 729 cgt_works_p (void) 730 { 731 static int result = -1; 732 struct_timespec unit; 733 734 if (! have_cgt) 735 return 0; 736 737 if (! have_cgt_id) 738 { 739 if (speed_option_verbose) 740 printf ("clock_gettime don't know what ID to use\n"); 741 result = 0; 742 return result; 743 } 744 745 if (result != -1) 746 return result; 747 748 /* trial run to see if it works */ 749 if (clock_gettime (CGT_ID, &unit) != 0) 750 { 751 if (speed_option_verbose) 752 printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno)); 753 result = 0; 754 return result; 755 } 756 757 /* get the resolution */ 758 if (clock_getres (CGT_ID, &unit) != 0) 759 { 760 if (speed_option_verbose) 761 printf ("clock_getres id=%d error: %s\n", CGT_ID, strerror (errno)); 762 result = 0; 763 return result; 764 } 765 766 cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9; 767 printf ("clock_gettime is %s accurate\n", 768 unittime_string (cgt_unittime)); 769 770 if (cgt_unittime < 10e-9) 771 { 772 /* Do we believe this? */ 773 struct timespec start, end; 774 static volatile int counter; 775 double duration; 776 if (clock_gettime (CGT_ID, &start)) 777 { 778 if (speed_option_verbose) 779 printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno)); 780 result = 0; 781 return result; 782 } 783 /* Loop of at least 1000 memory accesses, ought to take at 784 least 100 ns*/ 785 for (counter = 0; counter < CGT_DELAY_COUNT; counter++) 786 ; 787 if (clock_gettime (CGT_ID, &end)) 788 { 789 if (speed_option_verbose) 790 printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno)); 791 result = 0; 792 return result; 793 } 794 duration = (end.tv_sec + end.tv_nsec * 1e-9 795 - start.tv_sec - start.tv_nsec * 1e-9); 796 if (speed_option_verbose) 797 printf ("delay loop of %d rounds took %s (according to clock_get_time)\n", 798 CGT_DELAY_COUNT, unittime_string (duration)); 799 if (duration < 100e-9) 800 { 801 if (speed_option_verbose) 802 printf ("clock_gettime id=%d not believable\n", CGT_ID); 803 result = 0; 804 return result; 805 } 806 } 807 result = 1; 808 return result; 809 } 810 811 812 static double 813 freq_measure_mftb_one (void) 814 { 815 #define call_gettimeofday(t) gettimeofday (&(t), NULL) 816 #define timeval_tv_sec(t) ((t).tv_sec) 817 #define timeval_tv_usec(t) ((t).tv_usec) 818 FREQ_MEASURE_ONE ("mftb", struct_timeval, 819 call_gettimeofday, MFTB, 820 timeval_tv_sec, timeval_tv_usec); 821 } 822 823 824 static jmp_buf mftb_works_buf; 825 826 static RETSIGTYPE 827 mftb_works_handler (int sig) 828 { 829 longjmp (mftb_works_buf, 1); 830 } 831 832 int 833 mftb_works_p (void) 834 { 835 unsigned a[2]; 836 RETSIGTYPE (*old_handler) (int); 837 double cycletime; 838 839 /* suppress a warning about a[] unused */ 840 a[0] = 0; 841 842 if (! have_mftb) 843 return 0; 844 845 #ifdef SIGILL 846 old_handler = signal (SIGILL, mftb_works_handler); 847 if (old_handler == SIG_ERR) 848 { 849 if (speed_option_verbose) 850 printf ("mftb_works_p(): SIGILL not supported, assuming mftb works\n"); 851 return 1; 852 } 853 if (setjmp (mftb_works_buf)) 854 { 855 if (speed_option_verbose) 856 printf ("mftb_works_p(): SIGILL during mftb, so doesn't work\n"); 857 return 0; 858 } 859 MFTB (a); 860 signal (SIGILL, old_handler); 861 if (speed_option_verbose) 862 printf ("mftb_works_p(): mftb works\n"); 863 #else 864 865 if (speed_option_verbose) 866 printf ("mftb_works_p(): SIGILL not defined, assuming mftb works\n"); 867 #endif 868 869 #if ! HAVE_GETTIMEOFDAY 870 if (speed_option_verbose) 871 printf ("mftb_works_p(): no gettimeofday available to measure mftb\n"); 872 return 0; 873 #endif 874 875 /* The time base is normally 1/4 of the bus speed on 6xx and 7xx chips, on 876 other chips it can be driven from an external clock. */ 877 cycletime = freq_measure ("mftb", freq_measure_mftb_one); 878 if (cycletime == -1.0) 879 { 880 if (speed_option_verbose) 881 printf ("mftb_works_p(): cannot measure mftb period\n"); 882 return 0; 883 } 884 885 mftb_unittime = cycletime; 886 return 1; 887 } 888 889 890 volatile unsigned *sgi_addr; 891 892 int 893 sgi_works_p (void) 894 { 895 #if HAVE_SYSSGI && HAVE_MMAP 896 static int result = -1; 897 898 size_t pagesize, offset; 899 __psunsigned_t phys, physpage; 900 void *virtpage; 901 unsigned period_picoseconds; 902 int size, fd; 903 904 if (result != -1) 905 return result; 906 907 phys = syssgi (SGI_QUERY_CYCLECNTR, &period_picoseconds); 908 if (phys == (__psunsigned_t) -1) 909 { 910 /* ENODEV is the error when a counter is not available */ 911 if (speed_option_verbose) 912 printf ("syssgi SGI_QUERY_CYCLECNTR error: %s\n", strerror (errno)); 913 result = 0; 914 return result; 915 } 916 sgi_unittime = period_picoseconds * 1e-12; 917 918 /* IRIX 5 doesn't have SGI_CYCLECNTR_SIZE, assume 32 bits in that case. 919 Challenge/ONYX hardware has a 64 bit byte counter, but there seems no 920 obvious way to identify that without SGI_CYCLECNTR_SIZE. */ 921 #ifdef SGI_CYCLECNTR_SIZE 922 size = syssgi (SGI_CYCLECNTR_SIZE); 923 if (size == -1) 924 { 925 if (speed_option_verbose) 926 { 927 printf ("syssgi SGI_CYCLECNTR_SIZE error: %s\n", strerror (errno)); 928 printf (" will assume size==4\n"); 929 } 930 size = 32; 931 } 932 #else 933 size = 32; 934 #endif 935 936 if (size < 32) 937 { 938 printf ("syssgi SGI_CYCLECNTR_SIZE gives %d, expected 32 or 64\n", size); 939 result = 0; 940 return result; 941 } 942 943 pagesize = getpagesize(); 944 offset = (size_t) phys & (pagesize-1); 945 physpage = phys - offset; 946 947 /* shouldn't cross over a page boundary */ 948 ASSERT_ALWAYS (offset + size/8 <= pagesize); 949 950 fd = open("/dev/mmem", O_RDONLY); 951 if (fd == -1) 952 { 953 if (speed_option_verbose) 954 printf ("open /dev/mmem: %s\n", strerror (errno)); 955 result = 0; 956 return result; 957 } 958 959 virtpage = mmap (0, pagesize, PROT_READ, MAP_PRIVATE, fd, (off_t) physpage); 960 if (virtpage == (void *) -1) 961 { 962 if (speed_option_verbose) 963 printf ("mmap /dev/mmem: %s\n", strerror (errno)); 964 result = 0; 965 return result; 966 } 967 968 /* address of least significant 4 bytes, knowing mips is big endian */ 969 sgi_addr = (unsigned *) ((char *) virtpage + offset 970 + size/8 - sizeof(unsigned)); 971 result = 1; 972 return result; 973 974 #else /* ! (HAVE_SYSSGI && HAVE_MMAP) */ 975 return 0; 976 #endif 977 } 978 979 980 #define DEFAULT(var,n) \ 981 do { \ 982 if (! (var)) \ 983 (var) = (n); \ 984 } while (0) 985 986 void 987 speed_time_init (void) 988 { 989 double supplement_unittime = 0.0; 990 991 static int speed_time_initialized = 0; 992 if (speed_time_initialized) 993 return; 994 speed_time_initialized = 1; 995 996 speed_cycletime_init (); 997 998 if (!speed_option_cycles_broken && have_cycles && cycles_works_p ()) 999 { 1000 use_cycles = 1; 1001 DEFAULT (speed_cycletime, 1.0); 1002 speed_unittime = speed_cycletime; 1003 DEFAULT (speed_precision, 10000); 1004 strcpy (speed_time_string, "CPU cycle counter"); 1005 1006 /* only used if a supplementary method is chosen below */ 1007 cycles_limit = (have_cycles == 1 ? M_2POW32 : M_2POW64) / 2.0 1008 * speed_cycletime; 1009 1010 if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p()) 1011 { 1012 /* this is a good combination */ 1013 use_grus = 1; 1014 supplement_unittime = grus_unittime = 1.0e-6; 1015 strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond getrusage()"); 1016 } 1017 else if (have_cycles == 1) 1018 { 1019 /* When speed_cyclecounter has a limited range, look for something 1020 to supplement it. */ 1021 if (have_gtod && gettimeofday_microseconds_p()) 1022 { 1023 use_gtod = 1; 1024 supplement_unittime = gtod_unittime = 1.0e-6; 1025 strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond gettimeofday()"); 1026 } 1027 else if (have_grus) 1028 { 1029 use_grus = 1; 1030 supplement_unittime = grus_unittime = 1.0 / (double) clk_tck (); 1031 sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick getrusage()", unittime_string (supplement_unittime)); 1032 } 1033 else if (have_times) 1034 { 1035 use_times = 1; 1036 supplement_unittime = times_unittime = 1.0 / (double) clk_tck (); 1037 sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick times()", unittime_string (supplement_unittime)); 1038 } 1039 else if (have_gtod) 1040 { 1041 use_gtod = 1; 1042 supplement_unittime = gtod_unittime = 1.0 / (double) clk_tck (); 1043 sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick gettimeofday()", unittime_string (supplement_unittime)); 1044 } 1045 else 1046 { 1047 fprintf (stderr, "WARNING: cycle counter is 32 bits and there's no other functions.\n"); 1048 fprintf (stderr, " Wraparounds may produce bad results on long measurements.\n"); 1049 } 1050 } 1051 1052 if (use_grus || use_times || use_gtod) 1053 { 1054 /* must know cycle period to compare cycles to other measuring 1055 (via cycles_limit) */ 1056 speed_cycletime_need_seconds (); 1057 1058 if (speed_precision * supplement_unittime > cycles_limit) 1059 { 1060 fprintf (stderr, "WARNING: requested precision can't always be achieved due to limited range\n"); 1061 fprintf (stderr, " cycle counter and limited precision supplemental method\n"); 1062 fprintf (stderr, " (%s)\n", speed_time_string); 1063 } 1064 } 1065 } 1066 else if (have_stck) 1067 { 1068 strcpy (speed_time_string, "STCK timestamp"); 1069 /* stck is in units of 2^-12 microseconds, which is very likely higher 1070 resolution than a cpu cycle */ 1071 if (speed_cycletime == 0.0) 1072 speed_cycletime_fail 1073 ("Need to know CPU frequency for effective stck unit"); 1074 speed_unittime = MAX (speed_cycletime, STCK_PERIOD); 1075 DEFAULT (speed_precision, 10000); 1076 } 1077 else if (have_mftb && mftb_works_p ()) 1078 { 1079 use_mftb = 1; 1080 DEFAULT (speed_precision, 10000); 1081 speed_unittime = mftb_unittime; 1082 sprintf (speed_time_string, "mftb counter (%s)", 1083 unittime_string (speed_unittime)); 1084 } 1085 else if (have_sgi && sgi_works_p ()) 1086 { 1087 use_sgi = 1; 1088 DEFAULT (speed_precision, 10000); 1089 speed_unittime = sgi_unittime; 1090 sprintf (speed_time_string, "syssgi() mmap counter (%s), supplemented by millisecond getrusage()", 1091 unittime_string (speed_unittime)); 1092 /* supplemented with getrusage, which we assume to have 1ms resolution */ 1093 use_grus = 1; 1094 supplement_unittime = 1e-3; 1095 } 1096 else if (have_rrt) 1097 { 1098 timebasestruct_t t; 1099 use_rrt = 1; 1100 DEFAULT (speed_precision, 10000); 1101 read_real_time (&t, sizeof(t)); 1102 switch (t.flag) { 1103 case RTC_POWER: 1104 /* FIXME: What's the actual RTC resolution? */ 1105 speed_unittime = 1e-7; 1106 strcpy (speed_time_string, "read_real_time() power nanoseconds"); 1107 break; 1108 case RTC_POWER_PC: 1109 t.tb_high = 1; 1110 t.tb_low = 0; 1111 time_base_to_time (&t, sizeof(t)); 1112 speed_unittime = TIMEBASESTRUCT_SECS(&t) / M_2POW32; 1113 sprintf (speed_time_string, "%s read_real_time() powerpc ticks", 1114 unittime_string (speed_unittime)); 1115 break; 1116 default: 1117 fprintf (stderr, "ERROR: Unrecognised timebasestruct_t flag=%d\n", 1118 t.flag); 1119 abort (); 1120 } 1121 } 1122 else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5e-6) 1123 { 1124 /* use clock_gettime if microsecond or better resolution */ 1125 choose_cgt: 1126 use_cgt = 1; 1127 speed_unittime = cgt_unittime; 1128 DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000)); 1129 strcpy (speed_time_string, "microsecond accurate clock_gettime()"); 1130 } 1131 else if (have_times && clk_tck() > 1000000) 1132 { 1133 /* Cray vector systems have times() which is clock cycle resolution 1134 (eg. 450 MHz). */ 1135 DEFAULT (speed_precision, 10000); 1136 goto choose_times; 1137 } 1138 else if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p()) 1139 { 1140 use_grus = 1; 1141 speed_unittime = grus_unittime = 1.0e-6; 1142 DEFAULT (speed_precision, 1000); 1143 strcpy (speed_time_string, "microsecond accurate getrusage()"); 1144 } 1145 else if (have_gtod && gettimeofday_microseconds_p()) 1146 { 1147 use_gtod = 1; 1148 speed_unittime = gtod_unittime = 1.0e-6; 1149 DEFAULT (speed_precision, 1000); 1150 strcpy (speed_time_string, "microsecond accurate gettimeofday()"); 1151 } 1152 else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5/clk_tck()) 1153 { 1154 /* use clock_gettime if 1 tick or better resolution */ 1155 goto choose_cgt; 1156 } 1157 else if (have_times) 1158 { 1159 use_tick_boundary = 1; 1160 DEFAULT (speed_precision, 200); 1161 choose_times: 1162 use_times = 1; 1163 speed_unittime = times_unittime = 1.0 / (double) clk_tck (); 1164 sprintf (speed_time_string, "%s clock tick times()", 1165 unittime_string (speed_unittime)); 1166 } 1167 else if (have_grus) 1168 { 1169 use_grus = 1; 1170 use_tick_boundary = 1; 1171 speed_unittime = grus_unittime = 1.0 / (double) clk_tck (); 1172 DEFAULT (speed_precision, 200); 1173 sprintf (speed_time_string, "%s clock tick getrusage()\n", 1174 unittime_string (speed_unittime)); 1175 } 1176 else if (have_gtod) 1177 { 1178 use_gtod = 1; 1179 use_tick_boundary = 1; 1180 speed_unittime = gtod_unittime = 1.0 / (double) clk_tck (); 1181 DEFAULT (speed_precision, 200); 1182 sprintf (speed_time_string, "%s clock tick gettimeofday()", 1183 unittime_string (speed_unittime)); 1184 } 1185 else 1186 { 1187 fprintf (stderr, "No time measuring method available\n"); 1188 fprintf (stderr, "None of: speed_cyclecounter(), STCK(), getrusage(), gettimeofday(), times()\n"); 1189 abort (); 1190 } 1191 1192 if (speed_option_verbose) 1193 { 1194 printf ("speed_time_init: %s\n", speed_time_string); 1195 printf (" speed_precision %d\n", speed_precision); 1196 printf (" speed_unittime %.2g\n", speed_unittime); 1197 if (supplement_unittime) 1198 printf (" supplement_unittime %.2g\n", supplement_unittime); 1199 printf (" use_tick_boundary %d\n", use_tick_boundary); 1200 if (have_cycles) 1201 printf (" cycles_limit %.2g seconds\n", cycles_limit); 1202 } 1203 } 1204 1205 1206 1207 /* Burn up CPU until a clock tick boundary, for greater accuracy. Set the 1208 corresponding "start_foo" appropriately too. */ 1209 1210 void 1211 grus_tick_boundary (void) 1212 { 1213 struct_rusage prev; 1214 getrusage (0, &prev); 1215 do { 1216 getrusage (0, &start_grus); 1217 } while (start_grus.ru_utime.tv_usec == prev.ru_utime.tv_usec); 1218 } 1219 1220 void 1221 gtod_tick_boundary (void) 1222 { 1223 struct_timeval prev; 1224 gettimeofday (&prev, NULL); 1225 do { 1226 gettimeofday (&start_gtod, NULL); 1227 } while (start_gtod.tv_usec == prev.tv_usec); 1228 } 1229 1230 void 1231 times_tick_boundary (void) 1232 { 1233 struct_tms prev; 1234 times (&prev); 1235 do 1236 times (&start_times); 1237 while (start_times.tms_utime == prev.tms_utime); 1238 } 1239 1240 1241 /* "have_" values are tested to let unused code go dead. */ 1242 1243 void 1244 speed_starttime (void) 1245 { 1246 speed_time_init (); 1247 1248 if (have_grus && use_grus) 1249 { 1250 if (use_tick_boundary) 1251 grus_tick_boundary (); 1252 else 1253 getrusage (0, &start_grus); 1254 } 1255 1256 if (have_gtod && use_gtod) 1257 { 1258 if (use_tick_boundary) 1259 gtod_tick_boundary (); 1260 else 1261 gettimeofday (&start_gtod, NULL); 1262 } 1263 1264 if (have_times && use_times) 1265 { 1266 if (use_tick_boundary) 1267 times_tick_boundary (); 1268 else 1269 times (&start_times); 1270 } 1271 1272 if (have_cgt && use_cgt) 1273 clock_gettime (CGT_ID, &start_cgt); 1274 1275 if (have_rrt && use_rrt) 1276 read_real_time (&start_rrt, sizeof(start_rrt)); 1277 1278 if (have_sgi && use_sgi) 1279 start_sgi = *sgi_addr; 1280 1281 if (have_mftb && use_mftb) 1282 MFTB (start_mftb); 1283 1284 if (have_stck && use_stck) 1285 STCK (start_stck); 1286 1287 /* Cycles sampled last for maximum accuracy. */ 1288 if (have_cycles && use_cycles) 1289 speed_cyclecounter (start_cycles); 1290 } 1291 1292 1293 /* Calculate the difference between two cycle counter samples, as a "double" 1294 counter of cycles. 1295 1296 The start and end values are allowed to cancel in integers in case the 1297 counter values are bigger than the 53 bits that normally fit in a double. 1298 1299 This works even if speed_cyclecounter() puts a value bigger than 32-bits 1300 in the low word (the high word always gets a 2**32 multiplier though). */ 1301 1302 double 1303 speed_cyclecounter_diff (const unsigned end[2], const unsigned start[2]) 1304 { 1305 unsigned d; 1306 double t; 1307 1308 if (have_cycles == 1) 1309 { 1310 t = (end[0] - start[0]); 1311 } 1312 else 1313 { 1314 d = end[0] - start[0]; 1315 t = d - (d > end[0] ? M_2POWU : 0.0); 1316 t += (end[1] - start[1]) * M_2POW32; 1317 } 1318 return t; 1319 } 1320 1321 1322 double 1323 speed_mftb_diff (const unsigned end[2], const unsigned start[2]) 1324 { 1325 unsigned d; 1326 double t; 1327 1328 d = end[0] - start[0]; 1329 t = (double) d - (d > end[0] ? M_2POW32 : 0.0); 1330 t += (end[1] - start[1]) * M_2POW32; 1331 return t; 1332 } 1333 1334 1335 /* Calculate the difference between "start" and "end" using fields "sec" and 1336 "psec", where each "psec" is a "punit" of a second. 1337 1338 The seconds parts are allowed to cancel before being combined with the 1339 psec parts, in case a simple "sec+psec*punit" exceeds the precision of a 1340 double. 1341 1342 Total time is only calculated in a "double" since an integer count of 1343 psecs might overflow. 2^32 microseconds is only a bit over an hour, or 1344 2^32 nanoseconds only about 4 seconds. 1345 1346 The casts to "long" are for the benefit of timebasestruct_t, where the 1347 fields are only "unsigned int", but we want a signed difference. */ 1348 1349 #define DIFF_SECS_ROUTINE(sec, psec, punit) \ 1350 { \ 1351 long sec_diff, psec_diff; \ 1352 sec_diff = (long) end->sec - (long) start->sec; \ 1353 psec_diff = (long) end->psec - (long) start->psec; \ 1354 return (double) sec_diff + punit * (double) psec_diff; \ 1355 } 1356 1357 double 1358 timeval_diff_secs (const struct_timeval *end, const struct_timeval *start) 1359 { 1360 DIFF_SECS_ROUTINE (tv_sec, tv_usec, 1e-6); 1361 } 1362 1363 double 1364 rusage_diff_secs (const struct_rusage *end, const struct_rusage *start) 1365 { 1366 DIFF_SECS_ROUTINE (ru_utime.tv_sec, ru_utime.tv_usec, 1e-6); 1367 } 1368 1369 double 1370 timespec_diff_secs (const struct_timespec *end, const struct_timespec *start) 1371 { 1372 DIFF_SECS_ROUTINE (tv_sec, tv_nsec, 1e-9); 1373 } 1374 1375 /* This is for use after time_base_to_time, ie. for seconds and nanoseconds. */ 1376 double 1377 timebasestruct_diff_secs (const timebasestruct_t *end, 1378 const timebasestruct_t *start) 1379 { 1380 DIFF_SECS_ROUTINE (tb_high, tb_low, 1e-9); 1381 } 1382 1383 1384 double 1385 speed_endtime (void) 1386 { 1387 #define END_USE(name,value) \ 1388 do { \ 1389 if (speed_option_verbose >= 3) \ 1390 printf ("speed_endtime(): used %s\n", name); \ 1391 result = value; \ 1392 goto done; \ 1393 } while (0) 1394 1395 #define END_ENOUGH(name,value) \ 1396 do { \ 1397 if (speed_option_verbose >= 3) \ 1398 printf ("speed_endtime(): %s gives enough precision\n", name); \ 1399 result = value; \ 1400 goto done; \ 1401 } while (0) 1402 1403 #define END_EXCEED(name,value) \ 1404 do { \ 1405 if (speed_option_verbose >= 3) \ 1406 printf ("speed_endtime(): cycle counter limit exceeded, used %s\n", \ 1407 name); \ 1408 result = value; \ 1409 goto done; \ 1410 } while (0) 1411 1412 unsigned end_cycles[2]; 1413 stck_t end_stck; 1414 unsigned end_mftb[2]; 1415 unsigned end_sgi; 1416 timebasestruct_t end_rrt; 1417 struct_timespec end_cgt; 1418 struct_timeval end_gtod; 1419 struct_rusage end_grus; 1420 struct_tms end_times; 1421 double t_gtod, t_grus, t_times, t_cgt; 1422 double t_rrt, t_sgi, t_mftb, t_stck, t_cycles; 1423 double result; 1424 1425 /* Cycles sampled first for maximum accuracy. 1426 "have_" values tested to let unused code go dead. */ 1427 1428 if (have_cycles && use_cycles) speed_cyclecounter (end_cycles); 1429 if (have_stck && use_stck) STCK (end_stck); 1430 if (have_mftb && use_mftb) MFTB (end_mftb); 1431 if (have_sgi && use_sgi) end_sgi = *sgi_addr; 1432 if (have_rrt && use_rrt) read_real_time (&end_rrt, sizeof(end_rrt)); 1433 if (have_cgt && use_cgt) clock_gettime (CGT_ID, &end_cgt); 1434 if (have_gtod && use_gtod) gettimeofday (&end_gtod, NULL); 1435 if (have_grus && use_grus) getrusage (0, &end_grus); 1436 if (have_times && use_times) times (&end_times); 1437 1438 result = -1.0; 1439 1440 if (speed_option_verbose >= 4) 1441 { 1442 printf ("speed_endtime():\n"); 1443 if (use_cycles) 1444 printf (" cycles 0x%X,0x%X -> 0x%X,0x%X\n", 1445 start_cycles[1], start_cycles[0], 1446 end_cycles[1], end_cycles[0]); 1447 1448 if (use_stck) 1449 printf (" stck 0x%lX -> 0x%lX\n", start_stck, end_stck); 1450 1451 if (use_mftb) 1452 printf (" mftb 0x%X,%08X -> 0x%X,%08X\n", 1453 start_mftb[1], start_mftb[0], 1454 end_mftb[1], end_mftb[0]); 1455 1456 if (use_sgi) 1457 printf (" sgi 0x%X -> 0x%X\n", start_sgi, end_sgi); 1458 1459 if (use_rrt) 1460 printf (" read_real_time (%d)%u,%u -> (%d)%u,%u\n", 1461 start_rrt.flag, start_rrt.tb_high, start_rrt.tb_low, 1462 end_rrt.flag, end_rrt.tb_high, end_rrt.tb_low); 1463 1464 if (use_cgt) 1465 printf (" clock_gettime %ld.%09ld -> %ld.%09ld\n", 1466 start_cgt.tv_sec, start_cgt.tv_nsec, 1467 end_cgt.tv_sec, end_cgt.tv_nsec); 1468 1469 if (use_gtod) 1470 printf (" gettimeofday %ld.%06ld -> %ld.%06ld\n", 1471 start_gtod.tv_sec, start_gtod.tv_usec, 1472 end_gtod.tv_sec, end_gtod.tv_usec); 1473 1474 if (use_grus) 1475 printf (" getrusage %ld.%06ld -> %ld.%06ld\n", 1476 start_grus.ru_utime.tv_sec, start_grus.ru_utime.tv_usec, 1477 end_grus.ru_utime.tv_sec, end_grus.ru_utime.tv_usec); 1478 1479 if (use_times) 1480 printf (" times %ld -> %ld\n", 1481 start_times.tms_utime, end_times.tms_utime); 1482 } 1483 1484 if (use_rrt) 1485 { 1486 time_base_to_time (&start_rrt, sizeof(start_rrt)); 1487 time_base_to_time (&end_rrt, sizeof(end_rrt)); 1488 t_rrt = timebasestruct_diff_secs (&end_rrt, &start_rrt); 1489 END_USE ("read_real_time()", t_rrt); 1490 } 1491 1492 if (use_cgt) 1493 { 1494 t_cgt = timespec_diff_secs (&end_cgt, &start_cgt); 1495 END_USE ("clock_gettime()", t_cgt); 1496 } 1497 1498 if (use_grus) 1499 { 1500 t_grus = rusage_diff_secs (&end_grus, &start_grus); 1501 1502 /* Use getrusage() if the cycle counter limit would be exceeded, or if 1503 it provides enough accuracy already. */ 1504 if (use_cycles) 1505 { 1506 if (t_grus >= speed_precision*grus_unittime) 1507 END_ENOUGH ("getrusage()", t_grus); 1508 if (t_grus >= cycles_limit) 1509 END_EXCEED ("getrusage()", t_grus); 1510 } 1511 } 1512 1513 if (use_times) 1514 { 1515 t_times = (end_times.tms_utime - start_times.tms_utime) * times_unittime; 1516 1517 /* Use times() if the cycle counter limit would be exceeded, or if 1518 it provides enough accuracy already. */ 1519 if (use_cycles) 1520 { 1521 if (t_times >= speed_precision*times_unittime) 1522 END_ENOUGH ("times()", t_times); 1523 if (t_times >= cycles_limit) 1524 END_EXCEED ("times()", t_times); 1525 } 1526 } 1527 1528 if (use_gtod) 1529 { 1530 t_gtod = timeval_diff_secs (&end_gtod, &start_gtod); 1531 1532 /* Use gettimeofday() if it measured a value bigger than the cycle 1533 counter can handle. */ 1534 if (use_cycles) 1535 { 1536 if (t_gtod >= cycles_limit) 1537 END_EXCEED ("gettimeofday()", t_gtod); 1538 } 1539 } 1540 1541 if (use_mftb) 1542 { 1543 t_mftb = speed_mftb_diff (end_mftb, start_mftb) * mftb_unittime; 1544 END_USE ("mftb", t_mftb); 1545 } 1546 1547 if (use_stck) 1548 { 1549 t_stck = (end_stck - start_stck) * STCK_PERIOD; 1550 END_USE ("stck", t_stck); 1551 } 1552 1553 if (use_sgi) 1554 { 1555 t_sgi = (end_sgi - start_sgi) * sgi_unittime; 1556 END_USE ("SGI hardware counter", t_sgi); 1557 } 1558 1559 if (use_cycles) 1560 { 1561 t_cycles = speed_cyclecounter_diff (end_cycles, start_cycles) 1562 * speed_cycletime; 1563 END_USE ("cycle counter", t_cycles); 1564 } 1565 1566 if (use_grus && getrusage_microseconds_p()) 1567 END_USE ("getrusage()", t_grus); 1568 1569 if (use_gtod && gettimeofday_microseconds_p()) 1570 END_USE ("gettimeofday()", t_gtod); 1571 1572 if (use_times) END_USE ("times()", t_times); 1573 if (use_grus) END_USE ("getrusage()", t_grus); 1574 if (use_gtod) END_USE ("gettimeofday()", t_gtod); 1575 1576 fprintf (stderr, "speed_endtime(): oops, no time method available\n"); 1577 abort (); 1578 1579 done: 1580 if (result < 0.0) 1581 { 1582 if (speed_option_verbose >= 2) 1583 fprintf (stderr, "speed_endtime(): warning, treating negative time as zero: %.9f\n", result); 1584 result = 0.0; 1585 } 1586 return result; 1587 } 1588