1package Benchmark; 2 3use strict; 4 5 6=head1 NAME 7 8Benchmark - benchmark running times of Perl code 9 10=head1 SYNOPSIS 11 12 use Benchmark qw(:all) ; 13 14 timethis ($count, "code"); 15 16 # Use Perl code in strings... 17 timethese($count, { 18 'Name1' => '...code1...', 19 'Name2' => '...code2...', 20 }); 21 22 # ... or use subroutine references. 23 timethese($count, { 24 'Name1' => sub { ...code1... }, 25 'Name2' => sub { ...code2... }, 26 }); 27 28 # cmpthese can be used both ways as well 29 cmpthese($count, { 30 'Name1' => '...code1...', 31 'Name2' => '...code2...', 32 }); 33 34 cmpthese($count, { 35 'Name1' => sub { ...code1... }, 36 'Name2' => sub { ...code2... }, 37 }); 38 39 # ...or in two stages 40 $results = timethese($count, 41 { 42 'Name1' => sub { ...code1... }, 43 'Name2' => sub { ...code2... }, 44 }, 45 'none' 46 ); 47 cmpthese( $results ) ; 48 49 $t = timeit($count, '...other code...') 50 print "$count loops of other code took:",timestr($t),"\n"; 51 52 $t = countit($time, '...other code...') 53 $count = $t->iters ; 54 print "$count loops of other code took:",timestr($t),"\n"; 55 56 # enable hires wallclock timing if possible 57 use Benchmark ':hireswallclock'; 58 59=head1 DESCRIPTION 60 61The Benchmark module encapsulates a number of routines to help you 62figure out how long it takes to execute some code. 63 64timethis - run a chunk of code several times 65 66timethese - run several chunks of code several times 67 68cmpthese - print results of timethese as a comparison chart 69 70timeit - run a chunk of code and see how long it goes 71 72countit - see how many times a chunk of code runs in a given time 73 74 75=head2 Methods 76 77=over 10 78 79=item new 80 81Returns the current time. Example: 82 83 use Benchmark; 84 $t0 = Benchmark->new; 85 # ... your code here ... 86 $t1 = Benchmark->new; 87 $td = timediff($t1, $t0); 88 print "the code took:",timestr($td),"\n"; 89 90=item debug 91 92Enables or disable debugging by setting the C<$Benchmark::Debug> flag: 93 94 Benchmark->debug(1); 95 $t = timeit(10, ' 5 ** $Global '); 96 Benchmark->debug(0); 97 98=item iters 99 100Returns the number of iterations. 101 102=back 103 104=head2 Standard Exports 105 106The following routines will be exported into your namespace 107if you use the Benchmark module: 108 109=over 10 110 111=item timeit(COUNT, CODE) 112 113Arguments: COUNT is the number of times to run the loop, and CODE is 114the code to run. CODE may be either a code reference or a string to 115be eval'd; either way it will be run in the caller's package. 116 117Returns: a Benchmark object. 118 119=item timethis ( COUNT, CODE, [ TITLE, [ STYLE ]] ) 120 121Time COUNT iterations of CODE. CODE may be a string to eval or a 122code reference; either way the CODE will run in the caller's package. 123Results will be printed to STDOUT as TITLE followed by the times. 124TITLE defaults to "timethis COUNT" if none is provided. STYLE 125determines the format of the output, as described for timestr() below. 126 127The COUNT can be zero or negative: this means the I<minimum number of 128CPU seconds> to run. A zero signifies the default of 3 seconds. For 129example to run at least for 10 seconds: 130 131 timethis(-10, $code) 132 133or to run two pieces of code tests for at least 3 seconds: 134 135 timethese(0, { test1 => '...', test2 => '...'}) 136 137CPU seconds is, in UNIX terms, the user time plus the system time of 138the process itself, as opposed to the real (wallclock) time and the 139time spent by the child processes. Less than 0.1 seconds is not 140accepted (-0.01 as the count, for example, will cause a fatal runtime 141exception). 142 143Note that the CPU seconds is the B<minimum> time: CPU scheduling and 144other operating system factors may complicate the attempt so that a 145little bit more time is spent. The benchmark output will, however, 146also tell the number of C<$code> runs/second, which should be a more 147interesting number than the actually spent seconds. 148 149Returns a Benchmark object. 150 151=item timethese ( COUNT, CODEHASHREF, [ STYLE ] ) 152 153The CODEHASHREF is a reference to a hash containing names as keys 154and either a string to eval or a code reference for each value. 155For each (KEY, VALUE) pair in the CODEHASHREF, this routine will 156call 157 158 timethis(COUNT, VALUE, KEY, STYLE) 159 160The routines are called in string comparison order of KEY. 161 162The COUNT can be zero or negative, see timethis(). 163 164Returns a hash reference of Benchmark objects, keyed by name. 165 166=item timediff ( T1, T2 ) 167 168Returns the difference between two Benchmark times as a Benchmark 169object suitable for passing to timestr(). 170 171=item timestr ( TIMEDIFF, [ STYLE, [ FORMAT ] ] ) 172 173Returns a string that formats the times in the TIMEDIFF object in 174the requested STYLE. TIMEDIFF is expected to be a Benchmark object 175similar to that returned by timediff(). 176 177STYLE can be any of 'all', 'none', 'noc', 'nop' or 'auto'. 'all' shows 178each of the 5 times available ('wallclock' time, user time, system time, 179user time of children, and system time of children). 'noc' shows all 180except the two children times. 'nop' shows only wallclock and the 181two children times. 'auto' (the default) will act as 'all' unless 182the children times are both zero, in which case it acts as 'noc'. 183'none' prevents output. 184 185FORMAT is the L<printf(3)>-style format specifier (without the 186leading '%') to use to print the times. It defaults to '5.2f'. 187 188=back 189 190=head2 Optional Exports 191 192The following routines will be exported into your namespace 193if you specifically ask that they be imported: 194 195=over 10 196 197=item clearcache ( COUNT ) 198 199Clear the cached time for COUNT rounds of the null loop. 200 201=item clearallcache ( ) 202 203Clear all cached times. 204 205=item cmpthese ( COUNT, CODEHASHREF, [ STYLE ] ) 206 207=item cmpthese ( RESULTSHASHREF, [ STYLE ] ) 208 209Optionally calls timethese(), then outputs comparison chart. This: 210 211 cmpthese( -1, { a => "++\$i", b => "\$i *= 2" } ) ; 212 213outputs a chart like: 214 215 Rate b a 216 b 2831802/s -- -61% 217 a 7208959/s 155% -- 218 219This chart is sorted from slowest to fastest, and shows the percent speed 220difference between each pair of tests. 221 222C<cmpthese> can also be passed the data structure that timethese() returns: 223 224 $results = timethese( -1, 225 { a => "++\$i", b => "\$i *= 2" } ) ; 226 cmpthese( $results ); 227 228in case you want to see both sets of results. 229If the first argument is an unblessed hash reference, 230that is RESULTSHASHREF; otherwise that is COUNT. 231 232Returns a reference to an ARRAY of rows, each row is an ARRAY of cells from the 233above chart, including labels. This: 234 235 my $rows = cmpthese( -1, 236 { a => '++$i', b => '$i *= 2' }, "none" ); 237 238returns a data structure like: 239 240 [ 241 [ '', 'Rate', 'b', 'a' ], 242 [ 'b', '2885232/s', '--', '-59%' ], 243 [ 'a', '7099126/s', '146%', '--' ], 244 ] 245 246B<NOTE>: This result value differs from previous versions, which returned 247the C<timethese()> result structure. If you want that, just use the two 248statement C<timethese>...C<cmpthese> idiom shown above. 249 250Incidentally, note the variance in the result values between the two examples; 251this is typical of benchmarking. If this were a real benchmark, you would 252probably want to run a lot more iterations. 253 254=item countit(TIME, CODE) 255 256Arguments: TIME is the minimum length of time to run CODE for, and CODE is 257the code to run. CODE may be either a code reference or a string to 258be eval'd; either way it will be run in the caller's package. 259 260TIME is I<not> negative. countit() will run the loop many times to 261calculate the speed of CODE before running it for TIME. The actual 262time run for will usually be greater than TIME due to system clock 263resolution, so it's best to look at the number of iterations divided 264by the times that you are concerned with, not just the iterations. 265 266Returns: a Benchmark object. 267 268=item disablecache ( ) 269 270Disable caching of timings for the null loop. This will force Benchmark 271to recalculate these timings for each new piece of code timed. 272 273=item enablecache ( ) 274 275Enable caching of timings for the null loop. The time taken for COUNT 276rounds of the null loop will be calculated only once for each 277different COUNT used. 278 279=item timesum ( T1, T2 ) 280 281Returns the sum of two Benchmark times as a Benchmark object suitable 282for passing to timestr(). 283 284=back 285 286=head2 :hireswallclock 287 288If the Time::HiRes module has been installed, you can specify the 289special tag C<:hireswallclock> for Benchmark (if Time::HiRes is not 290available, the tag will be silently ignored). This tag will cause the 291wallclock time to be measured in microseconds, instead of integer 292seconds. Note though that the speed computations are still conducted 293in CPU time, not wallclock time. 294 295=head1 Benchmark Object 296 297Many of the functions in this module return a Benchmark object, 298or in the case of C<timethese()>, a reference to a hash, the values of 299which are Benchmark objects. This is useful if you want to store or 300further process results from Benchmark functions. 301 302Internally the Benchmark object holds timing values, 303described in L</"NOTES"> below. 304The following methods can be used to access them: 305 306=over 4 307 308=item cpu_p 309 310Total CPU (User + System) of the main (parent) process. 311 312=item cpu_c 313 314Total CPU (User + System) of any children processes. 315 316=item cpu_a 317 318Total CPU of parent and any children processes. 319 320=item real 321 322Real elapsed time "wallclock seconds". 323 324=item iters 325 326Number of iterations run. 327 328=back 329 330The following illustrates use of the Benchmark object: 331 332 $result = timethis(100000, sub { ... }); 333 print "total CPU = ", $result->cpu_a, "\n"; 334 335=head1 NOTES 336 337The data is stored as a list of values from the time and times 338functions: 339 340 ($real, $user, $system, $children_user, $children_system, $iters) 341 342in seconds for the whole loop (not divided by the number of rounds). 343 344The timing is done using time(3) and times(3). 345 346Code is executed in the caller's package. 347 348The time of the null loop (a loop with the same 349number of rounds but empty loop body) is subtracted 350from the time of the real loop. 351 352The null loop times can be cached, the key being the 353number of rounds. The caching can be controlled using 354calls like these: 355 356 clearcache($key); 357 clearallcache(); 358 359 disablecache(); 360 enablecache(); 361 362Caching is off by default, as it can (usually slightly) decrease 363accuracy and does not usually noticeably affect runtimes. 364 365=head1 EXAMPLES 366 367For example, 368 369 use Benchmark qw( cmpthese ) ; 370 $x = 3; 371 cmpthese( -5, { 372 a => sub{$x*$x}, 373 b => sub{$x**2}, 374 } ); 375 376outputs something like this: 377 378 Benchmark: running a, b, each for at least 5 CPU seconds... 379 Rate b a 380 b 1559428/s -- -62% 381 a 4152037/s 166% -- 382 383 384while 385 386 use Benchmark qw( timethese cmpthese ) ; 387 $x = 3; 388 $r = timethese( -5, { 389 a => sub{$x*$x}, 390 b => sub{$x**2}, 391 } ); 392 cmpthese $r; 393 394outputs something like this: 395 396 Benchmark: running a, b, each for at least 5 CPU seconds... 397 a: 10 wallclock secs ( 5.14 usr + 0.13 sys = 5.27 CPU) @ 3835055.60/s (n=20210743) 398 b: 5 wallclock secs ( 5.41 usr + 0.00 sys = 5.41 CPU) @ 1574944.92/s (n=8520452) 399 Rate b a 400 b 1574945/s -- -59% 401 a 3835056/s 144% -- 402 403 404=head1 INHERITANCE 405 406Benchmark inherits from no other class, except of course 407for Exporter. 408 409=head1 CAVEATS 410 411Comparing eval'd strings with code references will give you 412inaccurate results: a code reference will show a slightly slower 413execution time than the equivalent eval'd string. 414 415The real time timing is done using time(2) and 416the granularity is therefore only one second. 417 418Short tests may produce negative figures because perl 419can appear to take longer to execute the empty loop 420than a short test; try: 421 422 timethis(100,'1'); 423 424The system time of the null loop might be slightly 425more than the system time of the loop with the actual 426code and therefore the difference might end up being E<lt> 0. 427 428=head1 SEE ALSO 429 430L<Devel::NYTProf> - a Perl code profiler 431 432=head1 AUTHORS 433 434Jarkko Hietaniemi <F<jhi@iki.fi>>, Tim Bunce <F<Tim.Bunce@ig.co.uk>> 435 436=head1 MODIFICATION HISTORY 437 438September 8th, 1994; by Tim Bunce. 439 440March 28th, 1997; by Hugo van der Sanden: added support for code 441references and the already documented 'debug' method; revamped 442documentation. 443 444April 04-07th, 1997: by Jarkko Hietaniemi, added the run-for-some-time 445functionality. 446 447September, 1999; by Barrie Slaymaker: math fixes and accuracy and 448efficiency tweaks. Added cmpthese(). A result is now returned from 449timethese(). Exposed countit() (was runfor()). 450 451December, 2001; by Nicholas Clark: make timestr() recognise the style 'none' 452and return an empty string. If cmpthese is calling timethese, make it pass the 453style in. (so that 'none' will suppress output). Make sub new dump its 454debugging output to STDERR, to be consistent with everything else. 455All bugs found while writing a regression test. 456 457September, 2002; by Jarkko Hietaniemi: add ':hireswallclock' special tag. 458 459February, 2004; by Chia-liang Kao: make cmpthese and timestr use time 460statistics for children instead of parent when the style is 'nop'. 461 462November, 2007; by Christophe Grosjean: make cmpthese and timestr compute 463time consistently with style argument, default is 'all' not 'noc' any more. 464 465=cut 466 467# evaluate something in a clean lexical environment 468sub _doeval { no strict; eval shift } 469 470# 471# put any lexicals at file scope AFTER here 472# 473 474use Carp; 475use Exporter; 476 477our(@ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS, $VERSION); 478 479@ISA=qw(Exporter); 480@EXPORT=qw(timeit timethis timethese timediff timestr); 481@EXPORT_OK=qw(timesum cmpthese countit 482 clearcache clearallcache disablecache enablecache); 483%EXPORT_TAGS=( all => [ @EXPORT, @EXPORT_OK ] ) ; 484 485$VERSION = 1.18; 486 487# --- ':hireswallclock' special handling 488 489my $hirestime; 490 491sub mytime () { time } 492 493init(); 494 495sub BEGIN { 496 if (eval 'require Time::HiRes') { 497 import Time::HiRes qw(time); 498 $hirestime = \&Time::HiRes::time; 499 } 500} 501 502sub import { 503 my $class = shift; 504 if (grep { $_ eq ":hireswallclock" } @_) { 505 @_ = grep { $_ ne ":hireswallclock" } @_; 506 local $^W=0; 507 *mytime = $hirestime if defined $hirestime; 508 } 509 Benchmark->export_to_level(1, $class, @_); 510} 511 512our($Debug, $Min_Count, $Min_CPU, $Default_Format, $Default_Style, 513 %_Usage, %Cache, $Do_Cache); 514 515sub init { 516 $Debug = 0; 517 $Min_Count = 4; 518 $Min_CPU = 0.4; 519 $Default_Format = '5.2f'; 520 $Default_Style = 'auto'; 521 # The cache can cause a slight loss of sys time accuracy. If a 522 # user does many tests (>10) with *very* large counts (>10000) 523 # or works on a very slow machine the cache may be useful. 524 disablecache(); 525 clearallcache(); 526} 527 528sub debug { $Debug = ($_[1] != 0); } 529 530sub usage { 531 my $calling_sub = (caller(1))[3]; 532 $calling_sub =~ s/^Benchmark:://; 533 return $_Usage{$calling_sub} || ''; 534} 535 536# The cache needs two branches: 's' for strings and 'c' for code. The 537# empty loop is different in these two cases. 538 539$_Usage{clearcache} = <<'USAGE'; 540usage: clearcache($count); 541USAGE 542 543sub clearcache { 544 die usage unless @_ == 1; 545 delete $Cache{"$_[0]c"}; delete $Cache{"$_[0]s"}; 546} 547 548$_Usage{clearallcache} = <<'USAGE'; 549usage: clearallcache(); 550USAGE 551 552sub clearallcache { 553 die usage if @_; 554 %Cache = (); 555} 556 557$_Usage{enablecache} = <<'USAGE'; 558usage: enablecache(); 559USAGE 560 561sub enablecache { 562 die usage if @_; 563 $Do_Cache = 1; 564} 565 566$_Usage{disablecache} = <<'USAGE'; 567usage: disablecache(); 568USAGE 569 570sub disablecache { 571 die usage if @_; 572 $Do_Cache = 0; 573} 574 575 576# --- Functions to process the 'time' data type 577 578sub new { my @t = (mytime, times, @_ == 2 ? $_[1] : 0); 579 print STDERR "new=@t\n" if $Debug; 580 bless \@t; } 581 582sub cpu_p { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps ; } 583sub cpu_c { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $cu+$cs ; } 584sub cpu_a { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps+$cu+$cs ; } 585sub real { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $r ; } 586sub iters { $_[0]->[5] ; } 587 588 589$_Usage{timediff} = <<'USAGE'; 590usage: $result_diff = timediff($result1, $result2); 591USAGE 592 593sub timediff { 594 my($a, $b) = @_; 595 596 die usage unless ref $a and ref $b; 597 598 my @r; 599 for (my $i=0; $i < @$a; ++$i) { 600 push(@r, $a->[$i] - $b->[$i]); 601 } 602 #die "Bad timediff(): ($r[1] + $r[2]) <= 0 (@$a[1,2]|@$b[1,2])\n" 603 # if ($r[1] + $r[2]) < 0; 604 bless \@r; 605} 606 607$_Usage{timesum} = <<'USAGE'; 608usage: $sum = timesum($result1, $result2); 609USAGE 610 611sub timesum { 612 my($a, $b) = @_; 613 614 die usage unless ref $a and ref $b; 615 616 my @r; 617 for (my $i=0; $i < @$a; ++$i) { 618 push(@r, $a->[$i] + $b->[$i]); 619 } 620 bless \@r; 621} 622 623 624$_Usage{timestr} = <<'USAGE'; 625usage: $formatted_result = timestr($result1); 626USAGE 627 628sub timestr { 629 my($tr, $style, $f) = @_; 630 631 die usage unless ref $tr; 632 633 my @t = @$tr; 634 warn "bad time value (@t)" unless @t==6; 635 my($r, $pu, $ps, $cu, $cs, $n) = @t; 636 my($pt, $ct, $tt) = ($tr->cpu_p, $tr->cpu_c, $tr->cpu_a); 637 $f = $Default_Format unless defined $f; 638 # format a time in the required style, other formats may be added here 639 $style ||= $Default_Style; 640 return '' if $style eq 'none'; 641 $style = ($ct>0) ? 'all' : 'noc' if $style eq 'auto'; 642 my $s = "@t $style"; # default for unknown style 643 my $w = $hirestime ? "%2g" : "%2d"; 644 $s = sprintf("$w wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)", 645 $r,$pu,$ps,$cu,$cs,$tt) if $style eq 'all'; 646 $s = sprintf("$w wallclock secs (%$f usr + %$f sys = %$f CPU)", 647 $r,$pu,$ps,$pt) if $style eq 'noc'; 648 $s = sprintf("$w wallclock secs (%$f cusr + %$f csys = %$f CPU)", 649 $r,$cu,$cs,$ct) if $style eq 'nop'; 650 my $elapsed = do { 651 if ($style eq 'nop') {$cu+$cs} 652 elsif ($style eq 'noc') {$pu+$ps} 653 else {$cu+$cs+$pu+$ps} 654 }; 655 $s .= sprintf(" @ %$f/s (n=$n)",$n/($elapsed)) if $n && $elapsed; 656 $s; 657} 658 659sub timedebug { 660 my($msg, $t) = @_; 661 print STDERR "$msg",timestr($t),"\n" if $Debug; 662} 663 664# --- Functions implementing low-level support for timing loops 665 666$_Usage{runloop} = <<'USAGE'; 667usage: runloop($number, [$string | $coderef]) 668USAGE 669 670sub runloop { 671 my($n, $c) = @_; 672 673 $n+=0; # force numeric now, so garbage won't creep into the eval 674 croak "negative loopcount $n" if $n<0; 675 confess usage unless defined $c; 676 my($t0, $t1, $td); # before, after, difference 677 678 # find package of caller so we can execute code there 679 my($curpack) = caller(0); 680 my($i, $pack)= 0; 681 while (($pack) = caller(++$i)) { 682 last if $pack ne $curpack; 683 } 684 685 my ($subcode, $subref); 686 if (ref $c eq 'CODE') { 687 $subcode = "sub { for (1 .. $n) { local \$_; package $pack; &\$c; } }"; 688 $subref = eval $subcode; 689 } 690 else { 691 $subcode = "sub { for (1 .. $n) { local \$_; package $pack; $c;} }"; 692 $subref = _doeval($subcode); 693 } 694 croak "runloop unable to compile '$c': $@\ncode: $subcode\n" if $@; 695 print STDERR "runloop $n '$subcode'\n" if $Debug; 696 697 # Wait for the user timer to tick. This makes the error range more like 698 # -0.01, +0. If we don't wait, then it's more like -0.01, +0.01. This 699 # may not seem important, but it significantly reduces the chances of 700 # getting a too low initial $n in the initial, 'find the minimum' loop 701 # in &countit. This, in turn, can reduce the number of calls to 702 # &runloop a lot, and thus reduce additive errors. 703 my $tbase = Benchmark->new(0)->[1]; 704 while ( ( $t0 = Benchmark->new(0) )->[1] == $tbase ) {} ; 705 $subref->(); 706 $t1 = Benchmark->new($n); 707 $td = &timediff($t1, $t0); 708 timedebug("runloop:",$td); 709 $td; 710} 711 712$_Usage{timeit} = <<'USAGE'; 713usage: $result = timeit($count, 'code' ); or 714 $result = timeit($count, sub { code } ); 715USAGE 716 717sub timeit { 718 my($n, $code) = @_; 719 my($wn, $wc, $wd); 720 721 die usage unless defined $code and 722 (!ref $code or ref $code eq 'CODE'); 723 724 printf STDERR "timeit $n $code\n" if $Debug; 725 my $cache_key = $n . ( ref( $code ) ? 'c' : 's' ); 726 if ($Do_Cache && exists $Cache{$cache_key} ) { 727 $wn = $Cache{$cache_key}; 728 } else { 729 $wn = &runloop($n, ref( $code ) ? sub { } : '' ); 730 # Can't let our baseline have any iterations, or they get subtracted 731 # out of the result. 732 $wn->[5] = 0; 733 $Cache{$cache_key} = $wn; 734 } 735 736 $wc = &runloop($n, $code); 737 738 $wd = timediff($wc, $wn); 739 timedebug("timeit: ",$wc); 740 timedebug(" - ",$wn); 741 timedebug(" = ",$wd); 742 743 $wd; 744} 745 746 747my $default_for = 3; 748my $min_for = 0.1; 749 750 751$_Usage{countit} = <<'USAGE'; 752usage: $result = countit($time, 'code' ); or 753 $result = countit($time, sub { code } ); 754USAGE 755 756sub countit { 757 my ( $tmax, $code ) = @_; 758 759 die usage unless @_; 760 761 if ( not defined $tmax or $tmax == 0 ) { 762 $tmax = $default_for; 763 } elsif ( $tmax < 0 ) { 764 $tmax = -$tmax; 765 } 766 767 die "countit($tmax, ...): timelimit cannot be less than $min_for.\n" 768 if $tmax < $min_for; 769 770 my ($n, $tc); 771 772 # First find the minimum $n that gives a significant timing. 773 my $zeros=0; 774 for ($n = 1; ; $n *= 2 ) { 775 my $t0 = Benchmark->new(0); 776 my $td = timeit($n, $code); 777 my $t1 = Benchmark->new(0); 778 $tc = $td->[1] + $td->[2]; 779 if ( $tc <= 0 and $n > 1024 ) { 780 my $d = timediff($t1, $t0); 781 # note that $d is the total CPU time taken to call timeit(), 782 # while $tc is is difference in CPU secs between the empty run 783 # and the code run. If the code is trivial, its possible 784 # for $d to get large while $tc is still zero (or slightly 785 # negative). Bail out once timeit() starts taking more than a 786 # few seconds without noticeable difference. 787 if ($d->[1] + $d->[2] > 8 788 || ++$zeros > 16) 789 { 790 die "Timing is consistently zero in estimation loop, cannot benchmark. N=$n\n"; 791 } 792 } else { 793 $zeros = 0; 794 } 795 last if $tc > 0.1; 796 } 797 798 my $nmin = $n; 799 800 # Get $n high enough that we can guess the final $n with some accuracy. 801 my $tpra = 0.1 * $tmax; # Target/time practice. 802 while ( $tc < $tpra ) { 803 # The 5% fudge is to keep us from iterating again all 804 # that often (this speeds overall responsiveness when $tmax is big 805 # and we guess a little low). This does not noticeably affect 806 # accuracy since we're not counting these times. 807 $n = int( $tpra * 1.05 * $n / $tc ); # Linear approximation. 808 my $td = timeit($n, $code); 809 my $new_tc = $td->[1] + $td->[2]; 810 # Make sure we are making progress. 811 $tc = $new_tc > 1.2 * $tc ? $new_tc : 1.2 * $tc; 812 } 813 814 # Now, do the 'for real' timing(s), repeating until we exceed 815 # the max. 816 my $ntot = 0; 817 my $rtot = 0; 818 my $utot = 0.0; 819 my $stot = 0.0; 820 my $cutot = 0.0; 821 my $cstot = 0.0; 822 my $ttot = 0.0; 823 824 # The 5% fudge is because $n is often a few % low even for routines 825 # with stable times and avoiding extra timeit()s is nice for 826 # accuracy's sake. 827 $n = int( $n * ( 1.05 * $tmax / $tc ) ); 828 $zeros=0; 829 while () { 830 my $td = timeit($n, $code); 831 $ntot += $n; 832 $rtot += $td->[0]; 833 $utot += $td->[1]; 834 $stot += $td->[2]; 835 $cutot += $td->[3]; 836 $cstot += $td->[4]; 837 $ttot = $utot + $stot; 838 last if $ttot >= $tmax; 839 if ( $ttot <= 0 ) { 840 ++$zeros > 16 841 and die "Timing is consistently zero, cannot benchmark. N=$n\n"; 842 } else { 843 $zeros = 0; 844 } 845 $ttot = 0.01 if $ttot < 0.01; 846 my $r = $tmax / $ttot - 1; # Linear approximation. 847 $n = int( $r * $ntot ); 848 $n = $nmin if $n < $nmin; 849 } 850 851 return bless [ $rtot, $utot, $stot, $cutot, $cstot, $ntot ]; 852} 853 854# --- Functions implementing high-level time-then-print utilities 855 856sub n_to_for { 857 my $n = shift; 858 return $n == 0 ? $default_for : $n < 0 ? -$n : undef; 859} 860 861$_Usage{timethis} = <<'USAGE'; 862usage: $result = timethis($time, 'code' ); or 863 $result = timethis($time, sub { code } ); 864USAGE 865 866sub timethis{ 867 my($n, $code, $title, $style) = @_; 868 my($t, $forn); 869 870 die usage unless defined $code and 871 (!ref $code or ref $code eq 'CODE'); 872 873 if ( $n > 0 ) { 874 croak "non-integer loopcount $n, stopped" if int($n)<$n; 875 $t = timeit($n, $code); 876 $title = "timethis $n" unless defined $title; 877 } else { 878 my $fort = n_to_for( $n ); 879 $t = countit( $fort, $code ); 880 $title = "timethis for $fort" unless defined $title; 881 $forn = $t->[-1]; 882 } 883 local $| = 1; 884 $style = "" unless defined $style; 885 printf("%10s: ", $title) unless $style eq 'none'; 886 print timestr($t, $style, $Default_Format),"\n" unless $style eq 'none'; 887 888 $n = $forn if defined $forn; 889 890 # A conservative warning to spot very silly tests. 891 # Don't assume that your benchmark is ok simply because 892 # you don't get this warning! 893 print " (warning: too few iterations for a reliable count)\n" 894 if $n < $Min_Count 895 || ($t->real < 1 && $n < 1000) 896 || $t->cpu_a < $Min_CPU; 897 $t; 898} 899 900 901$_Usage{timethese} = <<'USAGE'; 902usage: timethese($count, { Name1 => 'code1', ... }); or 903 timethese($count, { Name1 => sub { code1 }, ... }); 904USAGE 905 906sub timethese{ 907 my($n, $alt, $style) = @_; 908 die usage unless ref $alt eq 'HASH'; 909 910 my @names = sort keys %$alt; 911 $style = "" unless defined $style; 912 print "Benchmark: " unless $style eq 'none'; 913 if ( $n > 0 ) { 914 croak "non-integer loopcount $n, stopped" if int($n)<$n; 915 print "timing $n iterations of" unless $style eq 'none'; 916 } else { 917 print "running" unless $style eq 'none'; 918 } 919 print " ", join(', ',@names) unless $style eq 'none'; 920 unless ( $n > 0 ) { 921 my $for = n_to_for( $n ); 922 print ", each" if $n > 1 && $style ne 'none'; 923 print " for at least $for CPU seconds" unless $style eq 'none'; 924 } 925 print "...\n" unless $style eq 'none'; 926 927 # we could save the results in an array and produce a summary here 928 # sum, min, max, avg etc etc 929 my %results; 930 foreach my $name (@names) { 931 $results{$name} = timethis ($n, $alt -> {$name}, $name, $style); 932 } 933 934 return \%results; 935} 936 937 938$_Usage{cmpthese} = <<'USAGE'; 939usage: cmpthese($count, { Name1 => 'code1', ... }); or 940 cmpthese($count, { Name1 => sub { code1 }, ... }); or 941 cmpthese($result, $style); 942USAGE 943 944sub cmpthese{ 945 my ($results, $style); 946 947 # $count can be a blessed object. 948 if ( ref $_[0] eq 'HASH' ) { 949 ($results, $style) = @_; 950 } 951 else { 952 my($count, $code) = @_[0,1]; 953 $style = $_[2] if defined $_[2]; 954 955 die usage unless ref $code eq 'HASH'; 956 957 $results = timethese($count, $code, ($style || "none")); 958 } 959 960 $style = "" unless defined $style; 961 962 # Flatten in to an array of arrays with the name as the first field 963 my @vals = map{ [ $_, @{$results->{$_}} ] } keys %$results; 964 965 for (@vals) { 966 # The epsilon fudge here is to prevent div by 0. Since clock 967 # resolutions are much larger, it's below the noise floor. 968 my $elapsed = do { 969 if ($style eq 'nop') {$_->[4]+$_->[5]} 970 elsif ($style eq 'noc') {$_->[2]+$_->[3]} 971 else {$_->[2]+$_->[3]+$_->[4]+$_->[5]} 972 }; 973 my $rate = $_->[6]/(($elapsed)+0.000000000000001); 974 $_->[7] = $rate; 975 } 976 977 # Sort by rate 978 @vals = sort { $a->[7] <=> $b->[7] } @vals; 979 980 # If more than half of the rates are greater than one... 981 my $display_as_rate = @vals ? ($vals[$#vals>>1]->[7] > 1) : 0; 982 983 my @rows; 984 my @col_widths; 985 986 my @top_row = ( 987 '', 988 $display_as_rate ? 'Rate' : 's/iter', 989 map { $_->[0] } @vals 990 ); 991 992 push @rows, \@top_row; 993 @col_widths = map { length( $_ ) } @top_row; 994 995 # Build the data rows 996 # We leave the last column in even though it never has any data. Perhaps 997 # it should go away. Also, perhaps a style for a single column of 998 # percentages might be nice. 999 for my $row_val ( @vals ) { 1000 my @row; 1001 1002 # Column 0 = test name 1003 push @row, $row_val->[0]; 1004 $col_widths[0] = length( $row_val->[0] ) 1005 if length( $row_val->[0] ) > $col_widths[0]; 1006 1007 # Column 1 = performance 1008 my $row_rate = $row_val->[7]; 1009 1010 # We assume that we'll never get a 0 rate. 1011 my $rate = $display_as_rate ? $row_rate : 1 / $row_rate; 1012 1013 # Only give a few decimal places before switching to sci. notation, 1014 # since the results aren't usually that accurate anyway. 1015 my $format = 1016 $rate >= 100 ? 1017 "%0.0f" : 1018 $rate >= 10 ? 1019 "%0.1f" : 1020 $rate >= 1 ? 1021 "%0.2f" : 1022 $rate >= 0.1 ? 1023 "%0.3f" : 1024 "%0.2e"; 1025 1026 $format .= "/s" 1027 if $display_as_rate; 1028 1029 my $formatted_rate = sprintf( $format, $rate ); 1030 push @row, $formatted_rate; 1031 $col_widths[1] = length( $formatted_rate ) 1032 if length( $formatted_rate ) > $col_widths[1]; 1033 1034 # Columns 2..N = performance ratios 1035 my $skip_rest = 0; 1036 for ( my $col_num = 0 ; $col_num < @vals ; ++$col_num ) { 1037 my $col_val = $vals[$col_num]; 1038 my $out; 1039 if ( $skip_rest ) { 1040 $out = ''; 1041 } 1042 elsif ( $col_val->[0] eq $row_val->[0] ) { 1043 $out = "--"; 1044 # $skip_rest = 1; 1045 } 1046 else { 1047 my $col_rate = $col_val->[7]; 1048 $out = sprintf( "%.0f%%", 100*$row_rate/$col_rate - 100 ); 1049 } 1050 push @row, $out; 1051 $col_widths[$col_num+2] = length( $out ) 1052 if length( $out ) > $col_widths[$col_num+2]; 1053 1054 # A little weirdness to set the first column width properly 1055 $col_widths[$col_num+2] = length( $col_val->[0] ) 1056 if length( $col_val->[0] ) > $col_widths[$col_num+2]; 1057 } 1058 push @rows, \@row; 1059 } 1060 1061 return \@rows if $style eq "none"; 1062 1063 # Equalize column widths in the chart as much as possible without 1064 # exceeding 80 characters. This does not use or affect cols 0 or 1. 1065 my @sorted_width_refs = 1066 sort { $$a <=> $$b } map { \$_ } @col_widths[2..$#col_widths]; 1067 my $max_width = ${$sorted_width_refs[-1]}; 1068 1069 my $total = @col_widths - 1 ; 1070 for ( @col_widths ) { $total += $_ } 1071 1072 STRETCHER: 1073 while ( $total < 80 ) { 1074 my $min_width = ${$sorted_width_refs[0]}; 1075 last 1076 if $min_width == $max_width; 1077 for ( @sorted_width_refs ) { 1078 last 1079 if $$_ > $min_width; 1080 ++$$_; 1081 ++$total; 1082 last STRETCHER 1083 if $total >= 80; 1084 } 1085 } 1086 1087 # Dump the output 1088 my $format = join( ' ', map { "%${_}s" } @col_widths ) . "\n"; 1089 substr( $format, 1, 0 ) = '-'; 1090 for ( @rows ) { 1091 printf $format, @$_; 1092 } 1093 1094 return \@rows ; 1095} 1096 1097 10981; 1099