1#!/usr/bin/perl 2# 3# A tool for analysing the performance of the code snippets found in 4# t/perf/benchmarks or similar 5 6 7=head1 NAME 8 9bench.pl - Compare the performance of perl code snippets across multiple 10perls. 11 12=head1 SYNOPSIS 13 14 # Basic: run the tests in t/perf/benchmarks against two or 15 # more perls 16 17 bench.pl [options] perlA[=labelA] perlB[=labelB] ... 18 19 # run the tests against the same perl twice, with varying options 20 21 bench.pl [options] perlA=bigint --args='-Mbigint' perlA=plain 22 23 # Run bench on blead, saving results to file; then modify the blead 24 # binary, and benchmark again, comparing against the saved results 25 26 bench.pl [options] --write=blead.time ./perl=blead 27 # ... hack hack hack, updating ./perl ... 28 bench.pl --read=blead.time ./perl=hacked 29 30 # You can also combine --read with --write and new benchmark runs 31 32 bench.pl --read=blead.time --write=last.time -- ./perl=hacked 33 34=head1 DESCRIPTION 35 36By default, F<bench.pl> will run code snippets found in 37F<t/perf/benchmarks> (or similar) under cachegrind, in order to calculate 38how many instruction reads, data writes, branches, cache misses, etc. that 39one execution of the snippet uses. Usually it will run them against two or 40more perl executables and show how much each test has gotten better or 41worse. 42 43It is modelled on the F<perlbench> tool, but since it measures instruction 44reads etc., rather than timings, it is much more precise and reproducible. 45It is also considerably faster, and is capable of running tests in 46parallel (with C<-j>). Rather than displaying a single relative 47percentage per test/perl combination, it displays values for 13 different 48measurements, such as instruction reads, conditional branch misses etc. 49 50There are options to write the raw data to a file, and to read it back. 51This means that you can view the same run data in different views with 52different selection and sort options. You can also use this mechanism 53to save the results of timing one perl, and then read it back while timing 54a modification, so that you don't have rerun the same tests on the same 55perl over and over, or have two perl executables built at the same time. 56 57The optional C<=label> after each perl executable is used in the display 58output. If you are doing a two step benchmark then you should provide 59a label for at least the "base" perl. If a label isn't specified, it 60defaults to the name of the perl executable. Labels must be unique across 61all current executables, plus any previous ones obtained via --read. 62 63In its most general form, the specification of a perl executable is: 64 65 path/perl=+mylabel --args='-foo -bar' --args='-baz' \ 66 --env='A=a' --env='B=b' 67 68This defines how to run the executable F<path/perl>. It has a label, 69which due to the C<+>, is appended to the binary name to give a label of 70C<path/perl=+mylabel> (without the C<+>, the label would be just 71C<mylabel>). 72 73It can be optionally followed by one or more C<--args> or C<--env> 74switches, which specify extra command line arguments or environment 75variables to use when invoking that executable. Each C<--env> switch 76should be of the form C<--env=VARIABLE=value>. Any C<--arg> values are 77concatenated to the eventual command line, along with the global 78C<--perlargs> value if any. The above would cause a system() call looking 79something like: 80 81 PERL_HASH_SEED=0 A=a B=b valgrind --tool=cachegrind \ 82 path/perl -foo -bar -baz .... 83 84=head1 OPTIONS 85 86=head2 General options 87 88=over 4 89 90=item * 91 92--action=I<foo> 93 94What action to perform. The default is I<grind>, which runs the benchmarks 95using I<cachegrind> as the back end. The only other action at the moment is 96I<selftest>, which runs some basic sanity checks and produces TAP output. 97 98=item * 99 100--debug 101 102Enable debugging output. 103 104=item * 105 106---help 107 108Display basic usage information. 109 110=item * 111 112-v 113--verbose 114 115Display progress information. 116 117=back 118 119=head2 Test selection options 120 121=over 4 122 123=item * 124 125--tests=I<FOO> 126 127Specify a subset of tests to run (or in the case of C<--read>, to read). 128It may be either a comma-separated list of test names, or a regular 129expression. For example 130 131 --tests=expr::assign::scalar_lex,expr::assign::2list_lex 132 --tests=/^expr::/ 133 134 135=back 136 137=head2 Input options 138 139=over 4 140 141 142=item * 143 144-r I<file> 145--read=I<file> 146 147Read in saved data from a previous C<--write> run from the specified file. 148If C<--tests> is present too, then only tests matching those conditions 149are read from the file. 150 151C<--read> may be specified multiple times, in which case the results 152across all files are aggregated. The list of test names from each file 153(after filtering by C<--tests>) must be identical across all files. 154 155This list of tests is used instead of that obtained from the normal 156benchmark file (or C<--benchfile>) for any benchmarks that are run. 157 158The perl labels must be unique across all read in test results. 159 160Requires C<JSON::PP> to be available. 161 162=back 163 164=head2 Benchmarking options 165 166Benchmarks will be run for all perls specified on the command line. 167These options can be used to modify the benchmarking behavior: 168 169=over 4 170 171=item * 172 173--autolabel 174 175Generate a unique label for every executable which doesn't have an 176explicit C<=label>. Works by stripping out common prefixes and suffixes 177from the executable names, then for any non-unique names, appending 178C<-0>, C<-1>, etc. text directly surrounding the unique part which look 179like version numbers (i.e. which match C</[0-9\.]+/>) aren't stripped. 180For example, 181 182 perl-5.20.0-threaded perl-5.22.0-threaded perl-5.24.0-threaded 183 184stripped to unique parts would be: 185 186 20 22 24 187 188but is actually only stripped down to: 189 190 5.20.0 5.22.0 5.24.0 191 192If the final results are plain integers, they are prefixed with "p" 193to avoid looking like column numbers to switches like C<--norm=2>. 194 195 196=item * 197 198--benchfile=I<foo> 199 200The path of the file which contains the benchmarks (F<t/perf/benchmarks> 201by default). 202 203=item * 204 205--grindargs=I<foo> 206 207Optional command-line arguments to pass to all cachegrind invocations. 208 209=item * 210 211-j I<N> 212--jobs=I<N> 213 214Run I<N> jobs in parallel (default 1). This determines how many cachegrind 215process will run at a time, and should generally be set to the number 216of CPUs available. 217 218=item * 219 220--perlargs=I<foo> 221 222Optional command-line arguments to pass to every perl executable. This 223may optionaly be combined with C<--args> switches following individual 224perls. For example: 225 226 bench.pl --perlargs='-Ilib -It/lib' .... \ 227 perlA --args='-Mstrict' \ 228 perlB --args='-Mwarnings' 229 230would cause the invocations 231 232 perlA -Ilib -It/lib -Mstrict 233 perlB -Ilib -It/lib -Mwarnings 234 235=back 236 237=head2 Output options 238 239Any results accumulated via --read or by running benchmarks can be output 240in any or all of these three ways: 241 242=over 4 243 244=item * 245 246-w I<file> 247--write=I<file> 248 249Save the raw data to the specified file. It can be read back later with 250C<--read>. If combined with C<--read> then the output file will be 251the merge of the file read and any additional perls added on the command 252line. 253 254Requires C<JSON::PP> to be available. 255 256=item * 257 258--bisect=I<field,minval,maxval> 259 260Exit with a zero status if the named field is in the specified range; 261exit with 1 otherwise. It will complain if more than one test or perl has 262been specified. It is intended to be called as part of a bisect run, to 263determine when something changed. For example, 264 265 bench.pl -j 8 --tests=foo --bisect=Ir,100,105 --perlargs=-Ilib \ 266 ./miniperl 267 268might be called from bisect to find when the number of instruction reads 269for test I<foo> falls outside the range 100..105. 270 271=item * 272 273--show 274 275Display the results to stdout in human-readable form. This is enabled by 276default, except with --write and --bisect. The following sub-options alter 277how --show behaves. 278 279=over 4 280 281=item * 282 283--average 284 285Only display the overall average, rather than the results for each 286individual test. 287 288=item * 289 290--compact=I<perl> 291 292Display the results for a single perl executable in a compact form. 293Which perl to display is specified in the same manner as C<--norm>. 294 295=item * 296 297--fields=I<a,b,c> 298 299Display only the specified fields; for example, 300 301 --fields=Ir,Ir_m,Ir_mm 302 303If only one field is selected, the output is in more compact form. 304 305=item * 306 307--norm=I<foo> 308 309Specify which perl column in the output to treat as the 100% norm. 310It may be: 311 312=over 313 314* a column number (0..N-1), 315 316* a negative column number (-1..-N) which counts from the right (so -1 is 317the right-most column), 318 319* or a perl executable name, 320 321* or a perl executable label. 322 323=back 324 325It defaults to the leftmost column. 326 327=item * 328 329--raw 330 331Display raw data counts rather than percentages in the outputs. This 332allows you to see the exact number of intruction reads, branch misses etc. 333for each test/perl combination. It also causes the C<AVERAGE> display 334per field to be calculated based on the average of each tests's count 335rather than average of each percentage. This means that tests with very 336high counts will dominate. 337 338=item * 339 340--sort=I<field:perl> 341 342Order the tests in the output based on the value of I<field> in the 343column I<perl>. The I<perl> value is as per C<--norm>. For example 344 345 bench.pl --sort=Dw:perl-5.20.0 \ 346 perl-5.16.0 perl-5.18.0 perl-5.20.0 347 348=back 349 350=back 351 352=cut 353 354 355 356use 5.010000; 357use warnings; 358use strict; 359use Getopt::Long qw(:config no_auto_abbrev require_order); 360use IPC::Open2 (); 361use IO::Select; 362use IO::File; 363use POSIX ":sys_wait_h"; 364 365# The version of the file format used to save data. We refuse to process 366# the file if the integer component differs. 367 368my $FORMAT_VERSION = 1.0; 369 370# The fields we know about 371 372my %VALID_FIELDS = map { $_ => 1 } 373 qw(Ir Ir_m1 Ir_mm Dr Dr_m1 Dr_mm Dw Dw_m1 Dw_mm COND COND_m IND IND_m); 374 375sub usage { 376 die <<EOF; 377Usage: $0 [options] -- perl[=label] ... 378 379General options: 380 381 --action=foo What action to perform [default: grind]: 382 grind run the code under cachegrind 383 selftest perform a selftest; produce TAP output 384 --debug Enable verbose debugging output. 385 --help Display this help. 386 -v|--verbose Display progress information. 387 388 389Selection: 390 391 --tests=FOO Select only the specified tests for reading, benchmarking 392 and display. FOO may be either a list of tests or 393 a pattern: 'foo,bar,baz' or '/regex/'; 394 [default: all tests]. 395 396Input: 397 398 -r|--read=file Read in previously saved data from the specified file. 399 May be repeated, and be used together with new 400 benchmarking to create combined results. 401 402Benchmarking: 403 Benchmarks will be run for any perl specified on the command line. 404 These options can be used to modify the benchmarking behavior: 405 406 --autolabel generate labels for any executables without one 407 --benchfile=foo File containing the benchmarks. 408 [default: t/perf/benchmarks]. 409 --grindargs=foo Optional command-line args to pass to cachegrind. 410 -j|--jobs=N Run N jobs in parallel [default 1]. 411 --perlargs=foo Optional command-line args to pass to each perl to run. 412 413Output: 414 Any results accumulated via --read or running benchmarks can be output 415 in any or all of these three ways: 416 417 -w|--write=file Save the raw data to the specified file (may be read 418 back later with --read). 419 420 --bisect=f,min,max Exit with a zero status if the named field f is in 421 the specified min..max range; exit 1 otherwise. 422 Produces no other output. Only legal if a single 423 benchmark test has been specified. 424 425 --show Display the results to stdout in human-readable form. 426 This is enabled by default, except with --write and 427 --bisect. The following sub-options alter how 428 --show behaves. 429 430 --average Only display average, not individual test results. 431 --compact=perl Display the results of a single perl in compact form. 432 Which perl specified like --norm 433 --fields=a,b,c Display only the specified fields (e.g. Ir,Ir_m,Ir_mm). 434 --norm=perl Which perl column to treat as 100%; may be a column 435 number (0..N-1) or a perl executable name or label; 436 [default: 0]. 437 --raw Display raw data counts rather than percentages. 438 --sort=field:perl Sort the tests based on the value of 'field' in the 439 column 'perl'. The perl value is as per --norm. 440 441 442The command line ends with one or more specified perl executables, 443which will be searched for in the current \$PATH. Each binary name may 444have an optional =LABEL appended, which will be used rather than the 445executable name in output. The labels must be unique across all current 446executables and previous runs obtained via --read. Each executable may 447optionally be succeeded by --args= and --env= to specify per-executable 448arguments and environmenbt variables: 449 450 perl-5.24.0=strict --args='-Mwarnings -Mstrict' --env='FOO=foo' \ 451 perl-5.24.0=plain 452EOF 453} 454 455my %OPTS = ( 456 action => 'grind', 457 average => 0, 458 benchfile => undef, 459 bisect => undef, 460 compact => undef, 461 debug => 0, 462 grindargs => '', 463 fields => undef, 464 jobs => 1, 465 norm => 0, 466 perlargs => '', 467 raw => 0, 468 read => undef, 469 show => undef, 470 sort => undef, 471 tests => undef, 472 verbose => 0, 473 write => undef, 474); 475 476 477# process command-line args and call top-level action 478 479{ 480 GetOptions( 481 'action=s' => \$OPTS{action}, 482 'average' => \$OPTS{average}, 483 'autolabel' => \$OPTS{autolabel}, 484 'benchfile=s' => \$OPTS{benchfile}, 485 'bisect=s' => \$OPTS{bisect}, 486 'compact=s' => \$OPTS{compact}, 487 'debug' => \$OPTS{debug}, 488 'grindargs=s' => \$OPTS{grindargs}, 489 'help|h' => \$OPTS{help}, 490 'fields=s' => \$OPTS{fields}, 491 'jobs|j=i' => \$OPTS{jobs}, 492 'norm=s' => \$OPTS{norm}, 493 'perlargs=s' => \$OPTS{perlargs}, 494 'raw' => \$OPTS{raw}, 495 'read|r=s@' => \$OPTS{read}, 496 'show' => \$OPTS{show}, 497 'sort=s' => \$OPTS{sort}, 498 'tests=s' => \$OPTS{tests}, 499 'v|verbose' => \$OPTS{verbose}, 500 'write|w=s' => \$OPTS{write}, 501 ) or die "Use the -h option for usage information.\n"; 502 503 usage if $OPTS{help}; 504 505 506 if (defined $OPTS{read} or defined $OPTS{write}) { 507 # fail early if it's not present 508 require JSON::PP; 509 } 510 511 if (defined $OPTS{fields}) { 512 my @f = split /,/, $OPTS{fields}; 513 for (@f) { 514 die "Error: --fields: unknown field '$_'\n" 515 unless $VALID_FIELDS{$_}; 516 } 517 my %f = map { $_ => 1 } @f; 518 $OPTS{fields} = \%f; 519 } 520 521 my %valid_actions = qw(grind 1 selftest 1); 522 unless ($valid_actions{$OPTS{action}}) { 523 die "Error: unrecognised action '$OPTS{action}'\n" 524 . "must be one of: " . join(', ', sort keys %valid_actions)."\n"; 525 } 526 527 if (defined $OPTS{sort}) { 528 my @s = split /:/, $OPTS{sort}; 529 if (@s != 2) { 530 die "Error: --sort argument should be of the form field:perl: " 531 . "'$OPTS{sort}'\n"; 532 } 533 my ($field, $perl) = @s; 534 die "Error: --sort: unknown field '$field'\n" 535 unless $VALID_FIELDS{$field}; 536 # the 'perl' value will be validated later, after we have processed 537 # the perls 538 $OPTS{'sort-field'} = $field; 539 $OPTS{'sort-perl'} = $perl; 540 } 541 542 # show is the default output action 543 $OPTS{show} = 1 unless $OPTS{write} || $OPTS{bisect}; 544 545 if ($OPTS{action} eq 'grind') { 546 do_grind(\@ARGV); 547 } 548 elsif ($OPTS{action} eq 'selftest') { 549 if (@ARGV) { 550 die "Error: no perl executables may be specified with selftest\n" 551 } 552 do_selftest(); 553 } 554} 555exit 0; 556 557 558# Given a hash ref keyed by test names, filter it by deleting unwanted 559# tests, based on $OPTS{tests}. 560 561sub filter_tests { 562 my ($tests) = @_; 563 564 my $opt = $OPTS{tests}; 565 return unless defined $opt; 566 567 my @tests; 568 569 if ($opt =~ m{^/}) { 570 $opt =~ s{^/(.+)/$}{$1} 571 or die "Error: --tests regex must be of the form /.../\n"; 572 for (keys %$tests) { 573 delete $tests->{$_} unless /$opt/; 574 } 575 } 576 else { 577 my %t; 578 for (split /,/, $opt) { 579 $t{$_} = 1; 580 next if exists $tests->{$_}; 581 582 my $e = "Error: no such test found: '$_'\n"; 583 if ($OPTS{verbose}) { 584 $e .= "Valid test names are:\n"; 585 $e .= " $_\n" for sort keys %$tests; 586 } 587 else { 588 $e .= "Re-run with --verbose for a list of valid tests.\n"; 589 } 590 die $e; 591 } 592 for (keys %$tests) { 593 delete $tests->{$_} unless exists $t{$_}; 594 } 595 } 596 die "Error: no tests to run\n" unless %$tests; 597} 598 599 600# Read in the test file, and filter out any tests excluded by $OPTS{tests} 601# return a hash ref { testname => { test }, ... } 602# and an array ref of the original test names order, 603 604sub read_tests_file { 605 my ($file) = @_; 606 607 my $ta; 608 { 609 local @INC = ('.'); 610 $ta = do $file; 611 } 612 unless ($ta) { 613 die "Error: can't load '$file': code didn't return a true value\n" 614 if defined $ta; 615 die "Error: can't parse '$file':\n$@\n" if $@; 616 die "Error: can't read '$file': $!\n"; 617 } 618 619 # validate and process each test 620 621 { 622 my %valid = map { $_ => 1 } qw(desc setup code pre post compile); 623 my @tests = @$ta; 624 if (!@tests || @tests % 2 != 0) { 625 die "Error: '$file' does not contain evenly paired test names and hashes\n"; 626 } 627 while (@tests) { 628 my $name = shift @tests; 629 my $hash = shift @tests; 630 631 unless ($name =~ /^[a-zA-Z]\w*(::\w+)*$/) { 632 die "Error: '$file': invalid test name: '$name'\n"; 633 } 634 635 for (sort keys %$hash) { 636 die "Error: '$file': invalid key '$_' for test '$name'\n" 637 unless exists $valid{$_}; 638 } 639 640 # make description default to the code 641 $hash->{desc} = $hash->{code} unless exists $hash->{desc}; 642 } 643 } 644 645 my @orig_order; 646 for (my $i=0; $i < @$ta; $i += 2) { 647 push @orig_order, $ta->[$i]; 648 } 649 650 my $t = { @$ta }; 651 filter_tests($t); 652 return $t, \@orig_order; 653} 654 655 656# Process the perl name/label/column argument of options like --norm and 657# --sort. Return the index of the matching perl. 658 659sub select_a_perl { 660 my ($perl, $perls, $who) = @_; 661 $perls ||= []; 662 my $n = @$perls; 663 664 if ($perl =~ /^-([0-9]+)$/) { 665 my $p = $1; 666 die "Error: $who value $perl outside range -1..-$n\n" 667 if $p < 1 || $p > $n; 668 return $n - $p; 669 } 670 671 if ($perl =~ /^[0-9]+$/) { 672 die "Error: $who value $perl outside range 0.." . $#$perls . "\n" 673 unless $perl < $n; 674 return $perl; 675 } 676 else { 677 my @perl = grep $perls->[$_][0] eq $perl 678 || $perls->[$_][1] eq $perl, 679 0..$#$perls; 680 unless (@perl) { 681 my $valid = ''; 682 for (@$perls) { 683 $valid .= " $_->[1]"; 684 $valid .= " $_->[0]" if $_->[0] ne $_->[1]; 685 $valid .= "\n"; 686 } 687 die "Error: $who: unrecognised perl '$perl'\n" 688 . "Valid perl names are:\n$valid"; 689 } 690 die "Error: $who: ambiguous perl '$perl'\n" 691 if @perl > 1; 692 return $perl[0]; 693 } 694} 695 696 697# Validate the list of perl executables on the command line. 698# The general form is 699# 700# a_perl_exe[=label] [ --args='perl args'] [ --env='FOO=foo' ] 701# 702# Return a list of [ exe, label, {env}, 'args' ] tuples 703 704sub process_executables_list { 705 my ($read_perls, @cmd_line_args) = @_; 706 707 my @results; # returned, each item is [ perlexe, label, {env}, 'args' ] 708 my %seen_from_reads = map { $_->[1] => 1 } @$read_perls; 709 my %seen; 710 my @labels; 711 712 while (@cmd_line_args) { 713 my $item = shift @cmd_line_args; 714 715 if ($item =~ /^--(.*)$/) { 716 my ($switch, $val) = split /=/, $1, 2; 717 die "Error: unrecognised executable switch '--$switch'\n" 718 unless $switch =~ /^(args|env)$/; 719 720 die "Error: --$switch without a preceding executable name\n" 721 unless @results; 722 723 unless (defined $val) { 724 $val = shift @cmd_line_args; 725 die "Error: --$switch is missing value\n" 726 unless defined $val; 727 } 728 729 if ($switch eq 'args') { 730 $results[-1][3] .= " $val"; 731 } 732 else { 733 # --env 734 $val =~ /^(\w+)=(.*)$/ 735 or die "Error: --env is missing =value\n"; 736 $results[-1][2]{$1} = $2; 737 } 738 739 next; 740 } 741 742 # whatever is left must be the name of an executable 743 744 my ($perl, $label) = split /=/, $item, 2; 745 push @labels, $label; 746 unless ($OPTS{autolabel}) { 747 $label //= $perl; 748 $label = $perl.$label if $label =~ /^\+/; 749 } 750 751 die "Error: duplicate label '$label': " 752 . "each executable must have a unique label\n" 753 if defined $label && $seen{$label}++; 754 755 die "Error: duplicate label '$label': " 756 . "seen both in --read file and on command line\n" 757 if defined $label && $seen_from_reads{$label}; 758 759 my $r = qx($perl -e 'print qq(ok\n)' 2>&1); 760 die "Error: unable to execute '$perl': $r\n" if $r ne "ok\n"; 761 762 push @results, [ $perl, $label, { }, '' ]; 763 } 764 765 # make args '' by default 766 for (@results) { 767 push @$_, '' unless @$_ > 3; 768 } 769 770 if ($OPTS{autolabel}) { 771 772 # create a list of [ 'perl-path', $i ] pairs for all 773 # $results[$i] which don't have a label 774 my @labels; 775 for (0..$#results) { 776 push @labels, [ $results[$_][0], $_ ] 777 unless defined $results[$_][1]; 778 } 779 780 if (@labels) { 781 # strip off common prefixes 782 my $pre = ''; 783 STRIP_PREFIX: 784 while (length $labels[0][0]) { 785 my $c = substr($labels[0][0], 0, 1); 786 for my $i (1..$#labels) { 787 last STRIP_PREFIX if substr($labels[$i][0], 0, 1) ne $c; 788 } 789 substr($labels[$_][0], 0, 1) = '' for 0..$#labels; 790 $pre .= $c; 791 } 792 # add back any final "version-ish" prefix 793 $pre =~ s/^.*?([0-9\.]*)$/$1/; 794 substr($labels[$_][0], 0, 0) = $pre for 0..$#labels; 795 796 # strip off common suffixes 797 my $post = ''; 798 STRIP_SUFFFIX: 799 while (length $labels[0][0]) { 800 my $c = substr($labels[0][0], -1, 1); 801 for my $i (1..$#labels) { 802 last STRIP_SUFFFIX if substr($labels[$i][0], -1, 1) ne $c; 803 } 804 chop $labels[$_][0] for 0..$#labels; 805 $post = "$c$post"; 806 } 807 # add back any initial "version-ish" suffix 808 $post =~ s/^([0-9\.]*).*$/$1/; 809 $labels[$_][0] .= $post for 0..$#labels; 810 811 # avoid degenerate empty string for single executable name 812 $labels[0][0] = '0' if @labels == 1 && !length $labels[0][0]; 813 814 # if the auto-generated labels are plain integers, prefix 815 # them with 'p' (for perl) to distinguish them from column 816 # indices (otherwise e.g. --norm=2 is ambiguous) 817 818 if ($labels[0][0] =~ /^\d*$/) { 819 $labels[$_][0] = "p$labels[$_][0]" for 0..$#labels; 820 } 821 822 # now de-duplicate labels 823 824 my (%seen, %index); 825 $seen{$read_perls->[$_][1]}++ for 0..$#$read_perls; 826 $seen{$labels[$_][0]}++ for 0..$#labels; 827 828 for my $i (0..$#labels) { 829 my $label = $labels[$i][0]; 830 next unless $seen{$label} > 1; 831 my $d = length($label) ? '-' : ''; 832 my $n = $index{$label} // 0; 833 $n++ while exists $seen{"$label$d$n"}; 834 $labels[$i][0] .= "$d$n"; 835 $index{$label} = $n + 1; 836 } 837 838 # finally, store them 839 $results[$_->[1]][1]= $_->[0] for @labels; 840 } 841 } 842 843 844 return @results; 845} 846 847 848 849# Return a string containing a perl program which runs the benchmark code 850# $ARGV[0] times. If $body is true, include the main body (setup) in 851# the loop; otherwise create an empty loop with just pre and post. 852# Note that an empty body is handled with '1;' so that a completely empty 853# loop has a single nextstate rather than a stub op, so more closely 854# matches the active loop; e.g.: 855# {1;} => nextstate; unstack 856# {$x=1;} => nextstate; const; gvsv; sassign; unstack 857# Note also that each statement is prefixed with a label; this avoids 858# adjacent nextstate ops being optimised away. 859# 860# A final 1; statement is added so that the code is always in void 861# context. 862# 863# It the compile flag is set for a test, the body of the loop is wrapped in 864# eval 'sub { .... }' to measure compile time rather than execution time 865 866sub make_perl_prog { 867 my ($name, $test, $body) = @_; 868 my ($desc, $setup, $code, $pre, $post, $compile) = 869 @$test{qw(desc setup code pre post compile)}; 870 871 $setup //= ''; 872 $pre = defined $pre ? "_PRE_: $pre; " : ""; 873 $post = defined $post ? "_POST_: $post; " : ""; 874 $code = $body ? $code : "1"; 875 $code = "_CODE_: $code; "; 876 my $full = "$pre$code$post _CXT_: 1; "; 877 $full = "eval q{sub { $full }};" if $compile; 878 879 return <<EOF; 880# $desc 881package $name; 882BEGIN { srand(0) } 883$setup; 884for my \$__loop__ (1..\$ARGV[0]) { 885 $full 886} 887EOF 888} 889 890 891# Parse the output from cachegrind. Return a hash ref. 892# See do_selftest() for examples of the output format. 893 894sub parse_cachegrind { 895 my ($output, $id, $perl) = @_; 896 897 my %res; 898 899 my @lines = split /\n/, $output; 900 for (@lines) { 901 unless (s/(==\d+==)|(--\d+--) //) { 902 die "Error: while executing $id:\n" 903 . "unexpected code or cachegrind output:\n$_\n"; 904 } 905 if (/I\s+refs:\s+([\d,]+)/) { 906 $res{Ir} = $1; 907 } 908 elsif (/I1\s+misses:\s+([\d,]+)/) { 909 $res{Ir_m1} = $1; 910 } 911 elsif (/LLi\s+misses:\s+([\d,]+)/) { 912 $res{Ir_mm} = $1; 913 } 914 elsif (/D\s+refs:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) { 915 @res{qw(Dr Dw)} = ($1,$2); 916 } 917 elsif (/D1\s+misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) { 918 @res{qw(Dr_m1 Dw_m1)} = ($1,$2); 919 } 920 elsif (/LLd\s+misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) { 921 @res{qw(Dr_mm Dw_mm)} = ($1,$2); 922 } 923 elsif (/Branches:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) { 924 @res{qw(COND IND)} = ($1,$2); 925 } 926 elsif (/Mispredicts:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) { 927 @res{qw(COND_m IND_m)} = ($1,$2); 928 } 929 } 930 931 for my $field (keys %VALID_FIELDS) { 932 die "Error: can't parse '$field' field from cachegrind output:\n$output" 933 unless exists $res{$field}; 934 $res{$field} =~ s/,//g; 935 } 936 937 return \%res; 938} 939 940 941# Handle the 'grind' action 942 943sub do_grind { 944 my ($cmd_line_args) = @_; # the residue of @ARGV after option processing 945 946 my ($loop_counts, $perls, $results, $tests, $order, @run_perls); 947 my ($bisect_field, $bisect_min, $bisect_max); 948 my ($done_read, $processed, $averages, %seen_labels); 949 950 if (defined $OPTS{bisect}) { 951 ($bisect_field, $bisect_min, $bisect_max) = split /,/, $OPTS{bisect}, 3; 952 die "Error: --bisect option must be of form 'field,integer,integer'\n" 953 unless 954 defined $bisect_max 955 and $bisect_min =~ /^[0-9]+$/ 956 and $bisect_max =~ /^[0-9]+$/; 957 958 die "Error: unrecognised field '$bisect_field' in --bisect option\n" 959 unless $VALID_FIELDS{$bisect_field}; 960 961 die "Error: --bisect min ($bisect_min) must be <= max ($bisect_max)\n" 962 if $bisect_min > $bisect_max; 963 } 964 965 # Read in previous benchmark results 966 967 foreach my $file (@{$OPTS{read}}) { 968 open my $in, '<:encoding(UTF-8)', $file 969 or die "Error: can't open '$file' for reading: $!\n"; 970 my $data = do { local $/; <$in> }; 971 close $in; 972 973 my $hash = JSON::PP::decode_json($data); 974 if (int($FORMAT_VERSION) < int($hash->{version})) { 975 die "Error: unsupported version $hash->{version} in file" 976 . " '$file' (too new)\n"; 977 } 978 my ($read_loop_counts, $read_perls, $read_results, $read_tests, $read_order) = 979 @$hash{qw(loop_counts perls results tests order)}; 980 981 # check file contents for consistency 982 my $k_o = join ';', sort @$read_order; 983 my $k_r = join ';', sort keys %$read_results; 984 my $k_t = join ';', sort keys %$read_tests; 985 die "File '$file' contains no results\n" unless length $k_r; 986 die "File '$file' contains differing test and results names\n" 987 unless $k_r eq $k_t; 988 die "File '$file' contains differing test and sort order names\n" 989 unless $k_o eq $k_t; 990 991 # delete tests not matching --tests= criteria, if any 992 filter_tests($read_results); 993 filter_tests($read_tests); 994 995 for my $perl (@$read_perls) { 996 my $label = $perl->[1]; 997 die "Error: duplicate label '$label': seen in file '$file'\n" 998 if exists $seen_labels{$label}; 999 $seen_labels{$label}++; 1000 } 1001 1002 if (!$done_read) { 1003 ($loop_counts, $perls, $results, $tests, $order) = 1004 ($read_loop_counts, $read_perls, $read_results, $read_tests, $read_order); 1005 $done_read = 1; 1006 } 1007 else { 1008 # merge results across multiple files 1009 1010 if ( join(';', sort keys %$tests) 1011 ne join(';', sort keys %$read_tests)) 1012 { 1013 my $err = "Can't merge multiple read files: " 1014 . "they contain differing test sets.\n"; 1015 if ($OPTS{verbose}) { 1016 $err .= "Previous tests:\n"; 1017 $err .= " $_\n" for sort keys %$tests; 1018 $err .= "tests from '$file':\n"; 1019 $err .= " $_\n" for sort keys %$read_tests; 1020 } 1021 else { 1022 $err .= "Re-run with --verbose to see the differences.\n"; 1023 } 1024 die $err; 1025 } 1026 1027 if ("@$read_loop_counts" ne "@$loop_counts") { 1028 die "Can't merge multiple read files: differing loop counts:\n" 1029 . " (previous=(@$loop_counts), " 1030 . "'$file'=(@$read_loop_counts))\n"; 1031 } 1032 1033 push @$perls, @{$read_perls}; 1034 foreach my $test (keys %{$read_results}) { 1035 foreach my $label (keys %{$read_results->{$test}}) { 1036 $results->{$test}{$label}= $read_results->{$test}{$label}; 1037 } 1038 } 1039 } 1040 } 1041 die "Error: --benchfile cannot be used when --read is present\n" 1042 if $done_read && defined $OPTS{benchfile}; 1043 1044 # Gather list of perls to benchmark: 1045 1046 if (@$cmd_line_args) { 1047 unless ($done_read) { 1048 # How many times to execute the loop for the two trials. The lower 1049 # value is intended to do the loop enough times that branch 1050 # prediction has taken hold; the higher loop allows us to see the 1051 # branch misses after that 1052 $loop_counts = [10, 20]; 1053 1054 ($tests, $order) = 1055 read_tests_file($OPTS{benchfile} // 't/perf/benchmarks'); 1056 } 1057 1058 @run_perls = process_executables_list($perls, @$cmd_line_args); 1059 push @$perls, @run_perls; 1060 } 1061 1062 # strip @$order to just the actual tests present 1063 $order = [ grep exists $tests->{$_}, @$order ]; 1064 1065 # Now we know what perls and tests we have, do extra option processing 1066 # and checking (done before grinding, so time isn't wasted if we die). 1067 1068 if (!$perls or !@$perls) { 1069 die "Error: nothing to do: no perls to run, no data to read.\n"; 1070 } 1071 if (@$perls < 2 and $OPTS{show} and !$OPTS{raw}) { 1072 die "Error: need at least 2 perls for comparison.\n" 1073 } 1074 1075 if ($OPTS{bisect}) { 1076 die "Error: exactly one perl executable must be specified for bisect\n" 1077 unless @$perls == 1; 1078 die "Error: only a single test may be specified with --bisect\n" 1079 unless keys %$tests == 1; 1080 } 1081 1082 $OPTS{norm} = select_a_perl($OPTS{norm}, $perls, "--norm"); 1083 1084 if (defined $OPTS{'sort-perl'}) { 1085 $OPTS{'sort-perl'} = 1086 select_a_perl($OPTS{'sort-perl'}, $perls, "--sort"); 1087 } 1088 1089 if (defined $OPTS{'compact'}) { 1090 $OPTS{'compact'} = 1091 select_a_perl($OPTS{'compact'}, $perls, "--compact"); 1092 } 1093 1094 1095 # Run the benchmarks; accumulate with any previously read # results. 1096 1097 if (@run_perls) { 1098 $results = grind_run($tests, $order, \@run_perls, $loop_counts, $results); 1099 } 1100 1101 1102 # Handle the 3 forms of output 1103 1104 if (defined $OPTS{write}) { 1105 my $json = JSON::PP::encode_json({ 1106 version => $FORMAT_VERSION, 1107 loop_counts => $loop_counts, 1108 perls => $perls, 1109 results => $results, 1110 tests => $tests, 1111 order => $order, 1112 }); 1113 1114 open my $out, '>:encoding(UTF-8)', $OPTS{write} 1115 or die "Error: can't open '$OPTS{write}' for writing: $!\n"; 1116 print $out $json or die "Error: writing to file '$OPTS{write}': $!\n"; 1117 close $out or die "Error: closing file '$OPTS{write}': $!\n"; 1118 } 1119 1120 if ($OPTS{show} or $OPTS{bisect}) { 1121 # numerically process the raw data 1122 ($processed, $averages) = 1123 grind_process($results, $perls, $loop_counts); 1124 } 1125 1126 if ($OPTS{show}) { 1127 if (defined $OPTS{compact}) { 1128 grind_print_compact($processed, $averages, $OPTS{compact}, 1129 $perls, $tests, $order); 1130 } 1131 else { 1132 grind_print($processed, $averages, $perls, $tests, $order); 1133 } 1134 } 1135 1136 if ($OPTS{bisect}) { 1137 # these panics shouldn't happen if the bisect checks above are sound 1138 my @r = values %$results; 1139 die "Panic: expected exactly one test result in bisect\n" 1140 if @r != 1; 1141 @r = values %{$r[0]}; 1142 die "Panic: expected exactly one perl result in bisect\n" 1143 if @r != 1; 1144 my $c = $r[0]{$bisect_field}; 1145 die "Panic: no result in bisect for field '$bisect_field'\n" 1146 unless defined $c; 1147 1148 print "Bisect: $bisect_field had the value $c\n"; 1149 1150 exit 0 if $bisect_min <= $c and $c <= $bisect_max; 1151 exit 1; 1152 } 1153} 1154 1155 1156# Run cachegrind for every test/perl combo. 1157# It may run several processes in parallel when -j is specified. 1158# Return a hash ref suitable for input to grind_process() 1159 1160sub grind_run { 1161 my ($tests, $order, $perls, $counts, $results) = @_; 1162 1163 # Build a list of all the jobs to run 1164 1165 my @jobs; 1166 1167 for my $test (grep $tests->{$_}, @$order) { 1168 1169 # Create two test progs: one with an empty loop and one with code. 1170 my @prog = ( 1171 make_perl_prog($test, $tests->{$test}, 0), 1172 make_perl_prog($test, $tests->{$test}, 1), 1173 ); 1174 1175 for my $p (@$perls) { 1176 my ($perl, $label, $env, $args) = @$p; 1177 1178 # Run both the empty loop and the active loop 1179 # $counts->[0] and $counts->[1] times. 1180 1181 for my $i (0,1) { 1182 for my $j (0,1) { 1183 my $envstr = ''; 1184 if (ref $env) { 1185 $envstr .= "$_=$env->{$_} " for sort keys %$env; 1186 } 1187 my $cmd = "PERL_HASH_SEED=0 $envstr" 1188 . "valgrind --tool=cachegrind --branch-sim=yes --cache-sim=yes " 1189 . "--cachegrind-out-file=/dev/null " 1190 . "$OPTS{grindargs} " 1191 . "$perl $OPTS{perlargs} $args - $counts->[$j] 2>&1"; 1192 # for debugging and error messages 1193 my $id = "$test/$label " 1194 . ($i ? "active" : "empty") . "/" 1195 . ($j ? "long" : "short") . " loop"; 1196 1197 push @jobs, { 1198 test => $test, 1199 perl => $perl, 1200 plabel => $label, 1201 cmd => $cmd, 1202 prog => $prog[$i], 1203 active => $i, 1204 loopix => $j, 1205 id => $id, 1206 }; 1207 } 1208 } 1209 } 1210 } 1211 1212 # Execute each cachegrind and store the results in %results. 1213 1214 local $SIG{PIPE} = 'IGNORE'; 1215 1216 my $max_jobs = $OPTS{jobs}; 1217 my $running = 0; # count of executing jobs 1218 my %pids; # map pids to jobs 1219 my %fds; # map fds to jobs 1220 my $select = IO::Select->new(); 1221 1222 my $njobs = scalar @jobs; 1223 my $donejobs = 0; 1224 my $starttime = time(); 1225 1226 while (@jobs or $running) { 1227 1228 if ($OPTS{debug}) { 1229 printf "Main loop: pending=%d running=%d\n", 1230 scalar(@jobs), $running; 1231 } 1232 1233 # Start new jobs 1234 1235 while (@jobs && $running < $max_jobs) { 1236 my $job = shift @jobs; 1237 my ($id, $cmd) =@$job{qw(id cmd)}; 1238 1239 my ($in, $out, $pid); 1240 $donejobs++; 1241 if($OPTS{verbose}) { 1242 my $donefrac = $donejobs / $njobs; 1243 my $eta = ""; 1244 # Once we've done at least 20% we'll have a good estimate of 1245 # the total runtime, hence ETA 1246 if($donefrac >= 0.2) { 1247 my $now = time(); 1248 my $duration = ($now - $starttime) / $donefrac; 1249 my $remaining = ($starttime + $duration) - $now; 1250 $eta = sprintf ", remaining %d:%02d", 1251 $remaining / 60, $remaining % 60; 1252 } 1253 warn sprintf "Starting %s (%d of %d, %.2f%%%s)\n", 1254 $id, $donejobs, $njobs, 100 * $donefrac, $eta; 1255 } 1256 eval { $pid = IPC::Open2::open2($out, $in, $cmd); 1; } 1257 or die "Error: while starting cachegrind subprocess" 1258 ." for $id:\n$@"; 1259 $running++; 1260 $pids{$pid} = $job; 1261 $fds{"$out"} = $job; 1262 $job->{out_fd} = $out; 1263 $job->{output} = ''; 1264 $job->{pid} = $pid; 1265 1266 $out->blocking(0); 1267 $select->add($out); 1268 1269 if ($OPTS{debug}) { 1270 print "Started pid $pid for $id\n"; 1271 } 1272 1273 # Note: 1274 # In principle we should write to $in in the main select loop, 1275 # since it may block. In reality, 1276 # a) the code we write to the perl process's stdin is likely 1277 # to be less than the OS's pipe buffer size; 1278 # b) by the time the perl process has read in all its stdin, 1279 # the only output it should have generated is a few lines 1280 # of cachegrind output preamble. 1281 # If these assumptions change, then perform the following print 1282 # in the select loop instead. 1283 1284 print $in $job->{prog}; 1285 close $in; 1286 } 1287 1288 # Get output of running jobs 1289 1290 if ($OPTS{debug}) { 1291 printf "Select: waiting on (%s)\n", 1292 join ', ', sort { $a <=> $b } map $fds{$_}{pid}, 1293 $select->handles; 1294 } 1295 1296 my @ready = $select->can_read; 1297 1298 if ($OPTS{debug}) { 1299 printf "Select: pids (%s) ready\n", 1300 join ', ', sort { $a <=> $b } map $fds{$_}{pid}, @ready; 1301 } 1302 1303 unless (@ready) { 1304 die "Panic: select returned no file handles\n"; 1305 } 1306 1307 for my $fd (@ready) { 1308 my $j = $fds{"$fd"}; 1309 my $r = sysread $fd, $j->{output}, 8192, length($j->{output}); 1310 unless (defined $r) { 1311 die "Panic: Read from process running $j->{id} gave:\n$!"; 1312 } 1313 next if $r; 1314 1315 # EOF 1316 1317 if ($OPTS{debug}) { 1318 print "Got eof for pid $fds{$fd}{pid} ($j->{id})\n"; 1319 } 1320 1321 $select->remove($j->{out_fd}); 1322 close($j->{out_fd}) 1323 or die "Panic: closing output fh on $j->{id} gave:\n$!\n"; 1324 $running--; 1325 delete $fds{"$j->{out_fd}"}; 1326 my $output = $j->{output}; 1327 1328 if ($OPTS{debug}) { 1329 my $p = $j->{prog}; 1330 $p =~ s/^/ : /mg; 1331 my $o = $output; 1332 $o =~ s/^/ : /mg; 1333 1334 print "\n$j->{id}/\nCommand: $j->{cmd}\n" 1335 . "Input:\n$p" 1336 . "Output\n$o"; 1337 } 1338 1339 $results->{$j->{test}}{$j->{plabel}}[$j->{active}][$j->{loopix}] 1340 = parse_cachegrind($output, $j->{id}, $j->{perl}); 1341 } 1342 1343 # Reap finished jobs 1344 1345 while (1) { 1346 my $kid = waitpid(-1, WNOHANG); 1347 my $ret = $?; 1348 last if $kid <= 0; 1349 1350 unless (exists $pids{$kid}) { 1351 die "Panic: reaped unexpected child $kid"; 1352 } 1353 my $j = $pids{$kid}; 1354 if ($ret) { 1355 die sprintf("Error: $j->{id} gave return status 0x%04x\n", $ret) 1356 . "with the following output\n:$j->{output}\n"; 1357 } 1358 delete $pids{$kid}; 1359 } 1360 } 1361 1362 return $results; 1363} 1364 1365 1366 1367 1368# grind_process(): process the data that has been extracted from 1369# cachgegrind's output. 1370# 1371# $res is of the form ->{benchmark_name}{perl_label}[active][count]{field_name}, 1372# where active is 0 or 1 indicating an empty or active loop, 1373# count is 0 or 1 indicating a short or long loop. E.g. 1374# 1375# $res->{'expr::assign::scalar_lex'}{perl-5.21.1}[0][10]{Dw_mm} 1376# 1377# The $res data structure is modified in-place by this sub. 1378# 1379# $perls is [ [ perl-exe, perl-label], .... ]. 1380# 1381# $counts is [ N, M ] indicating the counts for the short and long loops. 1382# 1383# 1384# return \%output, \%averages, where 1385# 1386# $output{benchmark_name}{perl_label}{field_name} = N 1387# $averages{perl_label}{field_name} = M 1388# 1389# where N is the raw count ($OPTS{raw}), or count_perl0/count_perlI otherwise; 1390# M is the average raw count over all tests ($OPTS{raw}), or 1391# 1/(sum(count_perlI/count_perl0)/num_tests) otherwise. 1392 1393sub grind_process { 1394 my ($res, $perls, $counts) = @_; 1395 1396 # Process the four results for each test/perf combo: 1397 # Convert 1398 # $res->{benchmark_name}{perl_label}[active][count]{field_name} = n 1399 # to 1400 # $res->{benchmark_name}{perl_label}{field_name} = averaged_n 1401 # 1402 # $r[0][1] - $r[0][0] is the time to do ($counts->[1]-$counts->[0]) 1403 # empty loops, eliminating startup time 1404 # $r[1][1] - $r[1][0] is the time to do ($counts->[1]-$counts->[0]) 1405 # active loops, eliminating startup time 1406 # (the two startup times may be different because different code 1407 # is being compiled); the difference of the two results above 1408 # divided by the count difference is the time to execute the 1409 # active code once, eliminating both startup and loop overhead. 1410 1411 for my $tests (values %$res) { 1412 for my $r (values %$tests) { 1413 my $r2; 1414 for (keys %{$r->[0][0]}) { 1415 my $n = ( ($r->[1][1]{$_} - $r->[1][0]{$_}) 1416 - ($r->[0][1]{$_} - $r->[0][0]{$_}) 1417 ) / ($counts->[1] - $counts->[0]); 1418 $r2->{$_} = $n; 1419 } 1420 $r = $r2; 1421 } 1422 } 1423 1424 my %totals; 1425 my %counts; 1426 my %data; 1427 1428 my $perl_norm = $perls->[$OPTS{norm}][1]; # the label of the reference perl 1429 1430 for my $test_name (keys %$res) { 1431 my $res1 = $res->{$test_name}; 1432 my $res2_norm = $res1->{$perl_norm}; 1433 for my $perl (keys %$res1) { 1434 my $res2 = $res1->{$perl}; 1435 for my $field (keys %$res2) { 1436 my ($p, $q) = ($res2_norm->{$field}, $res2->{$field}); 1437 1438 if ($OPTS{raw}) { 1439 # Avoid annoying '-0.0' displays. Ideally this number 1440 # should never be negative, but fluctuations in 1441 # startup etc can theoretically make this happen 1442 $q = 0 if ($q <= 0 && $q > -0.1); 1443 $totals{$perl}{$field} += $q; 1444 $counts{$perl}{$field}++; 1445 $data{$test_name}{$perl}{$field} = $q; 1446 next; 1447 } 1448 1449 # $p and $q are notionally integer counts, but 1450 # due to variations in startup etc, it's possible for a 1451 # count which is supposedly zero to be calculated as a 1452 # small positive or negative value. 1453 # In this case, set it to zero. Further below we 1454 # special-case zeros to avoid division by zero errors etc. 1455 1456 $p = 0.0 if $p < 0.01; 1457 $q = 0.0 if $q < 0.01; 1458 1459 if ($p == 0.0 && $q == 0.0) { 1460 # Both perls gave a count of zero, so no change: 1461 # treat as 100% 1462 $totals{$perl}{$field} += 1; 1463 $counts{$perl}{$field}++; 1464 $data{$test_name}{$perl}{$field} = 1; 1465 } 1466 elsif ($p == 0.0 || $q == 0.0) { 1467 # If either count is zero, there were too few events 1468 # to give a meaningful ratio (and we will end up with 1469 # division by zero if we try). Mark the result undef, 1470 # indicating that it shouldn't be displayed; and skip 1471 # adding to the average 1472 $data{$test_name}{$perl}{$field} = undef; 1473 } 1474 else { 1475 # For averages, we record q/p rather than p/q. 1476 # Consider a test where perl_norm took 1000 cycles 1477 # and perlN took 800 cycles. For the individual 1478 # results we display p/q, or 1.25; i.e. a quarter 1479 # quicker. For the averages, we instead sum all 1480 # the 0.8's, which gives the total cycles required to 1481 # execute all tests, with all tests given equal 1482 # weight. Later we reciprocate the final result, 1483 # i.e. 1/(sum(qi/pi)/n) 1484 1485 $totals{$perl}{$field} += $q/$p; 1486 $counts{$perl}{$field}++; 1487 $data{$test_name}{$perl}{$field} = $p/$q; 1488 } 1489 } 1490 } 1491 } 1492 1493 # Calculate averages based on %totals and %counts accumulated earlier. 1494 1495 my %averages; 1496 for my $perl (keys %totals) { 1497 my $t = $totals{$perl}; 1498 for my $field (keys %$t) { 1499 $averages{$perl}{$field} = $OPTS{raw} 1500 ? $t->{$field} / $counts{$perl}{$field} 1501 # reciprocal - see comments above 1502 : $counts{$perl}{$field} / $t->{$field}; 1503 } 1504 } 1505 1506 return \%data, \%averages; 1507} 1508 1509 1510 1511# print a standard blurb at the start of the grind display 1512 1513sub grind_blurb { 1514 my ($perls) = @_; 1515 1516 print <<EOF; 1517Key: 1518 Ir Instruction read 1519 Dr Data read 1520 Dw Data write 1521 COND conditional branches 1522 IND indirect branches 1523 _m branch predict miss 1524 _m1 level 1 cache miss 1525 _mm last cache (e.g. L3) miss 1526 - indeterminate percentage (e.g. 1/0) 1527 1528EOF 1529 1530 if ($OPTS{raw}) { 1531 print "The numbers represent raw counts per loop iteration.\n"; 1532 } 1533 else { 1534 print <<EOF; 1535The numbers represent relative counts per loop iteration, compared to 1536$perls->[$OPTS{norm}][1] at 100.0%. 1537Higher is better: for example, using half as many instructions gives 200%, 1538while using twice as many gives 50%. 1539EOF 1540 } 1541} 1542 1543 1544# return a sorted list of the test names, plus 'AVERAGE' 1545 1546sub sorted_test_names { 1547 my ($results, $order, $perls) = @_; 1548 1549 my @names; 1550 unless ($OPTS{average}) { 1551 if (defined $OPTS{'sort-field'}) { 1552 my ($field, $perlix) = @OPTS{'sort-field', 'sort-perl'}; 1553 my $perl = $perls->[$perlix][1]; 1554 @names = sort 1555 { 1556 $results->{$a}{$perl}{$field} 1557 <=> $results->{$b}{$perl}{$field} 1558 } 1559 keys %$results; 1560 } 1561 else { 1562 @names = grep $results->{$_}, @$order; 1563 } 1564 } 1565 1566 # No point in displaying average for only one test. 1567 push @names, 'AVERAGE' unless @names == 1; 1568 @names; 1569} 1570 1571 1572# format one cell data item 1573 1574sub grind_format_cell { 1575 my ($val, $width) = @_; 1576 my $s; 1577 if (!defined $val) { 1578 return sprintf "%*s", $width, '-'; 1579 } 1580 elsif (abs($val) >= 1_000_000) { 1581 # avoid displaying very large numbers (which might be the 1582 # result of e.g. 1 / 0.000001) 1583 return sprintf "%*s", $width, 'Inf'; 1584 } 1585 elsif ($OPTS{raw}) { 1586 return sprintf "%*.1f", $width, $val; 1587 } 1588 else { 1589 return sprintf "%*.2f", $width, $val * 100; 1590 } 1591} 1592 1593# grind_print(): display the tabulated results of all the cachegrinds. 1594# 1595# Arguments are of the form: 1596# $results->{benchmark_name}{perl_label}{field_name} = N 1597# $averages->{perl_label}{field_name} = M 1598# $perls = [ [ perl-exe, perl-label ], ... ] 1599# $tests->{test_name}{desc => ..., ...} 1600# $order = [ 'foo::bar1', ... ] # order to display tests 1601 1602sub grind_print { 1603 my ($results, $averages, $perls, $tests, $order) = @_; 1604 1605 my @perl_names = map $_->[0], @$perls; 1606 my @perl_labels = map $_->[1], @$perls; 1607 my %perl_labels; 1608 $perl_labels{$_->[0]} = $_->[1] for @$perls; 1609 1610 # Print standard header. 1611 grind_blurb($perls); 1612 1613 my @test_names = sorted_test_names($results, $order, $perls); 1614 1615 my @fields = qw(Ir Dr Dw COND IND 1616 COND_m IND_m 1617 Ir_m1 Dr_m1 Dw_m1 1618 Ir_mm Dr_mm Dw_mm 1619 ); 1620 1621 if ($OPTS{fields}) { 1622 @fields = grep exists $OPTS{fields}{$_}, @fields; 1623 } 1624 1625 # If only a single field is to be displayed, use a more compact 1626 # format with only a single line of output per test. 1627 1628 my $one_field = @fields == 1; 1629 1630 # The width of column 0: this is either field names, or for 1631 # $one_field, test names 1632 1633 my $width0 = 0; 1634 for ($one_field ? @test_names : @fields) { 1635 $width0 = length if length > $width0; 1636 } 1637 1638 # Calculate the widths of the data columns 1639 1640 my @widths = map length, @perl_labels; 1641 1642 for my $test (@test_names) { 1643 my $res = ($test eq 'AVERAGE') ? $averages : $results->{$test}; 1644 for my $field (@fields) { 1645 for my $i (0..$#widths) { 1646 my $l = length grind_format_cell( 1647 $res->{$perl_labels[$i]}{$field}, 1); 1648 $widths[$i] = $l if $l > $widths[$i]; 1649 } 1650 } 1651 } 1652 1653 # Print the results for each test 1654 1655 for my $test (0..$#test_names) { 1656 my $test_name = $test_names[$test]; 1657 my $doing_ave = ($test_name eq 'AVERAGE'); 1658 my $res = $doing_ave ? $averages : $results->{$test_name}; 1659 1660 # print per-test header 1661 1662 if ($one_field) { 1663 print "\nResults for field $fields[0]\n\n" if $test == 0; 1664 } 1665 else { 1666 print "\n$test_name"; 1667 print "\n$tests->{$test_name}{desc}" unless $doing_ave; 1668 print "\n\n"; 1669 } 1670 1671 # Print the perl executable names header. 1672 1673 if (!$one_field || $test == 0) { 1674 for my $i (0,1) { 1675 print " " x $width0; 1676 for (0..$#widths) { 1677 printf " %*s", $widths[$_], 1678 $i ? ('-' x$widths[$_]) : $perl_labels[$_]; 1679 } 1680 print "\n"; 1681 } 1682 } 1683 1684 my $field_suffix = ''; 1685 1686 # print a line of data 1687 1688 for my $field (@fields) { 1689 if ($one_field) { 1690 printf "%-*s", $width0, $test_name; 1691 } 1692 else { 1693 # If there are enough fields, print a blank line 1694 # between groups of fields that have the same suffix 1695 if (@fields > 4) { 1696 my $s = ''; 1697 $s = $1 if $field =~ /(_\w+)$/; 1698 print "\n" if $s ne $field_suffix; 1699 $field_suffix = $s; 1700 } 1701 printf "%*s", $width0, $field; 1702 } 1703 1704 for my $i (0..$#widths) { 1705 print " ", grind_format_cell($res->{$perl_labels[$i]}{$field}, 1706 $widths[$i]); 1707 } 1708 print "\n"; 1709 } 1710 } 1711} 1712 1713 1714 1715# grind_print_compact(): like grind_print(), but display a single perl 1716# in a compact form. Has an additional arg, $which_perl, which specifies 1717# which perl to display. 1718# 1719# Arguments are of the form: 1720# $results->{benchmark_name}{perl_label}{field_name} = N 1721# $averages->{perl_label}{field_name} = M 1722# $perls = [ [ perl-exe, perl-label ], ... ] 1723# $tests->{test_name}{desc => ..., ...} 1724# $order = [ 'foo::bar1', ... ] # order to display tests 1725 1726sub grind_print_compact { 1727 my ($results, $averages, $which_perl, $perls, $tests, $order) = @_; 1728 1729 # Print standard header. 1730 grind_blurb($perls); 1731 1732 print "\nResults for $perls->[$which_perl][1]\n\n"; 1733 1734 my @test_names = sorted_test_names($results, $order, $perls); 1735 1736 # Dump the results for each test. 1737 1738 my @fields = qw( Ir Dr Dw 1739 COND IND 1740 COND_m IND_m 1741 Ir_m1 Dr_m1 Dw_m1 1742 Ir_mm Dr_mm Dw_mm 1743 ); 1744 if ($OPTS{fields}) { 1745 @fields = grep exists $OPTS{fields}{$_}, @fields; 1746 } 1747 1748 # calculate the max width of the test names 1749 1750 my $name_width = 0; 1751 for (@test_names) { 1752 $name_width = length if length > $name_width; 1753 } 1754 1755 # Calculate the widths of the data columns 1756 1757 my @widths = map length, @fields; 1758 1759 for my $test (@test_names) { 1760 my $res = ($test eq 'AVERAGE') ? $averages : $results->{$test}; 1761 $res = $res->{$perls->[$which_perl][1]}; 1762 for my $i (0..$#fields) { 1763 my $l = length grind_format_cell($res->{$fields[$i]}, 1); 1764 $widths[$i] = $l if $l > $widths[$i]; 1765 } 1766 } 1767 1768 # Print header 1769 1770 printf " %*s", $widths[$_], $fields[$_] for 0..$#fields; 1771 print "\n"; 1772 printf " %*s", $_, ('-' x $_) for @widths; 1773 print "\n"; 1774 1775 # Print the results for each test 1776 1777 for my $test_name (@test_names) { 1778 my $doing_ave = ($test_name eq 'AVERAGE'); 1779 my $res = $doing_ave ? $averages : $results->{$test_name}; 1780 $res = $res->{$perls->[$which_perl][1]}; 1781 my $desc = $doing_ave 1782 ? $test_name 1783 : sprintf "%-*s %s", $name_width, $test_name, 1784 $tests->{$test_name}{desc}; 1785 1786 for my $i (0..$#fields) { 1787 print " ", grind_format_cell($res->{$fields[$i]}, $widths[$i]); 1788 } 1789 print " $desc\n"; 1790 } 1791} 1792 1793 1794# do_selftest(): check that we can parse known cachegrind() 1795# output formats. If the output of cachegrind changes, add a *new* 1796# test here; keep the old tests to make sure we continue to parse 1797# old cachegrinds 1798 1799sub do_selftest { 1800 1801 my @tests = ( 1802 'standard', 1803 <<'EOF', 1804==32350== Cachegrind, a cache and branch-prediction profiler 1805==32350== Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al. 1806==32350== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info 1807==32350== Command: perl5211o /tmp/uiS2gjdqe5 1 1808==32350== 1809--32350-- warning: L3 cache found, using its data for the LL simulation. 1810==32350== 1811==32350== I refs: 1,124,055 1812==32350== I1 misses: 5,573 1813==32350== LLi misses: 3,338 1814==32350== I1 miss rate: 0.49% 1815==32350== LLi miss rate: 0.29% 1816==32350== 1817==32350== D refs: 404,275 (259,191 rd + 145,084 wr) 1818==32350== D1 misses: 9,608 ( 6,098 rd + 3,510 wr) 1819==32350== LLd misses: 5,794 ( 2,781 rd + 3,013 wr) 1820==32350== D1 miss rate: 2.3% ( 2.3% + 2.4% ) 1821==32350== LLd miss rate: 1.4% ( 1.0% + 2.0% ) 1822==32350== 1823==32350== LL refs: 15,181 ( 11,671 rd + 3,510 wr) 1824==32350== LL misses: 9,132 ( 6,119 rd + 3,013 wr) 1825==32350== LL miss rate: 0.5% ( 0.4% + 2.0% ) 1826==32350== 1827==32350== Branches: 202,372 (197,050 cond + 5,322 ind) 1828==32350== Mispredicts: 19,153 ( 17,742 cond + 1,411 ind) 1829==32350== Mispred rate: 9.4% ( 9.0% + 26.5% ) 1830EOF 1831 { 1832 COND => 197050, 1833 COND_m => 17742, 1834 Dr => 259191, 1835 Dr_m1 => 6098, 1836 Dr_mm => 2781, 1837 Dw => 145084, 1838 Dw_m1 => 3510, 1839 Dw_mm => 3013, 1840 IND => 5322, 1841 IND_m => 1411, 1842 Ir => 1124055, 1843 Ir_m1 => 5573, 1844 Ir_mm => 3338, 1845 }, 1846 ); 1847 1848 for ('./t', '.') { 1849 my $t = "$_/test.pl"; 1850 next unless -f $t; 1851 require $t; 1852 } 1853 plan(@tests / 3 * keys %VALID_FIELDS); 1854 1855 while (@tests) { 1856 my $desc = shift @tests; 1857 my $output = shift @tests; 1858 my $expected = shift @tests; 1859 my $p = parse_cachegrind($output); 1860 for (sort keys %VALID_FIELDS) { 1861 is($p->{$_}, $expected->{$_}, "$desc, $_"); 1862 } 1863 } 1864} 1865